diff options
Diffstat (limited to 'arch/x86_64')
-rw-r--r-- | arch/x86_64/defconfig | 19 | ||||
-rw-r--r-- | arch/x86_64/ia32/Makefile | 4 | ||||
-rw-r--r-- | arch/x86_64/ia32/ia32_binfmt.c | 4 | ||||
-rw-r--r-- | arch/x86_64/ia32/ia32entry.S | 2 | ||||
-rw-r--r-- | arch/x86_64/kernel/Makefile | 4 | ||||
-rw-r--r-- | arch/x86_64/kernel/e820.c | 8 | ||||
-rw-r--r-- | arch/x86_64/kernel/entry.S | 7 | ||||
-rw-r--r-- | arch/x86_64/kernel/io_apic.c | 35 | ||||
-rw-r--r-- | arch/x86_64/kernel/kprobes.c | 16 | ||||
-rw-r--r-- | arch/x86_64/kernel/mce.c | 2 | ||||
-rw-r--r-- | arch/x86_64/kernel/mce_amd.c | 2 | ||||
-rw-r--r-- | arch/x86_64/kernel/mpparse.c | 12 | ||||
-rw-r--r-- | arch/x86_64/kernel/pci-dma.c | 4 | ||||
-rw-r--r-- | arch/x86_64/kernel/pci-gart.c | 18 | ||||
-rw-r--r-- | arch/x86_64/kernel/pci-nommu.c | 7 | ||||
-rw-r--r-- | arch/x86_64/kernel/pmtimer.c | 2 | ||||
-rw-r--r-- | arch/x86_64/kernel/process.c | 4 | ||||
-rw-r--r-- | arch/x86_64/kernel/ptrace.c | 6 | ||||
-rw-r--r-- | arch/x86_64/kernel/setup.c | 55 | ||||
-rw-r--r-- | arch/x86_64/kernel/traps.c | 38 | ||||
-rw-r--r-- | arch/x86_64/mm/numa.c | 2 | ||||
-rw-r--r-- | arch/x86_64/mm/srat.c | 19 |
22 files changed, 199 insertions, 71 deletions
diff --git a/arch/x86_64/defconfig b/arch/x86_64/defconfig index 3c45ec2..69db0c0 100644 --- a/arch/x86_64/defconfig +++ b/arch/x86_64/defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.17-rc1 -# Mon Apr 3 16:11:14 2006 +# Linux kernel version: 2.6.17-rc1-git11 +# Sun Apr 16 07:22:36 2006 # CONFIG_X86_64=y CONFIG_64BIT=y @@ -57,6 +57,7 @@ CONFIG_FUTEX=y CONFIG_EPOLL=y CONFIG_SHMEM=y CONFIG_SLAB=y +CONFIG_DOUBLEFAULT=y # CONFIG_TINY_SHMEM is not set CONFIG_BASE_SMALL=0 # CONFIG_SLOB is not set @@ -121,6 +122,7 @@ CONFIG_PREEMPT_VOLUNTARY=y CONFIG_PREEMPT_BKL=y CONFIG_NUMA=y CONFIG_K8_NUMA=y +CONFIG_NODES_SHIFT=6 CONFIG_X86_64_ACPI_NUMA=y CONFIG_NUMA_EMU=y CONFIG_ARCH_DISCONTIGMEM_ENABLE=y @@ -544,7 +546,6 @@ CONFIG_SCSI_SATA_INTEL_COMBINED=y # CONFIG_SCSI_INIA100 is not set # CONFIG_SCSI_SYM53C8XX_2 is not set # CONFIG_SCSI_IPR is not set -# CONFIG_SCSI_QLOGIC_FC is not set # CONFIG_SCSI_QLOGIC_1280 is not set # CONFIG_SCSI_QLA_FC is not set # CONFIG_SCSI_LPFC is not set @@ -1045,9 +1046,7 @@ CONFIG_USB_HIDINPUT=y # CONFIG_USB_ACECAD is not set # CONFIG_USB_KBTAB is not set # CONFIG_USB_POWERMATE is not set -# CONFIG_USB_MTOUCH is not set -# CONFIG_USB_ITMTOUCH is not set -# CONFIG_USB_EGALAX is not set +# CONFIG_USB_TOUCHSCREEN is not set # CONFIG_USB_YEALINK is not set # CONFIG_USB_XPAD is not set # CONFIG_USB_ATI_REMOTE is not set @@ -1118,6 +1117,14 @@ CONFIG_USB_MON=y # CONFIG_NEW_LEDS is not set # +# LED drivers +# + +# +# LED Triggers +# + +# # InfiniBand support # # CONFIG_INFINIBAND is not set diff --git a/arch/x86_64/ia32/Makefile b/arch/x86_64/ia32/Makefile index 929e6b0..e9263b4 100644 --- a/arch/x86_64/ia32/Makefile +++ b/arch/x86_64/ia32/Makefile @@ -27,5 +27,5 @@ $(obj)/vsyscall-sysenter.so $(obj)/vsyscall-syscall.so: \ $(obj)/vsyscall-%.so: $(src)/vsyscall.lds $(obj)/vsyscall-%.o FORCE $(call if_changed,syscall) -AFLAGS_vsyscall-sysenter.o = -m32 -AFLAGS_vsyscall-syscall.o = -m32 +AFLAGS_vsyscall-sysenter.o = -m32 -Wa,-32 +AFLAGS_vsyscall-syscall.o = -m32 -Wa,-32 diff --git a/arch/x86_64/ia32/ia32_binfmt.c b/arch/x86_64/ia32/ia32_binfmt.c index e776139..926c474 100644 --- a/arch/x86_64/ia32/ia32_binfmt.c +++ b/arch/x86_64/ia32/ia32_binfmt.c @@ -339,7 +339,7 @@ int ia32_setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top, struct mm_struct *mm = current->mm; int i, ret; - stack_base = IA32_STACK_TOP - MAX_ARG_PAGES * PAGE_SIZE; + stack_base = stack_top - MAX_ARG_PAGES * PAGE_SIZE; mm->arg_start = bprm->p + stack_base; bprm->p += stack_base; @@ -357,7 +357,7 @@ int ia32_setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top, { mpnt->vm_mm = mm; mpnt->vm_start = PAGE_MASK & (unsigned long) bprm->p; - mpnt->vm_end = IA32_STACK_TOP; + mpnt->vm_end = stack_top; if (executable_stack == EXSTACK_ENABLE_X) mpnt->vm_flags = VM_STACK_FLAGS | VM_EXEC; else if (executable_stack == EXSTACK_DISABLE_X) diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S index 5a98026..5a92fed 100644 --- a/arch/x86_64/ia32/ia32entry.S +++ b/arch/x86_64/ia32/ia32entry.S @@ -694,4 +694,6 @@ ia32_sys_call_table: .quad compat_sys_get_robust_list .quad sys_splice .quad sys_sync_file_range + .quad sys_tee + .quad compat_sys_vmsplice ia32_syscall_end: diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile index a098a11..059c883 100644 --- a/arch/x86_64/kernel/Makefile +++ b/arch/x86_64/kernel/Makefile @@ -8,7 +8,7 @@ obj-y := process.o signal.o entry.o traps.o irq.o \ ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \ x8664_ksyms.o i387.o syscall.o vsyscall.o \ setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \ - dmi_scan.o pci-dma.o pci-nommu.o + pci-dma.o pci-nommu.o obj-$(CONFIG_X86_MCE) += mce.o obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o @@ -49,5 +49,3 @@ intel_cacheinfo-y += ../../i386/kernel/cpu/intel_cacheinfo.o quirks-y += ../../i386/kernel/quirks.o i8237-y += ../../i386/kernel/i8237.o msr-$(subst m,y,$(CONFIG_X86_MSR)) += ../../i386/kernel/msr.o -dmi_scan-y += ../../i386/kernel/dmi_scan.o - diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c index 62776c0..1ef6028f 100644 --- a/arch/x86_64/kernel/e820.c +++ b/arch/x86_64/kernel/e820.c @@ -76,6 +76,12 @@ static inline int bad_addr(unsigned long *addrp, unsigned long size) *addrp = __pa_symbol(&_end); return 1; } + + if (last >= ebda_addr && addr < ebda_addr + ebda_size) { + *addrp = ebda_addr + ebda_size; + return 1; + } + /* XXX ramdisk image here? */ return 0; } @@ -143,7 +149,7 @@ unsigned long __init find_e820_area(unsigned long start, unsigned long end, unsi addr = start; if (addr > ei->addr + ei->size) continue; - while (bad_addr(&addr, size) && addr+size < ei->addr + ei->size) + while (bad_addr(&addr, size) && addr+size <= ei->addr+ei->size) ; last = addr + size; if (last > ei->addr + ei->size) diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S index c946e4f..586b34c 100644 --- a/arch/x86_64/kernel/entry.S +++ b/arch/x86_64/kernel/entry.S @@ -281,12 +281,7 @@ tracesys: ja 1f movq %r10,%rcx /* fixup for C */ call *sys_call_table(,%rax,8) - movq %rax,RAX-ARGOFFSET(%rsp) -1: SAVE_REST - movq %rsp,%rdi - call syscall_trace_leave - RESTORE_TOP_OF_STACK %rbx - RESTORE_REST +1: movq %rax,RAX-ARGOFFSET(%rsp) /* Use IRET because user could have changed frame */ jmp int_ret_from_sys_call CFI_ENDPROC diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c index 77b4c60..9cc7031 100644 --- a/arch/x86_64/kernel/io_apic.c +++ b/arch/x86_64/kernel/io_apic.c @@ -271,6 +271,18 @@ __setup("enable_8254_timer", setup_enable_8254_timer); #include <linux/pci_ids.h> #include <linux/pci.h> + +#ifdef CONFIG_ACPI + +static int nvidia_hpet_detected __initdata; + +static int __init nvidia_hpet_check(unsigned long phys, unsigned long size) +{ + nvidia_hpet_detected = 1; + return 0; +} +#endif + /* Temporary Hack. Nvidia and VIA boards currently only work with IO-APIC off. Check for an Nvidia or VIA PCI bridge and turn it off. Use pci direct infrastructure because this runs before the PCI subsystem. @@ -317,11 +329,19 @@ void __init check_ioapic(void) return; case PCI_VENDOR_ID_NVIDIA: #ifdef CONFIG_ACPI - /* All timer overrides on Nvidia - seem to be wrong. Skip them. */ - acpi_skip_timer_override = 1; - printk(KERN_INFO - "Nvidia board detected. Ignoring ACPI timer override.\n"); + /* + * All timer overrides on Nvidia are + * wrong unless HPET is enabled. + */ + nvidia_hpet_detected = 0; + acpi_table_parse(ACPI_HPET, + nvidia_hpet_check); + if (nvidia_hpet_detected == 0) { + acpi_skip_timer_override = 1; + printk(KERN_INFO "Nvidia board " + "detected. Ignoring ACPI " + "timer override.\n"); + } #endif /* RED-PEN skip them on mptables too? */ return; @@ -1777,6 +1797,8 @@ static inline void unlock_ExtINT_logic(void) spin_unlock_irqrestore(&ioapic_lock, flags); } +int timer_uses_ioapic_pin_0; + /* * This code may look a bit paranoid, but it's supposed to cooperate with * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ @@ -1814,6 +1836,9 @@ static inline void check_timer(void) pin2 = ioapic_i8259.pin; apic2 = ioapic_i8259.apic; + if (pin1 == 0) + timer_uses_ioapic_pin_0 = 1; + apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n", vector, apic1, pin1, apic2, pin2); diff --git a/arch/x86_64/kernel/kprobes.c b/arch/x86_64/kernel/kprobes.c index accbff3..fa1d19c 100644 --- a/arch/x86_64/kernel/kprobes.c +++ b/arch/x86_64/kernel/kprobes.c @@ -53,7 +53,7 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); /* * returns non-zero if opcode modifies the interrupt flag. */ -static inline int is_IF_modifier(kprobe_opcode_t *insn) +static __always_inline int is_IF_modifier(kprobe_opcode_t *insn) { switch (*insn) { case 0xfa: /* cli */ @@ -84,7 +84,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) * If it does, return the address of the 32-bit displacement word. * If not, return null. */ -static inline s32 *is_riprel(u8 *insn) +static s32 __kprobes *is_riprel(u8 *insn) { #define W(row,b0,b1,b2,b3,b4,b5,b6,b7,b8,b9,ba,bb,bc,bd,be,bf) \ (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \ @@ -229,7 +229,7 @@ void __kprobes arch_remove_kprobe(struct kprobe *p) mutex_unlock(&kprobe_mutex); } -static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb) +static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) { kcb->prev_kprobe.kp = kprobe_running(); kcb->prev_kprobe.status = kcb->kprobe_status; @@ -237,7 +237,7 @@ static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb) kcb->prev_kprobe.saved_rflags = kcb->kprobe_saved_rflags; } -static inline void restore_previous_kprobe(struct kprobe_ctlblk *kcb) +static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) { __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp; kcb->kprobe_status = kcb->prev_kprobe.status; @@ -245,7 +245,7 @@ static inline void restore_previous_kprobe(struct kprobe_ctlblk *kcb) kcb->kprobe_saved_rflags = kcb->prev_kprobe.saved_rflags; } -static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs, +static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) { __get_cpu_var(current_kprobe) = p; @@ -514,13 +514,13 @@ static void __kprobes resume_execution(struct kprobe *p, *tos = orig_rip + (*tos - copy_rip); break; case 0xff: - if ((*insn & 0x30) == 0x10) { + if ((insn[1] & 0x30) == 0x10) { /* call absolute, indirect */ /* Fix return addr; rip is correct. */ next_rip = regs->rip; *tos = orig_rip + (*tos - copy_rip); - } else if (((*insn & 0x31) == 0x20) || /* jmp near, absolute indirect */ - ((*insn & 0x31) == 0x21)) { /* jmp far, absolute indirect */ + } else if (((insn[1] & 0x31) == 0x20) || /* jmp near, absolute indirect */ + ((insn[1] & 0x31) == 0x21)) { /* jmp far, absolute indirect */ /* rip is correct. */ next_rip = regs->rip; } diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c index 6f0790e..c69fc43 100644 --- a/arch/x86_64/kernel/mce.c +++ b/arch/x86_64/kernel/mce.c @@ -629,7 +629,7 @@ static __cpuinit void mce_remove_device(unsigned int cpu) #endif /* Get notified when a cpu comes on/off. Be hotplug friendly. */ -static __cpuinit int +static int mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; diff --git a/arch/x86_64/kernel/mce_amd.c b/arch/x86_64/kernel/mce_amd.c index d3ad7d8..d13b241 100644 --- a/arch/x86_64/kernel/mce_amd.c +++ b/arch/x86_64/kernel/mce_amd.c @@ -482,7 +482,7 @@ static void threshold_remove_device(unsigned int cpu) #endif /* get notified when a cpu comes on/off */ -static __cpuinit int threshold_cpu_callback(struct notifier_block *nfb, +static int threshold_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { /* cpu was unsigned int to begin with */ diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c index b17cf3e..083da7e 100644 --- a/arch/x86_64/kernel/mpparse.c +++ b/arch/x86_64/kernel/mpparse.c @@ -968,7 +968,17 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity) */ int irq = gsi; if (gsi < MAX_GSI_NUM) { - if (gsi > 15) + /* + * Retain the VIA chipset work-around (gsi > 15), but + * avoid a problem where the 8254 timer (IRQ0) is setup + * via an override (so it's not on pin 0 of the ioapic), + * and at the same time, the pin 0 interrupt is a PCI + * type. The gsi > 15 test could cause these two pins + * to be shared as IRQ0, and they are not shareable. + * So test for this condition, and if necessary, avoid + * the pin collision. + */ + if (gsi > 15 || (gsi == 0 && !timer_uses_ioapic_pin_0)) gsi = pci_irq++; /* * Don't assign IRQ used by ACPI SCI diff --git a/arch/x86_64/kernel/pci-dma.c b/arch/x86_64/kernel/pci-dma.c index af035ed..a9275c95 100644 --- a/arch/x86_64/kernel/pci-dma.c +++ b/arch/x86_64/kernel/pci-dma.c @@ -54,6 +54,10 @@ dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order) else #endif node = numa_node_id(); + + if (node < first_node(node_online_map)) + node = first_node(node_online_map); + page = alloc_pages_node(node, gfp, order); return page ? page_address(page) : NULL; } diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c index a6c01e1..82a7c9b 100644 --- a/arch/x86_64/kernel/pci-gart.c +++ b/arch/x86_64/kernel/pci-gart.c @@ -112,10 +112,6 @@ static unsigned long alloc_iommu(int size) static void free_iommu(unsigned long offset, int size) { unsigned long flags; - if (size == 1) { - clear_bit(offset, iommu_gart_bitmap); - return; - } spin_lock_irqsave(&iommu_bitmap_lock, flags); __clear_bit_string(iommu_gart_bitmap, offset, size); spin_unlock_irqrestore(&iommu_bitmap_lock, flags); @@ -635,14 +631,20 @@ static int __init pci_iommu_init(void) printk(KERN_INFO "PCI-DMA: Disabling IOMMU.\n"); if (end_pfn > MAX_DMA32_PFN) { printk(KERN_ERR "WARNING more than 4GB of memory " - "but IOMMU not compiled in.\n" - KERN_ERR "WARNING 32bit PCI may malfunction.\n" - KERN_ERR "You might want to enable " - "CONFIG_GART_IOMMU\n"); + "but IOMMU not available.\n" + KERN_ERR "WARNING 32bit PCI may malfunction.\n"); } return -1; } + i = 0; + for_all_nb(dev) + i++; + if (i > MAX_NB) { + printk(KERN_ERR "PCI-GART: Too many northbridges (%ld). Disabled\n", i); + return -1; + } + printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n"); aper_size = info.aper_size * 1024 * 1024; iommu_size = check_iommu_size(info.aper_base, aper_size); diff --git a/arch/x86_64/kernel/pci-nommu.c b/arch/x86_64/kernel/pci-nommu.c index 44adcc2..1f6ecc6 100644 --- a/arch/x86_64/kernel/pci-nommu.c +++ b/arch/x86_64/kernel/pci-nommu.c @@ -12,9 +12,10 @@ static int check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size) { if (hwdev && bus + size > *hwdev->dma_mask) { - printk(KERN_ERR - "nommu_%s: overflow %Lx+%lu of device mask %Lx\n", - name, (long long)bus, size, (long long)*hwdev->dma_mask); + if (*hwdev->dma_mask >= 0xffffffffULL) + printk(KERN_ERR + "nommu_%s: overflow %Lx+%lu of device mask %Lx\n", + name, (long long)bus, size, (long long)*hwdev->dma_mask); return 0; } return 1; diff --git a/arch/x86_64/kernel/pmtimer.c b/arch/x86_64/kernel/pmtimer.c index b0444a4..bf421ed 100644 --- a/arch/x86_64/kernel/pmtimer.c +++ b/arch/x86_64/kernel/pmtimer.c @@ -68,7 +68,7 @@ int pmtimer_mark_offset(void) offset_delay = delta % (USEC_PER_SEC / HZ); rdtscll(tsc); - vxtime.last_tsc = tsc - offset_delay * cpu_khz; + vxtime.last_tsc = tsc - offset_delay * (u64)cpu_khz / 1000; /* don't calculate delay for first run, or if we've got less then a tick */ diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index 1c44b53..fb903e6 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c @@ -575,8 +575,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) prev->userrsp = read_pda(oldrsp); write_pda(oldrsp, next->userrsp); write_pda(pcurrent, next_p); + /* This must be here to ensure both math_state_restore() and - kernel_fpu_begin() work consistently. */ + kernel_fpu_begin() work consistently. + And the AMD workaround requires it to be after DS reload. */ unlazy_fpu(prev_p); write_pda(kernelstack, task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET); diff --git a/arch/x86_64/kernel/ptrace.c b/arch/x86_64/kernel/ptrace.c index da8e790..2d50024 100644 --- a/arch/x86_64/kernel/ptrace.c +++ b/arch/x86_64/kernel/ptrace.c @@ -600,12 +600,12 @@ asmlinkage void syscall_trace_enter(struct pt_regs *regs) if (unlikely(current->audit_context)) { if (test_thread_flag(TIF_IA32)) { - audit_syscall_entry(current, AUDIT_ARCH_I386, + audit_syscall_entry(AUDIT_ARCH_I386, regs->orig_rax, regs->rbx, regs->rcx, regs->rdx, regs->rsi); } else { - audit_syscall_entry(current, AUDIT_ARCH_X86_64, + audit_syscall_entry(AUDIT_ARCH_X86_64, regs->orig_rax, regs->rdi, regs->rsi, regs->rdx, regs->r10); @@ -616,7 +616,7 @@ asmlinkage void syscall_trace_enter(struct pt_regs *regs) asmlinkage void syscall_trace_leave(struct pt_regs *regs) { if (unlikely(current->audit_context)) - audit_syscall_exit(current, AUDITSC_RESULT(regs->rax), regs->rax); + audit_syscall_exit(AUDITSC_RESULT(regs->rax), regs->rax); if ((test_thread_flag(TIF_SYSCALL_TRACE) || test_thread_flag(TIF_SINGLESTEP)) diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index c50b067..655b919 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -571,17 +571,28 @@ static inline void copy_edd(void) #endif #define EBDA_ADDR_POINTER 0x40E -static void __init reserve_ebda_region(void) + +unsigned __initdata ebda_addr; +unsigned __initdata ebda_size; + +static void discover_ebda(void) { - unsigned int addr; - /** + /* * there is a real-mode segmented pointer pointing to the * 4K EBDA area at 0x40E */ - addr = *(unsigned short *)phys_to_virt(EBDA_ADDR_POINTER); - addr <<= 4; - if (addr) - reserve_bootmem_generic(addr, PAGE_SIZE); + ebda_addr = *(unsigned short *)EBDA_ADDR_POINTER; + ebda_addr <<= 4; + + ebda_size = *(unsigned short *)(unsigned long)ebda_addr; + + /* Round EBDA up to pages */ + if (ebda_size == 0) + ebda_size = 1; + ebda_size <<= 10; + ebda_size = round_up(ebda_size + (ebda_addr & ~PAGE_MASK), PAGE_SIZE); + if (ebda_size > 64*1024) + ebda_size = 64*1024; } void __init setup_arch(char **cmdline_p) @@ -627,6 +638,8 @@ void __init setup_arch(char **cmdline_p) check_efer(); + discover_ebda(); + init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT)); dmi_scan_machine(); @@ -669,7 +682,8 @@ void __init setup_arch(char **cmdline_p) reserve_bootmem_generic(0, PAGE_SIZE); /* reserve ebda region */ - reserve_ebda_region(); + if (ebda_addr) + reserve_bootmem_generic(ebda_addr, ebda_size); #ifdef CONFIG_SMP /* @@ -930,6 +944,10 @@ static int __init init_amd(struct cpuinfo_x86 *c) if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58)) set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability); + /* Enable workaround for FXSAVE leak */ + if (c->x86 >= 6) + set_bit(X86_FEATURE_FXSAVE_LEAK, &c->x86_capability); + r = get_model_name(c); if (!r) { switch (c->x86) { @@ -1033,7 +1051,7 @@ static void srat_detect_node(void) for now. */ node = apicid_to_node[hard_smp_processor_id()]; if (node == NUMA_NO_NODE) - node = 0; + node = first_node(node_online_map); numa_set_node(cpu, node); if (acpi_numa > 0) @@ -1422,3 +1440,22 @@ struct seq_operations cpuinfo_op = { .show = show_cpuinfo, }; +#ifdef CONFIG_INPUT_PCSPKR +#include <linux/platform_device.h> +static __init int add_pcspkr(void) +{ + struct platform_device *pd; + int ret; + + pd = platform_device_alloc("pcspkr", -1); + if (!pd) + return -ENOMEM; + + ret = platform_device_add(pd); + if (ret) + platform_device_put(pd); + + return ret; +} +device_initcall(add_pcspkr); +#endif diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 6bda322..cea335e 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -30,6 +30,7 @@ #include <linux/moduleparam.h> #include <linux/nmi.h> #include <linux/kprobes.h> +#include <linux/kexec.h> #include <asm/system.h> #include <asm/uaccess.h> @@ -101,6 +102,8 @@ static inline void preempt_conditional_cli(struct pt_regs *regs) { if (regs->eflags & X86_EFLAGS_IF) local_irq_disable(); + /* Make sure to not schedule here because we could be running + on an exception stack. */ preempt_enable_no_resched(); } @@ -384,6 +387,7 @@ void out_of_line_bug(void) static DEFINE_SPINLOCK(die_lock); static int die_owner = -1; +static unsigned int die_nest_count; unsigned __kprobes long oops_begin(void) { @@ -398,6 +402,7 @@ unsigned __kprobes long oops_begin(void) else spin_lock(&die_lock); } + die_nest_count++; die_owner = cpu; console_verbose(); bust_spinlocks(1); @@ -408,7 +413,13 @@ void __kprobes oops_end(unsigned long flags) { die_owner = -1; bust_spinlocks(0); - spin_unlock_irqrestore(&die_lock, flags); + die_nest_count--; + if (die_nest_count) + /* We still own the lock */ + local_irq_restore(flags); + else + /* Nest count reaches zero, release the lock. */ + spin_unlock_irqrestore(&die_lock, flags); if (panic_on_oops) panic("Oops"); } @@ -433,6 +444,8 @@ void __kprobes __die(const char * str, struct pt_regs * regs, long err) printk(KERN_ALERT "RIP "); printk_address(regs->rip); printk(" RSP <%016lx>\n", regs->rsp); + if (kexec_should_crash(current)) + crash_kexec(regs); } void die(const char * str, struct pt_regs * regs, long err) @@ -455,10 +468,14 @@ void __kprobes die_nmi(char *str, struct pt_regs *regs) */ printk(str, safe_smp_processor_id()); show_registers(regs); + if (kexec_should_crash(current)) + crash_kexec(regs); if (panic_on_timeout || panic_on_oops) panic("nmi watchdog"); printk("console shuts up ...\n"); oops_end(flags); + nmi_exit(); + local_irq_enable(); do_exit(SIGSEGV); } @@ -468,8 +485,6 @@ static void __kprobes do_trap(int trapnr, int signr, char *str, { struct task_struct *tsk = current; - conditional_sti(regs); - tsk->thread.error_code = error_code; tsk->thread.trap_no = trapnr; @@ -506,6 +521,7 @@ asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ == NOTIFY_STOP) \ return; \ + conditional_sti(regs); \ do_trap(trapnr, signr, str, regs, error_code, NULL); \ } @@ -520,6 +536,7 @@ asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ == NOTIFY_STOP) \ return; \ + conditional_sti(regs); \ do_trap(trapnr, signr, str, regs, error_code, &info); \ } @@ -533,7 +550,17 @@ DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) DO_ERROR(18, SIGSEGV, "reserved", reserved) -DO_ERROR(12, SIGBUS, "stack segment", stack_segment) + +/* Runs on IST stack */ +asmlinkage void do_stack_segment(struct pt_regs *regs, long error_code) +{ + if (notify_die(DIE_TRAP, "stack segment", regs, error_code, + 12, SIGBUS) == NOTIFY_STOP) + return; + preempt_conditional_sti(regs); + do_trap(12, SIGBUS, "stack segment", regs, error_code, NULL); + preempt_conditional_cli(regs); +} asmlinkage void do_double_fault(struct pt_regs * regs, long error_code) { @@ -667,8 +694,9 @@ asmlinkage void __kprobes do_int3(struct pt_regs * regs, long error_code) if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) { return; } + preempt_conditional_sti(regs); do_trap(3, SIGTRAP, "int3", regs, error_code, NULL); - return; + preempt_conditional_cli(regs); } /* Help handler running on IST stack to switch back to user stack diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c index cc02573..b2fac14 100644 --- a/arch/x86_64/mm/numa.c +++ b/arch/x86_64/mm/numa.c @@ -188,11 +188,13 @@ void __init setup_node_zones(int nodeid) memory. */ memmapsize = sizeof(struct page) * (end_pfn-start_pfn); limit = end_pfn << PAGE_SHIFT; +#ifdef CONFIG_FLAT_NODE_MEM_MAP NODE_DATA(nodeid)->node_mem_map = __alloc_bootmem_core(NODE_DATA(nodeid)->bdata, memmapsize, SMP_CACHE_BYTES, round_down(limit - memmapsize, PAGE_SIZE), limit); +#endif size_zones(zones, holes, start_pfn, end_pfn); free_area_init_node(nodeid, NODE_DATA(nodeid), zones, diff --git a/arch/x86_64/mm/srat.c b/arch/x86_64/mm/srat.c index 15ae9fc..474df22 100644 --- a/arch/x86_64/mm/srat.c +++ b/arch/x86_64/mm/srat.c @@ -34,7 +34,10 @@ static nodemask_t nodes_found __initdata; static struct bootnode nodes[MAX_NUMNODES] __initdata; static struct bootnode nodes_add[MAX_NUMNODES] __initdata; static int found_add_area __initdata; -int hotadd_percent __initdata = 10; +int hotadd_percent __initdata = 0; +#ifndef RESERVE_HOTADD +#define hotadd_percent 0 /* Ignore all settings */ +#endif static u8 pxm2node[256] = { [0 ... 255] = 0xff }; /* Too small nodes confuse the VM badly. Usually they result @@ -103,6 +106,7 @@ static __init void bad_srat(void) int i; printk(KERN_ERR "SRAT: SRAT not used.\n"); acpi_numa = -1; + found_add_area = 0; for (i = 0; i < MAX_LOCAL_APIC; i++) apicid_to_node[i] = NUMA_NO_NODE; for (i = 0; i < MAX_NUMNODES; i++) @@ -154,7 +158,8 @@ acpi_numa_processor_affinity_init(struct acpi_table_processor_affinity *pa) int pxm, node; if (srat_disabled()) return; - if (pa->header.length != sizeof(struct acpi_table_processor_affinity)) { bad_srat(); + if (pa->header.length != sizeof(struct acpi_table_processor_affinity)) { + bad_srat(); return; } if (pa->flags.enabled == 0) @@ -191,15 +196,17 @@ static int hotadd_enough_memory(struct bootnode *nd) allowed = (end_pfn - e820_hole_size(0, end_pfn)) * PAGE_SIZE; allowed = (allowed / 100) * hotadd_percent; if (allocated + mem > allowed) { + unsigned long range; /* Give them at least part of their hotadd memory upto hotadd_percent It would be better to spread the limit out over multiple hotplug areas, but that is too complicated right now */ if (allocated >= allowed) return 0; - pages = (allowed - allocated + mem) / sizeof(struct page); + range = allowed - allocated; + pages = (range / PAGE_SIZE); mem = pages * sizeof(struct page); - nd->end = nd->start + pages*PAGE_SIZE; + nd->end = nd->start + range; } /* Not completely fool proof, but a good sanity check */ addr = find_e820_area(last_area_end, end_pfn<<PAGE_SHIFT, mem); @@ -392,8 +399,10 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) /* First clean up the node list */ for (i = 0; i < MAX_NUMNODES; i++) { cutoff_node(i, start, end); - if ((nodes[i].end - nodes[i].start) < NODE_MIN_SIZE) + if ((nodes[i].end - nodes[i].start) < NODE_MIN_SIZE) { unparse_node(i); + node_set_offline(i); + } } if (acpi_numa <= 0) |