summaryrefslogtreecommitdiffstats
path: root/arch/x86_64/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86_64/kernel')
-rw-r--r--arch/x86_64/kernel/Makefile4
-rw-r--r--arch/x86_64/kernel/aperture.c2
-rw-r--r--arch/x86_64/kernel/e820.c36
-rw-r--r--arch/x86_64/kernel/entry.S28
-rw-r--r--arch/x86_64/kernel/kprobes.c10
-rw-r--r--arch/x86_64/kernel/mce.c10
-rw-r--r--arch/x86_64/kernel/mce_amd.c2
-rw-r--r--arch/x86_64/kernel/nmi.c7
-rw-r--r--arch/x86_64/kernel/pci-dma.c2
-rw-r--r--arch/x86_64/kernel/pci-gart.c4
-rw-r--r--arch/x86_64/kernel/process.c14
-rw-r--r--arch/x86_64/kernel/ptrace.c5
-rw-r--r--arch/x86_64/kernel/setup.c8
-rw-r--r--arch/x86_64/kernel/time.c4
-rw-r--r--arch/x86_64/kernel/traps.c5
-rw-r--r--arch/x86_64/kernel/vmlinux.lds.S2
-rw-r--r--arch/x86_64/kernel/x8664_ksyms.c3
17 files changed, 97 insertions, 49 deletions
diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile
index a098a11..059c883 100644
--- a/arch/x86_64/kernel/Makefile
+++ b/arch/x86_64/kernel/Makefile
@@ -8,7 +8,7 @@ obj-y := process.o signal.o entry.o traps.o irq.o \
ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \
x8664_ksyms.o i387.o syscall.o vsyscall.o \
setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \
- dmi_scan.o pci-dma.o pci-nommu.o
+ pci-dma.o pci-nommu.o
obj-$(CONFIG_X86_MCE) += mce.o
obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o
@@ -49,5 +49,3 @@ intel_cacheinfo-y += ../../i386/kernel/cpu/intel_cacheinfo.o
quirks-y += ../../i386/kernel/quirks.o
i8237-y += ../../i386/kernel/i8237.o
msr-$(subst m,y,$(CONFIG_X86_MSR)) += ../../i386/kernel/msr.o
-dmi_scan-y += ../../i386/kernel/dmi_scan.o
-
diff --git a/arch/x86_64/kernel/aperture.c b/arch/x86_64/kernel/aperture.c
index fffd6b0..70b9d21 100644
--- a/arch/x86_64/kernel/aperture.c
+++ b/arch/x86_64/kernel/aperture.c
@@ -80,7 +80,7 @@ static int __init aperture_valid(char *name, u64 aper_base, u32 aper_size)
printk("Aperture from %s beyond 4GB. Ignoring.\n",name);
return 0;
}
- if (e820_mapped(aper_base, aper_base + aper_size, E820_RAM)) {
+ if (e820_any_mapped(aper_base, aper_base + aper_size, E820_RAM)) {
printk("Aperture from %s pointing to e820 RAM. Ignoring.\n",name);
return 0;
}
diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c
index 293cd71..62776c0 100644
--- a/arch/x86_64/kernel/e820.c
+++ b/arch/x86_64/kernel/e820.c
@@ -80,7 +80,12 @@ static inline int bad_addr(unsigned long *addrp, unsigned long size)
return 0;
}
-int __init e820_mapped(unsigned long start, unsigned long end, unsigned type)
+/*
+ * This function checks if any part of the range <start,end> is mapped
+ * with type.
+ */
+int __meminit
+e820_any_mapped(unsigned long start, unsigned long end, unsigned type)
{
int i;
for (i = 0; i < e820.nr_map; i++) {
@@ -94,6 +99,35 @@ int __init e820_mapped(unsigned long start, unsigned long end, unsigned type)
return 0;
}
+/*
+ * This function checks if the entire range <start,end> is mapped with type.
+ *
+ * Note: this function only works correct if the e820 table is sorted and
+ * not-overlapping, which is the case
+ */
+int __init e820_all_mapped(unsigned long start, unsigned long end, unsigned type)
+{
+ int i;
+ for (i = 0; i < e820.nr_map; i++) {
+ struct e820entry *ei = &e820.map[i];
+ if (type && ei->type != type)
+ continue;
+ /* is the region (part) in overlap with the current region ?*/
+ if (ei->addr >= end || ei->addr + ei->size <= start)
+ continue;
+
+ /* if the region is at the beginning of <start,end> we move
+ * start to the end of the region since it's ok until there
+ */
+ if (ei->addr <= start)
+ start = ei->addr + ei->size;
+ /* if start is now at or beyond end, we're done, full coverage */
+ if (start >= end)
+ return 1; /* we're done */
+ }
+ return 0;
+}
+
/*
* Find a free area in a specific range.
*/
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S
index 8538bfe..c946e4f 100644
--- a/arch/x86_64/kernel/entry.S
+++ b/arch/x86_64/kernel/entry.S
@@ -180,6 +180,10 @@ rff_trace:
*
* XXX if we had a free scratch register we could save the RSP into the stack frame
* and report it properly in ps. Unfortunately we haven't.
+ *
+ * When user can change the frames always force IRET. That is because
+ * it deals with uncanonical addresses better. SYSRET has trouble
+ * with them due to bugs in both AMD and Intel CPUs.
*/
ENTRY(system_call)
@@ -254,7 +258,10 @@ sysret_signal:
xorl %esi,%esi # oldset -> arg2
call ptregscall_common
1: movl $_TIF_NEED_RESCHED,%edi
- jmp sysret_check
+ /* Use IRET because user could have changed frame. This
+ works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
+ cli
+ jmp int_with_check
badsys:
movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
@@ -280,7 +287,8 @@ tracesys:
call syscall_trace_leave
RESTORE_TOP_OF_STACK %rbx
RESTORE_REST
- jmp ret_from_sys_call
+ /* Use IRET because user could have changed frame */
+ jmp int_ret_from_sys_call
CFI_ENDPROC
/*
@@ -408,25 +416,9 @@ ENTRY(stub_execve)
CFI_ADJUST_CFA_OFFSET -8
CFI_REGISTER rip, r11
SAVE_REST
- movq %r11, %r15
- CFI_REGISTER rip, r15
FIXUP_TOP_OF_STACK %r11
call sys_execve
- GET_THREAD_INFO(%rcx)
- bt $TIF_IA32,threadinfo_flags(%rcx)
- CFI_REMEMBER_STATE
- jc exec_32bit
RESTORE_TOP_OF_STACK %r11
- movq %r15, %r11
- CFI_REGISTER rip, r11
- RESTORE_REST
- pushq %r11
- CFI_ADJUST_CFA_OFFSET 8
- CFI_REL_OFFSET rip, 0
- ret
-
-exec_32bit:
- CFI_RESTORE_STATE
movq %rax,RAX(%rsp)
RESTORE_REST
jmp int_ret_from_sys_call
diff --git a/arch/x86_64/kernel/kprobes.c b/arch/x86_64/kernel/kprobes.c
index accbff3..1eaa5da 100644
--- a/arch/x86_64/kernel/kprobes.c
+++ b/arch/x86_64/kernel/kprobes.c
@@ -53,7 +53,7 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
/*
* returns non-zero if opcode modifies the interrupt flag.
*/
-static inline int is_IF_modifier(kprobe_opcode_t *insn)
+static __always_inline int is_IF_modifier(kprobe_opcode_t *insn)
{
switch (*insn) {
case 0xfa: /* cli */
@@ -84,7 +84,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
* If it does, return the address of the 32-bit displacement word.
* If not, return null.
*/
-static inline s32 *is_riprel(u8 *insn)
+static s32 __kprobes *is_riprel(u8 *insn)
{
#define W(row,b0,b1,b2,b3,b4,b5,b6,b7,b8,b9,ba,bb,bc,bd,be,bf) \
(((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \
@@ -229,7 +229,7 @@ void __kprobes arch_remove_kprobe(struct kprobe *p)
mutex_unlock(&kprobe_mutex);
}
-static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb)
+static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
{
kcb->prev_kprobe.kp = kprobe_running();
kcb->prev_kprobe.status = kcb->kprobe_status;
@@ -237,7 +237,7 @@ static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb)
kcb->prev_kprobe.saved_rflags = kcb->kprobe_saved_rflags;
}
-static inline void restore_previous_kprobe(struct kprobe_ctlblk *kcb)
+static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
{
__get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
kcb->kprobe_status = kcb->prev_kprobe.status;
@@ -245,7 +245,7 @@ static inline void restore_previous_kprobe(struct kprobe_ctlblk *kcb)
kcb->kprobe_saved_rflags = kcb->prev_kprobe.saved_rflags;
}
-static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
+static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
struct kprobe_ctlblk *kcb)
{
__get_cpu_var(current_kprobe) = p;
diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c
index 10b3e34..c69fc43 100644
--- a/arch/x86_64/kernel/mce.c
+++ b/arch/x86_64/kernel/mce.c
@@ -29,6 +29,8 @@
#define MISC_MCELOG_MINOR 227
#define NR_BANKS 6
+atomic_t mce_entry;
+
static int mce_dont_init;
/* 0: always panic, 1: panic if deadlock possible, 2: try to avoid panic,
@@ -172,10 +174,12 @@ void do_machine_check(struct pt_regs * regs, long error_code)
int i;
int panicm_found = 0;
+ atomic_inc(&mce_entry);
+
if (regs)
notify_die(DIE_NMI, "machine check", regs, error_code, 18, SIGKILL);
if (!banks)
- return;
+ goto out2;
memset(&m, 0, sizeof(struct mce));
m.cpu = safe_smp_processor_id();
@@ -266,6 +270,8 @@ void do_machine_check(struct pt_regs * regs, long error_code)
out:
/* Last thing done in the machine check exception to clear state. */
wrmsrl(MSR_IA32_MCG_STATUS, 0);
+ out2:
+ atomic_dec(&mce_entry);
}
/*
@@ -623,7 +629,7 @@ static __cpuinit void mce_remove_device(unsigned int cpu)
#endif
/* Get notified when a cpu comes on/off. Be hotplug friendly. */
-static __cpuinit int
+static int
mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
{
unsigned int cpu = (unsigned long)hcpu;
diff --git a/arch/x86_64/kernel/mce_amd.c b/arch/x86_64/kernel/mce_amd.c
index d3ad7d8..d13b241 100644
--- a/arch/x86_64/kernel/mce_amd.c
+++ b/arch/x86_64/kernel/mce_amd.c
@@ -482,7 +482,7 @@ static void threshold_remove_device(unsigned int cpu)
#endif
/* get notified when a cpu comes on/off */
-static __cpuinit int threshold_cpu_callback(struct notifier_block *nfb,
+static int threshold_cpu_callback(struct notifier_block *nfb,
unsigned long action, void *hcpu)
{
/* cpu was unsigned int to begin with */
diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c
index d9e4067..4e6357f 100644
--- a/arch/x86_64/kernel/nmi.c
+++ b/arch/x86_64/kernel/nmi.c
@@ -34,6 +34,7 @@
#include <asm/proto.h>
#include <asm/kdebug.h>
#include <asm/local.h>
+#include <asm/mce.h>
/*
* lapic_nmi_owner tracks the ownership of the lapic NMI hardware:
@@ -480,6 +481,12 @@ void __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
__get_cpu_var(nmi_touch) = 0;
touched = 1;
}
+#ifdef CONFIG_X86_MCE
+ /* Could check oops_in_progress here too, but it's safer
+ not too */
+ if (atomic_read(&mce_entry) > 0)
+ touched = 1;
+#endif
if (!touched && __get_cpu_var(last_irq_sum) == sum) {
/*
* Ayiee, looks like this CPU is stuck ...
diff --git a/arch/x86_64/kernel/pci-dma.c b/arch/x86_64/kernel/pci-dma.c
index 03c9eee..af035ed 100644
--- a/arch/x86_64/kernel/pci-dma.c
+++ b/arch/x86_64/kernel/pci-dma.c
@@ -48,9 +48,11 @@ dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order)
{
struct page *page;
int node;
+#ifdef CONFIG_PCI
if (dev->bus == &pci_bus_type)
node = pcibus_to_node(to_pci_dev(dev)->bus);
else
+#endif
node = numa_node_id();
page = alloc_pages_node(node, gfp, order);
return page ? page_address(page) : NULL;
diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c
index a6c01e1..9d3d76c 100644
--- a/arch/x86_64/kernel/pci-gart.c
+++ b/arch/x86_64/kernel/pci-gart.c
@@ -112,10 +112,6 @@ static unsigned long alloc_iommu(int size)
static void free_iommu(unsigned long offset, int size)
{
unsigned long flags;
- if (size == 1) {
- clear_bit(offset, iommu_gart_bitmap);
- return;
- }
spin_lock_irqsave(&iommu_bitmap_lock, flags);
__clear_bit_string(iommu_gart_bitmap, offset, size);
spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
index 70dd8e5..fb903e6 100644
--- a/arch/x86_64/kernel/process.c
+++ b/arch/x86_64/kernel/process.c
@@ -575,8 +575,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
prev->userrsp = read_pda(oldrsp);
write_pda(oldrsp, next->userrsp);
write_pda(pcurrent, next_p);
+
/* This must be here to ensure both math_state_restore() and
- kernel_fpu_begin() work consistently. */
+ kernel_fpu_begin() work consistently.
+ And the AMD workaround requires it to be after DS reload. */
unlazy_fpu(prev_p);
write_pda(kernelstack,
task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
@@ -781,10 +783,16 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
}
case ARCH_GET_GS: {
unsigned long base;
+ unsigned gsindex;
if (task->thread.gsindex == GS_TLS_SEL)
base = read_32bit_tls(task, GS_TLS);
- else if (doit)
- rdmsrl(MSR_KERNEL_GS_BASE, base);
+ else if (doit) {
+ asm("movl %%gs,%0" : "=r" (gsindex));
+ if (gsindex)
+ rdmsrl(MSR_KERNEL_GS_BASE, base);
+ else
+ base = task->thread.gs;
+ }
else
base = task->thread.gs;
ret = put_user(base, (unsigned long __user *)addr);
diff --git a/arch/x86_64/kernel/ptrace.c b/arch/x86_64/kernel/ptrace.c
index d44b2c1..da8e790 100644
--- a/arch/x86_64/kernel/ptrace.c
+++ b/arch/x86_64/kernel/ptrace.c
@@ -274,11 +274,6 @@ static int putreg(struct task_struct *child,
return -EIO;
value &= 0xffff;
break;
- case offsetof(struct user_regs_struct, rip):
- /* Check if the new RIP address is canonical */
- if (value >= TASK_SIZE_OF(child))
- return -EIO;
- break;
}
put_stack_long(child, regno - sizeof(struct pt_regs), value);
return 0;
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index 0856ad4..759070c 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -353,8 +353,10 @@ static __init void parse_cmdline_early (char ** cmdline_p)
if (fullarg(from, "enable_timer_pin_1"))
disable_timer_pin_1 = -1;
- if (fullarg(from, "nolapic") || fullarg(from, "disableapic"))
+ if (fullarg(from, "nolapic") || fullarg(from, "disableapic")) {
+ clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
disable_apic = 1;
+ }
if (fullarg(from, "noapic"))
skip_ioapic_setup = 1;
@@ -928,6 +930,10 @@ static int __init init_amd(struct cpuinfo_x86 *c)
if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58))
set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
+ /* Enable workaround for FXSAVE leak */
+ if (c->x86 >= 6)
+ set_bit(X86_FEATURE_FXSAVE_LEAK, &c->x86_capability);
+
r = get_model_name(c);
if (!r) {
switch (c->x86) {
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
index ef8bc46..7392570 100644
--- a/arch/x86_64/kernel/time.c
+++ b/arch/x86_64/kernel/time.c
@@ -726,7 +726,7 @@ static __init int late_hpet_init(void)
unsigned int ntimer;
if (!vxtime.hpet_address)
- return -1;
+ return 0;
memset(&hd, 0, sizeof (hd));
@@ -917,6 +917,8 @@ void __init time_init(void)
vxtime.hpet_address = 0;
if (hpet_use_timer) {
+ /* set tick_nsec to use the proper rate for HPET */
+ tick_nsec = TICK_NSEC_HPET;
cpu_khz = hpet_calibrate_tsc();
timename = "HPET";
#ifdef CONFIG_X86_PM_TIMER
diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c
index 6bda322..2700b13 100644
--- a/arch/x86_64/kernel/traps.c
+++ b/arch/x86_64/kernel/traps.c
@@ -30,6 +30,7 @@
#include <linux/moduleparam.h>
#include <linux/nmi.h>
#include <linux/kprobes.h>
+#include <linux/kexec.h>
#include <asm/system.h>
#include <asm/uaccess.h>
@@ -433,6 +434,8 @@ void __kprobes __die(const char * str, struct pt_regs * regs, long err)
printk(KERN_ALERT "RIP ");
printk_address(regs->rip);
printk(" RSP <%016lx>\n", regs->rsp);
+ if (kexec_should_crash(current))
+ crash_kexec(regs);
}
void die(const char * str, struct pt_regs * regs, long err)
@@ -455,6 +458,8 @@ void __kprobes die_nmi(char *str, struct pt_regs *regs)
*/
printk(str, safe_smp_processor_id());
show_registers(regs);
+ if (kexec_should_crash(current))
+ crash_kexec(regs);
if (panic_on_timeout || panic_on_oops)
panic("nmi watchdog");
printk("console shuts up ...\n");
diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S
index 39ff070..b81f473 100644
--- a/arch/x86_64/kernel/vmlinux.lds.S
+++ b/arch/x86_64/kernel/vmlinux.lds.S
@@ -65,7 +65,7 @@ SECTIONS
.data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) {
*(.data.cacheline_aligned)
}
- . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
+ . = ALIGN(CONFIG_X86_INTERNODE_CACHE_BYTES);
.data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) {
*(.data.read_mostly)
}
diff --git a/arch/x86_64/kernel/x8664_ksyms.c b/arch/x86_64/kernel/x8664_ksyms.c
index d78f460..1def21c 100644
--- a/arch/x86_64/kernel/x8664_ksyms.c
+++ b/arch/x86_64/kernel/x8664_ksyms.c
@@ -112,7 +112,6 @@ EXPORT_SYMBOL_GPL(unset_nmi_callback);
#undef memcpy
#undef memset
#undef memmove
-#undef strlen
extern void * memset(void *,int,__kernel_size_t);
extern size_t strlen(const char *);
@@ -121,8 +120,6 @@ extern void * memcpy(void *,const void *,__kernel_size_t);
extern void * __memcpy(void *,const void *,__kernel_size_t);
EXPORT_SYMBOL(memset);
-EXPORT_SYMBOL(strlen);
-EXPORT_SYMBOL(strpbrk);
EXPORT_SYMBOL(memmove);
EXPORT_SYMBOL(memcpy);
EXPORT_SYMBOL(__memcpy);
OpenPOWER on IntegriCloud