From b8f05c8803fce899d79ca66f8d7f348cf15fb40e Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 27 Feb 2015 15:45:29 +0100 Subject: x86/xen: correct bug in p2m list initialization Commit 054954eb051f35e74b75a566a96fe756015352c8 ("xen: switch to linear virtual mapped sparse p2m list") introduced an error. During initialization of the p2m list a p2m identity area mapped by a complete identity pmd entry has to be split up into smaller chunks sometimes, if a non-identity pfn is introduced in this area. If this non-identity pfn is not at index 0 of a p2m page the new p2m page needed is initialized with wrong identity entries, as the identity pfns don't start with the value corresponding to index 0, but with the initial non-identity pfn. This results in weird wrong mappings. Correct the wrong initialization by starting with the correct pfn. Cc: stable@vger.kernel.org # 3.19 Reported-by: Stefan Bader Signed-off-by: Juergen Gross Tested-by: Stefan Bader Signed-off-by: David Vrabel --- arch/x86/xen/p2m.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 740ae30..9f93af5 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -563,7 +563,7 @@ static bool alloc_p2m(unsigned long pfn) if (p2m_pfn == PFN_DOWN(__pa(p2m_missing))) p2m_init(p2m); else - p2m_init_identity(p2m, pfn); + p2m_init_identity(p2m, pfn & ~(P2M_PER_PAGE - 1)); spin_lock_irqsave(&p2m_update_lock, flags); -- cgit v1.1 From e893286918d2cde3a94850d8f7101cd1039e0c62 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Thu, 5 Mar 2015 09:13:31 +0100 Subject: x86/vdso: Fix the build on GCC5 On gcc5 the kernel does not link: ld: .eh_frame_hdr table[4] FDE at 0000000000000648 overlaps table[5] FDE at 0000000000000670. Because prior GCC versions always emitted NOPs on ALIGN directives, but gcc5 started omitting them. .LSTARTFDEDLSI1 says: /* HACK: The dwarf2 unwind routines will subtract 1 from the return address to get an address in the middle of the presumed call instruction. Since we didn't get here via a call, we need to include the nop before the real start to make up for it. */ .long .LSTART_sigreturn-1-. /* PC-relative start address */ But commit 69d0627a7f6e ("x86 vDSO: reorder vdso32 code") from 2.6.25 replaced .org __kernel_vsyscall+32,0x90 by ALIGN right before __kernel_sigreturn. Of course, ALIGN need not generate any NOP in there. Esp. gcc5 collapses vclock_gettime.o and int80.o together with no generated NOPs as "ALIGN". So fix this by adding to that point at least a single NOP and make the function ALIGN possibly with more NOPs then. Kudos for reporting and diagnosing should go to Richard. Reported-by: Richard Biener Signed-off-by: Jiri Slaby Acked-by: Andy Lutomirski Cc: Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1425543211-12542-1-git-send-email-jslaby@suse.cz Signed-off-by: Ingo Molnar --- arch/x86/vdso/vdso32/sigreturn.S | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/vdso/vdso32/sigreturn.S b/arch/x86/vdso/vdso32/sigreturn.S index 31776d0..d7ec4e2 100644 --- a/arch/x86/vdso/vdso32/sigreturn.S +++ b/arch/x86/vdso/vdso32/sigreturn.S @@ -17,6 +17,7 @@ .text .globl __kernel_sigreturn .type __kernel_sigreturn,@function + nop /* this guy is needed for .LSTARTFDEDLSI1 below (watch for HACK) */ ALIGN __kernel_sigreturn: .LSTART_sigreturn: -- cgit v1.1 From 394838c96013ba414a24ffe7a2a593a9154daadf Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 9 Mar 2015 17:42:31 -0700 Subject: x86/asm/entry/32: Fix user_mode() misuses The one in do_debug() is probably harmless, but better safe than sorry. Signed-off-by: Andy Lutomirski Cc: Cc: Borislav Petkov Cc: Dave Hansen Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/d67deaa9df5458363623001f252d1aee3215d014.1425948056.git.luto@amacapital.net Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 9d2073e..4ff5d16 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -384,7 +384,7 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) goto exit; conditional_sti(regs); - if (!user_mode(regs)) + if (!user_mode_vm(regs)) die("bounds", regs, error_code); if (!cpu_feature_enabled(X86_FEATURE_MPX)) { @@ -637,7 +637,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) * then it's very likely the result of an icebp/int01 trap. * User wants a sigtrap for that. */ - if (!dr6 && user_mode(regs)) + if (!dr6 && user_mode_vm(regs)) user_icebp = 1; /* Catch kmemcheck conditions first of all! */ -- cgit v1.1 From dc9be0fac70a2ad86e31a81372bb0bdfb6945353 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 5 Mar 2015 11:54:46 +0100 Subject: kvm: move advertising of KVM_CAP_IRQFD to common code POWER supports irqfds but forgot to advertise them. Some userspace does not check for the capability, but others check it---thus they work on x86 and s390 but not POWER. To avoid that other architectures in the future make the same mistake, let common code handle KVM_CAP_IRQFD the same way as KVM_CAP_IRQFD_RESAMPLE. Reported-and-tested-by: Greg Kurz Cc: stable@vger.kernel.org Fixes: 297e21053a52f060944e9f0de4c64fad9bcd72fc Signed-off-by: Paolo Bonzini Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/x86.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index bd7a70b..32bf19e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2744,7 +2744,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_USER_NMI: case KVM_CAP_REINJECT_CONTROL: case KVM_CAP_IRQ_INJECT_STATUS: - case KVM_CAP_IRQFD: case KVM_CAP_IOEVENTFD: case KVM_CAP_IOEVENTFD_NO_LENGTH: case KVM_CAP_PIT2: -- cgit v1.1 From 7486341a98f26857f383aec88ffa10950087c3a1 Mon Sep 17 00:00:00 2001 From: "Li, Aubrey" Date: Wed, 11 Mar 2015 16:09:00 +0800 Subject: x86/platform, acpi: Bypass legacy PIC and PIT in ACPI hardware reduced mode On a platform in ACPI Hardware-reduced mode, the legacy PIC and PIT may not be initialized even though they may be present in silicon. Touching these legacy components causes unexpected results on the system. On the Bay Trail-T(ASUS-T100) platform, touching these legacy components blocks platform hardware low idle power state(S0ix) during system suspend. So we should bypass them in ACPI hardware reduced mode. Suggested-by: Arjan van de Ven Signed-off-by: Li Aubrey Cc: Cc: Alan Cox Cc: H. Peter Anvin Cc: Len Brown Cc: Rafael J. Wysocki Cc: Rafael J. Wysocki Link: http://lkml.kernel.org/r/54FFF81C.20703@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/boot.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 3d525c6..803b684 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1338,6 +1338,26 @@ static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d) } /* + * ACPI offers an alternative platform interface model that removes + * ACPI hardware requirements for platforms that do not implement + * the PC Architecture. + * + * We initialize the Hardware-reduced ACPI model here: + */ +static void __init acpi_reduced_hw_init(void) +{ + if (acpi_gbl_reduced_hardware) { + /* + * Override x86_init functions and bypass legacy pic + * in Hardware-reduced ACPI mode + */ + x86_init.timers.timer_init = x86_init_noop; + x86_init.irqs.pre_vector_init = x86_init_noop; + legacy_pic = &null_legacy_pic; + } +} + +/* * If your system is blacklisted here, but you find that acpi=force * works for you, please contact linux-acpi@vger.kernel.org */ @@ -1536,6 +1556,11 @@ int __init early_acpi_boot_init(void) */ early_acpi_process_madt(); + /* + * Hardware-reduced ACPI mode initialization: + */ + acpi_reduced_hw_init(); + return 0; } -- cgit v1.1 From c8a470cab030bae8f9e6e5cfff72b047b7c627a7 Mon Sep 17 00:00:00 2001 From: Daniel J Blueman Date: Thu, 12 Mar 2015 16:55:13 +0100 Subject: x86/apic/numachip: Fix sibling map with NumaChip On NumaChip systems, the physical processor ID assignment wasn't accounting for the number of nodes in AMD multi-module processors, giving an incorrect sibling map: $ cd /sys/devices/system/cpu/cpu29/topology $ grep . * core_id:5 core_siblings:00000000,ff000000 core_siblings_list:24-31 physical_package_id:3 thread_siblings:00000000,30000000 thread_siblings_list:28-29 This fixes it: $ cd /sys/devices/system/cpu/cpu29/topology $ grep . * core_id:5 core_siblings:00000000,ffff0000 core_siblings_list:16-31 physical_package_id:1 thread_siblings:00000000,30000000 thread_siblings_list:28-29 Signed-off-by: Daniel J Blueman Signed-off-by: Borislav Petkov Cc: Cc: H. Peter Anvin Cc: Steffen Persvold Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1426135950-10110-1-git-send-email-daniel@numascale.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/apic_numachip.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index c2fd21f..017149c 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -37,10 +37,12 @@ static const struct apic apic_numachip; static unsigned int get_apic_id(unsigned long x) { unsigned long value; - unsigned int id; + unsigned int id = (x >> 24) & 0xff; - rdmsrl(MSR_FAM10H_NODE_ID, value); - id = ((x >> 24) & 0xffU) | ((value << 2) & 0xff00U); + if (static_cpu_has_safe(X86_FEATURE_NODEID_MSR)) { + rdmsrl(MSR_FAM10H_NODE_ID, value); + id |= (value << 2) & 0xff00; + } return id; } @@ -155,10 +157,18 @@ static int __init numachip_probe(void) static void fixup_cpu_id(struct cpuinfo_x86 *c, int node) { - if (c->phys_proc_id != node) { - c->phys_proc_id = node; - per_cpu(cpu_llc_id, smp_processor_id()) = node; + u64 val; + u32 nodes = 1; + + this_cpu_write(cpu_llc_id, node); + + /* Account for nodes per socket in multi-core-module processors */ + if (static_cpu_has_safe(X86_FEATURE_NODEID_MSR)) { + rdmsrl(MSR_FAM10H_NODE_ID, val); + nodes = ((val >> 3) & 7) + 1; } + + c->phys_proc_id = node / nodes; } static int __init numachip_system_init(void) -- cgit v1.1 From c1a6bff28cbff796bf6e7db5cf42ec9244911be2 Mon Sep 17 00:00:00 2001 From: Petr Matousek Date: Wed, 11 Mar 2015 12:16:09 +0100 Subject: kvm: x86: i8259: return initialized data on invalid-size read If data is read from PIC with invalid access size, the return data stays uninitialized even though success is returned. Fix this by always initializing the data. Signed-off-by: Petr Matousek Reported-by: Nadav Amit Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/i8259.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index cc31f7c..9541ba3 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -507,6 +507,7 @@ static int picdev_read(struct kvm_pic *s, return -EOPNOTSUPP; if (len != 1) { + memset(val, 0, len); pr_pic_unimpl("non byte read\n"); return 0; } -- cgit v1.1 From ccfe8c3f7e52ae83155cb038753f4c75b774ca8a Mon Sep 17 00:00:00 2001 From: Stephan Mueller Date: Thu, 12 Mar 2015 09:17:51 +0100 Subject: crypto: aesni - fix memory usage in GCM decryption The kernel crypto API logic requires the caller to provide the length of (ciphertext || authentication tag) as cryptlen for the AEAD decryption operation. Thus, the cipher implementation must calculate the size of the plaintext output itself and cannot simply use cryptlen. The RFC4106 GCM decryption operation tries to overwrite cryptlen memory in req->dst. As the destination buffer for decryption only needs to hold the plaintext memory but cryptlen references the input buffer holding (ciphertext || authentication tag), the assumption of the destination buffer length in RFC4106 GCM operation leads to a too large size. This patch simply uses the already calculated plaintext size. In addition, this patch fixes the offset calculation of the AAD buffer pointer: as mentioned before, cryptlen already includes the size of the tag. Thus, the tag does not need to be added. With the addition, the AAD will be written beyond the already allocated buffer. Note, this fixes a kernel crash that can be triggered from user space via AF_ALG(aead) -- simply use the libkcapi test application from [1] and update it to use rfc4106-gcm-aes. Using [1], the changes were tested using CAVS vectors to demonstrate that the crypto operation still delivers the right results. [1] http://www.chronox.de/libkcapi.html CC: Tadeusz Struk Cc: stable@vger.kernel.org Signed-off-by: Stephan Mueller Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_glue.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 947c6bf..54f60ab 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -1155,7 +1155,7 @@ static int __driver_rfc4106_decrypt(struct aead_request *req) src = kmalloc(req->cryptlen + req->assoclen, GFP_ATOMIC); if (!src) return -ENOMEM; - assoc = (src + req->cryptlen + auth_tag_len); + assoc = (src + req->cryptlen); scatterwalk_map_and_copy(src, req->src, 0, req->cryptlen, 0); scatterwalk_map_and_copy(assoc, req->assoc, 0, req->assoclen, 0); @@ -1180,7 +1180,7 @@ static int __driver_rfc4106_decrypt(struct aead_request *req) scatterwalk_done(&src_sg_walk, 0, 0); scatterwalk_done(&assoc_sg_walk, 0, 0); } else { - scatterwalk_map_and_copy(dst, req->dst, 0, req->cryptlen, 1); + scatterwalk_map_and_copy(dst, req->dst, 0, tempCipherLen, 1); kfree(src); } return retval; -- cgit v1.1 From a7c80ebcac3068b1c3cb27d538d29558c30010c8 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 13 Mar 2015 09:53:09 +0100 Subject: x86/fpu: Avoid math_state_restore() without used_math() in __restore_xstate_sig() math_state_restore() assumes it is called with irqs disabled, but this is not true if the caller is __restore_xstate_sig(). This means that if ia32_fxstate == T and __copy_from_user() fails, __restore_xstate_sig() returns with irqs disabled too. This triggers: BUG: sleeping function called from invalid context at kernel/locking/rwsem.c:41 dump_stack ___might_sleep ? _raw_spin_unlock_irqrestore __might_sleep down_read ? _raw_spin_unlock_irqrestore print_vma_addr signal_fault sys32_rt_sigreturn Change __restore_xstate_sig() to call set_used_math() unconditionally. This avoids enabling and disabling interrupts in math_state_restore(). If copy_from_user() fails, we can simply do fpu_finit() by hand. [ Note: this is only the first step. math_state_restore() should not check used_math(), it should set this flag. While init_fpu() should simply die. ] Signed-off-by: Oleg Nesterov Signed-off-by: Borislav Petkov Cc: Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Pekka Riikonen Cc: Quentin Casasnovas Cc: Rik van Riel Cc: Suresh Siddha Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20150307153844.GB25954@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/xsave.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 34f66e5..cdc6cf9 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -379,7 +379,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) * thread's fpu state, reconstruct fxstate from the fsave * header. Sanitize the copied state etc. */ - struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave; + struct fpu *fpu = &tsk->thread.fpu; struct user_i387_ia32_struct env; int err = 0; @@ -393,14 +393,15 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) */ drop_fpu(tsk); - if (__copy_from_user(xsave, buf_fx, state_size) || + if (__copy_from_user(&fpu->state->xsave, buf_fx, state_size) || __copy_from_user(&env, buf, sizeof(env))) { + fpu_finit(fpu); err = -1; } else { sanitize_restored_xstate(tsk, &env, xstate_bv, fx_only); - set_used_math(); } + set_used_math(); if (use_eager_fpu()) { preempt_disable(); math_state_restore(); -- cgit v1.1 From f4c3686386393c120710dd34df2a74183ab805fd Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 13 Mar 2015 09:53:10 +0100 Subject: x86/fpu: Drop_fpu() should not assume that tsk equals current drop_fpu() does clear_used_math() and usually this is correct because tsk == current. However switch_fpu_finish()->restore_fpu_checking() is called before __switch_to() updates the "current_task" variable. If it fails, we will wrongly clear the PF_USED_MATH flag of the previous task. So use clear_stopped_child_used_math() instead. Signed-off-by: Oleg Nesterov Signed-off-by: Borislav Petkov Reviewed-by: Rik van Riel Cc: Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Pekka Riikonen Cc: Quentin Casasnovas Cc: Suresh Siddha Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20150309171041.GB11388@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/fpu-internal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index 0dbc082..72ba21a 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -370,7 +370,7 @@ static inline void drop_fpu(struct task_struct *tsk) preempt_disable(); tsk->thread.fpu_counter = 0; __drop_fpu(tsk); - clear_used_math(); + clear_stopped_child_used_math(tsk); preempt_enable(); } -- cgit v1.1 From 670125bda1d86edfadf81dc56a87582ac7fbd47b Mon Sep 17 00:00:00 2001 From: Wincy Van Date: Wed, 4 Mar 2015 14:31:56 +0800 Subject: KVM: VMX: Set msr bitmap correctly if vcpu is in guest mode In commit 3af18d9c5fe9 ("KVM: nVMX: Prepare for using hardware MSR bitmap"), we are setting MSR_BITMAP in prepare_vmcs02 if we should use hardware. This is not enough since the field will be modified by following vmx_set_efer. Fix this by setting vmx_msr_bitmap_nested in vmx_set_msr_bitmap if vcpu is in guest mode. Signed-off-by: Wincy Van Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index f7b20b4..10a481b 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2168,7 +2168,10 @@ static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu) { unsigned long *msr_bitmap; - if (irqchip_in_kernel(vcpu->kvm) && apic_x2apic_mode(vcpu->arch.apic)) { + if (is_guest_mode(vcpu)) + msr_bitmap = vmx_msr_bitmap_nested; + else if (irqchip_in_kernel(vcpu->kvm) && + apic_x2apic_mode(vcpu->arch.apic)) { if (is_long_mode(vcpu)) msr_bitmap = vmx_msr_bitmap_longmode_x2apic; else @@ -9218,9 +9221,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) } if (cpu_has_vmx_msr_bitmap() && - exec_control & CPU_BASED_USE_MSR_BITMAPS && - nested_vmx_merge_msr_bitmap(vcpu, vmcs12)) { - vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_nested)); + exec_control & CPU_BASED_USE_MSR_BITMAPS) { + nested_vmx_merge_msr_bitmap(vcpu, vmcs12); + /* MSR_BITMAP will be set by following vmx_set_efer. */ } else exec_control &= ~CPU_BASED_USE_MSR_BITMAPS; -- cgit v1.1 From 69797dafe35541bfff1989c0b37c66ed785faf0e Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 16 Mar 2015 11:06:28 +0100 Subject: Revert "x86/mm/ASLR: Propagate base load address calculation" This reverts commit: f47233c2d34f ("x86/mm/ASLR: Propagate base load address calculation") The main reason for the revert is that the new boot flag does not work at all currently, and in order to make this work, we need non-trivial changes to the x86 boot code which we didn't manage to get done in time for merging. And even if we did, they would've been too risky so instead of rushing things and break booting 4.1 on boxes left and right, we will be very strict and conservative and will take our time with this to fix and test it properly. Reported-by: Yinghai Lu Signed-off-by: Borislav Petkov Cc: Ard Biesheuvel Cc: Baoquan He Cc: H. Peter Anvin Cc: Josh Triplett Cc: Junjie Mao Cc: Kees Cook Cc: Linus Torvalds Cc: Matt Fleming Link: http://lkml.kernel.org/r/20150316100628.GD22995@pd.tnic Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/aslr.c | 34 +--------------------------------- arch/x86/boot/compressed/misc.c | 3 +-- arch/x86/boot/compressed/misc.h | 6 ++---- arch/x86/include/asm/page_types.h | 2 -- arch/x86/include/uapi/asm/bootparam.h | 1 - arch/x86/kernel/module.c | 10 +++++++++- arch/x86/kernel/setup.c | 22 ++++------------------ 7 files changed, 17 insertions(+), 61 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c index 7083c16..bb13763 100644 --- a/arch/x86/boot/compressed/aslr.c +++ b/arch/x86/boot/compressed/aslr.c @@ -14,13 +14,6 @@ static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@" LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION; -struct kaslr_setup_data { - __u64 next; - __u32 type; - __u32 len; - __u8 data[1]; -} kaslr_setup_data; - #define I8254_PORT_CONTROL 0x43 #define I8254_PORT_COUNTER0 0x40 #define I8254_CMD_READBACK 0xC0 @@ -302,29 +295,7 @@ static unsigned long find_random_addr(unsigned long minimum, return slots_fetch_random(); } -static void add_kaslr_setup_data(struct boot_params *params, __u8 enabled) -{ - struct setup_data *data; - - kaslr_setup_data.type = SETUP_KASLR; - kaslr_setup_data.len = 1; - kaslr_setup_data.next = 0; - kaslr_setup_data.data[0] = enabled; - - data = (struct setup_data *)(unsigned long)params->hdr.setup_data; - - while (data && data->next) - data = (struct setup_data *)(unsigned long)data->next; - - if (data) - data->next = (unsigned long)&kaslr_setup_data; - else - params->hdr.setup_data = (unsigned long)&kaslr_setup_data; - -} - -unsigned char *choose_kernel_location(struct boot_params *params, - unsigned char *input, +unsigned char *choose_kernel_location(unsigned char *input, unsigned long input_size, unsigned char *output, unsigned long output_size) @@ -335,17 +306,14 @@ unsigned char *choose_kernel_location(struct boot_params *params, #ifdef CONFIG_HIBERNATION if (!cmdline_find_option_bool("kaslr")) { debug_putstr("KASLR disabled by default...\n"); - add_kaslr_setup_data(params, 0); goto out; } #else if (cmdline_find_option_bool("nokaslr")) { debug_putstr("KASLR disabled by cmdline...\n"); - add_kaslr_setup_data(params, 0); goto out; } #endif - add_kaslr_setup_data(params, 1); /* Record the various known unsafe memory ranges. */ mem_avoid_init((unsigned long)input, input_size, diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 5903089..a950864 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -401,8 +401,7 @@ asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap, * the entire decompressed kernel plus relocation table, or the * entire decompressed kernel plus .bss and .brk sections. */ - output = choose_kernel_location(real_mode, input_data, input_len, - output, + output = choose_kernel_location(input_data, input_len, output, output_len > run_size ? output_len : run_size); diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index ee3576b..04477d6 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -57,8 +57,7 @@ int cmdline_find_option_bool(const char *option); #if CONFIG_RANDOMIZE_BASE /* aslr.c */ -unsigned char *choose_kernel_location(struct boot_params *params, - unsigned char *input, +unsigned char *choose_kernel_location(unsigned char *input, unsigned long input_size, unsigned char *output, unsigned long output_size); @@ -66,8 +65,7 @@ unsigned char *choose_kernel_location(struct boot_params *params, bool has_cpuflag(int flag); #else static inline -unsigned char *choose_kernel_location(struct boot_params *params, - unsigned char *input, +unsigned char *choose_kernel_location(unsigned char *input, unsigned long input_size, unsigned char *output, unsigned long output_size) diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index 95e11f7..f97fbe3 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h @@ -51,8 +51,6 @@ extern int devmem_is_allowed(unsigned long pagenr); extern unsigned long max_low_pfn_mapped; extern unsigned long max_pfn_mapped; -extern bool kaslr_enabled; - static inline phys_addr_t get_max_mapped(void) { return (phys_addr_t)max_pfn_mapped << PAGE_SHIFT; diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h index 44e6dd7..225b098 100644 --- a/arch/x86/include/uapi/asm/bootparam.h +++ b/arch/x86/include/uapi/asm/bootparam.h @@ -7,7 +7,6 @@ #define SETUP_DTB 2 #define SETUP_PCI 3 #define SETUP_EFI 4 -#define SETUP_KASLR 5 /* ram_size flags */ #define RAMDISK_IMAGE_START_MASK 0x07FF diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index 9bbb9b3..d1ac80b 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -47,13 +47,21 @@ do { \ #ifdef CONFIG_RANDOMIZE_BASE static unsigned long module_load_offset; +static int randomize_modules = 1; /* Mutex protects the module_load_offset. */ static DEFINE_MUTEX(module_kaslr_mutex); +static int __init parse_nokaslr(char *p) +{ + randomize_modules = 0; + return 0; +} +early_param("nokaslr", parse_nokaslr); + static unsigned long int get_module_load_offset(void) { - if (kaslr_enabled) { + if (randomize_modules) { mutex_lock(&module_kaslr_mutex); /* * Calculate the module_load_offset the first time this diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 98dc931..0a2421c 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -122,8 +122,6 @@ unsigned long max_low_pfn_mapped; unsigned long max_pfn_mapped; -bool __read_mostly kaslr_enabled = false; - #ifdef CONFIG_DMI RESERVE_BRK(dmi_alloc, 65536); #endif @@ -427,11 +425,6 @@ static void __init reserve_initrd(void) } #endif /* CONFIG_BLK_DEV_INITRD */ -static void __init parse_kaslr_setup(u64 pa_data, u32 data_len) -{ - kaslr_enabled = (bool)(pa_data + sizeof(struct setup_data)); -} - static void __init parse_setup_data(void) { struct setup_data *data; @@ -457,9 +450,6 @@ static void __init parse_setup_data(void) case SETUP_EFI: parse_efi_setup(pa_data, data_len); break; - case SETUP_KASLR: - parse_kaslr_setup(pa_data, data_len); - break; default: break; } @@ -842,14 +832,10 @@ static void __init trim_low_memory_range(void) static int dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p) { - if (kaslr_enabled) - pr_emerg("Kernel Offset: 0x%lx from 0x%lx (relocation range: 0x%lx-0x%lx)\n", - (unsigned long)&_text - __START_KERNEL, - __START_KERNEL, - __START_KERNEL_map, - MODULES_VADDR-1); - else - pr_emerg("Kernel Offset: disabled\n"); + pr_emerg("Kernel Offset: 0x%lx from 0x%lx " + "(relocation range: 0x%lx-0x%lx)\n", + (unsigned long)&_text - __START_KERNEL, __START_KERNEL, + __START_KERNEL_map, MODULES_VADDR-1); return 0; } -- cgit v1.1