From 13d2b4d11d69a92574a55bfd985cfb0ca77aebdc Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 24 Jan 2013 13:11:10 +0000 Subject: x86/xen: don't assume %ds is usable in xen_iret for 32-bit PVOPS. This fixes CVE-2013-0228 / XSA-42 Drew Jones while working on CVE-2013-0190 found that that unprivileged guest user in 32bit PV guest can use to crash the > guest with the panic like this: ------------- general protection fault: 0000 [#1] SMP last sysfs file: /sys/devices/vbd-51712/block/xvda/dev Modules linked in: sunrpc ipt_REJECT nf_conntrack_ipv4 nf_defrag_ipv4 iptable_filter ip_tables ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 xt_state nf_conntrack ip6table_filter ip6_tables ipv6 xen_netfront ext4 mbcache jbd2 xen_blkfront dm_mirror dm_region_hash dm_log dm_mod [last unloaded: scsi_wait_scan] Pid: 1250, comm: r Not tainted 2.6.32-356.el6.i686 #1 EIP: 0061:[] EFLAGS: 00010086 CPU: 0 EIP is at xen_iret+0x12/0x2b EAX: eb8d0000 EBX: 00000001 ECX: 08049860 EDX: 00000010 ESI: 00000000 EDI: 003d0f00 EBP: b77f8388 ESP: eb8d1fe0 DS: 0000 ES: 007b FS: 0000 GS: 00e0 SS: 0069 Process r (pid: 1250, ti=eb8d0000 task=c2953550 task.ti=eb8d0000) Stack: 00000000 0027f416 00000073 00000206 b77f8364 0000007b 00000000 00000000 Call Trace: Code: c3 8b 44 24 18 81 4c 24 38 00 02 00 00 8d 64 24 30 e9 03 00 00 00 8d 76 00 f7 44 24 08 00 00 02 80 75 33 50 b8 00 e0 ff ff 21 e0 <8b> 40 10 8b 04 85 a0 f6 ab c0 8b 80 0c b0 b3 c0 f6 44 24 0d 02 EIP: [] xen_iret+0x12/0x2b SS:ESP 0069:eb8d1fe0 general protection fault: 0000 [#2] ---[ end trace ab0d29a492dcd330 ]--- Kernel panic - not syncing: Fatal exception Pid: 1250, comm: r Tainted: G D --------------- 2.6.32-356.el6.i686 #1 Call Trace: [] ? panic+0x6e/0x122 [] ? oops_end+0xbc/0xd0 [] ? do_general_protection+0x0/0x210 [] ? error_code+0x73/ ------------- Petr says: " I've analysed the bug and I think that xen_iret() cannot cope with mangled DS, in this case zeroed out (null selector/descriptor) by either xen_failsafe_callback() or RESTORE_REGS because the corresponding LDT entry was invalidated by the reproducer. " Jan took a look at the preliminary patch and came up a fix that solves this problem: "This code gets called after all registers other than those handled by IRET got already restored, hence a null selector in %ds or a non-null one that got loaded from a code or read-only data descriptor would cause a kernel mode fault (with the potential of crashing the kernel as a whole, if panic_on_oops is set)." The way to fix this is to realize that the we can only relay on the registers that IRET restores. The two that are guaranteed are the %cs and %ss as they are always fixed GDT selectors. Also they are inaccessible from user mode - so they cannot be altered. This is the approach taken in this patch. Another alternative option suggested by Jan would be to relay on the subtle realization that using the %ebp or %esp relative references uses the %ss segment. In which case we could switch from using %eax to %ebp and would not need the %ss over-rides. That would also require one extra instruction to compensate for the one place where the register is used as scaled index. However Andrew pointed out that is too subtle and if further work was to be done in this code-path it could escape folks attention and lead to accidents. Reviewed-by: Petr Matousek Reported-by: Petr Matousek Reviewed-by: Andrew Cooper Signed-off-by: Jan Beulich Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/xen-asm_32.S | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S index f9643fc..33ca6e4 100644 --- a/arch/x86/xen/xen-asm_32.S +++ b/arch/x86/xen/xen-asm_32.S @@ -89,11 +89,11 @@ ENTRY(xen_iret) */ #ifdef CONFIG_SMP GET_THREAD_INFO(%eax) - movl TI_cpu(%eax), %eax - movl __per_cpu_offset(,%eax,4), %eax - mov xen_vcpu(%eax), %eax + movl %ss:TI_cpu(%eax), %eax + movl %ss:__per_cpu_offset(,%eax,4), %eax + mov %ss:xen_vcpu(%eax), %eax #else - movl xen_vcpu, %eax + movl %ss:xen_vcpu, %eax #endif /* check IF state we're restoring */ @@ -106,11 +106,11 @@ ENTRY(xen_iret) * resuming the code, so we don't have to be worried about * being preempted to another CPU. */ - setz XEN_vcpu_info_mask(%eax) + setz %ss:XEN_vcpu_info_mask(%eax) xen_iret_start_crit: /* check for unmasked and pending */ - cmpw $0x0001, XEN_vcpu_info_pending(%eax) + cmpw $0x0001, %ss:XEN_vcpu_info_pending(%eax) /* * If there's something pending, mask events again so we can @@ -118,7 +118,7 @@ xen_iret_start_crit: * touch XEN_vcpu_info_mask. */ jne 1f - movb $1, XEN_vcpu_info_mask(%eax) + movb $1, %ss:XEN_vcpu_info_mask(%eax) 1: popl %eax -- cgit v1.1 From 4f8c85272c5e7ea1f2fe15d866835bc6f8fc996f Mon Sep 17 00:00:00 2001 From: Cyril Roelandt Date: Tue, 12 Feb 2013 05:01:53 +0100 Subject: xen: remove redundant NULL check before unregister_and_remove_pcpu(). unregister_and_remove_pcpu on a NULL pointer is a no-op, so the NULL check in sync_pcpu can be removed. Signed-off-by: Cyril Roelandt Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/pcpu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/xen/pcpu.c b/drivers/xen/pcpu.c index 067fcfa..5a27a45 100644 --- a/drivers/xen/pcpu.c +++ b/drivers/xen/pcpu.c @@ -278,8 +278,7 @@ static int sync_pcpu(uint32_t cpu, uint32_t *max_cpu) * Only those at cpu present map has its sys interface. */ if (info->flags & XEN_PCPU_FLAGS_INVALID) { - if (pcpu) - unregister_and_remove_pcpu(pcpu); + unregister_and_remove_pcpu(pcpu); return 0; } -- cgit v1.1 From 5eb65be2d9a1f7c5e2b95aede16e7eab1cdb67e2 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 14 Feb 2013 21:29:27 -0500 Subject: Revert "xen/PVonHVM: fix compile warning in init_hvm_pv_info" This reverts commit a7be94ac8d69c037d08f0fd94b45a593f1d45176. Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/enlighten.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 138e566..5fb3ec1 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1579,7 +1579,7 @@ static void __init xen_hvm_init_shared_info(void) static void __init init_hvm_pv_info(void) { - uint32_t ecx, edx, pages, msr, base; + uint32_t eax, ebx, ecx, edx, pages, msr, base; u64 pfn; base = xen_cpuid_base(); -- cgit v1.1 From e9daff24a266307943457086533041bd971d0ef9 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 14 Feb 2013 21:29:31 -0500 Subject: Revert "xen PVonHVM: use E820_Reserved area for shared_info" This reverts commit 9d02b43dee0d7fb18dfb13a00915550b1a3daa9f. We are doing this b/c on 32-bit PVonHVM with older hypervisors (Xen 4.1) it ends up bothing up the start_info. This is bad b/c we use it for the time keeping, and the timekeeping code loops forever - as the version field never changes. Olaf says to revert it, so lets do that. Acked-by: Olaf Hering Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/enlighten.c | 75 ++++++++++++++---------------------------------- arch/x86/xen/suspend.c | 2 +- arch/x86/xen/xen-ops.h | 2 +- 3 files changed, 24 insertions(+), 55 deletions(-) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 5fb3ec1..e0140923 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1517,72 +1517,51 @@ asmlinkage void __init xen_start_kernel(void) #endif } -#ifdef CONFIG_XEN_PVHVM -#define HVM_SHARED_INFO_ADDR 0xFE700000UL -static struct shared_info *xen_hvm_shared_info; -static unsigned long xen_hvm_sip_phys; -static int xen_major, xen_minor; - -static void xen_hvm_connect_shared_info(unsigned long pfn) +void __ref xen_hvm_init_shared_info(void) { + int cpu; struct xen_add_to_physmap xatp; + static struct shared_info *shared_info_page = 0; + if (!shared_info_page) + shared_info_page = (struct shared_info *) + extend_brk(PAGE_SIZE, PAGE_SIZE); xatp.domid = DOMID_SELF; xatp.idx = 0; xatp.space = XENMAPSPACE_shared_info; - xatp.gpfn = pfn; + xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT; if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) BUG(); -} -static void __init xen_hvm_set_shared_info(struct shared_info *sip) -{ - int cpu; - - HYPERVISOR_shared_info = sip; + HYPERVISOR_shared_info = (struct shared_info *)shared_info_page; /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info * page, we use it in the event channel upcall and in some pvclock * related functions. We don't need the vcpu_info placement * optimizations because we don't use any pv_mmu or pv_irq op on - * HVM. */ - for_each_online_cpu(cpu) + * HVM. + * When xen_hvm_init_shared_info is run at boot time only vcpu 0 is + * online but xen_hvm_init_shared_info is run at resume time too and + * in that case multiple vcpus might be online. */ + for_each_online_cpu(cpu) { per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; -} - -/* Reconnect the shared_info pfn to a (new) mfn */ -void xen_hvm_resume_shared_info(void) -{ - xen_hvm_connect_shared_info(xen_hvm_sip_phys >> PAGE_SHIFT); -} - -/* Xen tools prior to Xen 4 do not provide a E820_Reserved area for guest usage. - * On these old tools the shared info page will be placed in E820_Ram. - * Xen 4 provides a E820_Reserved area at 0xFC000000, and this code expects - * that nothing is mapped up to HVM_SHARED_INFO_ADDR. - * Xen 4.3+ provides an explicit 1MB area at HVM_SHARED_INFO_ADDR which is used - * here for the shared info page. */ -static void __init xen_hvm_init_shared_info(void) -{ - if (xen_major < 4) { - xen_hvm_shared_info = extend_brk(PAGE_SIZE, PAGE_SIZE); - xen_hvm_sip_phys = __pa(xen_hvm_shared_info); - } else { - xen_hvm_sip_phys = HVM_SHARED_INFO_ADDR; - set_fixmap(FIX_PARAVIRT_BOOTMAP, xen_hvm_sip_phys); - xen_hvm_shared_info = - (struct shared_info *)fix_to_virt(FIX_PARAVIRT_BOOTMAP); } - xen_hvm_connect_shared_info(xen_hvm_sip_phys >> PAGE_SHIFT); - xen_hvm_set_shared_info(xen_hvm_shared_info); } +#ifdef CONFIG_XEN_PVHVM static void __init init_hvm_pv_info(void) { + int major, minor; uint32_t eax, ebx, ecx, edx, pages, msr, base; u64 pfn; base = xen_cpuid_base(); + cpuid(base + 1, &eax, &ebx, &ecx, &edx); + + major = eax >> 16; + minor = eax & 0xffff; + printk(KERN_INFO "Xen version %d.%d.\n", major, minor); + cpuid(base + 2, &pages, &msr, &ecx, &edx); pfn = __pa(hypercall_page); @@ -1633,22 +1612,12 @@ static void __init xen_hvm_guest_init(void) static bool __init xen_hvm_platform(void) { - uint32_t eax, ebx, ecx, edx, base; - if (xen_pv_domain()) return false; - base = xen_cpuid_base(); - if (!base) + if (!xen_cpuid_base()) return false; - cpuid(base + 1, &eax, &ebx, &ecx, &edx); - - xen_major = eax >> 16; - xen_minor = eax & 0xffff; - - printk(KERN_INFO "Xen version %d.%d.\n", xen_major, xen_minor); - return true; } diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index ae8a00c..45329c8 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -30,7 +30,7 @@ void xen_arch_hvm_post_suspend(int suspend_cancelled) { #ifdef CONFIG_XEN_PVHVM int cpu; - xen_hvm_resume_shared_info(); + xen_hvm_init_shared_info(); xen_callback_vector(); xen_unplug_emulated_devices(); if (xen_feature(XENFEAT_hvm_safe_pvclock)) { diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index d2e73d1..a95b417 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -40,7 +40,7 @@ void xen_enable_syscall(void); void xen_vcpu_restore(void); void xen_callback_vector(void); -void xen_hvm_resume_shared_info(void); +void xen_hvm_init_shared_info(void); void xen_unplug_emulated_devices(void); void __init xen_build_dynamic_phys_to_machine(void); -- cgit v1.1