From ed209f46dbcc4a79d277e0d2e93dc51a1f331be5 Mon Sep 17 00:00:00 2001 From: Renato Botelho Date: Tue, 8 May 2018 15:00:31 -0300 Subject: Revert "Proposed fix for CVE-2018-8897" This reverts commit 70d1caf0ad967030b2ce835dc0f116ed1733c82c. --- sys/amd64/amd64/exception.S | 116 +------------------------------------------ sys/amd64/amd64/machdep.c | 13 ++--- sys/amd64/amd64/mp_machdep.c | 11 ---- sys/amd64/amd64/pmap.c | 3 -- sys/amd64/amd64/trap.c | 52 ------------------- sys/i386/i386/trap.c | 30 ----------- 6 files changed, 4 insertions(+), 221 deletions(-) diff --git a/sys/amd64/amd64/exception.S b/sys/amd64/amd64/exception.S index e3afe22..b89c2eb 100644 --- a/sys/amd64/amd64/exception.S +++ b/sys/amd64/amd64/exception.S @@ -116,6 +116,7 @@ X\l: subq $TF_RIP,%rsp jmp alltraps_noen .endm + TRAP_NOEN dbg, T_TRCTRAP TRAP_NOEN bpt, T_BPTFLT #ifdef KDTRACE_HOOKS TRAP_NOEN dtrace_ret, T_DTRACE_RET @@ -524,121 +525,6 @@ IDTVEC(fast_syscall32) sysret /* - * DB# handler is very similar to NM#, because 'mov/pop %ss' delay - * generation of exception until the next instruction is executed, - * which might be a kernel entry. So we must execute the handler - * on IST stack and be ready for non-kernel GSBASE. - */ -IDTVEC(dbg) - subq $TF_RIP,%rsp - movl $(T_TRCTRAP),TF_TRAPNO(%rsp) - movq $0,TF_ADDR(%rsp) - movq $0,TF_ERR(%rsp) - movq %rdi,TF_RDI(%rsp) - movq %rsi,TF_RSI(%rsp) - movq %rdx,TF_RDX(%rsp) - movq %rcx,TF_RCX(%rsp) - movq %r8,TF_R8(%rsp) - movq %r9,TF_R9(%rsp) - movq %rax,TF_RAX(%rsp) - movq %rbx,TF_RBX(%rsp) - movq %rbp,TF_RBP(%rsp) - movq %r10,TF_R10(%rsp) - movq %r11,TF_R11(%rsp) - movq %r12,TF_R12(%rsp) - movq %r13,TF_R13(%rsp) - movq %r14,TF_R14(%rsp) - movq %r15,TF_R15(%rsp) - SAVE_SEGS - movl $TF_HASSEGS,TF_FLAGS(%rsp) - cld - testb $SEL_RPL_MASK,TF_CS(%rsp) - jnz dbg_fromuserspace - /* - * We've interrupted the kernel. Preserve GS.base in %r12, - * %cr3 in %r13, and possibly lower half of MSR_IA32_SPEC_CTL in %r14d. - */ - movl $MSR_GSBASE,%ecx - rdmsr - movq %rax,%r12 - shlq $32,%rdx - orq %rdx,%r12 - /* Retrieve and load the canonical value for GS.base. */ - movq TF_SIZE(%rsp),%rdx - movl %edx,%eax - shrq $32,%rdx - wrmsr - movq %cr3,%r13 - movq PCPU(KCR3),%rax - cmpq $~0,%rax - je 1f - movq %rax,%cr3 -1: testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip) - je 2f - movl $MSR_IA32_SPEC_CTRL,%ecx - rdmsr - movl %eax,%r14d - call handle_ibrs_entry -2: FAKE_MCOUNT(TF_RIP(%rsp)) - movq %rsp,%rdi - call trap - MEXITCOUNT - testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip) - je 3f - movl %r14d,%eax - xorl %edx,%edx - movl $MSR_IA32_SPEC_CTRL,%ecx - wrmsr - /* - * Put back the preserved MSR_GSBASE value. - */ -3: movl $MSR_GSBASE,%ecx - movq %r12,%rdx - movl %edx,%eax - shrq $32,%rdx - wrmsr - movq %r13,%cr3 - RESTORE_REGS - addq $TF_RIP,%rsp - jmp doreti_iret -dbg_fromuserspace: - /* - * Switch to kernel GSBASE and kernel page table, and copy frame - * from the IST stack to the normal kernel stack, since trap() - * re-enables interrupts, and since we might trap on DB# while - * in trap(). - */ - swapgs - movq PCPU(KCR3),%rax - cmpq $~0,%rax - je 1f - movq %rax,%cr3 -1: movq PCPU(RSP0),%rax - movl $TF_SIZE,%ecx - subq %rcx,%rax - movq %rax,%rdi - movq %rsp,%rsi - rep;movsb - movq %rax,%rsp - call handle_ibrs_entry - movq PCPU(CURPCB),%rdi - orl $PCB_FULL_IRET,PCB_FLAGS(%rdi) - testb $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip) - jz 3f - cmpw $KUF32SEL,TF_FS(%rsp) - jne 2f - rdfsbase %rax - movq %rax,PCB_FSBASE(%rdi) -2: cmpw $KUG32SEL,TF_GS(%rsp) - jne 3f - movl $MSR_KGSBASE,%ecx - rdmsr - shlq $32,%rdx - orq %rdx,%rax - movq %rax,PCB_GSBASE(%rdi) -3: jmp calltrap - -/* * NMI handling is special. * * First, NMIs do not respect the state of the processor's RFLAGS.IF diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c index ab48566..51b8433 100644 --- a/sys/amd64/amd64/machdep.c +++ b/sys/amd64/amd64/machdep.c @@ -675,7 +675,6 @@ struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */ static char dblfault_stack[PAGE_SIZE] __aligned(16); static char mce0_stack[PAGE_SIZE] __aligned(16); static char nmi0_stack[PAGE_SIZE] __aligned(16); -static char dbg0_stack[PAGE_SIZE] __aligned(16); CTASSERT(sizeof(struct nmi_pcpu) == 16); struct amd64tss common_tss[MAXCPU]; @@ -828,7 +827,7 @@ extern inthand_t IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align), IDTVEC(xmm), IDTVEC(dblfault), - IDTVEC(div_pti), IDTVEC(bpt_pti), + IDTVEC(div_pti), IDTVEC(dbg_pti), IDTVEC(bpt_pti), IDTVEC(ofl_pti), IDTVEC(bnd_pti), IDTVEC(ill_pti), IDTVEC(dna_pti), IDTVEC(fpusegm_pti), IDTVEC(tss_pti), IDTVEC(missing_pti), IDTVEC(stk_pti), IDTVEC(prot_pti), IDTVEC(page_pti), @@ -1639,7 +1638,8 @@ hammer_time(u_int64_t modulep, u_int64_t physfree) SEL_KPL, 0); setidt(IDT_DE, pti ? &IDTVEC(div_pti) : &IDTVEC(div), SDT_SYSIGT, SEL_KPL, 0); - setidt(IDT_DB, &IDTVEC(dbg), SDT_SYSIGT, SEL_KPL, 4); + setidt(IDT_DB, pti ? &IDTVEC(dbg_pti) : &IDTVEC(dbg), SDT_SYSIGT, + SEL_KPL, 0); setidt(IDT_NMI, &IDTVEC(nmi), SDT_SYSIGT, SEL_KPL, 2); setidt(IDT_BP, pti ? &IDTVEC(bpt_pti) : &IDTVEC(bpt), SDT_SYSIGT, SEL_UPL, 0); @@ -1721,13 +1721,6 @@ hammer_time(u_int64_t modulep, u_int64_t physfree) np = ((struct nmi_pcpu *) &mce0_stack[sizeof(mce0_stack)]) - 1; np->np_pcpu = (register_t) pc; common_tss[0].tss_ist3 = (long) np; - - /* - * DB# stack, runs on ist4. - */ - np = ((struct nmi_pcpu *) &dbg0_stack[sizeof(dbg0_stack)]) - 1; - np->np_pcpu = (register_t) pc; - common_tss[0].tss_ist4 = (long) np; /* Set the IO permission bitmap (empty due to tss seg limit) */ common_tss[0].tss_iobase = sizeof(struct amd64tss) + IOPERM_BITMAP_SIZE; diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c index 34a05e9..450d512 100644 --- a/sys/amd64/amd64/mp_machdep.c +++ b/sys/amd64/amd64/mp_machdep.c @@ -87,7 +87,6 @@ extern struct pcpu __pcpu[]; char *doublefault_stack; char *mce_stack; char *nmi_stack; -char *dbg_stack; /* * Local data and functions. @@ -226,10 +225,6 @@ init_secondary(void) np = ((struct nmi_pcpu *) &mce_stack[PAGE_SIZE]) - 1; common_tss[cpu].tss_ist3 = (long) np; - /* The DB# stack runs on IST4. */ - np = ((struct nmi_pcpu *) &dbg_stack[PAGE_SIZE]) - 1; - common_tss[cpu].tss_ist4 = (long) np; - /* Prepare private GDT */ gdt_segs[GPROC0_SEL].ssd_base = (long) &common_tss[cpu]; for (x = 0; x < NGDT; x++) { @@ -275,10 +270,6 @@ init_secondary(void) np = ((struct nmi_pcpu *) &mce_stack[PAGE_SIZE]) - 1; np->np_pcpu = (register_t) pc; - /* Save the per-cpu pointer for use by the DB# handler. */ - np = ((struct nmi_pcpu *) &dbg_stack[PAGE_SIZE]) - 1; - np->np_pcpu = (register_t) pc; - wrmsr(MSR_FSBASE, 0); /* User value */ wrmsr(MSR_GSBASE, (u_int64_t)pc); wrmsr(MSR_KGSBASE, (u_int64_t)pc); /* XXX User value while we're in the kernel */ @@ -377,8 +368,6 @@ native_start_all_aps(void) M_WAITOK | M_ZERO); nmi_stack = (char *)kmem_malloc(kernel_arena, PAGE_SIZE, M_WAITOK | M_ZERO); - dbg_stack = (char *)kmem_malloc(kernel_arena, PAGE_SIZE, - M_WAITOK | M_ZERO); dpcpu = (void *)kmem_malloc(kernel_arena, DPCPU_SIZE, M_WAITOK | M_ZERO); diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index 53de162..64abe9b 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -7564,9 +7564,6 @@ pmap_pti_init(void) /* MC# stack IST 3 */ va = common_tss[i].tss_ist3 + sizeof(struct nmi_pcpu); pmap_pti_add_kva_locked(va - PAGE_SIZE, va, false); - /* DB# stack IST 4 */ - va = common_tss[i].tss_ist4 + sizeof(struct nmi_pcpu); - pmap_pti_add_kva_locked(va - PAGE_SIZE, va, false); } pmap_pti_add_kva_locked((vm_offset_t)kernphys + KERNBASE, (vm_offset_t)etext, true); diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c index af4925a..a553fc5 100644 --- a/sys/amd64/amd64/trap.c +++ b/sys/amd64/amd64/trap.c @@ -45,7 +45,6 @@ __FBSDID("$FreeBSD$"); */ #include "opt_clock.h" -#include "opt_compat.h" #include "opt_cpu.h" #include "opt_hwpmc_hooks.h" #include "opt_isa.h" @@ -100,11 +99,6 @@ PMC_SOFT_DEFINE( , , page_fault, write); #include #endif -extern inthand_t IDTVEC(bpt), IDTVEC(bpt_pti), IDTVEC(dbg), - IDTVEC(fast_syscall), IDTVEC(fast_syscall_pti), IDTVEC(fast_syscall32), - IDTVEC(int0x80_syscall_pti), IDTVEC(int0x80_syscall); - - void __noinline trap(struct trapframe *frame); void trap_check(struct trapframe *frame); void dblfault_handler(struct trapframe *frame); @@ -541,52 +535,6 @@ trap(struct trapframe *frame) load_dr6(rdr6() & ~0xf); return; } - - /* - * Malicious user code can configure a debug - * register watchpoint to trap on data access - * to the top of stack and then execute 'pop - * %ss; int 3'. Due to exception deferral for - * 'pop %ss', the CPU will not interrupt 'int - * 3' to raise the DB# exception for the debug - * register but will postpone the DB# until - * execution of the first instruction of the - * BP# handler (in kernel mode). Normally the - * previous check would ignore DB# exceptions - * for watchpoints on user addresses raised in - * kernel mode. However, some CPU errata - * include cases where DB# exceptions do not - * properly set bits in %dr6, e.g. Haswell - * HSD23 and Skylake-X SKZ24. - * - * A deferred DB# can also be raised on the - * first instructions of system call entry - * points or single-step traps via similar use - * of 'pop %ss' or 'mov xxx, %ss'. - */ - if (pti) { - if (frame->tf_rip == - (uintptr_t)IDTVEC(fast_syscall_pti) || -#ifdef COMPAT_FREEBSD32 - frame->tf_rip == - (uintptr_t)IDTVEC(int0x80_syscall_pti) || -#endif - frame->tf_rip == (uintptr_t)IDTVEC(bpt_pti)) - return; - } else { - if (frame->tf_rip == - (uintptr_t)IDTVEC(fast_syscall) || -#ifdef COMPAT_FREEBSD32 - frame->tf_rip == - (uintptr_t)IDTVEC(int0x80_syscall) || -#endif - frame->tf_rip == (uintptr_t)IDTVEC(bpt)) - return; - } - if (frame->tf_rip == (uintptr_t)IDTVEC(dbg) || - /* Needed for AMD. */ - frame->tf_rip == (uintptr_t)IDTVEC(fast_syscall32)) - return; /* * FALLTHROUGH (TRCTRAP kernel mode, kernel address) */ diff --git a/sys/i386/i386/trap.c b/sys/i386/i386/trap.c index 7fa94f4..000ff17 100644 --- a/sys/i386/i386/trap.c +++ b/sys/i386/i386/trap.c @@ -116,8 +116,6 @@ void dblfault_handler(void); extern inthand_t IDTVEC(lcall_syscall); -extern inthand_t IDTVEC(bpt), IDTVEC(dbg), IDTVEC(int0x80_syscall); - #define MAX_TRAP_MSG 32 static char *trap_msg[] = { "", /* 0 unused */ @@ -670,34 +668,6 @@ kernel_trctrap: load_dr6(rdr6() & ~0xf); goto out; } - - /* - * Malicious user code can configure a debug - * register watchpoint to trap on data access - * to the top of stack and then execute 'pop - * %ss; int 3'. Due to exception deferral for - * 'pop %ss', the CPU will not interrupt 'int - * 3' to raise the DB# exception for the debug - * register but will postpone the DB# until - * execution of the first instruction of the - * BP# handler (in kernel mode). Normally the - * previous check would ignore DB# exceptions - * for watchpoints on user addresses raised in - * kernel mode. However, some CPU errata - * include cases where DB# exceptions do not - * properly set bits in %dr6, e.g. Haswell - * HSD23 and Skylake-X SKZ24. - * - * A deferred DB# can also be raised on the - * first instructions of system call entry - * points or single-step traps via similar use - * of 'pop %ss' or 'mov xxx, %ss'. - */ - if (frame->tf_eip == - (uintptr_t)IDTVEC(int0x80_syscall) || - frame->tf_eip == (uintptr_t)IDTVEC(bpt) || - frame->tf_eip == (uintptr_t)IDTVEC(dbg)) - return; /* * FALLTHROUGH (TRCTRAP kernel mode, kernel address) */ -- cgit v1.1