diff options
author | gordon <gordon@FreeBSD.org> | 2018-03-14 04:00:00 +0000 |
---|---|---|
committer | gordon <gordon@FreeBSD.org> | 2018-03-14 04:00:00 +0000 |
commit | 312f06f761f7362e153ed5a1a9c49e17294e52b5 (patch) | |
tree | 272e75bf0fc1b0ab9b02a0f2f034e207b04d82f3 /sys/amd64/amd64/machdep.c | |
parent | c69c8ae8053b66933da68a1f410f78dda1300dd7 (diff) | |
download | FreeBSD-src-312f06f761f7362e153ed5a1a9c49e17294e52b5.zip FreeBSD-src-312f06f761f7362e153ed5a1a9c49e17294e52b5.tar.gz |
Add mitigations for two classes of speculative execution vulnerabilities
on amd64. [FreeBSD-SA-18:03.speculative_execution]
Approved by: so
Security: FreeBSD-SA-18:03.speculative_execution
Security: CVE-2017-5715
Security: CVE-2017-5754
Diffstat (limited to 'sys/amd64/amd64/machdep.c')
-rw-r--r-- | sys/amd64/amd64/machdep.c | 144 |
1 files changed, 99 insertions, 45 deletions
diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c index 2c18af9..dd5bb06 100644 --- a/sys/amd64/amd64/machdep.c +++ b/sys/amd64/amd64/machdep.c @@ -114,6 +114,7 @@ __FBSDID("$FreeBSD$"); #include <machine/clock.h> #include <machine/cpu.h> #include <machine/cputypes.h> +#include <machine/frame.h> #include <machine/intr_machdep.h> #include <x86/mca.h> #include <machine/md_var.h> @@ -149,6 +150,14 @@ __FBSDID("$FreeBSD$"); /* Sanity check for __curthread() */ CTASSERT(offsetof(struct pcpu, pc_curthread) == 0); +/* + * The PTI trampoline stack needs enough space for a hardware trapframe and a + * couple of scratch registers, as well as the trapframe left behind after an + * iret fault. + */ +CTASSERT(PC_PTI_STACK_SZ * sizeof(register_t) >= 2 * sizeof(struct pti_frame) - + offsetof(struct pti_frame, pti_rip)); + extern u_int64_t hammer_time(u_int64_t, u_int64_t); #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) @@ -180,12 +189,6 @@ struct init_ops init_ops = { .msi_init = msi_init, }; -/* - * The file "conf/ldscript.amd64" defines the symbol "kernphys". Its value is - * the physical address at which the kernel is loaded. - */ -extern char kernphys[]; - struct msgbuf *msgbufp; /* @@ -670,7 +673,7 @@ static struct gate_descriptor idt0[NIDT]; struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */ static char dblfault_stack[PAGE_SIZE] __aligned(16); - +static char mce0_stack[PAGE_SIZE] __aligned(16); static char nmi0_stack[PAGE_SIZE] __aligned(16); CTASSERT(sizeof(struct nmi_pcpu) == 16); @@ -824,13 +827,20 @@ extern inthand_t IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align), IDTVEC(xmm), IDTVEC(dblfault), + IDTVEC(div_pti), IDTVEC(dbg_pti), IDTVEC(bpt_pti), + IDTVEC(ofl_pti), IDTVEC(bnd_pti), IDTVEC(ill_pti), IDTVEC(dna_pti), + IDTVEC(fpusegm_pti), IDTVEC(tss_pti), IDTVEC(missing_pti), + IDTVEC(stk_pti), IDTVEC(prot_pti), IDTVEC(page_pti), + IDTVEC(rsvd_pti), IDTVEC(fpu_pti), IDTVEC(align_pti), + IDTVEC(xmm_pti), #ifdef KDTRACE_HOOKS - IDTVEC(dtrace_ret), + IDTVEC(dtrace_ret), IDTVEC(dtrace_ret_pti), #endif #ifdef XENHVM - IDTVEC(xen_intr_upcall), + IDTVEC(xen_intr_upcall), IDTVEC(xen_intr_upcall_pti), #endif - IDTVEC(fast_syscall), IDTVEC(fast_syscall32); + IDTVEC(fast_syscall), IDTVEC(fast_syscall32), + IDTVEC(fast_syscall_pti); #ifdef DDB /* @@ -1523,6 +1533,23 @@ amd64_kdb_init(void) #endif } +/* Set up the fast syscall stuff */ +void +amd64_conf_fast_syscall(void) +{ + uint64_t msr; + + msr = rdmsr(MSR_EFER) | EFER_SCE; + wrmsr(MSR_EFER, msr); + wrmsr(MSR_LSTAR, pti ? (u_int64_t)IDTVEC(fast_syscall_pti) : + (u_int64_t)IDTVEC(fast_syscall)); + wrmsr(MSR_CSTAR, (u_int64_t)IDTVEC(fast_syscall32)); + msr = ((u_int64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) | + ((u_int64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48); + wrmsr(MSR_STAR, msr); + wrmsr(MSR_SF_MASK, PSL_NT | PSL_T | PSL_I | PSL_C | PSL_D); +} + u_int64_t hammer_time(u_int64_t modulep, u_int64_t physfree) { @@ -1531,7 +1558,7 @@ hammer_time(u_int64_t modulep, u_int64_t physfree) struct pcpu *pc; struct nmi_pcpu *np; struct xstate_hdr *xhdr; - u_int64_t msr; + u_int64_t rsp0; char *env; size_t kstack0_sz; int late_console; @@ -1544,6 +1571,8 @@ hammer_time(u_int64_t modulep, u_int64_t physfree) kmdp = init_ops.parse_preload_data(modulep); + identify_cpu1(); + /* Init basic tunables, hz etc */ init_param1(); @@ -1600,34 +1629,55 @@ hammer_time(u_int64_t modulep, u_int64_t physfree) mtx_init(&dt_lock, "descriptor tables", NULL, MTX_DEF); /* exceptions */ + pti = pti_get_default(); + TUNABLE_INT_FETCH("vm.pmap.pti", &pti); + for (x = 0; x < NIDT; x++) - setidt(x, &IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0); - setidt(IDT_DE, &IDTVEC(div), SDT_SYSIGT, SEL_KPL, 0); - setidt(IDT_DB, &IDTVEC(dbg), SDT_SYSIGT, SEL_KPL, 0); + setidt(x, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_SYSIGT, + SEL_KPL, 0); + setidt(IDT_DE, pti ? &IDTVEC(div_pti) : &IDTVEC(div), SDT_SYSIGT, + SEL_KPL, 0); + setidt(IDT_DB, pti ? &IDTVEC(dbg_pti) : &IDTVEC(dbg), SDT_SYSIGT, + SEL_KPL, 0); setidt(IDT_NMI, &IDTVEC(nmi), SDT_SYSIGT, SEL_KPL, 2); - setidt(IDT_BP, &IDTVEC(bpt), SDT_SYSIGT, SEL_UPL, 0); - setidt(IDT_OF, &IDTVEC(ofl), SDT_SYSIGT, SEL_KPL, 0); - setidt(IDT_BR, &IDTVEC(bnd), SDT_SYSIGT, SEL_KPL, 0); - setidt(IDT_UD, &IDTVEC(ill), SDT_SYSIGT, SEL_KPL, 0); - setidt(IDT_NM, &IDTVEC(dna), SDT_SYSIGT, SEL_KPL, 0); + setidt(IDT_BP, pti ? &IDTVEC(bpt_pti) : &IDTVEC(bpt), SDT_SYSIGT, + SEL_UPL, 0); + setidt(IDT_OF, pti ? &IDTVEC(ofl_pti) : &IDTVEC(ofl), SDT_SYSIGT, + SEL_KPL, 0); + setidt(IDT_BR, pti ? &IDTVEC(bnd_pti) : &IDTVEC(bnd), SDT_SYSIGT, + SEL_KPL, 0); + setidt(IDT_UD, pti ? &IDTVEC(ill_pti) : &IDTVEC(ill), SDT_SYSIGT, + SEL_KPL, 0); + setidt(IDT_NM, pti ? &IDTVEC(dna_pti) : &IDTVEC(dna), SDT_SYSIGT, + SEL_KPL, 0); setidt(IDT_DF, &IDTVEC(dblfault), SDT_SYSIGT, SEL_KPL, 1); - setidt(IDT_FPUGP, &IDTVEC(fpusegm), SDT_SYSIGT, SEL_KPL, 0); - setidt(IDT_TS, &IDTVEC(tss), SDT_SYSIGT, SEL_KPL, 0); - setidt(IDT_NP, &IDTVEC(missing), SDT_SYSIGT, SEL_KPL, 0); - setidt(IDT_SS, &IDTVEC(stk), SDT_SYSIGT, SEL_KPL, 0); - setidt(IDT_GP, &IDTVEC(prot), SDT_SYSIGT, SEL_KPL, 0); - setidt(IDT_PF, &IDTVEC(page), SDT_SYSIGT, SEL_KPL, 0); - setidt(IDT_MF, &IDTVEC(fpu), SDT_SYSIGT, SEL_KPL, 0); - setidt(IDT_AC, &IDTVEC(align), SDT_SYSIGT, SEL_KPL, 0); - setidt(IDT_MC, &IDTVEC(mchk), SDT_SYSIGT, SEL_KPL, 0); - setidt(IDT_XF, &IDTVEC(xmm), SDT_SYSIGT, SEL_KPL, 0); + setidt(IDT_FPUGP, pti ? &IDTVEC(fpusegm_pti) : &IDTVEC(fpusegm), + SDT_SYSIGT, SEL_KPL, 0); + setidt(IDT_TS, pti ? &IDTVEC(tss_pti) : &IDTVEC(tss), SDT_SYSIGT, + SEL_KPL, 0); + setidt(IDT_NP, pti ? &IDTVEC(missing_pti) : &IDTVEC(missing), + SDT_SYSIGT, SEL_KPL, 0); + setidt(IDT_SS, pti ? &IDTVEC(stk_pti) : &IDTVEC(stk), SDT_SYSIGT, + SEL_KPL, 0); + setidt(IDT_GP, pti ? &IDTVEC(prot_pti) : &IDTVEC(prot), SDT_SYSIGT, + SEL_KPL, 0); + setidt(IDT_PF, pti ? &IDTVEC(page_pti) : &IDTVEC(page), SDT_SYSIGT, + SEL_KPL, 0); + setidt(IDT_MF, pti ? &IDTVEC(fpu_pti) : &IDTVEC(fpu), SDT_SYSIGT, + SEL_KPL, 0); + setidt(IDT_AC, pti ? &IDTVEC(align_pti) : &IDTVEC(align), SDT_SYSIGT, + SEL_KPL, 0); + setidt(IDT_MC, &IDTVEC(mchk), SDT_SYSIGT, SEL_KPL, 3); + setidt(IDT_XF, pti ? &IDTVEC(xmm_pti) : &IDTVEC(xmm), SDT_SYSIGT, + SEL_KPL, 0); #ifdef KDTRACE_HOOKS - setidt(IDT_DTRACE_RET, &IDTVEC(dtrace_ret), SDT_SYSIGT, SEL_UPL, 0); + setidt(IDT_DTRACE_RET, pti ? &IDTVEC(dtrace_ret_pti) : + &IDTVEC(dtrace_ret), SDT_SYSIGT, SEL_UPL, 0); #endif #ifdef XENHVM - setidt(IDT_EVTCHN, &IDTVEC(xen_intr_upcall), SDT_SYSIGT, SEL_UPL, 0); + setidt(IDT_EVTCHN, pti ? &IDTVEC(xen_intr_upcall_pti) : + &IDTVEC(xen_intr_upcall), SDT_SYSIGT, SEL_KPL, 0); #endif - r_idt.rd_limit = sizeof(idt0) - 1; r_idt.rd_base = (long) idt; lidt(&r_idt); @@ -1648,7 +1698,7 @@ hammer_time(u_int64_t modulep, u_int64_t physfree) != NULL) vty_set_preferred(VTY_VT); - identify_cpu(); /* Final stage of CPU initialization */ + finishidentcpu(); /* Final stage of CPU initialization */ initializecpu(); /* Initialize CPU registers */ initializecpucache(); @@ -1663,21 +1713,21 @@ hammer_time(u_int64_t modulep, u_int64_t physfree) np->np_pcpu = (register_t) pc; common_tss[0].tss_ist2 = (long) np; + /* + * MC# stack, runs on ist3. The pcpu pointer is stored just + * above the start of the ist3 stack. + */ + np = ((struct nmi_pcpu *) &mce0_stack[sizeof(mce0_stack)]) - 1; + np->np_pcpu = (register_t) pc; + common_tss[0].tss_ist3 = (long) np; + /* Set the IO permission bitmap (empty due to tss seg limit) */ common_tss[0].tss_iobase = sizeof(struct amd64tss) + IOPERM_BITMAP_SIZE; gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); ltr(gsel_tss); - /* Set up the fast syscall stuff */ - msr = rdmsr(MSR_EFER) | EFER_SCE; - wrmsr(MSR_EFER, msr); - wrmsr(MSR_LSTAR, (u_int64_t)IDTVEC(fast_syscall)); - wrmsr(MSR_CSTAR, (u_int64_t)IDTVEC(fast_syscall32)); - msr = ((u_int64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) | - ((u_int64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48); - wrmsr(MSR_STAR, msr); - wrmsr(MSR_SF_MASK, PSL_NT|PSL_T|PSL_I|PSL_C|PSL_D); + amd64_conf_fast_syscall(); /* * Temporary forge some valid pointer to PCB, for exception @@ -1749,10 +1799,12 @@ hammer_time(u_int64_t modulep, u_int64_t physfree) xhdr->xstate_bv = xsave_mask; } /* make an initial tss so cpu can get interrupt stack on syscall! */ - common_tss[0].tss_rsp0 = (vm_offset_t)thread0.td_pcb; + rsp0 = (vm_offset_t)thread0.td_pcb; /* Ensure the stack is aligned to 16 bytes */ - common_tss[0].tss_rsp0 &= ~0xFul; - PCPU_SET(rsp0, common_tss[0].tss_rsp0); + rsp0 &= ~0xFul; + common_tss[0].tss_rsp0 = pti ? ((vm_offset_t)PCPU_PTR(pti_stack) + + PC_PTI_STACK_SZ * sizeof(uint64_t)) & ~0xful : rsp0; + PCPU_SET(rsp0, rsp0); PCPU_SET(curpcb, thread0.td_pcb); /* transfer to user mode */ @@ -1782,6 +1834,8 @@ hammer_time(u_int64_t modulep, u_int64_t physfree) #endif thread0.td_critnest = 0; + TUNABLE_INT_FETCH("hw.ibrs_disable", &hw_ibrs_disable); + /* Location of kernel stack for locore */ return ((u_int64_t)thread0.td_pcb); } |