diff options
author | kib <kib@FreeBSD.org> | 2018-02-17 18:00:01 +0000 |
---|---|---|
committer | Luiz Souza <luiz@netgate.com> | 2018-02-19 14:55:29 -0300 |
commit | 430a2bea3907149b30cc75fc722b6cf1f81da82a (patch) | |
tree | c808db355a0220f39f7dff07a7e4805c57c44190 /sys/amd64/amd64/exception.S | |
parent | 75492dc7653f9d5eb0e33bc8277957739f587ff0 (diff) | |
download | FreeBSD-src-430a2bea3907149b30cc75fc722b6cf1f81da82a.zip FreeBSD-src-430a2bea3907149b30cc75fc722b6cf1f81da82a.tar.gz |
MFC r328083,328096,328116,328119,328120,328128,328135,328153,328157,
328166,328177,328199,328202,328205,328468,328470,328624,328625,328627,
328628,329214,329297,329365:
Meltdown mitigation by PTI, PCID optimization of PTI, and kernel use of IBRS
for some mitigations of Spectre.
Tested by: emaste, Arshan Khanifar <arshankhanifar@gmail.com>
Discussed with: jkim
Sponsored by: The FreeBSD Foundation
(cherry picked from commit 6dd025b40ee6870bea6ba670f30dcf684edc3f6c)
Diffstat (limited to 'sys/amd64/amd64/exception.S')
-rw-r--r-- | sys/amd64/amd64/exception.S | 549 |
1 files changed, 376 insertions, 173 deletions
diff --git a/sys/amd64/amd64/exception.S b/sys/amd64/amd64/exception.S index ebdf41a..b89c2eb 100644 --- a/sys/amd64/amd64/exception.S +++ b/sys/amd64/amd64/exception.S @@ -1,12 +1,16 @@ /*- * Copyright (c) 1989, 1990 William F. Jolitz. * Copyright (c) 1990 The Regents of the University of California. - * Copyright (c) 2007 The FreeBSD Foundation + * Copyright (c) 2007-2018 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by A. Joseph Koshy under * sponsorship from the FreeBSD Foundation and Google, Inc. * + * Portions of this software were developed by + * Konstantin Belousov <kib@FreeBSD.org> under sponsorship from + * the FreeBSD Foundation. + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -38,13 +42,13 @@ #include "opt_compat.h" #include "opt_hwpmc_hooks.h" +#include "assym.s" + #include <machine/asmacros.h> #include <machine/psl.h> #include <machine/trap.h> #include <machine/specialreg.h> -#include "assym.s" - #ifdef KDTRACE_HOOKS .bss .globl dtrace_invop_jump_addr @@ -100,68 +104,61 @@ dtrace_invop_calltrap_addr: MCOUNT_LABEL(user) MCOUNT_LABEL(btrap) -/* Traps that we leave interrupts disabled for.. */ -#define TRAP_NOEN(a) \ - subq $TF_RIP,%rsp; \ - movl $(a),TF_TRAPNO(%rsp) ; \ - movq $0,TF_ADDR(%rsp) ; \ - movq $0,TF_ERR(%rsp) ; \ +/* Traps that we leave interrupts disabled for. */ + .macro TRAP_NOEN l, trapno + PTI_ENTRY \l,X\l + .globl X\l + .type X\l,@function +X\l: subq $TF_RIP,%rsp + movl $\trapno,TF_TRAPNO(%rsp) + movq $0,TF_ADDR(%rsp) + movq $0,TF_ERR(%rsp) jmp alltraps_noen -IDTVEC(dbg) - TRAP_NOEN(T_TRCTRAP) -IDTVEC(bpt) - TRAP_NOEN(T_BPTFLT) + .endm + + TRAP_NOEN dbg, T_TRCTRAP + TRAP_NOEN bpt, T_BPTFLT #ifdef KDTRACE_HOOKS -IDTVEC(dtrace_ret) - TRAP_NOEN(T_DTRACE_RET) + TRAP_NOEN dtrace_ret, T_DTRACE_RET #endif /* Regular traps; The cpu does not supply tf_err for these. */ -#define TRAP(a) \ - subq $TF_RIP,%rsp; \ - movl $(a),TF_TRAPNO(%rsp) ; \ - movq $0,TF_ADDR(%rsp) ; \ - movq $0,TF_ERR(%rsp) ; \ + .macro TRAP l, trapno + PTI_ENTRY \l,X\l + .globl X\l + .type X\l,@function +X\l: + subq $TF_RIP,%rsp + movl $\trapno,TF_TRAPNO(%rsp) + movq $0,TF_ADDR(%rsp) + movq $0,TF_ERR(%rsp) jmp alltraps -IDTVEC(div) - TRAP(T_DIVIDE) -IDTVEC(ofl) - TRAP(T_OFLOW) -IDTVEC(bnd) - TRAP(T_BOUND) -IDTVEC(ill) - TRAP(T_PRIVINFLT) -IDTVEC(dna) - TRAP(T_DNA) -IDTVEC(fpusegm) - TRAP(T_FPOPFLT) -IDTVEC(mchk) - TRAP(T_MCHK) -IDTVEC(rsvd) - TRAP(T_RESERVED) -IDTVEC(fpu) - TRAP(T_ARITHTRAP) -IDTVEC(xmm) - TRAP(T_XMMFLT) - -/* This group of traps have tf_err already pushed by the cpu */ -#define TRAP_ERR(a) \ - subq $TF_ERR,%rsp; \ - movl $(a),TF_TRAPNO(%rsp) ; \ - movq $0,TF_ADDR(%rsp) ; \ + .endm + + TRAP div, T_DIVIDE + TRAP ofl, T_OFLOW + TRAP bnd, T_BOUND + TRAP ill, T_PRIVINFLT + TRAP dna, T_DNA + TRAP fpusegm, T_FPOPFLT + TRAP rsvd, T_RESERVED + TRAP fpu, T_ARITHTRAP + TRAP xmm, T_XMMFLT + +/* This group of traps have tf_err already pushed by the cpu. */ + .macro TRAP_ERR l, trapno + PTI_ENTRY \l,X\l,has_err=1 + .globl X\l + .type X\l,@function +X\l: + subq $TF_ERR,%rsp + movl $\trapno,TF_TRAPNO(%rsp) + movq $0,TF_ADDR(%rsp) jmp alltraps -IDTVEC(tss) - TRAP_ERR(T_TSSFLT) -IDTVEC(missing) - subq $TF_ERR,%rsp - movl $T_SEGNPFLT,TF_TRAPNO(%rsp) - jmp prot_addrf -IDTVEC(stk) - subq $TF_ERR,%rsp - movl $T_STKFLT,TF_TRAPNO(%rsp) - jmp prot_addrf -IDTVEC(align) - TRAP_ERR(T_ALIGNFLT) + .endm + + TRAP_ERR tss, T_TSSFLT + TRAP_ERR align, T_ALIGNFLT /* * alltraps entry point. Use swapgs if this is the first time in the @@ -174,24 +171,22 @@ IDTVEC(align) alltraps: movq %rdi,TF_RDI(%rsp) testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ - jz alltraps_testi /* already running with kernel GS.base */ + jz 1f /* already running with kernel GS.base */ swapgs movq PCPU(CURPCB),%rdi andl $~PCB_FULL_IRET,PCB_FLAGS(%rdi) - movw %fs,TF_FS(%rsp) - movw %gs,TF_GS(%rsp) - movw %es,TF_ES(%rsp) - movw %ds,TF_DS(%rsp) -alltraps_testi: - testl $PSL_I,TF_RFLAGS(%rsp) - jz alltraps_pushregs_no_rdi - sti -alltraps_pushregs_no_rdi: +1: SAVE_SEGS movq %rdx,TF_RDX(%rsp) movq %rax,TF_RAX(%rsp) + movq %rcx,TF_RCX(%rsp) + testb $SEL_RPL_MASK,TF_CS(%rsp) + jz 2f + call handle_ibrs_entry +2: testl $PSL_I,TF_RFLAGS(%rsp) + jz alltraps_pushregs_no_rax + sti alltraps_pushregs_no_rax: movq %rsi,TF_RSI(%rsp) - movq %rcx,TF_RCX(%rsp) movq %r8,TF_R8(%rsp) movq %r9,TF_R9(%rsp) movq %rbx,TF_RBX(%rsp) @@ -249,15 +244,18 @@ calltrap: alltraps_noen: movq %rdi,TF_RDI(%rsp) testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ - jz 1f /* already running with kernel GS.base */ + jz 1f /* already running with kernel GS.base */ swapgs movq PCPU(CURPCB),%rdi andl $~PCB_FULL_IRET,PCB_FLAGS(%rdi) -1: movw %fs,TF_FS(%rsp) - movw %gs,TF_GS(%rsp) - movw %es,TF_ES(%rsp) - movw %ds,TF_DS(%rsp) - jmp alltraps_pushregs_no_rdi +1: SAVE_SEGS + movq %rdx,TF_RDX(%rsp) + movq %rax,TF_RAX(%rsp) + movq %rcx,TF_RCX(%rsp) + testb $SEL_RPL_MASK,TF_CS(%rsp) + jz alltraps_pushregs_no_rax + call handle_ibrs_entry + jmp alltraps_pushregs_no_rax IDTVEC(dblfault) subq $TF_ERR,%rsp @@ -279,56 +277,110 @@ IDTVEC(dblfault) movq %r13,TF_R13(%rsp) movq %r14,TF_R14(%rsp) movq %r15,TF_R15(%rsp) - movw %fs,TF_FS(%rsp) - movw %gs,TF_GS(%rsp) - movw %es,TF_ES(%rsp) - movw %ds,TF_DS(%rsp) + SAVE_SEGS movl $TF_HASSEGS,TF_FLAGS(%rsp) cld testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ jz 1f /* already running with kernel GS.base */ swapgs 1: - movq %rsp,%rdi + movq PCPU(KCR3),%rax + cmpq $~0,%rax + je 2f + movq %rax,%cr3 +2: movq %rsp,%rdi call dblfault_handler -2: - hlt - jmp 2b +3: hlt + jmp 3b + ALIGN_TEXT +IDTVEC(page_pti) + testb $SEL_RPL_MASK,PTI_CS-2*8(%rsp) + jz Xpage + swapgs + pushq %rax + pushq %rdx + movq %cr3,%rax + movq %rax,PCPU(SAVED_UCR3) + PTI_UUENTRY has_err=1 + subq $TF_ERR,%rsp + movq %rdi,TF_RDI(%rsp) + movq %rax,TF_RAX(%rsp) + movq %rdx,TF_RDX(%rsp) + movq %rcx,TF_RCX(%rsp) + jmp page_u IDTVEC(page) subq $TF_ERR,%rsp - movl $T_PAGEFLT,TF_TRAPNO(%rsp) - movq %rdi,TF_RDI(%rsp) /* free up a GP register */ + movq %rdi,TF_RDI(%rsp) /* free up GP registers */ + movq %rax,TF_RAX(%rsp) + movq %rdx,TF_RDX(%rsp) + movq %rcx,TF_RCX(%rsp) testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ - jz 1f /* already running with kernel GS.base */ + jz page_cr2 /* already running with kernel GS.base */ swapgs - movq PCPU(CURPCB),%rdi +page_u: movq PCPU(CURPCB),%rdi andl $~PCB_FULL_IRET,PCB_FLAGS(%rdi) -1: movq %cr2,%rdi /* preserve %cr2 before .. */ + movq PCPU(SAVED_UCR3),%rax + movq %rax,PCB_SAVED_UCR3(%rdi) + call handle_ibrs_entry +page_cr2: + movq %cr2,%rdi /* preserve %cr2 before .. */ movq %rdi,TF_ADDR(%rsp) /* enabling interrupts. */ - movw %fs,TF_FS(%rsp) - movw %gs,TF_GS(%rsp) - movw %es,TF_ES(%rsp) - movw %ds,TF_DS(%rsp) + SAVE_SEGS + movl $T_PAGEFLT,TF_TRAPNO(%rsp) testl $PSL_I,TF_RFLAGS(%rsp) - jz alltraps_pushregs_no_rdi + jz alltraps_pushregs_no_rax sti - jmp alltraps_pushregs_no_rdi + jmp alltraps_pushregs_no_rax /* * We have to special-case this one. If we get a trap in doreti() at * the iretq stage, we'll reenter with the wrong gs state. We'll have * to do a special the swapgs in this case even coming from the kernel. * XXX linux has a trap handler for their equivalent of load_gs(). + * + * On the stack, we have the hardware interrupt frame to return + * to usermode (faulted) and another frame with error code, for + * fault. For PTI, copy both frames to the main thread stack. */ -IDTVEC(prot) + .macro PROTF_ENTRY name,trapno +\name\()_pti_doreti: + pushq %rax + pushq %rdx + swapgs + movq PCPU(KCR3),%rax + movq %rax,%cr3 + movq PCPU(RSP0),%rax + subq $2*PTI_SIZE-3*8,%rax /* no err, %rax, %rdx in faulted frame */ + MOVE_STACKS (PTI_SIZE / 4 - 3) + movq %rax,%rsp + popq %rdx + popq %rax + swapgs + jmp X\name +IDTVEC(\name\()_pti) + cmpq $doreti_iret,PTI_RIP-2*8(%rsp) + je \name\()_pti_doreti + testb $SEL_RPL_MASK,PTI_CS-2*8(%rsp) /* %rax, %rdx not yet pushed */ + jz X\name + PTI_UENTRY has_err=1 + swapgs +IDTVEC(\name) subq $TF_ERR,%rsp - movl $T_PROTFLT,TF_TRAPNO(%rsp) + movl $\trapno,TF_TRAPNO(%rsp) + jmp prot_addrf + .endm + + PROTF_ENTRY missing, T_SEGNPFLT + PROTF_ENTRY stk, T_STKFLT + PROTF_ENTRY prot, T_PROTFLT + prot_addrf: movq $0,TF_ADDR(%rsp) movq %rdi,TF_RDI(%rsp) /* free up a GP register */ movq %rax,TF_RAX(%rsp) movq %rdx,TF_RDX(%rsp) + movq %rcx,TF_RCX(%rsp) movw %fs,TF_FS(%rsp) movw %gs,TF_GS(%rsp) leaq doreti_iret(%rip),%rdi @@ -354,7 +406,8 @@ prot_addrf: 3: cmpw $KUG32SEL,TF_GS(%rsp) jne 4f movq %rdx,PCB_GSBASE(%rdi) -4: orl $PCB_FULL_IRET,PCB_FLAGS(%rdi) /* always full iret from GPF */ +4: call handle_ibrs_entry + orl $PCB_FULL_IRET,PCB_FLAGS(%rdi) /* always full iret from GPF */ movw %es,TF_ES(%rsp) movw %ds,TF_DS(%rsp) testl $PSL_I,TF_RFLAGS(%rsp) @@ -375,8 +428,18 @@ prot_addrf: * We do not support invoking this from a custom segment registers, * esp. %cs, %ss, %fs, %gs, e.g. using entries from an LDT. */ + SUPERALIGN_TEXT +IDTVEC(fast_syscall_pti) + swapgs + movq %rax,PCPU(SCRATCH_RAX) + movq PCPU(KCR3),%rax + movq %rax,%cr3 + jmp fast_syscall_common + SUPERALIGN_TEXT IDTVEC(fast_syscall) swapgs + movq %rax,PCPU(SCRATCH_RAX) +fast_syscall_common: movq %rsp,PCPU(SCRATCH_RSP) movq PCPU(RSP0),%rsp /* Now emulate a trapframe. Make the 8 byte alignment odd for call. */ @@ -386,10 +449,11 @@ IDTVEC(fast_syscall) movq %rcx,TF_RIP(%rsp) /* %rcx original value is in %r10 */ movq PCPU(SCRATCH_RSP),%r11 /* %r11 already saved */ movq %r11,TF_RSP(%rsp) /* user stack pointer */ - movw %fs,TF_FS(%rsp) - movw %gs,TF_GS(%rsp) - movw %es,TF_ES(%rsp) - movw %ds,TF_DS(%rsp) + movq PCPU(SCRATCH_RAX),%rax + movq %rax,TF_RAX(%rsp) /* syscall number */ + movq %rdx,TF_RDX(%rsp) /* arg 3 */ + SAVE_SEGS + call handle_ibrs_entry movq PCPU(CURPCB),%r11 andl $~PCB_FULL_IRET,PCB_FLAGS(%r11) sti @@ -398,11 +462,9 @@ IDTVEC(fast_syscall) movq $2,TF_ERR(%rsp) movq %rdi,TF_RDI(%rsp) /* arg 1 */ movq %rsi,TF_RSI(%rsp) /* arg 2 */ - movq %rdx,TF_RDX(%rsp) /* arg 3 */ movq %r10,TF_RCX(%rsp) /* arg 4 */ movq %r8,TF_R8(%rsp) /* arg 5 */ movq %r9,TF_R9(%rsp) /* arg 6 */ - movq %rax,TF_RAX(%rsp) /* syscall number */ movq %rbx,TF_RBX(%rsp) /* C preserved */ movq %rbp,TF_RBP(%rsp) /* C preserved */ movq %r12,TF_R12(%rsp) /* C preserved */ @@ -421,11 +483,12 @@ IDTVEC(fast_syscall) /* Disable interrupts before testing PCB_FULL_IRET. */ cli testl $PCB_FULL_IRET,PCB_FLAGS(%rax) - jnz 3f + jnz 4f /* Check for and handle AST's on return to userland. */ movq PCPU(CURTHREAD),%rax testl $TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%rax) - jne 2f + jne 3f + call handle_ibrs_exit /* Restore preserved registers. */ MEXITCOUNT movq TF_RDI(%rsp),%rdi /* bonus; preserve arg 1 */ @@ -435,16 +498,21 @@ IDTVEC(fast_syscall) movq TF_RFLAGS(%rsp),%r11 /* original %rflags */ movq TF_RIP(%rsp),%rcx /* original %rip */ movq TF_RSP(%rsp),%rsp /* user stack pointer */ - swapgs + cmpb $0,pti + je 2f + movq PCPU(UCR3),%r9 + movq %r9,%cr3 + xorl %r9d,%r9d +2: swapgs sysretq -2: /* AST scheduled. */ +3: /* AST scheduled. */ sti movq %rsp,%rdi call ast jmp 1b -3: /* Requested full context restore, use doreti for that. */ +4: /* Requested full context restore, use doreti for that. */ MEXITCOUNT jmp doreti @@ -500,17 +568,15 @@ IDTVEC(nmi) movq %r13,TF_R13(%rsp) movq %r14,TF_R14(%rsp) movq %r15,TF_R15(%rsp) - movw %fs,TF_FS(%rsp) - movw %gs,TF_GS(%rsp) - movw %es,TF_ES(%rsp) - movw %ds,TF_DS(%rsp) + SAVE_SEGS movl $TF_HASSEGS,TF_FLAGS(%rsp) cld xorl %ebx,%ebx testb $SEL_RPL_MASK,TF_CS(%rsp) jnz nmi_fromuserspace /* - * We've interrupted the kernel. Preserve GS.base in %r12. + * We've interrupted the kernel. Preserve GS.base in %r12, + * %cr3 in %r13, and possibly lower half of MSR_IA32_SPEC_CTL in %r14d. */ movl $MSR_GSBASE,%ecx rdmsr @@ -522,27 +588,45 @@ IDTVEC(nmi) movl %edx,%eax shrq $32,%rdx wrmsr + movq %cr3,%r13 + movq PCPU(KCR3),%rax + cmpq $~0,%rax + je 1f + movq %rax,%cr3 +1: testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip) + je nmi_calltrap + movl $MSR_IA32_SPEC_CTRL,%ecx + rdmsr + movl %eax,%r14d + call handle_ibrs_entry jmp nmi_calltrap nmi_fromuserspace: incl %ebx swapgs - testb $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip) - jz 2f + movq %cr3,%r13 + movq PCPU(KCR3),%rax + cmpq $~0,%rax + je 1f + movq %rax,%cr3 +1: call handle_ibrs_entry movq PCPU(CURPCB),%rdi testq %rdi,%rdi - jz 2f + jz 3f + orl $PCB_FULL_IRET,PCB_FLAGS(%rdi) + testb $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip) + jz 3f cmpw $KUF32SEL,TF_FS(%rsp) - jne 1f + jne 2f rdfsbase %rax movq %rax,PCB_FSBASE(%rdi) -1: cmpw $KUG32SEL,TF_GS(%rsp) - jne 2f +2: cmpw $KUG32SEL,TF_GS(%rsp) + jne 3f movl $MSR_KGSBASE,%ecx rdmsr shlq $32,%rdx orq %rdx,%rax movq %rax,PCB_GSBASE(%rdi) -2: +3: /* Note: this label is also used by ddb and gdb: */ nmi_calltrap: FAKE_MCOUNT(TF_RIP(%rsp)) @@ -565,26 +649,29 @@ nmi_calltrap: movq PCPU(CURTHREAD),%rax orq %rax,%rax /* curthread present? */ jz nocallchain - testl $TDP_CALLCHAIN,TD_PFLAGS(%rax) /* flagged for capture? */ - jz nocallchain /* - * A user callchain is to be captured, so: - * - Move execution to the regular kernel stack, to allow for - * nested NMI interrupts. - * - Take the processor out of "NMI" mode by faking an "iret". - * - Enable interrupts, so that copyin() can work. + * Move execution to the regular kernel stack, because we + * committed to return through doreti. */ movq %rsp,%rsi /* source stack pointer */ movq $TF_SIZE,%rcx movq PCPU(RSP0),%rdx subq %rcx,%rdx movq %rdx,%rdi /* destination stack pointer */ - shrq $3,%rcx /* trap frame size in long words */ cld rep movsq /* copy trapframe */ + movq %rdx,%rsp /* we are on the regular kstack */ + testl $TDP_CALLCHAIN,TD_PFLAGS(%rax) /* flagged for capture? */ + jz nocallchain + /* + * A user callchain is to be captured, so: + * - Take the processor out of "NMI" mode by faking an "iret", + * to allow for nested NMI interrupts. + * - Enable interrupts, so that copyin() can work. + */ movl %ss,%eax pushq %rax /* tf_ss */ pushq %rdx /* tf_rsp (on kernel stack) */ @@ -614,33 +701,139 @@ outofnmi: cli nocallchain: #endif - testl %ebx,%ebx + testl %ebx,%ebx /* %ebx == 0 => return to userland */ jnz doreti_exit -nmi_kernelexit: + /* + * Restore speculation control MSR, if preserved. + */ + testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip) + je 1f + movl %r14d,%eax + xorl %edx,%edx + movl $MSR_IA32_SPEC_CTRL,%ecx + wrmsr /* * Put back the preserved MSR_GSBASE value. */ +1: movl $MSR_GSBASE,%ecx + movq %r12,%rdx + movl %edx,%eax + shrq $32,%rdx + wrmsr + movq %r13,%cr3 + RESTORE_REGS + addq $TF_RIP,%rsp + jmp doreti_iret + +/* + * MC# handling is similar to NMI. + * + * As with NMIs, machine check exceptions do not respect RFLAGS.IF and + * can occur at any time with a GS.base value that does not correspond + * to the privilege level in CS. + * + * Machine checks are not unblocked by iretq, but it is best to run + * the handler with interrupts disabled since the exception may have + * interrupted a critical section. + * + * The MC# handler runs on its own stack (tss_ist3). The canonical + * GS.base value for the processor is stored just above the bottom of + * its MC# stack. For exceptions taken from kernel mode, the current + * value in the processor's GS.base is saved at entry to C-preserved + * register %r12, the canonical value for GS.base is then loaded into + * the processor, and the saved value is restored at exit time. For + * exceptions taken from user mode, the cheaper 'SWAPGS' instructions + * are used for swapping GS.base. + */ + +IDTVEC(mchk) + subq $TF_RIP,%rsp + movl $(T_MCHK),TF_TRAPNO(%rsp) + movq $0,TF_ADDR(%rsp) + movq $0,TF_ERR(%rsp) + movq %rdi,TF_RDI(%rsp) + movq %rsi,TF_RSI(%rsp) + movq %rdx,TF_RDX(%rsp) + movq %rcx,TF_RCX(%rsp) + movq %r8,TF_R8(%rsp) + movq %r9,TF_R9(%rsp) + movq %rax,TF_RAX(%rsp) + movq %rbx,TF_RBX(%rsp) + movq %rbp,TF_RBP(%rsp) + movq %r10,TF_R10(%rsp) + movq %r11,TF_R11(%rsp) + movq %r12,TF_R12(%rsp) + movq %r13,TF_R13(%rsp) + movq %r14,TF_R14(%rsp) + movq %r15,TF_R15(%rsp) + SAVE_SEGS + movl $TF_HASSEGS,TF_FLAGS(%rsp) + cld + xorl %ebx,%ebx + testb $SEL_RPL_MASK,TF_CS(%rsp) + jnz mchk_fromuserspace + /* + * We've interrupted the kernel. Preserve GS.base in %r12, + * %cr3 in %r13, and possibly lower half of MSR_IA32_SPEC_CTL in %r14d. + */ movl $MSR_GSBASE,%ecx + rdmsr + movq %rax,%r12 + shlq $32,%rdx + orq %rdx,%r12 + /* Retrieve and load the canonical value for GS.base. */ + movq TF_SIZE(%rsp),%rdx + movl %edx,%eax + shrq $32,%rdx + wrmsr + movq %cr3,%r13 + movq PCPU(KCR3),%rax + cmpq $~0,%rax + je 1f + movq %rax,%cr3 +1: testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip) + je mchk_calltrap + movl $MSR_IA32_SPEC_CTRL,%ecx + rdmsr + movl %eax,%r14d + call handle_ibrs_entry + jmp mchk_calltrap +mchk_fromuserspace: + incl %ebx + swapgs + movq %cr3,%r13 + movq PCPU(KCR3),%rax + cmpq $~0,%rax + je 1f + movq %rax,%cr3 +1: call handle_ibrs_entry +/* Note: this label is also used by ddb and gdb: */ +mchk_calltrap: + FAKE_MCOUNT(TF_RIP(%rsp)) + movq %rsp,%rdi + call mca_intr + MEXITCOUNT + testl %ebx,%ebx /* %ebx == 0 => return to userland */ + jnz doreti_exit + /* + * Restore speculation control MSR, if preserved. + */ + testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip) + je 1f + movl %r14d,%eax + xorl %edx,%edx + movl $MSR_IA32_SPEC_CTRL,%ecx + wrmsr + /* + * Put back the preserved MSR_GSBASE value. + */ +1: movl $MSR_GSBASE,%ecx movq %r12,%rdx movl %edx,%eax shrq $32,%rdx wrmsr -nmi_restoreregs: - movq TF_RDI(%rsp),%rdi - movq TF_RSI(%rsp),%rsi - movq TF_RDX(%rsp),%rdx - movq TF_RCX(%rsp),%rcx - movq TF_R8(%rsp),%r8 - movq TF_R9(%rsp),%r9 - movq TF_RAX(%rsp),%rax - movq TF_RBX(%rsp),%rbx - movq TF_RBP(%rsp),%rbp - movq TF_R10(%rsp),%r10 - movq TF_R11(%rsp),%r11 - movq TF_R12(%rsp),%r12 - movq TF_R13(%rsp),%r13 - movq TF_R14(%rsp),%r14 - movq TF_R15(%rsp),%r15 + movq %r13,%cr3 + RESTORE_REGS addq $TF_RIP,%rsp jmp doreti_iret @@ -808,27 +1001,39 @@ ld_es: ld_ds: movw TF_DS(%rsp),%ds ld_regs: - movq TF_RDI(%rsp),%rdi - movq TF_RSI(%rsp),%rsi - movq TF_RDX(%rsp),%rdx - movq TF_RCX(%rsp),%rcx - movq TF_R8(%rsp),%r8 - movq TF_R9(%rsp),%r9 - movq TF_RAX(%rsp),%rax - movq TF_RBX(%rsp),%rbx - movq TF_RBP(%rsp),%rbp - movq TF_R10(%rsp),%r10 - movq TF_R11(%rsp),%r11 - movq TF_R12(%rsp),%r12 - movq TF_R13(%rsp),%r13 - movq TF_R14(%rsp),%r14 - movq TF_R15(%rsp),%r15 + RESTORE_REGS testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ - jz 1f /* keep running with kernel GS.base */ + jz 2f /* keep running with kernel GS.base */ cli + call handle_ibrs_exit_rs + cmpb $0,pti + je 1f + pushq %rdx + movq PCPU(PRVSPACE),%rdx + addq $PC_PTI_STACK+PC_PTI_STACK_SZ*8-PTI_SIZE,%rdx + movq %rax,PTI_RAX(%rdx) + popq %rax + movq %rax,PTI_RDX(%rdx) + movq TF_RIP(%rsp),%rax + movq %rax,PTI_RIP(%rdx) + movq TF_CS(%rsp),%rax + movq %rax,PTI_CS(%rdx) + movq TF_RFLAGS(%rsp),%rax + movq %rax,PTI_RFLAGS(%rdx) + movq TF_RSP(%rsp),%rax + movq %rax,PTI_RSP(%rdx) + movq TF_SS(%rsp),%rax + movq %rax,PTI_SS(%rdx) + movq PCPU(UCR3),%rax swapgs -1: - addq $TF_RIP,%rsp /* skip over tf_err, tf_trapno */ + movq %rdx,%rsp + movq %rax,%cr3 + popq %rdx + popq %rax + addq $8,%rsp + jmp doreti_iret +1: swapgs +2: addq $TF_RIP,%rsp .globl doreti_iret doreti_iret: iretq @@ -852,22 +1057,20 @@ set_segs: .globl doreti_iret_fault doreti_iret_fault: subq $TF_RIP,%rsp /* space including tf_err, tf_trapno */ - testl $PSL_I,TF_RFLAGS(%rsp) + movq %rax,TF_RAX(%rsp) + movq %rdx,TF_RDX(%rsp) + movq %rcx,TF_RCX(%rsp) + call handle_ibrs_entry + testb $SEL_RPL_MASK,TF_CS(%rsp) jz 1f sti 1: - movw %fs,TF_FS(%rsp) - movw %gs,TF_GS(%rsp) - movw %es,TF_ES(%rsp) - movw %ds,TF_DS(%rsp) + SAVE_SEGS movl $TF_HASSEGS,TF_FLAGS(%rsp) movq %rdi,TF_RDI(%rsp) movq %rsi,TF_RSI(%rsp) - movq %rdx,TF_RDX(%rsp) - movq %rcx,TF_RCX(%rsp) movq %r8,TF_R8(%rsp) movq %r9,TF_R9(%rsp) - movq %rax,TF_RAX(%rsp) movq %rbx,TF_RBX(%rsp) movq %rbp,TF_RBP(%rsp) movq %r10,TF_R10(%rsp) @@ -886,7 +1089,7 @@ doreti_iret_fault: .globl ds_load_fault ds_load_fault: movl $T_PROTFLT,TF_TRAPNO(%rsp) - testl $PSL_I,TF_RFLAGS(%rsp) + testb $SEL_RPL_MASK,TF_CS(%rsp) jz 1f sti 1: |