summaryrefslogtreecommitdiffstats
path: root/sys/amd64/amd64/exception.S
diff options
context:
space:
mode:
authorgordon <gordon@FreeBSD.org>2018-03-14 04:00:00 +0000
committergordon <gordon@FreeBSD.org>2018-03-14 04:00:00 +0000
commit312f06f761f7362e153ed5a1a9c49e17294e52b5 (patch)
tree272e75bf0fc1b0ab9b02a0f2f034e207b04d82f3 /sys/amd64/amd64/exception.S
parentc69c8ae8053b66933da68a1f410f78dda1300dd7 (diff)
downloadFreeBSD-src-312f06f761f7362e153ed5a1a9c49e17294e52b5.zip
FreeBSD-src-312f06f761f7362e153ed5a1a9c49e17294e52b5.tar.gz
Add mitigations for two classes of speculative execution vulnerabilities
on amd64. [FreeBSD-SA-18:03.speculative_execution] Approved by: so Security: FreeBSD-SA-18:03.speculative_execution Security: CVE-2017-5715 Security: CVE-2017-5754
Diffstat (limited to 'sys/amd64/amd64/exception.S')
-rw-r--r--sys/amd64/amd64/exception.S562
1 files changed, 388 insertions, 174 deletions
diff --git a/sys/amd64/amd64/exception.S b/sys/amd64/amd64/exception.S
index 2c2b99b..524e729 100644
--- a/sys/amd64/amd64/exception.S
+++ b/sys/amd64/amd64/exception.S
@@ -1,12 +1,16 @@
/*-
* Copyright (c) 1989, 1990 William F. Jolitz.
* Copyright (c) 1990 The Regents of the University of California.
- * Copyright (c) 2007 The FreeBSD Foundation
+ * Copyright (c) 2007-2018 The FreeBSD Foundation
* All rights reserved.
*
* Portions of this software were developed by A. Joseph Koshy under
* sponsorship from the FreeBSD Foundation and Google, Inc.
*
+ * Portions of this software were developed by
+ * Konstantin Belousov <kib@FreeBSD.org> under sponsorship from
+ * the FreeBSD Foundation.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -38,13 +42,13 @@
#include "opt_compat.h"
#include "opt_hwpmc_hooks.h"
+#include "assym.s"
+
#include <machine/asmacros.h>
#include <machine/psl.h>
#include <machine/trap.h>
#include <machine/specialreg.h>
-#include "assym.s"
-
#ifdef KDTRACE_HOOKS
.bss
.globl dtrace_invop_jump_addr
@@ -100,68 +104,61 @@ dtrace_invop_calltrap_addr:
MCOUNT_LABEL(user)
MCOUNT_LABEL(btrap)
-/* Traps that we leave interrupts disabled for.. */
-#define TRAP_NOEN(a) \
- subq $TF_RIP,%rsp; \
- movl $(a),TF_TRAPNO(%rsp) ; \
- movq $0,TF_ADDR(%rsp) ; \
- movq $0,TF_ERR(%rsp) ; \
+/* Traps that we leave interrupts disabled for. */
+ .macro TRAP_NOEN l, trapno
+ PTI_ENTRY \l,X\l
+ .globl X\l
+ .type X\l,@function
+X\l: subq $TF_RIP,%rsp
+ movl $\trapno,TF_TRAPNO(%rsp)
+ movq $0,TF_ADDR(%rsp)
+ movq $0,TF_ERR(%rsp)
jmp alltraps_noen
-IDTVEC(dbg)
- TRAP_NOEN(T_TRCTRAP)
-IDTVEC(bpt)
- TRAP_NOEN(T_BPTFLT)
+ .endm
+
+ TRAP_NOEN dbg, T_TRCTRAP
+ TRAP_NOEN bpt, T_BPTFLT
#ifdef KDTRACE_HOOKS
-IDTVEC(dtrace_ret)
- TRAP_NOEN(T_DTRACE_RET)
+ TRAP_NOEN dtrace_ret, T_DTRACE_RET
#endif
/* Regular traps; The cpu does not supply tf_err for these. */
-#define TRAP(a) \
- subq $TF_RIP,%rsp; \
- movl $(a),TF_TRAPNO(%rsp) ; \
- movq $0,TF_ADDR(%rsp) ; \
- movq $0,TF_ERR(%rsp) ; \
+ .macro TRAP l, trapno
+ PTI_ENTRY \l,X\l
+ .globl X\l
+ .type X\l,@function
+X\l:
+ subq $TF_RIP,%rsp
+ movl $\trapno,TF_TRAPNO(%rsp)
+ movq $0,TF_ADDR(%rsp)
+ movq $0,TF_ERR(%rsp)
jmp alltraps
-IDTVEC(div)
- TRAP(T_DIVIDE)
-IDTVEC(ofl)
- TRAP(T_OFLOW)
-IDTVEC(bnd)
- TRAP(T_BOUND)
-IDTVEC(ill)
- TRAP(T_PRIVINFLT)
-IDTVEC(dna)
- TRAP(T_DNA)
-IDTVEC(fpusegm)
- TRAP(T_FPOPFLT)
-IDTVEC(mchk)
- TRAP(T_MCHK)
-IDTVEC(rsvd)
- TRAP(T_RESERVED)
-IDTVEC(fpu)
- TRAP(T_ARITHTRAP)
-IDTVEC(xmm)
- TRAP(T_XMMFLT)
-
-/* This group of traps have tf_err already pushed by the cpu */
-#define TRAP_ERR(a) \
- subq $TF_ERR,%rsp; \
- movl $(a),TF_TRAPNO(%rsp) ; \
- movq $0,TF_ADDR(%rsp) ; \
+ .endm
+
+ TRAP div, T_DIVIDE
+ TRAP ofl, T_OFLOW
+ TRAP bnd, T_BOUND
+ TRAP ill, T_PRIVINFLT
+ TRAP dna, T_DNA
+ TRAP fpusegm, T_FPOPFLT
+ TRAP rsvd, T_RESERVED
+ TRAP fpu, T_ARITHTRAP
+ TRAP xmm, T_XMMFLT
+
+/* This group of traps have tf_err already pushed by the cpu. */
+ .macro TRAP_ERR l, trapno
+ PTI_ENTRY \l,X\l,has_err=1
+ .globl X\l
+ .type X\l,@function
+X\l:
+ subq $TF_ERR,%rsp
+ movl $\trapno,TF_TRAPNO(%rsp)
+ movq $0,TF_ADDR(%rsp)
jmp alltraps
-IDTVEC(tss)
- TRAP_ERR(T_TSSFLT)
-IDTVEC(missing)
- subq $TF_ERR,%rsp
- movl $T_SEGNPFLT,TF_TRAPNO(%rsp)
- jmp prot_addrf
-IDTVEC(stk)
- subq $TF_ERR,%rsp
- movl $T_STKFLT,TF_TRAPNO(%rsp)
- jmp prot_addrf
-IDTVEC(align)
- TRAP_ERR(T_ALIGNFLT)
+ .endm
+
+ TRAP_ERR tss, T_TSSFLT
+ TRAP_ERR align, T_ALIGNFLT
/*
* alltraps entry point. Use swapgs if this is the first time in the
@@ -174,25 +171,24 @@ IDTVEC(align)
alltraps:
movq %rdi,TF_RDI(%rsp)
testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
- jz alltraps_testi /* already running with kernel GS.base */
+ jz 1f /* already running with kernel GS.base */
swapgs
movq PCPU(CURPCB),%rdi
andl $~PCB_FULL_IRET,PCB_FLAGS(%rdi)
- movw %fs,TF_FS(%rsp)
- movw %gs,TF_GS(%rsp)
- movw %es,TF_ES(%rsp)
- movw %ds,TF_DS(%rsp)
-alltraps_testi:
- testl $PSL_I,TF_RFLAGS(%rsp)
- jz alltraps_pushregs_no_rdi
- sti
-alltraps_pushregs_no_rdi:
- movq %rsi,TF_RSI(%rsp)
+1: SAVE_SEGS
movq %rdx,TF_RDX(%rsp)
+ movq %rax,TF_RAX(%rsp)
movq %rcx,TF_RCX(%rsp)
+ testb $SEL_RPL_MASK,TF_CS(%rsp)
+ jz 2f
+ call handle_ibrs_entry
+2: testl $PSL_I,TF_RFLAGS(%rsp)
+ jz alltraps_pushregs_no_rax
+ sti
+alltraps_pushregs_no_rax:
+ movq %rsi,TF_RSI(%rsp)
movq %r8,TF_R8(%rsp)
movq %r9,TF_R9(%rsp)
- movq %rax,TF_RAX(%rsp)
movq %rbx,TF_RBX(%rsp)
movq %rbp,TF_RBP(%rsp)
movq %r10,TF_R10(%rsp)
@@ -248,15 +244,18 @@ calltrap:
alltraps_noen:
movq %rdi,TF_RDI(%rsp)
testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
- jz 1f /* already running with kernel GS.base */
+ jz 1f /* already running with kernel GS.base */
swapgs
movq PCPU(CURPCB),%rdi
andl $~PCB_FULL_IRET,PCB_FLAGS(%rdi)
-1: movw %fs,TF_FS(%rsp)
- movw %gs,TF_GS(%rsp)
- movw %es,TF_ES(%rsp)
- movw %ds,TF_DS(%rsp)
- jmp alltraps_pushregs_no_rdi
+1: SAVE_SEGS
+ movq %rdx,TF_RDX(%rsp)
+ movq %rax,TF_RAX(%rsp)
+ movq %rcx,TF_RCX(%rsp)
+ testb $SEL_RPL_MASK,TF_CS(%rsp)
+ jz alltraps_pushregs_no_rax
+ call handle_ibrs_entry
+ jmp alltraps_pushregs_no_rax
IDTVEC(dblfault)
subq $TF_ERR,%rsp
@@ -278,70 +277,131 @@ IDTVEC(dblfault)
movq %r13,TF_R13(%rsp)
movq %r14,TF_R14(%rsp)
movq %r15,TF_R15(%rsp)
- movw %fs,TF_FS(%rsp)
- movw %gs,TF_GS(%rsp)
- movw %es,TF_ES(%rsp)
- movw %ds,TF_DS(%rsp)
+ SAVE_SEGS
movl $TF_HASSEGS,TF_FLAGS(%rsp)
cld
testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
jz 1f /* already running with kernel GS.base */
swapgs
1:
- movq %rsp,%rdi
+ movq PCPU(KCR3),%rax
+ cmpq $~0,%rax
+ je 2f
+ movq %rax,%cr3
+2: movq %rsp,%rdi
call dblfault_handler
-2:
- hlt
- jmp 2b
+3: hlt
+ jmp 3b
+ ALIGN_TEXT
+IDTVEC(page_pti)
+ testb $SEL_RPL_MASK,PTI_CS-2*8(%rsp)
+ jz Xpage
+ swapgs
+ pushq %rax
+ pushq %rdx
+ movq %cr3,%rax
+ movq %rax,PCPU(SAVED_UCR3)
+ PTI_UUENTRY has_err=1
+ subq $TF_ERR,%rsp
+ movq %rdi,TF_RDI(%rsp)
+ movq %rax,TF_RAX(%rsp)
+ movq %rdx,TF_RDX(%rsp)
+ movq %rcx,TF_RCX(%rsp)
+ jmp page_u
IDTVEC(page)
subq $TF_ERR,%rsp
- movl $T_PAGEFLT,TF_TRAPNO(%rsp)
- movq %rdi,TF_RDI(%rsp) /* free up a GP register */
+ movq %rdi,TF_RDI(%rsp) /* free up GP registers */
+ movq %rax,TF_RAX(%rsp)
+ movq %rdx,TF_RDX(%rsp)
+ movq %rcx,TF_RCX(%rsp)
testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
- jz 1f /* already running with kernel GS.base */
+ jz page_cr2 /* already running with kernel GS.base */
swapgs
- movq PCPU(CURPCB),%rdi
+page_u: movq PCPU(CURPCB),%rdi
andl $~PCB_FULL_IRET,PCB_FLAGS(%rdi)
-1: movq %cr2,%rdi /* preserve %cr2 before .. */
+ movq PCPU(SAVED_UCR3),%rax
+ movq %rax,PCB_SAVED_UCR3(%rdi)
+ call handle_ibrs_entry
+page_cr2:
+ movq %cr2,%rdi /* preserve %cr2 before .. */
movq %rdi,TF_ADDR(%rsp) /* enabling interrupts. */
- movw %fs,TF_FS(%rsp)
- movw %gs,TF_GS(%rsp)
- movw %es,TF_ES(%rsp)
- movw %ds,TF_DS(%rsp)
+ SAVE_SEGS
+ movl $T_PAGEFLT,TF_TRAPNO(%rsp)
testl $PSL_I,TF_RFLAGS(%rsp)
- jz alltraps_pushregs_no_rdi
+ jz alltraps_pushregs_no_rax
sti
- jmp alltraps_pushregs_no_rdi
+ jmp alltraps_pushregs_no_rax
/*
* We have to special-case this one. If we get a trap in doreti() at
* the iretq stage, we'll reenter with the wrong gs state. We'll have
* to do a special the swapgs in this case even coming from the kernel.
* XXX linux has a trap handler for their equivalent of load_gs().
+ *
+ * On the stack, we have the hardware interrupt frame to return
+ * to usermode (faulted) and another frame with error code, for
+ * fault. For PTI, copy both frames to the main thread stack.
*/
-IDTVEC(prot)
+ .macro PROTF_ENTRY name,trapno
+\name\()_pti_doreti:
+ pushq %rax
+ pushq %rdx
+ swapgs
+ movq PCPU(KCR3),%rax
+ movq %rax,%cr3
+ movq PCPU(RSP0),%rax
+ subq $2*PTI_SIZE-3*8,%rax /* no err, %rax, %rdx in faulted frame */
+ MOVE_STACKS (PTI_SIZE / 4 - 3)
+ movq %rax,%rsp
+ popq %rdx
+ popq %rax
+ swapgs
+ jmp X\name
+IDTVEC(\name\()_pti)
+ cmpq $doreti_iret,PTI_RIP-2*8(%rsp)
+ je \name\()_pti_doreti
+ testb $SEL_RPL_MASK,PTI_CS-2*8(%rsp) /* %rax, %rdx not yet pushed */
+ jz X\name
+ PTI_UENTRY has_err=1
+ swapgs
+IDTVEC(\name)
subq $TF_ERR,%rsp
- movl $T_PROTFLT,TF_TRAPNO(%rsp)
+ movl $\trapno,TF_TRAPNO(%rsp)
+ jmp prot_addrf
+ .endm
+
+ PROTF_ENTRY missing, T_SEGNPFLT
+ PROTF_ENTRY stk, T_STKFLT
+ PROTF_ENTRY prot, T_PROTFLT
+
prot_addrf:
movq $0,TF_ADDR(%rsp)
movq %rdi,TF_RDI(%rsp) /* free up a GP register */
+ movq %rax,TF_RAX(%rsp)
+ movq %rdx,TF_RDX(%rsp)
+ movq %rcx,TF_RCX(%rsp)
+ movw %fs,TF_FS(%rsp)
+ movw %gs,TF_GS(%rsp)
leaq doreti_iret(%rip),%rdi
cmpq %rdi,TF_RIP(%rsp)
- je 1f /* kernel but with user gsbase!! */
+ je 5f /* kernel but with user gsbase!! */
testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
- jz 2f /* already running with kernel GS.base */
-1: swapgs
-2: movq PCPU(CURPCB),%rdi
+ jz 6f /* already running with kernel GS.base */
+ swapgs
+ movq PCPU(CURPCB),%rdi
+4: call handle_ibrs_entry
orl $PCB_FULL_IRET,PCB_FLAGS(%rdi) /* always full iret from GPF */
- movw %fs,TF_FS(%rsp)
- movw %gs,TF_GS(%rsp)
movw %es,TF_ES(%rsp)
movw %ds,TF_DS(%rsp)
testl $PSL_I,TF_RFLAGS(%rsp)
- jz alltraps_pushregs_no_rdi
+ jz alltraps_pushregs_no_rax
sti
- jmp alltraps_pushregs_no_rdi
+ jmp alltraps_pushregs_no_rax
+
+5: swapgs
+6: movq PCPU(CURPCB),%rdi
+ jmp 4b
/*
* Fast syscall entry point. We enter here with just our new %cs/%ss set,
@@ -352,8 +412,18 @@ prot_addrf:
* We do not support invoking this from a custom %cs or %ss (e.g. using
* entries from an LDT).
*/
+ SUPERALIGN_TEXT
+IDTVEC(fast_syscall_pti)
+ swapgs
+ movq %rax,PCPU(SCRATCH_RAX)
+ movq PCPU(KCR3),%rax
+ movq %rax,%cr3
+ jmp fast_syscall_common
+ SUPERALIGN_TEXT
IDTVEC(fast_syscall)
swapgs
+ movq %rax,PCPU(SCRATCH_RAX)
+fast_syscall_common:
movq %rsp,PCPU(SCRATCH_RSP)
movq PCPU(RSP0),%rsp
/* Now emulate a trapframe. Make the 8 byte alignment odd for call. */
@@ -363,10 +433,11 @@ IDTVEC(fast_syscall)
movq %rcx,TF_RIP(%rsp) /* %rcx original value is in %r10 */
movq PCPU(SCRATCH_RSP),%r11 /* %r11 already saved */
movq %r11,TF_RSP(%rsp) /* user stack pointer */
- movw %fs,TF_FS(%rsp)
- movw %gs,TF_GS(%rsp)
- movw %es,TF_ES(%rsp)
- movw %ds,TF_DS(%rsp)
+ movq PCPU(SCRATCH_RAX),%rax
+ movq %rax,TF_RAX(%rsp) /* syscall number */
+ movq %rdx,TF_RDX(%rsp) /* arg 3 */
+ SAVE_SEGS
+ call handle_ibrs_entry
movq PCPU(CURPCB),%r11
andl $~PCB_FULL_IRET,PCB_FLAGS(%r11)
sti
@@ -375,11 +446,9 @@ IDTVEC(fast_syscall)
movq $2,TF_ERR(%rsp)
movq %rdi,TF_RDI(%rsp) /* arg 1 */
movq %rsi,TF_RSI(%rsp) /* arg 2 */
- movq %rdx,TF_RDX(%rsp) /* arg 3 */
movq %r10,TF_RCX(%rsp) /* arg 4 */
movq %r8,TF_R8(%rsp) /* arg 5 */
movq %r9,TF_R9(%rsp) /* arg 6 */
- movq %rax,TF_RAX(%rsp) /* syscall number */
movq %rbx,TF_RBX(%rsp) /* C preserved */
movq %rbp,TF_RBP(%rsp) /* C preserved */
movq %r12,TF_R12(%rsp) /* C preserved */
@@ -398,11 +467,12 @@ IDTVEC(fast_syscall)
/* Disable interrupts before testing PCB_FULL_IRET. */
cli
testl $PCB_FULL_IRET,PCB_FLAGS(%rax)
- jnz 3f
+ jnz 4f
/* Check for and handle AST's on return to userland. */
movq PCPU(CURTHREAD),%rax
testl $TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%rax)
- jne 2f
+ jne 3f
+ call handle_ibrs_exit
/* Restore preserved registers. */
MEXITCOUNT
movq TF_RDI(%rsp),%rdi /* bonus; preserve arg 1 */
@@ -412,16 +482,21 @@ IDTVEC(fast_syscall)
movq TF_RFLAGS(%rsp),%r11 /* original %rflags */
movq TF_RIP(%rsp),%rcx /* original %rip */
movq TF_RSP(%rsp),%rsp /* user stack pointer */
- swapgs
+ cmpb $0,pti
+ je 2f
+ movq PCPU(UCR3),%r9
+ movq %r9,%cr3
+ xorl %r9d,%r9d
+2: swapgs
sysretq
-2: /* AST scheduled. */
+3: /* AST scheduled. */
sti
movq %rsp,%rdi
call ast
jmp 1b
-3: /* Requested full context restore, use doreti for that. */
+4: /* Requested full context restore, use doreti for that. */
MEXITCOUNT
jmp doreti
@@ -477,17 +552,15 @@ IDTVEC(nmi)
movq %r13,TF_R13(%rsp)
movq %r14,TF_R14(%rsp)
movq %r15,TF_R15(%rsp)
- movw %fs,TF_FS(%rsp)
- movw %gs,TF_GS(%rsp)
- movw %es,TF_ES(%rsp)
- movw %ds,TF_DS(%rsp)
+ SAVE_SEGS
movl $TF_HASSEGS,TF_FLAGS(%rsp)
cld
xorl %ebx,%ebx
testb $SEL_RPL_MASK,TF_CS(%rsp)
jnz nmi_fromuserspace
/*
- * We've interrupted the kernel. Preserve GS.base in %r12.
+ * We've interrupted the kernel. Preserve GS.base in %r12,
+ * %cr3 in %r13, and possibly lower half of MSR_IA32_SPEC_CTL in %r14d.
*/
movl $MSR_GSBASE,%ecx
rdmsr
@@ -499,10 +572,32 @@ IDTVEC(nmi)
movl %edx,%eax
shrq $32,%rdx
wrmsr
+ movq %cr3,%r13
+ movq PCPU(KCR3),%rax
+ cmpq $~0,%rax
+ je 1f
+ movq %rax,%cr3
+1: testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip)
+ je nmi_calltrap
+ movl $MSR_IA32_SPEC_CTRL,%ecx
+ rdmsr
+ movl %eax,%r14d
+ call handle_ibrs_entry
jmp nmi_calltrap
nmi_fromuserspace:
incl %ebx
swapgs
+ movq %cr3,%r13
+ movq PCPU(KCR3),%rax
+ cmpq $~0,%rax
+ je 1f
+ movq %rax,%cr3
+1: call handle_ibrs_entry
+ movq PCPU(CURPCB),%rdi
+ testq %rdi,%rdi
+ jz 3f
+ orl $PCB_FULL_IRET,PCB_FLAGS(%rdi)
+3:
/* Note: this label is also used by ddb and gdb: */
nmi_calltrap:
FAKE_MCOUNT(TF_RIP(%rsp))
@@ -525,26 +620,29 @@ nmi_calltrap:
movq PCPU(CURTHREAD),%rax
orq %rax,%rax /* curthread present? */
jz nocallchain
- testl $TDP_CALLCHAIN,TD_PFLAGS(%rax) /* flagged for capture? */
- jz nocallchain
/*
- * A user callchain is to be captured, so:
- * - Move execution to the regular kernel stack, to allow for
- * nested NMI interrupts.
- * - Take the processor out of "NMI" mode by faking an "iret".
- * - Enable interrupts, so that copyin() can work.
+ * Move execution to the regular kernel stack, because we
+ * committed to return through doreti.
*/
movq %rsp,%rsi /* source stack pointer */
movq $TF_SIZE,%rcx
movq PCPU(RSP0),%rdx
subq %rcx,%rdx
movq %rdx,%rdi /* destination stack pointer */
-
shrq $3,%rcx /* trap frame size in long words */
cld
rep
movsq /* copy trapframe */
+ movq %rdx,%rsp /* we are on the regular kstack */
+ testl $TDP_CALLCHAIN,TD_PFLAGS(%rax) /* flagged for capture? */
+ jz nocallchain
+ /*
+ * A user callchain is to be captured, so:
+ * - Take the processor out of "NMI" mode by faking an "iret",
+ * to allow for nested NMI interrupts.
+ * - Enable interrupts, so that copyin() can work.
+ */
movl %ss,%eax
pushq %rax /* tf_ss */
pushq %rdx /* tf_rsp (on kernel stack) */
@@ -574,33 +672,139 @@ outofnmi:
cli
nocallchain:
#endif
- testl %ebx,%ebx
+ testl %ebx,%ebx /* %ebx == 0 => return to userland */
jnz doreti_exit
-nmi_kernelexit:
+ /*
+ * Restore speculation control MSR, if preserved.
+ */
+ testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip)
+ je 1f
+ movl %r14d,%eax
+ xorl %edx,%edx
+ movl $MSR_IA32_SPEC_CTRL,%ecx
+ wrmsr
/*
* Put back the preserved MSR_GSBASE value.
*/
+1: movl $MSR_GSBASE,%ecx
+ movq %r12,%rdx
+ movl %edx,%eax
+ shrq $32,%rdx
+ wrmsr
+ movq %r13,%cr3
+ RESTORE_REGS
+ addq $TF_RIP,%rsp
+ jmp doreti_iret
+
+/*
+ * MC# handling is similar to NMI.
+ *
+ * As with NMIs, machine check exceptions do not respect RFLAGS.IF and
+ * can occur at any time with a GS.base value that does not correspond
+ * to the privilege level in CS.
+ *
+ * Machine checks are not unblocked by iretq, but it is best to run
+ * the handler with interrupts disabled since the exception may have
+ * interrupted a critical section.
+ *
+ * The MC# handler runs on its own stack (tss_ist3). The canonical
+ * GS.base value for the processor is stored just above the bottom of
+ * its MC# stack. For exceptions taken from kernel mode, the current
+ * value in the processor's GS.base is saved at entry to C-preserved
+ * register %r12, the canonical value for GS.base is then loaded into
+ * the processor, and the saved value is restored at exit time. For
+ * exceptions taken from user mode, the cheaper 'SWAPGS' instructions
+ * are used for swapping GS.base.
+ */
+
+IDTVEC(mchk)
+ subq $TF_RIP,%rsp
+ movl $(T_MCHK),TF_TRAPNO(%rsp)
+ movq $0,TF_ADDR(%rsp)
+ movq $0,TF_ERR(%rsp)
+ movq %rdi,TF_RDI(%rsp)
+ movq %rsi,TF_RSI(%rsp)
+ movq %rdx,TF_RDX(%rsp)
+ movq %rcx,TF_RCX(%rsp)
+ movq %r8,TF_R8(%rsp)
+ movq %r9,TF_R9(%rsp)
+ movq %rax,TF_RAX(%rsp)
+ movq %rbx,TF_RBX(%rsp)
+ movq %rbp,TF_RBP(%rsp)
+ movq %r10,TF_R10(%rsp)
+ movq %r11,TF_R11(%rsp)
+ movq %r12,TF_R12(%rsp)
+ movq %r13,TF_R13(%rsp)
+ movq %r14,TF_R14(%rsp)
+ movq %r15,TF_R15(%rsp)
+ SAVE_SEGS
+ movl $TF_HASSEGS,TF_FLAGS(%rsp)
+ cld
+ xorl %ebx,%ebx
+ testb $SEL_RPL_MASK,TF_CS(%rsp)
+ jnz mchk_fromuserspace
+ /*
+ * We've interrupted the kernel. Preserve GS.base in %r12,
+ * %cr3 in %r13, and possibly lower half of MSR_IA32_SPEC_CTL in %r14d.
+ */
movl $MSR_GSBASE,%ecx
+ rdmsr
+ movq %rax,%r12
+ shlq $32,%rdx
+ orq %rdx,%r12
+ /* Retrieve and load the canonical value for GS.base. */
+ movq TF_SIZE(%rsp),%rdx
+ movl %edx,%eax
+ shrq $32,%rdx
+ wrmsr
+ movq %cr3,%r13
+ movq PCPU(KCR3),%rax
+ cmpq $~0,%rax
+ je 1f
+ movq %rax,%cr3
+1: testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip)
+ je mchk_calltrap
+ movl $MSR_IA32_SPEC_CTRL,%ecx
+ rdmsr
+ movl %eax,%r14d
+ call handle_ibrs_entry
+ jmp mchk_calltrap
+mchk_fromuserspace:
+ incl %ebx
+ swapgs
+ movq %cr3,%r13
+ movq PCPU(KCR3),%rax
+ cmpq $~0,%rax
+ je 1f
+ movq %rax,%cr3
+1: call handle_ibrs_entry
+/* Note: this label is also used by ddb and gdb: */
+mchk_calltrap:
+ FAKE_MCOUNT(TF_RIP(%rsp))
+ movq %rsp,%rdi
+ call mca_intr
+ MEXITCOUNT
+ testl %ebx,%ebx /* %ebx == 0 => return to userland */
+ jnz doreti_exit
+ /*
+ * Restore speculation control MSR, if preserved.
+ */
+ testl $CPUID_STDEXT3_IBPB,cpu_stdext_feature3(%rip)
+ je 1f
+ movl %r14d,%eax
+ xorl %edx,%edx
+ movl $MSR_IA32_SPEC_CTRL,%ecx
+ wrmsr
+ /*
+ * Put back the preserved MSR_GSBASE value.
+ */
+1: movl $MSR_GSBASE,%ecx
movq %r12,%rdx
movl %edx,%eax
shrq $32,%rdx
wrmsr
-nmi_restoreregs:
- movq TF_RDI(%rsp),%rdi
- movq TF_RSI(%rsp),%rsi
- movq TF_RDX(%rsp),%rdx
- movq TF_RCX(%rsp),%rcx
- movq TF_R8(%rsp),%r8
- movq TF_R9(%rsp),%r9
- movq TF_RAX(%rsp),%rax
- movq TF_RBX(%rsp),%rbx
- movq TF_RBP(%rsp),%rbp
- movq TF_R10(%rsp),%r10
- movq TF_R11(%rsp),%r11
- movq TF_R12(%rsp),%r12
- movq TF_R13(%rsp),%r13
- movq TF_R14(%rsp),%r14
- movq TF_R15(%rsp),%r15
+ movq %r13,%cr3
+ RESTORE_REGS
addq $TF_RIP,%rsp
jmp doreti_iret
@@ -767,27 +971,39 @@ ld_es:
ld_ds:
movw TF_DS(%rsp),%ds
ld_regs:
- movq TF_RDI(%rsp),%rdi
- movq TF_RSI(%rsp),%rsi
- movq TF_RDX(%rsp),%rdx
- movq TF_RCX(%rsp),%rcx
- movq TF_R8(%rsp),%r8
- movq TF_R9(%rsp),%r9
- movq TF_RAX(%rsp),%rax
- movq TF_RBX(%rsp),%rbx
- movq TF_RBP(%rsp),%rbp
- movq TF_R10(%rsp),%r10
- movq TF_R11(%rsp),%r11
- movq TF_R12(%rsp),%r12
- movq TF_R13(%rsp),%r13
- movq TF_R14(%rsp),%r14
- movq TF_R15(%rsp),%r15
+ RESTORE_REGS
testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
- jz 1f /* keep running with kernel GS.base */
+ jz 2f /* keep running with kernel GS.base */
cli
+ call handle_ibrs_exit_rs
+ cmpb $0,pti
+ je 1f
+ pushq %rdx
+ movq PCPU(PRVSPACE),%rdx
+ addq $PC_PTI_STACK+PC_PTI_STACK_SZ*8-PTI_SIZE,%rdx
+ movq %rax,PTI_RAX(%rdx)
+ popq %rax
+ movq %rax,PTI_RDX(%rdx)
+ movq TF_RIP(%rsp),%rax
+ movq %rax,PTI_RIP(%rdx)
+ movq TF_CS(%rsp),%rax
+ movq %rax,PTI_CS(%rdx)
+ movq TF_RFLAGS(%rsp),%rax
+ movq %rax,PTI_RFLAGS(%rdx)
+ movq TF_RSP(%rsp),%rax
+ movq %rax,PTI_RSP(%rdx)
+ movq TF_SS(%rsp),%rax
+ movq %rax,PTI_SS(%rdx)
+ movq PCPU(UCR3),%rax
swapgs
-1:
- addq $TF_RIP,%rsp /* skip over tf_err, tf_trapno */
+ movq %rdx,%rsp
+ movq %rax,%cr3
+ popq %rdx
+ popq %rax
+ addq $8,%rsp
+ jmp doreti_iret
+1: swapgs
+2: addq $TF_RIP,%rsp
.globl doreti_iret
doreti_iret:
iretq
@@ -811,22 +1027,20 @@ set_segs:
.globl doreti_iret_fault
doreti_iret_fault:
subq $TF_RIP,%rsp /* space including tf_err, tf_trapno */
- testl $PSL_I,TF_RFLAGS(%rsp)
+ movq %rax,TF_RAX(%rsp)
+ movq %rdx,TF_RDX(%rsp)
+ movq %rcx,TF_RCX(%rsp)
+ call handle_ibrs_entry
+ testb $SEL_RPL_MASK,TF_CS(%rsp)
jz 1f
sti
1:
- movw %fs,TF_FS(%rsp)
- movw %gs,TF_GS(%rsp)
- movw %es,TF_ES(%rsp)
- movw %ds,TF_DS(%rsp)
+ SAVE_SEGS
movl $TF_HASSEGS,TF_FLAGS(%rsp)
movq %rdi,TF_RDI(%rsp)
movq %rsi,TF_RSI(%rsp)
- movq %rdx,TF_RDX(%rsp)
- movq %rcx,TF_RCX(%rsp)
movq %r8,TF_R8(%rsp)
movq %r9,TF_R9(%rsp)
- movq %rax,TF_RAX(%rsp)
movq %rbx,TF_RBX(%rsp)
movq %rbp,TF_RBP(%rsp)
movq %r10,TF_R10(%rsp)
@@ -845,7 +1059,7 @@ doreti_iret_fault:
.globl ds_load_fault
ds_load_fault:
movl $T_PROTFLT,TF_TRAPNO(%rsp)
- testl $PSL_I,TF_RFLAGS(%rsp)
+ testb $SEL_RPL_MASK,TF_CS(%rsp)
jz 1f
sti
1:
OpenPOWER on IntegriCloud