From 3091212869dee5f2673091658796eb1badc0135f Mon Sep 17 00:00:00 2001 From: jkoshy Date: Tue, 3 Feb 2009 09:01:45 +0000 Subject: Improve robustness of NMI handling, for NMIs recognized in kernel mode. - Make the NMI handler run on its own stack (TSS_IST2). - Store the GSBASE value for each CPU just before the start of each NMI stack, permitting efficient retrieval using %rsp-relative addressing. - For NMIs taken from kernel mode, program MSR_GSBASE explicitly since one or both of MSR_GSBASE and MSR_KGSBASE can be potentially invalid. The current contents of MSR_GSBASE are saved and restored at exit. - For NMIs handled from user mode, continue to use 'swapgs' to load the per-CPU GSBASE. Reviewed by: jeff Debugging help: jeff Tested by: gnn, Artem Belevich --- sys/amd64/amd64/exception.S | 77 ++++++++++++++++++++++++++++++--------------- 1 file changed, 52 insertions(+), 25 deletions(-) (limited to 'sys/amd64/amd64/exception.S') diff --git a/sys/amd64/amd64/exception.S b/sys/amd64/amd64/exception.S index 1c098e4..897bfec 100644 --- a/sys/amd64/amd64/exception.S +++ b/sys/amd64/amd64/exception.S @@ -383,22 +383,24 @@ IDTVEC(fast_syscall32) * NMI handling is special. * * First, NMIs do not respect the state of the processor's RFLAGS.IF - * bit and the NMI handler may be invoked at any time, including when - * the processor is in a critical section with RFLAGS.IF == 0. In - * particular, this means that the processor's GS.base values could be - * inconsistent on entry to the handler, and so we need to read - * MSR_GSBASE to determine if a 'swapgs' is needed. We use '%ebx', a - * C-preserved register, to remember whether to swap GS back on the - * exit path. + * bit. The NMI handler may be entered at any time, including when + * the processor is in a critical section with RFLAGS.IF == 0. + * The processor's GS.base value could be invalid on entry to the + * handler. * * Second, the processor treats NMIs specially, blocking further NMIs - * until an 'iretq' instruction is executed. We therefore need to - * execute the NMI handler with interrupts disabled to prevent a - * nested interrupt from executing an 'iretq' instruction and - * inadvertently taking the processor out of NMI mode. + * until an 'iretq' instruction is executed. We thus need to execute + * the NMI handler with interrupts disabled, to prevent a nested interrupt + * from executing an 'iretq' instruction and inadvertently taking the + * processor out of NMI mode. * - * Third, the NMI handler runs on its own stack (tss_ist1), shared - * with the double fault handler. + * Third, the NMI handler runs on its own stack (tss_ist2). The canonical + * GS.base value for the processor is stored just above the bottom of its + * NMI stack. For NMIs taken from kernel mode, the current value in + * the processor's GS.base is saved at entry to C-preserved register %r12, + * the canonical value for GS.base is then loaded into the processor, and + * the saved value is restored at exit time. For NMIs taken from user mode, + * the cheaper 'SWAPGS' instructions are used for swapping GS.base. */ IDTVEC(nmi) @@ -423,12 +425,22 @@ IDTVEC(nmi) movq %r15,TF_R15(%rsp) xorl %ebx,%ebx testb $SEL_RPL_MASK,TF_CS(%rsp) - jnz nmi_needswapgs /* we came from userland */ + jnz nmi_fromuserspace + /* + * We've interrupted the kernel. Preserve GS.base in %r12. + */ movl $MSR_GSBASE,%ecx rdmsr - cmpl $VM_MAXUSER_ADDRESS >> 32,%edx - jae nmi_calltrap /* GS.base holds a kernel VA */ -nmi_needswapgs: + movq %rax,%r12 + shlq $32,%rdx + orq %rdx,%r12 + /* Retrieve and load the canonical value for GS.base. */ + movq TF_SIZE(%rsp),%rdx + movl %edx,%eax + shrq $32,%rdx + wrmsr + jmp nmi_calltrap +nmi_fromuserspace: incl %ebx swapgs /* Note: this label is also used by ddb and gdb: */ @@ -439,14 +451,19 @@ nmi_calltrap: MEXITCOUNT #ifdef HWPMC_HOOKS /* - * Check if the current trap was from user mode and if so - * whether the current thread needs a user call chain to be - * captured. We are still in NMI mode at this point. + * Capture a userspace callchain if needed. + * + * - Check if the current trap was from user mode. + * - Check if the current thread is valid. + * - Check if the thread requires a user call chain to be + * captured. + * + * We are still in NMI mode at this point. */ - testb $SEL_RPL_MASK,TF_CS(%rsp) - jz nocallchain - movq PCPU(CURTHREAD),%rax /* curthread present? */ - orq %rax,%rax + testl %ebx,%ebx + jz nocallchain /* not from userspace */ + movq PCPU(CURTHREAD),%rax + orq %rax,%rax /* curthread present? */ jz nocallchain testl $TDP_CALLCHAIN,TD_PFLAGS(%rax) /* flagged for capture? */ jz nocallchain @@ -498,8 +515,18 @@ outofnmi: nocallchain: #endif testl %ebx,%ebx - jz nmi_restoreregs + jz nmi_kernelexit swapgs + jmp nmi_restoreregs +nmi_kernelexit: + /* + * Put back the preserved MSR_GSBASE value. + */ + movl $MSR_GSBASE,%ecx + movq %r12,%rdx + movl %edx,%eax + shrq $32,%rdx + wrmsr nmi_restoreregs: movq TF_RDI(%rsp),%rdi movq TF_RSI(%rsp),%rsi -- cgit v1.1