diff options
author | Renato Botelho <renato@netgate.com> | 2018-06-21 12:12:45 -0300 |
---|---|---|
committer | Renato Botelho <renato@netgate.com> | 2018-06-21 12:12:45 -0300 |
commit | a39641b9d2cf45e8da67be9ba315455c4b41de0f (patch) | |
tree | bbabba87fe974b9f13cb7f7518dddfd8f383741f /sys/amd64 | |
parent | 502f1b4994ebf056d1f73d1a86d0c727280a35ab (diff) | |
parent | f55f63f9f9c29dae38ac323adfd253cec627873c (diff) | |
download | FreeBSD-src-a39641b9d2cf45e8da67be9ba315455c4b41de0f.zip FreeBSD-src-a39641b9d2cf45e8da67be9ba315455c4b41de0f.tar.gz |
Merge remote-tracking branch 'origin/releng/11.2' into RELENG_2_4_4
Diffstat (limited to 'sys/amd64')
-rw-r--r-- | sys/amd64/amd64/cpu_switch.S | 13 | ||||
-rw-r--r-- | sys/amd64/amd64/fpu.c | 114 |
2 files changed, 80 insertions, 47 deletions
diff --git a/sys/amd64/amd64/cpu_switch.S b/sys/amd64/amd64/cpu_switch.S index 75599a5..d7ac318 100644 --- a/sys/amd64/amd64/cpu_switch.S +++ b/sys/amd64/amd64/cpu_switch.S @@ -128,10 +128,10 @@ done_store_dr: /* have we used fp, and need a save? */ cmpq %rdi,PCPU(FPCURTHREAD) - jne 3f + jne 2f movq PCB_SAVEFPU(%r8),%r8 clts - cmpl $0,use_xsave + cmpl $0,use_xsave(%rip) jne 1f fxsave (%r8) jmp 2f @@ -143,12 +143,7 @@ ctx_switch_xsave: /* This is patched to xsaveopt if supported, see fpuinit_bsp1() */ xsave (%r8) movq %rcx,%rdx -2: smsw %ax - orb $CR0_TS,%al - lmsw %ax - xorl %eax,%eax - movq %rax,PCPU(FPCURTHREAD) -3: +2: /* Save is done. Now fire up new thread. Leave old vmspace. */ movq %rsi,%r12 movq %rdi,%r13 @@ -235,6 +230,8 @@ done_load_dr: movq PCB_RBX(%r8),%rbx movq PCB_RIP(%r8),%rax movq %rax,(%rsp) + movq PCPU(CURTHREAD),%rdi + call fpu_activate_sw ret /* diff --git a/sys/amd64/amd64/fpu.c b/sys/amd64/amd64/fpu.c index 68d1ac1..99f967e 100644 --- a/sys/amd64/amd64/fpu.c +++ b/sys/amd64/amd64/fpu.c @@ -139,6 +139,11 @@ static void fpu_clean_state(void); SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD, SYSCTL_NULL_INT_PTR, 1, "Floating point instructions executed in hardware"); +int lazy_fpu_switch = 0; +SYSCTL_INT(_hw, OID_AUTO, lazy_fpu_switch, CTLFLAG_RWTUN | CTLFLAG_NOFETCH, + &lazy_fpu_switch, 0, + "Lazily load FPU context after context switch"); + int use_xsave; /* non-static for cpu_switch.S */ uint64_t xsave_mask; /* the same */ static uma_zone_t fpu_save_area_zone; @@ -204,6 +209,7 @@ fpuinit_bsp1(void) u_int cp[4]; uint64_t xsave_mask_user; + TUNABLE_INT_FETCH("hw.lazy_fpu_switch", &lazy_fpu_switch); if ((cpu_feature2 & CPUID2_XSAVE) != 0) { use_xsave = 1; TUNABLE_INT_FETCH("hw.use_xsave", &use_xsave); @@ -612,6 +618,45 @@ fputrap_sse(void) return (fpetable[(mxcsr & (~mxcsr >> 7)) & 0x3f]); } +static void +restore_fpu_curthread(struct thread *td) +{ + struct pcb *pcb; + + /* + * Record new context early in case frstor causes a trap. + */ + PCPU_SET(fpcurthread, td); + + stop_emulating(); + fpu_clean_state(); + pcb = td->td_pcb; + + if ((pcb->pcb_flags & PCB_FPUINITDONE) == 0) { + /* + * This is the first time this thread has used the FPU or + * the PCB doesn't contain a clean FPU state. Explicitly + * load an initial state. + * + * We prefer to restore the state from the actual save + * area in PCB instead of directly loading from + * fpu_initialstate, to ignite the XSAVEOPT + * tracking engine. + */ + bcopy(fpu_initialstate, pcb->pcb_save, + cpu_max_ext_state_size); + fpurestore(pcb->pcb_save); + if (pcb->pcb_initial_fpucw != __INITIAL_FPUCW__) + fldcw(pcb->pcb_initial_fpucw); + if (PCB_USER_FPU(pcb)) + set_pcb_flags(pcb, PCB_FPUINITDONE | + PCB_USERFPUINITDONE); + else + set_pcb_flags(pcb, PCB_FPUINITDONE); + } else + fpurestore(pcb->pcb_save); +} + /* * Device Not Available (DNA, #NM) exception handler. * @@ -622,7 +667,9 @@ fputrap_sse(void) void fpudna(void) { + struct thread *td; + td = curthread; /* * This handler is entered with interrupts enabled, so context * switches may occur before critical_enter() is executed. If @@ -636,49 +683,38 @@ fpudna(void) KASSERT((curpcb->pcb_flags & PCB_FPUNOSAVE) == 0, ("fpudna while in fpu_kern_enter(FPU_KERN_NOCTX)")); - if (PCPU_GET(fpcurthread) == curthread) { - printf("fpudna: fpcurthread == curthread\n"); + if (__predict_false(PCPU_GET(fpcurthread) == td)) { + /* + * Some virtual machines seems to set %cr0.TS at + * arbitrary moments. Silently clear the TS bit + * regardless of the eager/lazy FPU context switch + * mode. + */ stop_emulating(); - critical_exit(); - return; - } - if (PCPU_GET(fpcurthread) != NULL) { - panic("fpudna: fpcurthread = %p (%d), curthread = %p (%d)\n", - PCPU_GET(fpcurthread), PCPU_GET(fpcurthread)->td_tid, - curthread, curthread->td_tid); + } else { + if (__predict_false(PCPU_GET(fpcurthread) != NULL)) { + panic( + "fpudna: fpcurthread = %p (%d), curthread = %p (%d)\n", + PCPU_GET(fpcurthread), + PCPU_GET(fpcurthread)->td_tid, td, td->td_tid); + } + restore_fpu_curthread(td); } - stop_emulating(); - /* - * Record new context early in case frstor causes a trap. - */ - PCPU_SET(fpcurthread, curthread); + critical_exit(); +} - fpu_clean_state(); +void fpu_activate_sw(struct thread *td); /* Called from the context switch */ +void +fpu_activate_sw(struct thread *td) +{ - if ((curpcb->pcb_flags & PCB_FPUINITDONE) == 0) { - /* - * This is the first time this thread has used the FPU or - * the PCB doesn't contain a clean FPU state. Explicitly - * load an initial state. - * - * We prefer to restore the state from the actual save - * area in PCB instead of directly loading from - * fpu_initialstate, to ignite the XSAVEOPT - * tracking engine. - */ - bcopy(fpu_initialstate, curpcb->pcb_save, - cpu_max_ext_state_size); - fpurestore(curpcb->pcb_save); - if (curpcb->pcb_initial_fpucw != __INITIAL_FPUCW__) - fldcw(curpcb->pcb_initial_fpucw); - if (PCB_USER_FPU(curpcb)) - set_pcb_flags(curpcb, - PCB_FPUINITDONE | PCB_USERFPUINITDONE); - else - set_pcb_flags(curpcb, PCB_FPUINITDONE); - } else - fpurestore(curpcb->pcb_save); - critical_exit(); + if (lazy_fpu_switch || (td->td_pflags & TDP_KTHREAD) != 0 || + !PCB_USER_FPU(td->td_pcb)) { + PCPU_SET(fpcurthread, NULL); + start_emulating(); + } else if (PCPU_GET(fpcurthread) != td) { + restore_fpu_curthread(td); + } } void |