diff options
-rw-r--r-- | sys/amd64/amd64/cpu_switch.S | 3 | ||||
-rw-r--r-- | sys/amd64/amd64/fpu.c | 70 | ||||
-rw-r--r-- | sys/amd64/include/md_var.h | 1 | ||||
-rw-r--r-- | sys/x86/include/specialreg.h | 5 |
4 files changed, 78 insertions, 1 deletions
diff --git a/sys/amd64/amd64/cpu_switch.S b/sys/amd64/amd64/cpu_switch.S index 1254f3f..ed1ccb5 100644 --- a/sys/amd64/amd64/cpu_switch.S +++ b/sys/amd64/amd64/cpu_switch.S @@ -122,6 +122,9 @@ done_store_dr: 1: movq %rdx,%rcx movl xsave_mask,%eax movl xsave_mask+4,%edx + .globl ctx_switch_xsave +ctx_switch_xsave: + /* This is patched to xsaveopt if supported, see fpuinit_bsp1() */ xsave (%r8) movq %rcx,%rdx 2: smsw %ax diff --git a/sys/amd64/amd64/fpu.c b/sys/amd64/amd64/fpu.c index ca951ad..a7812b7 100644 --- a/sys/amd64/amd64/fpu.c +++ b/sys/amd64/amd64/fpu.c @@ -132,10 +132,16 @@ static void fpu_clean_state(void); SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD, NULL, 1, "Floating point instructions executed in hardware"); +static int use_xsaveopt; int use_xsave; /* non-static for cpu_switch.S */ uint64_t xsave_mask; /* the same */ static struct savefpu *fpu_initialstate; +struct xsave_area_elm_descr { + u_int offset; + u_int size; +} *xsave_area_desc; + void fpusave(void *addr) { @@ -182,6 +188,17 @@ fpuinit_bsp1(void) TUNABLE_ULONG_FETCH("hw.xsave_mask", &xsave_mask_user); xsave_mask_user |= XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE; xsave_mask &= xsave_mask_user; + + cpuid_count(0xd, 0x1, cp); + if ((cp[0] & CPUID_EXTSTATE_XSAVEOPT) != 0) { + /* + * Patch the XSAVE instruction in the cpu_switch code + * to XSAVEOPT. We assume that XSAVE encoding used + * REX byte, and set the bit 4 of the r/m byte. + */ + ctx_switch_xsave[3] |= 0x10; + use_xsaveopt = 1; + } } /* @@ -252,6 +269,7 @@ static void fpuinitstate(void *arg __unused) { register_t saveintr; + int cp[4], i, max_ext_n; fpu_initialstate = malloc(cpu_max_ext_state_size, M_DEVBUF, M_WAITOK | M_ZERO); @@ -273,6 +291,28 @@ fpuinitstate(void *arg __unused) */ bzero(&fpu_initialstate->sv_xmm[0], sizeof(struct xmmacc)); + /* + * Create a table describing the layout of the CPU Extended + * Save Area. + */ + if (use_xsaveopt) { + max_ext_n = flsl(xsave_mask); + xsave_area_desc = malloc(max_ext_n * sizeof(struct + xsave_area_elm_descr), M_DEVBUF, M_WAITOK | M_ZERO); + /* x87 state */ + xsave_area_desc[0].offset = 0; + xsave_area_desc[0].size = 160; + /* XMM */ + xsave_area_desc[1].offset = 160; + xsave_area_desc[1].size = 288 - 160; + + for (i = 2; i < max_ext_n; i++) { + cpuid_count(0xd, i, cp); + xsave_area_desc[i].offset = cp[1]; + xsave_area_desc[i].size = cp[0]; + } + } + start_emulating(); intr_restore(saveintr); } @@ -560,8 +600,14 @@ fpudna(void) * This is the first time this thread has used the FPU or * the PCB doesn't contain a clean FPU state. Explicitly * load an initial state. + * + * We prefer to restore the state from the actual save + * area in PCB instead of directly loading from + * fpu_initialstate, to ignite the XSAVEOPT + * tracking engine. */ - fpurestore(fpu_initialstate); + bcopy(fpu_initialstate, pcb->pcb_save, cpu_max_ext_state_size); + fpurestore(pcb->pcb_save); if (pcb->pcb_initial_fpucw != __INITIAL_FPUCW__) fldcw(pcb->pcb_initial_fpucw); if (PCB_USER_FPU(pcb)) @@ -596,6 +642,9 @@ int fpugetregs(struct thread *td) { struct pcb *pcb; + uint64_t *xstate_bv, bit; + char *sa; + int max_ext_n, i; pcb = td->td_pcb; if ((pcb->pcb_flags & PCB_USERFPUINITDONE) == 0) { @@ -613,6 +662,25 @@ fpugetregs(struct thread *td) return (_MC_FPOWNED_FPU); } else { critical_exit(); + if (use_xsaveopt) { + /* + * Handle partially saved state. + */ + sa = (char *)get_pcb_user_save_pcb(pcb); + xstate_bv = (uint64_t *)(sa + sizeof(struct savefpu) + + offsetof(struct xstate_hdr, xstate_bv)); + max_ext_n = flsl(xsave_mask); + for (i = 0; i < max_ext_n; i++) { + bit = 1 << i; + if ((*xstate_bv & bit) != 0) + continue; + bcopy((char *)fpu_initialstate + + xsave_area_desc[i].offset, + sa + xsave_area_desc[i].offset, + xsave_area_desc[i].size); + *xstate_bv |= bit; + } + } return (_MC_FPOWNED_PCB); } } diff --git a/sys/amd64/include/md_var.h b/sys/amd64/include/md_var.h index ff11ea1..ff322bb 100644 --- a/sys/amd64/include/md_var.h +++ b/sys/amd64/include/md_var.h @@ -57,6 +57,7 @@ extern u_int cpu_procinfo; extern u_int cpu_procinfo2; extern char cpu_vendor[]; extern u_int cpu_vendor_id; +extern char ctx_switch_xsave[]; extern char kstack[]; extern char sigcode[]; extern int szsigcode; diff --git a/sys/x86/include/specialreg.h b/sys/x86/include/specialreg.h index ff6a777..7084e30 100644 --- a/sys/x86/include/specialreg.h +++ b/sys/x86/include/specialreg.h @@ -247,6 +247,11 @@ #define CPUID_TYPE_CORE 2 /* + * CPUID instruction 0xd Processor Extended State Enumeration Sub-leaf 1 + */ +#define CPUID_EXTSTATE_XSAVEOPT 0x00000001 + +/* * AMD extended function 8000_0007h edx info */ #define AMDPM_TS 0x00000001 |