diff options
author | kib <kib@FreeBSD.org> | 2012-07-14 15:48:30 +0000 |
---|---|---|
committer | kib <kib@FreeBSD.org> | 2012-07-14 15:48:30 +0000 |
commit | 5bd2b3d73a52ccf1497165e4224696e551152035 (patch) | |
tree | aa9f4225bbf17a3b53982e7f3c810f1d7ab0ea28 /sys | |
parent | f9866864606d10b02b48047c44cc0d5d87003625 (diff) | |
download | FreeBSD-src-5bd2b3d73a52ccf1497165e4224696e551152035.zip FreeBSD-src-5bd2b3d73a52ccf1497165e4224696e551152035.tar.gz |
Add support for the XSAVEOPT instruction use. Our XSAVE/XRSTOR usage
mostly meets the guidelines set by the Intel SDM:
1. We use XRSTOR and XSAVE from the same CPL using the same linear
address for the store area
2. Contrary to the recommendations, we cannot zero the FPU save area
for a new thread, since fork semantic requires the copy of the
previous state. This advice seemingly contradicts to the advice
from the item 6.
3. We do use XSAVEOPT in the context switch code only, and the area
for XSAVEOPT already always contains the data saved by XSAVE.
4. We do not modify the save area between XRSTOR, when the area is
loaded into FPU context, and XSAVE. We always spit the fpu context
into save area and start emulation when directly writing into FPU
context.
5. We do not use segmented addressing to access save area, or rather,
always address it using %ds basing.
6. XSAVEOPT can be only executed in the area which was previously
loaded with XRSTOR, since context switch code checks for FPU use by
outgoing thread before saving, and thread which stopped emulation
forcibly get context loaded with XRSTOR.
7. The PCB cannot be paged out while FPU emulation is turned off, since
stack of the executing thread is never swapped out.
The context switch code is patched to issue XSAVEOPT instead of XSAVE
if supported. This approach eliminates one conditional in the context
switch code, which would be needed otherwise.
For user-visible machine context to have proper data, fpugetregs()
checks for unsaved extension blocks and manually copies pristine FPU
state into them, according to the description provided by CPUID leaf
0xd.
MFC after: 1 month
Diffstat (limited to 'sys')
-rw-r--r-- | sys/amd64/amd64/cpu_switch.S | 3 | ||||
-rw-r--r-- | sys/amd64/amd64/fpu.c | 70 | ||||
-rw-r--r-- | sys/amd64/include/md_var.h | 1 | ||||
-rw-r--r-- | sys/x86/include/specialreg.h | 5 |
4 files changed, 78 insertions, 1 deletions
diff --git a/sys/amd64/amd64/cpu_switch.S b/sys/amd64/amd64/cpu_switch.S index 1254f3f..ed1ccb5 100644 --- a/sys/amd64/amd64/cpu_switch.S +++ b/sys/amd64/amd64/cpu_switch.S @@ -122,6 +122,9 @@ done_store_dr: 1: movq %rdx,%rcx movl xsave_mask,%eax movl xsave_mask+4,%edx + .globl ctx_switch_xsave +ctx_switch_xsave: + /* This is patched to xsaveopt if supported, see fpuinit_bsp1() */ xsave (%r8) movq %rcx,%rdx 2: smsw %ax diff --git a/sys/amd64/amd64/fpu.c b/sys/amd64/amd64/fpu.c index ca951ad..a7812b7 100644 --- a/sys/amd64/amd64/fpu.c +++ b/sys/amd64/amd64/fpu.c @@ -132,10 +132,16 @@ static void fpu_clean_state(void); SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD, NULL, 1, "Floating point instructions executed in hardware"); +static int use_xsaveopt; int use_xsave; /* non-static for cpu_switch.S */ uint64_t xsave_mask; /* the same */ static struct savefpu *fpu_initialstate; +struct xsave_area_elm_descr { + u_int offset; + u_int size; +} *xsave_area_desc; + void fpusave(void *addr) { @@ -182,6 +188,17 @@ fpuinit_bsp1(void) TUNABLE_ULONG_FETCH("hw.xsave_mask", &xsave_mask_user); xsave_mask_user |= XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE; xsave_mask &= xsave_mask_user; + + cpuid_count(0xd, 0x1, cp); + if ((cp[0] & CPUID_EXTSTATE_XSAVEOPT) != 0) { + /* + * Patch the XSAVE instruction in the cpu_switch code + * to XSAVEOPT. We assume that XSAVE encoding used + * REX byte, and set the bit 4 of the r/m byte. + */ + ctx_switch_xsave[3] |= 0x10; + use_xsaveopt = 1; + } } /* @@ -252,6 +269,7 @@ static void fpuinitstate(void *arg __unused) { register_t saveintr; + int cp[4], i, max_ext_n; fpu_initialstate = malloc(cpu_max_ext_state_size, M_DEVBUF, M_WAITOK | M_ZERO); @@ -273,6 +291,28 @@ fpuinitstate(void *arg __unused) */ bzero(&fpu_initialstate->sv_xmm[0], sizeof(struct xmmacc)); + /* + * Create a table describing the layout of the CPU Extended + * Save Area. + */ + if (use_xsaveopt) { + max_ext_n = flsl(xsave_mask); + xsave_area_desc = malloc(max_ext_n * sizeof(struct + xsave_area_elm_descr), M_DEVBUF, M_WAITOK | M_ZERO); + /* x87 state */ + xsave_area_desc[0].offset = 0; + xsave_area_desc[0].size = 160; + /* XMM */ + xsave_area_desc[1].offset = 160; + xsave_area_desc[1].size = 288 - 160; + + for (i = 2; i < max_ext_n; i++) { + cpuid_count(0xd, i, cp); + xsave_area_desc[i].offset = cp[1]; + xsave_area_desc[i].size = cp[0]; + } + } + start_emulating(); intr_restore(saveintr); } @@ -560,8 +600,14 @@ fpudna(void) * This is the first time this thread has used the FPU or * the PCB doesn't contain a clean FPU state. Explicitly * load an initial state. + * + * We prefer to restore the state from the actual save + * area in PCB instead of directly loading from + * fpu_initialstate, to ignite the XSAVEOPT + * tracking engine. */ - fpurestore(fpu_initialstate); + bcopy(fpu_initialstate, pcb->pcb_save, cpu_max_ext_state_size); + fpurestore(pcb->pcb_save); if (pcb->pcb_initial_fpucw != __INITIAL_FPUCW__) fldcw(pcb->pcb_initial_fpucw); if (PCB_USER_FPU(pcb)) @@ -596,6 +642,9 @@ int fpugetregs(struct thread *td) { struct pcb *pcb; + uint64_t *xstate_bv, bit; + char *sa; + int max_ext_n, i; pcb = td->td_pcb; if ((pcb->pcb_flags & PCB_USERFPUINITDONE) == 0) { @@ -613,6 +662,25 @@ fpugetregs(struct thread *td) return (_MC_FPOWNED_FPU); } else { critical_exit(); + if (use_xsaveopt) { + /* + * Handle partially saved state. + */ + sa = (char *)get_pcb_user_save_pcb(pcb); + xstate_bv = (uint64_t *)(sa + sizeof(struct savefpu) + + offsetof(struct xstate_hdr, xstate_bv)); + max_ext_n = flsl(xsave_mask); + for (i = 0; i < max_ext_n; i++) { + bit = 1 << i; + if ((*xstate_bv & bit) != 0) + continue; + bcopy((char *)fpu_initialstate + + xsave_area_desc[i].offset, + sa + xsave_area_desc[i].offset, + xsave_area_desc[i].size); + *xstate_bv |= bit; + } + } return (_MC_FPOWNED_PCB); } } diff --git a/sys/amd64/include/md_var.h b/sys/amd64/include/md_var.h index ff11ea1..ff322bb 100644 --- a/sys/amd64/include/md_var.h +++ b/sys/amd64/include/md_var.h @@ -57,6 +57,7 @@ extern u_int cpu_procinfo; extern u_int cpu_procinfo2; extern char cpu_vendor[]; extern u_int cpu_vendor_id; +extern char ctx_switch_xsave[]; extern char kstack[]; extern char sigcode[]; extern int szsigcode; diff --git a/sys/x86/include/specialreg.h b/sys/x86/include/specialreg.h index ff6a777..7084e30 100644 --- a/sys/x86/include/specialreg.h +++ b/sys/x86/include/specialreg.h @@ -247,6 +247,11 @@ #define CPUID_TYPE_CORE 2 /* + * CPUID instruction 0xd Processor Extended State Enumeration Sub-leaf 1 + */ +#define CPUID_EXTSTATE_XSAVEOPT 0x00000001 + +/* * AMD extended function 8000_0007h edx info */ #define AMDPM_TS 0x00000001 |