summaryrefslogtreecommitdiffstats
path: root/sys/amd64/amd64/fpu.c
diff options
context:
space:
mode:
authorkib <kib@FreeBSD.org>2012-07-14 15:48:30 +0000
committerkib <kib@FreeBSD.org>2012-07-14 15:48:30 +0000
commit5bd2b3d73a52ccf1497165e4224696e551152035 (patch)
treeaa9f4225bbf17a3b53982e7f3c810f1d7ab0ea28 /sys/amd64/amd64/fpu.c
parentf9866864606d10b02b48047c44cc0d5d87003625 (diff)
downloadFreeBSD-src-5bd2b3d73a52ccf1497165e4224696e551152035.zip
FreeBSD-src-5bd2b3d73a52ccf1497165e4224696e551152035.tar.gz
Add support for the XSAVEOPT instruction use. Our XSAVE/XRSTOR usage
mostly meets the guidelines set by the Intel SDM: 1. We use XRSTOR and XSAVE from the same CPL using the same linear address for the store area 2. Contrary to the recommendations, we cannot zero the FPU save area for a new thread, since fork semantic requires the copy of the previous state. This advice seemingly contradicts to the advice from the item 6. 3. We do use XSAVEOPT in the context switch code only, and the area for XSAVEOPT already always contains the data saved by XSAVE. 4. We do not modify the save area between XRSTOR, when the area is loaded into FPU context, and XSAVE. We always spit the fpu context into save area and start emulation when directly writing into FPU context. 5. We do not use segmented addressing to access save area, or rather, always address it using %ds basing. 6. XSAVEOPT can be only executed in the area which was previously loaded with XRSTOR, since context switch code checks for FPU use by outgoing thread before saving, and thread which stopped emulation forcibly get context loaded with XRSTOR. 7. The PCB cannot be paged out while FPU emulation is turned off, since stack of the executing thread is never swapped out. The context switch code is patched to issue XSAVEOPT instead of XSAVE if supported. This approach eliminates one conditional in the context switch code, which would be needed otherwise. For user-visible machine context to have proper data, fpugetregs() checks for unsaved extension blocks and manually copies pristine FPU state into them, according to the description provided by CPUID leaf 0xd. MFC after: 1 month
Diffstat (limited to 'sys/amd64/amd64/fpu.c')
-rw-r--r--sys/amd64/amd64/fpu.c70
1 files changed, 69 insertions, 1 deletions
diff --git a/sys/amd64/amd64/fpu.c b/sys/amd64/amd64/fpu.c
index ca951ad..a7812b7 100644
--- a/sys/amd64/amd64/fpu.c
+++ b/sys/amd64/amd64/fpu.c
@@ -132,10 +132,16 @@ static void fpu_clean_state(void);
SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD,
NULL, 1, "Floating point instructions executed in hardware");
+static int use_xsaveopt;
int use_xsave; /* non-static for cpu_switch.S */
uint64_t xsave_mask; /* the same */
static struct savefpu *fpu_initialstate;
+struct xsave_area_elm_descr {
+ u_int offset;
+ u_int size;
+} *xsave_area_desc;
+
void
fpusave(void *addr)
{
@@ -182,6 +188,17 @@ fpuinit_bsp1(void)
TUNABLE_ULONG_FETCH("hw.xsave_mask", &xsave_mask_user);
xsave_mask_user |= XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE;
xsave_mask &= xsave_mask_user;
+
+ cpuid_count(0xd, 0x1, cp);
+ if ((cp[0] & CPUID_EXTSTATE_XSAVEOPT) != 0) {
+ /*
+ * Patch the XSAVE instruction in the cpu_switch code
+ * to XSAVEOPT. We assume that XSAVE encoding used
+ * REX byte, and set the bit 4 of the r/m byte.
+ */
+ ctx_switch_xsave[3] |= 0x10;
+ use_xsaveopt = 1;
+ }
}
/*
@@ -252,6 +269,7 @@ static void
fpuinitstate(void *arg __unused)
{
register_t saveintr;
+ int cp[4], i, max_ext_n;
fpu_initialstate = malloc(cpu_max_ext_state_size, M_DEVBUF,
M_WAITOK | M_ZERO);
@@ -273,6 +291,28 @@ fpuinitstate(void *arg __unused)
*/
bzero(&fpu_initialstate->sv_xmm[0], sizeof(struct xmmacc));
+ /*
+ * Create a table describing the layout of the CPU Extended
+ * Save Area.
+ */
+ if (use_xsaveopt) {
+ max_ext_n = flsl(xsave_mask);
+ xsave_area_desc = malloc(max_ext_n * sizeof(struct
+ xsave_area_elm_descr), M_DEVBUF, M_WAITOK | M_ZERO);
+ /* x87 state */
+ xsave_area_desc[0].offset = 0;
+ xsave_area_desc[0].size = 160;
+ /* XMM */
+ xsave_area_desc[1].offset = 160;
+ xsave_area_desc[1].size = 288 - 160;
+
+ for (i = 2; i < max_ext_n; i++) {
+ cpuid_count(0xd, i, cp);
+ xsave_area_desc[i].offset = cp[1];
+ xsave_area_desc[i].size = cp[0];
+ }
+ }
+
start_emulating();
intr_restore(saveintr);
}
@@ -560,8 +600,14 @@ fpudna(void)
* This is the first time this thread has used the FPU or
* the PCB doesn't contain a clean FPU state. Explicitly
* load an initial state.
+ *
+ * We prefer to restore the state from the actual save
+ * area in PCB instead of directly loading from
+ * fpu_initialstate, to ignite the XSAVEOPT
+ * tracking engine.
*/
- fpurestore(fpu_initialstate);
+ bcopy(fpu_initialstate, pcb->pcb_save, cpu_max_ext_state_size);
+ fpurestore(pcb->pcb_save);
if (pcb->pcb_initial_fpucw != __INITIAL_FPUCW__)
fldcw(pcb->pcb_initial_fpucw);
if (PCB_USER_FPU(pcb))
@@ -596,6 +642,9 @@ int
fpugetregs(struct thread *td)
{
struct pcb *pcb;
+ uint64_t *xstate_bv, bit;
+ char *sa;
+ int max_ext_n, i;
pcb = td->td_pcb;
if ((pcb->pcb_flags & PCB_USERFPUINITDONE) == 0) {
@@ -613,6 +662,25 @@ fpugetregs(struct thread *td)
return (_MC_FPOWNED_FPU);
} else {
critical_exit();
+ if (use_xsaveopt) {
+ /*
+ * Handle partially saved state.
+ */
+ sa = (char *)get_pcb_user_save_pcb(pcb);
+ xstate_bv = (uint64_t *)(sa + sizeof(struct savefpu) +
+ offsetof(struct xstate_hdr, xstate_bv));
+ max_ext_n = flsl(xsave_mask);
+ for (i = 0; i < max_ext_n; i++) {
+ bit = 1 << i;
+ if ((*xstate_bv & bit) != 0)
+ continue;
+ bcopy((char *)fpu_initialstate +
+ xsave_area_desc[i].offset,
+ sa + xsave_area_desc[i].offset,
+ xsave_area_desc[i].size);
+ *xstate_bv |= bit;
+ }
+ }
return (_MC_FPOWNED_PCB);
}
}
OpenPOWER on IntegriCloud