summaryrefslogtreecommitdiffstats
path: root/sys/amd64/amd64/fpu.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/amd64/amd64/fpu.c')
-rw-r--r--sys/amd64/amd64/fpu.c70
1 files changed, 69 insertions, 1 deletions
diff --git a/sys/amd64/amd64/fpu.c b/sys/amd64/amd64/fpu.c
index ca951ad..a7812b7 100644
--- a/sys/amd64/amd64/fpu.c
+++ b/sys/amd64/amd64/fpu.c
@@ -132,10 +132,16 @@ static void fpu_clean_state(void);
SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD,
NULL, 1, "Floating point instructions executed in hardware");
+static int use_xsaveopt;
int use_xsave; /* non-static for cpu_switch.S */
uint64_t xsave_mask; /* the same */
static struct savefpu *fpu_initialstate;
+struct xsave_area_elm_descr {
+ u_int offset;
+ u_int size;
+} *xsave_area_desc;
+
void
fpusave(void *addr)
{
@@ -182,6 +188,17 @@ fpuinit_bsp1(void)
TUNABLE_ULONG_FETCH("hw.xsave_mask", &xsave_mask_user);
xsave_mask_user |= XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE;
xsave_mask &= xsave_mask_user;
+
+ cpuid_count(0xd, 0x1, cp);
+ if ((cp[0] & CPUID_EXTSTATE_XSAVEOPT) != 0) {
+ /*
+ * Patch the XSAVE instruction in the cpu_switch code
+ * to XSAVEOPT. We assume that XSAVE encoding used
+ * REX byte, and set the bit 4 of the r/m byte.
+ */
+ ctx_switch_xsave[3] |= 0x10;
+ use_xsaveopt = 1;
+ }
}
/*
@@ -252,6 +269,7 @@ static void
fpuinitstate(void *arg __unused)
{
register_t saveintr;
+ int cp[4], i, max_ext_n;
fpu_initialstate = malloc(cpu_max_ext_state_size, M_DEVBUF,
M_WAITOK | M_ZERO);
@@ -273,6 +291,28 @@ fpuinitstate(void *arg __unused)
*/
bzero(&fpu_initialstate->sv_xmm[0], sizeof(struct xmmacc));
+ /*
+ * Create a table describing the layout of the CPU Extended
+ * Save Area.
+ */
+ if (use_xsaveopt) {
+ max_ext_n = flsl(xsave_mask);
+ xsave_area_desc = malloc(max_ext_n * sizeof(struct
+ xsave_area_elm_descr), M_DEVBUF, M_WAITOK | M_ZERO);
+ /* x87 state */
+ xsave_area_desc[0].offset = 0;
+ xsave_area_desc[0].size = 160;
+ /* XMM */
+ xsave_area_desc[1].offset = 160;
+ xsave_area_desc[1].size = 288 - 160;
+
+ for (i = 2; i < max_ext_n; i++) {
+ cpuid_count(0xd, i, cp);
+ xsave_area_desc[i].offset = cp[1];
+ xsave_area_desc[i].size = cp[0];
+ }
+ }
+
start_emulating();
intr_restore(saveintr);
}
@@ -560,8 +600,14 @@ fpudna(void)
* This is the first time this thread has used the FPU or
* the PCB doesn't contain a clean FPU state. Explicitly
* load an initial state.
+ *
+ * We prefer to restore the state from the actual save
+ * area in PCB instead of directly loading from
+ * fpu_initialstate, to ignite the XSAVEOPT
+ * tracking engine.
*/
- fpurestore(fpu_initialstate);
+ bcopy(fpu_initialstate, pcb->pcb_save, cpu_max_ext_state_size);
+ fpurestore(pcb->pcb_save);
if (pcb->pcb_initial_fpucw != __INITIAL_FPUCW__)
fldcw(pcb->pcb_initial_fpucw);
if (PCB_USER_FPU(pcb))
@@ -596,6 +642,9 @@ int
fpugetregs(struct thread *td)
{
struct pcb *pcb;
+ uint64_t *xstate_bv, bit;
+ char *sa;
+ int max_ext_n, i;
pcb = td->td_pcb;
if ((pcb->pcb_flags & PCB_USERFPUINITDONE) == 0) {
@@ -613,6 +662,25 @@ fpugetregs(struct thread *td)
return (_MC_FPOWNED_FPU);
} else {
critical_exit();
+ if (use_xsaveopt) {
+ /*
+ * Handle partially saved state.
+ */
+ sa = (char *)get_pcb_user_save_pcb(pcb);
+ xstate_bv = (uint64_t *)(sa + sizeof(struct savefpu) +
+ offsetof(struct xstate_hdr, xstate_bv));
+ max_ext_n = flsl(xsave_mask);
+ for (i = 0; i < max_ext_n; i++) {
+ bit = 1 << i;
+ if ((*xstate_bv & bit) != 0)
+ continue;
+ bcopy((char *)fpu_initialstate +
+ xsave_area_desc[i].offset,
+ sa + xsave_area_desc[i].offset,
+ xsave_area_desc[i].size);
+ *xstate_bv |= bit;
+ }
+ }
return (_MC_FPOWNED_PCB);
}
}
OpenPOWER on IntegriCloud