summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sys/amd64/amd64/exception.S3
-rw-r--r--sys/amd64/amd64/exception.s3
-rw-r--r--sys/amd64/amd64/fpu.c93
-rw-r--r--sys/amd64/amd64/genassym.c5
-rw-r--r--sys/amd64/amd64/initcpu.c22
-rw-r--r--sys/amd64/amd64/locore.S3
-rw-r--r--sys/amd64/amd64/locore.s3
-rw-r--r--sys/amd64/amd64/machdep.c83
-rw-r--r--sys/amd64/amd64/mp_machdep.c5
-rw-r--r--sys/amd64/amd64/mptable.c5
-rw-r--r--sys/amd64/amd64/support.S8
-rw-r--r--sys/amd64/amd64/support.s8
-rw-r--r--sys/amd64/amd64/trap.c5
-rw-r--r--sys/amd64/amd64/vm_machdep.c2
-rw-r--r--sys/amd64/include/fpu.h38
-rw-r--r--sys/amd64/include/md_var.h1
-rw-r--r--sys/amd64/include/mptable.h5
-rw-r--r--sys/amd64/include/npx.h38
-rw-r--r--sys/amd64/include/pcb.h3
-rw-r--r--sys/amd64/include/specialreg.h2
-rw-r--r--sys/amd64/include/trap.h3
-rw-r--r--sys/amd64/isa/npx.c93
-rw-r--r--sys/conf/NOTES3
-rw-r--r--sys/conf/options.i3861
-rw-r--r--sys/i386/conf/NOTES3
-rw-r--r--sys/i386/i386/exception.s3
-rw-r--r--sys/i386/i386/genassym.c5
-rw-r--r--sys/i386/i386/initcpu.c22
-rw-r--r--sys/i386/i386/locore.s3
-rw-r--r--sys/i386/i386/machdep.c83
-rw-r--r--sys/i386/i386/mp_machdep.c5
-rw-r--r--sys/i386/i386/mptable.c5
-rw-r--r--sys/i386/i386/support.s8
-rw-r--r--sys/i386/i386/trap.c5
-rw-r--r--sys/i386/i386/vm_machdep.c2
-rw-r--r--sys/i386/include/md_var.h1
-rw-r--r--sys/i386/include/mptable.h5
-rw-r--r--sys/i386/include/npx.h38
-rw-r--r--sys/i386/include/pcb.h3
-rw-r--r--sys/i386/include/specialreg.h2
-rw-r--r--sys/i386/include/trap.h3
-rw-r--r--sys/i386/isa/npx.c93
42 files changed, 653 insertions, 71 deletions
diff --git a/sys/amd64/amd64/exception.S b/sys/amd64/amd64/exception.S
index 3ccbc23..419fbd2 100644
--- a/sys/amd64/amd64/exception.S
+++ b/sys/amd64/amd64/exception.S
@@ -153,6 +153,9 @@ IDTVEC(fpu)
IDTVEC(align)
TRAP(T_ALIGNFLT)
+IDTVEC(xmm)
+ pushl $0; TRAP(T_XMMFLT)
+
/*
* alltraps entry point. Interrupts are enabled if this was a trap
* gate (TGT), else disabled if this was an interrupt gate (IGT).
diff --git a/sys/amd64/amd64/exception.s b/sys/amd64/amd64/exception.s
index 3ccbc23..419fbd2 100644
--- a/sys/amd64/amd64/exception.s
+++ b/sys/amd64/amd64/exception.s
@@ -153,6 +153,9 @@ IDTVEC(fpu)
IDTVEC(align)
TRAP(T_ALIGNFLT)
+IDTVEC(xmm)
+ pushl $0; TRAP(T_XMMFLT)
+
/*
* alltraps entry point. Interrupts are enabled if this was a trap
* gate (TGT), else disabled if this was an interrupt gate (IGT).
diff --git a/sys/amd64/amd64/fpu.c b/sys/amd64/amd64/fpu.c
index b6c69a0..f6410e9 100644
--- a/sys/amd64/amd64/fpu.c
+++ b/sys/amd64/amd64/fpu.c
@@ -35,6 +35,7 @@
* $FreeBSD$
*/
+#include "opt_cpu.h"
#include "opt_debug_npx.h"
#include "opt_math_emulate.h"
@@ -99,6 +100,8 @@
#define fnstsw(addr) __asm __volatile("fnstsw %0" : "=m" (*(addr)))
#define fp_divide_by_0() __asm("fldz; fld1; fdiv %st,%st(1); fnop")
#define frstor(addr) __asm("frstor %0" : : "m" (*(addr)))
+#define fxrstor(addr) __asm("fxrstor %0" : : "m" (*(addr)))
+#define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr)))
#define start_emulating() __asm("smsw %%ax; orb %0,%%al; lmsw %%ax" \
: : "n" (CR0_TS) : "ax")
#define stop_emulating() __asm("clts")
@@ -113,11 +116,41 @@ void fnstcw __P((caddr_t addr));
void fnstsw __P((caddr_t addr));
void fp_divide_by_0 __P((void));
void frstor __P((caddr_t addr));
+void fxsave __P((caddr_t addr));
+void fxrstor __P((caddr_t addr));
void start_emulating __P((void));
void stop_emulating __P((void));
#endif /* __GNUC__ */
+#ifdef CPU_ENABLE_SSE
+#define GET_FPU_CW(proc) \
+ (cpu_fxsr ? \
+ (proc)->p_addr->u_pcb.pcb_save.sv_xmm.sv_env.en_cw : \
+ (proc)->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_cw)
+#define GET_FPU_SW(proc) \
+ (cpu_fxsr ? \
+ (proc)->p_addr->u_pcb.pcb_save.sv_xmm.sv_env.en_sw : \
+ (proc)->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_sw)
+#define MASK_FPU_SW(proc, mask) \
+ (cpu_fxsr ? \
+ (proc)->p_addr->u_pcb.pcb_save.sv_xmm.sv_env.en_sw & (mask) : \
+ (proc)->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_sw & (mask))
+#define GET_FPU_EXSW_PTR(pcb) \
+ (cpu_fxsr ? \
+ &(pcb)->pcb_save.sv_xmm.sv_ex_sw : \
+ &(pcb)->pcb_save.sv_87.sv_ex_sw)
+#else /* CPU_ENABLE_SSE */
+#define GET_FPU_CW(proc) \
+ (proc->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_cw)
+#define GET_FPU_SW(proc) \
+ (proc->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_sw)
+#define MASK_FPU_SW(proc, mask) \
+ ((proc)->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_sw & (mask))
+#define GET_FPU_EXSW_PTR(pcb) \
+ (&(pcb)->pcb_save.sv_87.sv_ex_sw)
+#endif /* CPU_ENABLE_SSE */
+
typedef u_char bool_t;
static int npx_attach __P((device_t dev));
@@ -127,6 +160,8 @@ static void npx_intr __P((void *));
#endif
static int npx_probe __P((device_t dev));
static int npx_probe1 __P((device_t dev));
+static void fpusave __P((union savefpu *, u_char));
+static void fpurstor __P((union savefpu *, u_char));
#ifdef I586_CPU_XXX
static long timezero __P((const char *funcname,
void (*func)(void *buf, size_t len)));
@@ -529,7 +564,7 @@ void
npxinit(control)
u_short control;
{
- struct save87 dummy;
+ union savefpu dummy;
critical_t savecrit;
if (!npx_exists)
@@ -544,7 +579,7 @@ npxinit(control)
stop_emulating();
fldcw(&control);
if (PCPU_GET(curpcb) != NULL)
- fnsave(&PCPU_GET(curpcb)->pcb_savefpu);
+ fpusave(&PCPU_GET(curpcb)->pcb_save, curproc->p_oncpu);
start_emulating();
critical_exit(savecrit);
}
@@ -560,7 +595,7 @@ npxexit(p)
savecrit = critical_enter();
if (p == PCPU_GET(npxproc))
- npxsave(&PCPU_GET(curpcb)->pcb_savefpu);
+ npxsave(&PCPU_GET(curpcb)->pcb_save);
critical_exit(savecrit);
#ifdef NPX_DEBUG
if (npx_exists) {
@@ -773,6 +808,7 @@ npxtrap()
{
critical_t savecrit;
u_short control, status;
+ u_long *exstat;
if (!npx_exists) {
printf("npxtrap: npxproc = %p, curproc = %p, npx_exists = %d\n",
@@ -787,16 +823,17 @@ npxtrap()
* wherever they are.
*/
if (PCPU_GET(npxproc) != curproc) {
- control = curproc->p_addr->u_pcb.pcb_savefpu.sv_env.en_cw;
- status = curproc->p_addr->u_pcb.pcb_savefpu.sv_env.en_sw;
+ control = GET_FPU_CW(curproc);
+ status = GET_FPU_SW(curproc);
} else {
fnstcw(&control);
fnstsw(&status);
}
- curproc->p_addr->u_pcb.pcb_savefpu.sv_ex_sw = status;
+ exstat = GET_FPU_EXSW_PTR(&curproc->p_addr->u_pcb);
+ *exstat = status;
if (PCPU_GET(npxproc) != curproc)
- curproc->p_addr->u_pcb.pcb_savefpu.sv_env.en_sw &= ~0x80bf;
+ MASK_FPU_SW(curproc, ~0x80bf);
else
fnclex();
critical_exit(savecrit);
@@ -813,6 +850,7 @@ npxtrap()
int
npxdna()
{
+ u_long *exstat;
critical_t s;
if (!npx_exists)
@@ -828,7 +866,9 @@ npxdna()
* Record new context early in case frstor causes an IRQ13.
*/
PCPU_SET(npxproc, CURPROC);
- PCPU_GET(curpcb)->pcb_savefpu.sv_ex_sw = 0;
+
+ exstat = GET_FPU_EXSW_PTR(PCPU_GET(curpcb));
+ *exstat = 0;
/*
* The following frstor may cause an IRQ13 when the state being
* restored has a pending error. The error will appear to have been
@@ -841,7 +881,7 @@ npxdna()
* fnsave are broken, so our treatment breaks fnclex if it is the
* first FPU instruction after a context switch.
*/
- frstor(&PCPU_GET(curpcb)->pcb_savefpu);
+ fpurstor(&PCPU_GET(curpcb)->pcb_save, curproc->p_oncpu);
critical_exit(s);
return (1);
@@ -872,15 +912,46 @@ npxdna()
*/
void
npxsave(addr)
- struct save87 *addr;
+ union savefpu *addr;
{
stop_emulating();
- fnsave(addr);
+ fpusave(addr, curproc->p_oncpu);
+
start_emulating();
PCPU_SET(npxproc, NULL);
}
+static void
+fpusave(addr, oncpu)
+ union savefpu *addr;
+ u_char oncpu;
+{
+ static struct savexmm svxmm[MAXCPU];
+
+ if (!cpu_fxsr)
+ fnsave(addr);
+ else {
+ fxsave(&svxmm[oncpu]);
+ bcopy(&svxmm[oncpu], addr, sizeof(struct savexmm));
+ }
+}
+
+static void
+fpurstor(addr, oncpu)
+ union savefpu *addr;
+ u_char oncpu;
+{
+ static struct savexmm svxmm[MAXCPU];
+
+ if (!cpu_fxsr)
+ frstor(addr);
+ else {
+ bcopy(addr, &svxmm[oncpu], sizeof (struct savexmm));
+ fxrstor(&svxmm[oncpu]);
+ }
+}
+
#ifdef I586_CPU_XXX
static long
timezero(funcname, func)
diff --git a/sys/amd64/amd64/genassym.c b/sys/amd64/amd64/genassym.c
index c6f0970..ac664e8 100644
--- a/sys/amd64/amd64/genassym.c
+++ b/sys/amd64/amd64/genassym.c
@@ -126,8 +126,9 @@ ASSYM(PCB_EXT, offsetof(struct pcb, pcb_ext));
ASSYM(PCB_SPARE, offsetof(struct pcb, __pcb_spare));
ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags));
-ASSYM(PCB_SAVEFPU, offsetof(struct pcb, pcb_savefpu));
-ASSYM(PCB_SAVEFPU_SIZE, sizeof(struct save87));
+ASSYM(PCB_SAVEFPU, offsetof(struct pcb, pcb_save));
+ASSYM(PCB_SAVEFPU_SIZE, sizeof(union savefpu));
+ASSYM(PCB_SAVE87_SIZE, sizeof(struct save87));
ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault));
#ifdef SMP
diff --git a/sys/amd64/amd64/initcpu.c b/sys/amd64/amd64/initcpu.c
index 8b39b44..7fb56fa 100644
--- a/sys/amd64/amd64/initcpu.c
+++ b/sys/amd64/amd64/initcpu.c
@@ -34,6 +34,7 @@
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/systm.h>
+#include <sys/sysctl.h>
#include <machine/cputypes.h>
#include <machine/md_var.h>
@@ -61,8 +62,14 @@ static void init_6x86(void);
static void init_6x86MX(void);
static void init_ppro(void);
static void init_mendocino(void);
+void enable_sse();
#endif
+int hw_instruction_sse = 0;
+SYSCTL_INT(_hw, OID_AUTO, instruction_sse, CTLFLAG_RD,
+ &hw_instruction_sse, 0,
+ "SIMD/MMX2 instructions available in CPU");
+
#ifdef I486_CPU
/*
* IBM Blue Lightning
@@ -501,6 +508,20 @@ init_mendocino(void)
#endif /* CPU_PPRO2CELERON */
}
+/*
+ * Initialize CR4 (Control register 4) to enable SSE instructions.
+ */
+void
+enable_sse(void)
+{
+#if defined(CPU_ENABLE_SSE)
+ if ((cpu_feature & CPUID_XMM) && (cpu_feature & CPUID_FXSR)) {
+ load_cr4(rcr4() | CR4_FXSR | CR4_XMM);
+ cpu_fxsr = hw_instruction_sse = 1;
+ }
+#endif
+}
+
#endif /* I686_CPU */
void
@@ -544,6 +565,7 @@ initializecpu(void)
init_mendocino();
break;
}
+ enable_sse();
}
break;
#endif
diff --git a/sys/amd64/amd64/locore.S b/sys/amd64/amd64/locore.S
index 379af45..cdfd799 100644
--- a/sys/amd64/amd64/locore.S
+++ b/sys/amd64/amd64/locore.S
@@ -113,12 +113,13 @@ HIDENAME(tmpstk):
.globl boothowto,bootdev
.globl cpu,cpu_vendor,cpu_id,bootinfo
- .globl cpu_high, cpu_feature
+ .globl cpu_high, cpu_feature, cpu_fxsr
cpu: .long 0 /* are we 386, 386sx, or 486 */
cpu_id: .long 0 /* stepping ID */
cpu_high: .long 0 /* highest arg to CPUID */
cpu_feature: .long 0 /* features */
+cpu_fxsr: .long 0 /* use fxsave/fxrstor instruction */
cpu_vendor: .space 20 /* CPU origin code */
bootinfo: .space BOOTINFO_SIZE /* bootinfo that we can handle */
diff --git a/sys/amd64/amd64/locore.s b/sys/amd64/amd64/locore.s
index 379af45..cdfd799 100644
--- a/sys/amd64/amd64/locore.s
+++ b/sys/amd64/amd64/locore.s
@@ -113,12 +113,13 @@ HIDENAME(tmpstk):
.globl boothowto,bootdev
.globl cpu,cpu_vendor,cpu_id,bootinfo
- .globl cpu_high, cpu_feature
+ .globl cpu_high, cpu_feature, cpu_fxsr
cpu: .long 0 /* are we 386, 386sx, or 486 */
cpu_id: .long 0 /* stepping ID */
cpu_high: .long 0 /* highest arg to CPUID */
cpu_feature: .long 0 /* features */
+cpu_fxsr: .long 0 /* use fxsave/fxrstor instruction */
cpu_vendor: .space 20 /* CPU origin code */
bootinfo: .space BOOTINFO_SIZE /* bootinfo that we can handle */
diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c
index 544aff5..7e9a4dd 100644
--- a/sys/amd64/amd64/machdep.c
+++ b/sys/amd64/amd64/machdep.c
@@ -127,6 +127,10 @@ extern void initializecpu(void);
#define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
static void cpu_startup __P((void *));
+#ifdef CPU_ENABLE_SSE
+static void set_fpregs_xmm __P((struct save87 *, struct savexmm *));
+static void fill_fpregs_xmm __P((struct savexmm *, struct save87 *));
+#endif /* CPU_ENABLE_SSE */
SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL)
int _udatasel, _ucodesel;
@@ -1361,7 +1365,7 @@ extern inthand_t
IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
- IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall);
+ IDTVEC(xmm), IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall);
void
sdtossd(sd, ssd)
@@ -1900,6 +1904,7 @@ init386(first)
setidt(16, &IDTVEC(fpu), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
setidt(17, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
setidt(18, &IDTVEC(mchk), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+ setidt(19, &IDTVEC(xmm), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
setidt(0x80, &IDTVEC(int0x80_syscall),
SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
@@ -2092,8 +2097,8 @@ int ptrace_write_u(p, off, data)
*(int*)((char *)p->p_addr + off) = data;
return (0);
}
- min = offsetof(struct user, u_pcb) + offsetof(struct pcb, pcb_savefpu);
- if (off >= min && off <= min + sizeof(struct save87) - sizeof(int)) {
+ min = offsetof(struct user, u_pcb) + offsetof(struct pcb, pcb_save);
+ if (off >= min && off <= min + sizeof(union savefpu) - sizeof(int)) {
*(int*)((char *)p->p_addr + off) = data;
return (0);
}
@@ -2161,12 +2166,73 @@ set_regs(p, regs)
return (0);
}
+#ifdef CPU_ENABLE_SSE
+static void
+fill_fpregs_xmm(sv_xmm, sv_87)
+ struct savexmm *sv_xmm;
+ struct save87 *sv_87;
+{
+ register struct env87 *penv_87 = &sv_87->sv_env;
+ register struct envxmm *penv_xmm = &sv_xmm->sv_env;
+ int i;
+
+ /* FPU control/status */
+ penv_87->en_cw = penv_xmm->en_cw;
+ penv_87->en_sw = penv_xmm->en_sw;
+ penv_87->en_tw = penv_xmm->en_tw;
+ penv_87->en_fip = penv_xmm->en_fip;
+ penv_87->en_fcs = penv_xmm->en_fcs;
+ penv_87->en_opcode = penv_xmm->en_opcode;
+ penv_87->en_foo = penv_xmm->en_foo;
+ penv_87->en_fos = penv_xmm->en_fos;
+
+ /* FPU registers */
+ for (i = 0; i < 8; ++i)
+ sv_87->sv_ac[i] = sv_xmm->sv_fp[i].fp_acc;
+
+ sv_87->sv_ex_sw = sv_xmm->sv_ex_sw;
+}
+
+static void
+set_fpregs_xmm(sv_87, sv_xmm)
+ struct save87 *sv_87;
+ struct savexmm *sv_xmm;
+{
+ register struct env87 *penv_87 = &sv_87->sv_env;
+ register struct envxmm *penv_xmm = &sv_xmm->sv_env;
+ int i;
+
+ /* FPU control/status */
+ penv_xmm->en_cw = penv_87->en_cw;
+ penv_xmm->en_sw = penv_87->en_sw;
+ penv_xmm->en_tw = penv_87->en_tw;
+ penv_xmm->en_fip = penv_87->en_fip;
+ penv_xmm->en_fcs = penv_87->en_fcs;
+ penv_xmm->en_opcode = penv_87->en_opcode;
+ penv_xmm->en_foo = penv_87->en_foo;
+ penv_xmm->en_fos = penv_87->en_fos;
+
+ /* FPU registers */
+ for (i = 0; i < 8; ++i)
+ sv_xmm->sv_fp[i].fp_acc = sv_87->sv_ac[i];
+
+ sv_xmm->sv_ex_sw = sv_87->sv_ex_sw;
+}
+#endif /* CPU_ENABLE_SSE */
+
int
fill_fpregs(p, fpregs)
struct proc *p;
struct fpreg *fpregs;
{
- bcopy(&p->p_addr->u_pcb.pcb_savefpu, fpregs, sizeof *fpregs);
+#ifdef CPU_ENABLE_SSE
+ if (cpu_fxsr) {
+ fill_fpregs_xmm(&p->p_addr->u_pcb.pcb_save.sv_xmm,
+ (struct save87 *)fpregs);
+ return (0);
+ }
+#endif /* CPU_ENABLE_SSE */
+ bcopy(&p->p_addr->u_pcb.pcb_save.sv_87, fpregs, sizeof *fpregs);
return (0);
}
@@ -2175,7 +2241,14 @@ set_fpregs(p, fpregs)
struct proc *p;
struct fpreg *fpregs;
{
- bcopy(fpregs, &p->p_addr->u_pcb.pcb_savefpu, sizeof *fpregs);
+#ifdef CPU_ENABLE_SSE
+ if (cpu_fxsr) {
+ set_fpregs_xmm((struct save87 *)fpregs,
+ &p->p_addr->u_pcb.pcb_save.sv_xmm);
+ return (0);
+ }
+#endif /* CPU_ENABLE_SSE */
+ bcopy(fpregs, &p->p_addr->u_pcb.pcb_save.sv_87, sizeof *fpregs);
return (0);
}
diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c
index 0a0de69..d5af7b3 100644
--- a/sys/amd64/amd64/mp_machdep.c
+++ b/sys/amd64/amd64/mp_machdep.c
@@ -2221,6 +2221,8 @@ invltlb(void)
* This is called once the rest of the system is up and running and we're
* ready to let the AP's out of the pen.
*/
+extern void enable_sse(void);
+
void
ap_init(void)
{
@@ -2260,6 +2262,9 @@ ap_init(void)
/* set up FPU state on the AP */
npxinit(__INITIAL_NPXCW__);
+ /* set up SSE registers */
+ enable_sse();
+
/* A quick check from sanity claus */
apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]);
if (PCPU_GET(cpuid) != apic_id) {
diff --git a/sys/amd64/amd64/mptable.c b/sys/amd64/amd64/mptable.c
index 0a0de69..d5af7b3 100644
--- a/sys/amd64/amd64/mptable.c
+++ b/sys/amd64/amd64/mptable.c
@@ -2221,6 +2221,8 @@ invltlb(void)
* This is called once the rest of the system is up and running and we're
* ready to let the AP's out of the pen.
*/
+extern void enable_sse(void);
+
void
ap_init(void)
{
@@ -2260,6 +2262,9 @@ ap_init(void)
/* set up FPU state on the AP */
npxinit(__INITIAL_NPXCW__);
+ /* set up SSE registers */
+ enable_sse();
+
/* A quick check from sanity claus */
apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]);
if (PCPU_GET(cpuid) != apic_id) {
diff --git a/sys/amd64/amd64/support.S b/sys/amd64/amd64/support.S
index 3218774..55bc29c 100644
--- a/sys/amd64/amd64/support.S
+++ b/sys/amd64/amd64/support.S
@@ -976,7 +976,7 @@ ENTRY(i586_copyin)
ENTRY(fastmove)
pushl %ebp
movl %esp,%ebp
- subl $PCB_SAVEFPU_SIZE+3*4,%esp
+ subl $PCB_SAVE87_SIZE+3*4,%esp
movl 8(%ebp),%ecx
cmpl $63,%ecx
@@ -1018,7 +1018,7 @@ ENTRY(fastmove)
movl PCPU(CURPCB),%esi
addl $PCB_SAVEFPU,%esi
cld
- movl $PCB_SAVEFPU_SIZE>>2,%ecx
+ movl $PCB_SAVE87_SIZE>>2,%ecx
rep
movsl
movl -12(%ebp),%ecx
@@ -1102,7 +1102,7 @@ fastmove_loop:
addl $PCB_SAVEFPU,%edi
movl %esp,%esi
cld
- movl $PCB_SAVEFPU_SIZE>>2,%ecx
+ movl $PCB_SAVE87_SIZE>>2,%ecx
rep
movsl
movl -12(%ebp),%ecx
@@ -1147,7 +1147,7 @@ fastmove_fault:
addl $PCB_SAVEFPU,%edi
movl %esp,%esi
cld
- movl $PCB_SAVEFPU_SIZE>>2,%ecx
+ movl $PCB_SAVE87_SIZE>>2,%ecx
rep
movsl
diff --git a/sys/amd64/amd64/support.s b/sys/amd64/amd64/support.s
index 3218774..55bc29c 100644
--- a/sys/amd64/amd64/support.s
+++ b/sys/amd64/amd64/support.s
@@ -976,7 +976,7 @@ ENTRY(i586_copyin)
ENTRY(fastmove)
pushl %ebp
movl %esp,%ebp
- subl $PCB_SAVEFPU_SIZE+3*4,%esp
+ subl $PCB_SAVE87_SIZE+3*4,%esp
movl 8(%ebp),%ecx
cmpl $63,%ecx
@@ -1018,7 +1018,7 @@ ENTRY(fastmove)
movl PCPU(CURPCB),%esi
addl $PCB_SAVEFPU,%esi
cld
- movl $PCB_SAVEFPU_SIZE>>2,%ecx
+ movl $PCB_SAVE87_SIZE>>2,%ecx
rep
movsl
movl -12(%ebp),%ecx
@@ -1102,7 +1102,7 @@ fastmove_loop:
addl $PCB_SAVEFPU,%edi
movl %esp,%esi
cld
- movl $PCB_SAVEFPU_SIZE>>2,%ecx
+ movl $PCB_SAVE87_SIZE>>2,%ecx
rep
movsl
movl -12(%ebp),%ecx
@@ -1147,7 +1147,7 @@ fastmove_fault:
addl $PCB_SAVEFPU,%edi
movl %esp,%esi
cld
- movl $PCB_SAVEFPU_SIZE>>2,%ecx
+ movl $PCB_SAVE87_SIZE>>2,%ecx
rep
movsl
diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c
index b1ab3bd..0431e0e 100644
--- a/sys/amd64/amd64/trap.c
+++ b/sys/amd64/amd64/trap.c
@@ -386,6 +386,11 @@ restart:
ucode = T_FPOPFLT;
i = SIGILL;
break;
+
+ case T_XMMFLT: /* SIMD floating-point exception */
+ ucode = 0; /* XXX */
+ i = SIGFPE;
+ break;
}
} else {
/* kernel trap */
diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c
index e5e5ea6..05efb4d 100644
--- a/sys/amd64/amd64/vm_machdep.c
+++ b/sys/amd64/amd64/vm_machdep.c
@@ -148,7 +148,7 @@ cpu_fork(p1, p2, flags)
p1->p_addr->u_pcb.pcb_gs = rgs();
savecrit = critical_enter();
if (PCPU_GET(npxproc) == p1)
- npxsave(&p1->p_addr->u_pcb.pcb_savefpu);
+ npxsave(&p1->p_addr->u_pcb.pcb_save);
critical_exit(savecrit);
#endif
diff --git a/sys/amd64/include/fpu.h b/sys/amd64/include/fpu.h
index 11f0478..1474f2f 100644
--- a/sys/amd64/include/fpu.h
+++ b/sys/amd64/include/fpu.h
@@ -85,6 +85,42 @@ struct save87 {
u_char sv_pad[64]; /* padding; used by emulators */
};
+struct envxmm {
+ u_int16_t en_cw; /* control word (16bits) */
+ u_int16_t en_sw; /* status word (16bits) */
+ u_int16_t en_tw; /* tag word (16bits) */
+ u_int16_t en_opcode; /* opcode last executed (11 bits ) */
+ u_int32_t en_fip; /* floating point instruction pointer */
+ u_int16_t en_fcs; /* floating code segment selector */
+ u_int16_t en_pad0; /* padding */
+ u_int32_t en_foo; /* floating operand offset */
+ u_int16_t en_fos; /* floating operand segment selector */
+ u_int16_t en_pad1; /* padding */
+ u_int32_t en_mxcsr; /* SSE sontorol/status register */
+ u_int32_t en_pad2; /* padding */
+};
+
+/* Contents of each SSE extended accumulator */
+struct xmmacc {
+ u_char xmm_bytes[16];
+};
+
+struct savexmm {
+ struct envxmm sv_env;
+ struct {
+ struct fpacc87 fp_acc;
+ u_char fp_pad[6]; /* padding */
+ } sv_fp[8];
+ struct xmmacc sv_xmm[8];
+ u_long sv_ex_sw; /* status word for last exception */
+ u_char sv_pad[220];
+} __attribute__((aligned(16)));
+
+union savefpu {
+ struct save87 sv_87;
+ struct savexmm sv_xmm;
+};
+
/*
* The hardware default control word for i387's and later coprocessors is
* 0x37F, giving:
@@ -108,7 +144,7 @@ struct save87 {
int npxdna __P((void));
void npxexit __P((struct proc *p));
void npxinit __P((int control));
-void npxsave __P((struct save87 *addr));
+void npxsave __P((union savefpu *addr));
int npxtrap __P((void));
#endif
diff --git a/sys/amd64/include/md_var.h b/sys/amd64/include/md_var.h
index 5a2ed26..6c81a96 100644
--- a/sys/amd64/include/md_var.h
+++ b/sys/amd64/include/md_var.h
@@ -47,6 +47,7 @@ extern int (*copyout_vector) __P((const void *kaddr, void *udaddr,
extern u_int cpu_feature;
extern u_int cpu_high;
extern u_int cpu_id;
+extern u_int cpu_fxsr;
extern char cpu_vendor[];
extern u_int cyrix_did;
extern char kstack[];
diff --git a/sys/amd64/include/mptable.h b/sys/amd64/include/mptable.h
index 0a0de69..d5af7b3 100644
--- a/sys/amd64/include/mptable.h
+++ b/sys/amd64/include/mptable.h
@@ -2221,6 +2221,8 @@ invltlb(void)
* This is called once the rest of the system is up and running and we're
* ready to let the AP's out of the pen.
*/
+extern void enable_sse(void);
+
void
ap_init(void)
{
@@ -2260,6 +2262,9 @@ ap_init(void)
/* set up FPU state on the AP */
npxinit(__INITIAL_NPXCW__);
+ /* set up SSE registers */
+ enable_sse();
+
/* A quick check from sanity claus */
apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]);
if (PCPU_GET(cpuid) != apic_id) {
diff --git a/sys/amd64/include/npx.h b/sys/amd64/include/npx.h
index 11f0478..1474f2f 100644
--- a/sys/amd64/include/npx.h
+++ b/sys/amd64/include/npx.h
@@ -85,6 +85,42 @@ struct save87 {
u_char sv_pad[64]; /* padding; used by emulators */
};
+struct envxmm {
+ u_int16_t en_cw; /* control word (16bits) */
+ u_int16_t en_sw; /* status word (16bits) */
+ u_int16_t en_tw; /* tag word (16bits) */
+ u_int16_t en_opcode; /* opcode last executed (11 bits ) */
+ u_int32_t en_fip; /* floating point instruction pointer */
+ u_int16_t en_fcs; /* floating code segment selector */
+ u_int16_t en_pad0; /* padding */
+ u_int32_t en_foo; /* floating operand offset */
+ u_int16_t en_fos; /* floating operand segment selector */
+ u_int16_t en_pad1; /* padding */
+ u_int32_t en_mxcsr; /* SSE sontorol/status register */
+ u_int32_t en_pad2; /* padding */
+};
+
+/* Contents of each SSE extended accumulator */
+struct xmmacc {
+ u_char xmm_bytes[16];
+};
+
+struct savexmm {
+ struct envxmm sv_env;
+ struct {
+ struct fpacc87 fp_acc;
+ u_char fp_pad[6]; /* padding */
+ } sv_fp[8];
+ struct xmmacc sv_xmm[8];
+ u_long sv_ex_sw; /* status word for last exception */
+ u_char sv_pad[220];
+} __attribute__((aligned(16)));
+
+union savefpu {
+ struct save87 sv_87;
+ struct savexmm sv_xmm;
+};
+
/*
* The hardware default control word for i387's and later coprocessors is
* 0x37F, giving:
@@ -108,7 +144,7 @@ struct save87 {
int npxdna __P((void));
void npxexit __P((struct proc *p));
void npxinit __P((int control));
-void npxsave __P((struct save87 *addr));
+void npxsave __P((union savefpu *addr));
int npxtrap __P((void));
#endif
diff --git a/sys/amd64/include/pcb.h b/sys/amd64/include/pcb.h
index 962fc6f..6ea7c3d 100644
--- a/sys/amd64/include/pcb.h
+++ b/sys/amd64/include/pcb.h
@@ -62,7 +62,8 @@ struct pcb {
int pcb_dr7;
struct pcb_ldt *pcb_ldt; /* per process (user) LDT */
- struct save87 pcb_savefpu; /* floating point state for 287/387 */
+ union savefpu pcb_save;
+#define pcb_savefpu pcb_save.sv_87
u_char pcb_flags;
#define FP_SOFTFP 0x01 /* process using software fltng pnt emulator */
#define PCB_DBREGS 0x02 /* process using debug registers */
diff --git a/sys/amd64/include/specialreg.h b/sys/amd64/include/specialreg.h
index 937cab0..02440c9 100644
--- a/sys/amd64/include/specialreg.h
+++ b/sys/amd64/include/specialreg.h
@@ -93,6 +93,8 @@
#define CPUID_PGE 0x2000
#define CPUID_MCA 0x4000
#define CPUID_CMOV 0x8000
+#define CPUID_FXSR 0x01000000
+#define CPUID_XMM 0x02000000
/*
* Model-specific registers for the i386 family
diff --git a/sys/amd64/include/trap.h b/sys/amd64/include/trap.h
index 6db97ec..67becb3 100644
--- a/sys/amd64/include/trap.h
+++ b/sys/amd64/include/trap.h
@@ -64,7 +64,8 @@
#define T_SEGNPFLT 26 /* segment not present fault */
#define T_STKFLT 27 /* stack fault */
#define T_MCHK 28 /* machine check trap */
-#define T_RESERVED 29 /* reserved (unknown) */
+#define T_XMMFLT 29 /* SIMD floating-point exception */
+#define T_RESERVED 30 /* reserved (unknown) */
/* XXX most of the following codes aren't used, but could be. */
diff --git a/sys/amd64/isa/npx.c b/sys/amd64/isa/npx.c
index b6c69a0..f6410e9 100644
--- a/sys/amd64/isa/npx.c
+++ b/sys/amd64/isa/npx.c
@@ -35,6 +35,7 @@
* $FreeBSD$
*/
+#include "opt_cpu.h"
#include "opt_debug_npx.h"
#include "opt_math_emulate.h"
@@ -99,6 +100,8 @@
#define fnstsw(addr) __asm __volatile("fnstsw %0" : "=m" (*(addr)))
#define fp_divide_by_0() __asm("fldz; fld1; fdiv %st,%st(1); fnop")
#define frstor(addr) __asm("frstor %0" : : "m" (*(addr)))
+#define fxrstor(addr) __asm("fxrstor %0" : : "m" (*(addr)))
+#define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr)))
#define start_emulating() __asm("smsw %%ax; orb %0,%%al; lmsw %%ax" \
: : "n" (CR0_TS) : "ax")
#define stop_emulating() __asm("clts")
@@ -113,11 +116,41 @@ void fnstcw __P((caddr_t addr));
void fnstsw __P((caddr_t addr));
void fp_divide_by_0 __P((void));
void frstor __P((caddr_t addr));
+void fxsave __P((caddr_t addr));
+void fxrstor __P((caddr_t addr));
void start_emulating __P((void));
void stop_emulating __P((void));
#endif /* __GNUC__ */
+#ifdef CPU_ENABLE_SSE
+#define GET_FPU_CW(proc) \
+ (cpu_fxsr ? \
+ (proc)->p_addr->u_pcb.pcb_save.sv_xmm.sv_env.en_cw : \
+ (proc)->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_cw)
+#define GET_FPU_SW(proc) \
+ (cpu_fxsr ? \
+ (proc)->p_addr->u_pcb.pcb_save.sv_xmm.sv_env.en_sw : \
+ (proc)->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_sw)
+#define MASK_FPU_SW(proc, mask) \
+ (cpu_fxsr ? \
+ (proc)->p_addr->u_pcb.pcb_save.sv_xmm.sv_env.en_sw & (mask) : \
+ (proc)->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_sw & (mask))
+#define GET_FPU_EXSW_PTR(pcb) \
+ (cpu_fxsr ? \
+ &(pcb)->pcb_save.sv_xmm.sv_ex_sw : \
+ &(pcb)->pcb_save.sv_87.sv_ex_sw)
+#else /* CPU_ENABLE_SSE */
+#define GET_FPU_CW(proc) \
+ (proc->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_cw)
+#define GET_FPU_SW(proc) \
+ (proc->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_sw)
+#define MASK_FPU_SW(proc, mask) \
+ ((proc)->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_sw & (mask))
+#define GET_FPU_EXSW_PTR(pcb) \
+ (&(pcb)->pcb_save.sv_87.sv_ex_sw)
+#endif /* CPU_ENABLE_SSE */
+
typedef u_char bool_t;
static int npx_attach __P((device_t dev));
@@ -127,6 +160,8 @@ static void npx_intr __P((void *));
#endif
static int npx_probe __P((device_t dev));
static int npx_probe1 __P((device_t dev));
+static void fpusave __P((union savefpu *, u_char));
+static void fpurstor __P((union savefpu *, u_char));
#ifdef I586_CPU_XXX
static long timezero __P((const char *funcname,
void (*func)(void *buf, size_t len)));
@@ -529,7 +564,7 @@ void
npxinit(control)
u_short control;
{
- struct save87 dummy;
+ union savefpu dummy;
critical_t savecrit;
if (!npx_exists)
@@ -544,7 +579,7 @@ npxinit(control)
stop_emulating();
fldcw(&control);
if (PCPU_GET(curpcb) != NULL)
- fnsave(&PCPU_GET(curpcb)->pcb_savefpu);
+ fpusave(&PCPU_GET(curpcb)->pcb_save, curproc->p_oncpu);
start_emulating();
critical_exit(savecrit);
}
@@ -560,7 +595,7 @@ npxexit(p)
savecrit = critical_enter();
if (p == PCPU_GET(npxproc))
- npxsave(&PCPU_GET(curpcb)->pcb_savefpu);
+ npxsave(&PCPU_GET(curpcb)->pcb_save);
critical_exit(savecrit);
#ifdef NPX_DEBUG
if (npx_exists) {
@@ -773,6 +808,7 @@ npxtrap()
{
critical_t savecrit;
u_short control, status;
+ u_long *exstat;
if (!npx_exists) {
printf("npxtrap: npxproc = %p, curproc = %p, npx_exists = %d\n",
@@ -787,16 +823,17 @@ npxtrap()
* wherever they are.
*/
if (PCPU_GET(npxproc) != curproc) {
- control = curproc->p_addr->u_pcb.pcb_savefpu.sv_env.en_cw;
- status = curproc->p_addr->u_pcb.pcb_savefpu.sv_env.en_sw;
+ control = GET_FPU_CW(curproc);
+ status = GET_FPU_SW(curproc);
} else {
fnstcw(&control);
fnstsw(&status);
}
- curproc->p_addr->u_pcb.pcb_savefpu.sv_ex_sw = status;
+ exstat = GET_FPU_EXSW_PTR(&curproc->p_addr->u_pcb);
+ *exstat = status;
if (PCPU_GET(npxproc) != curproc)
- curproc->p_addr->u_pcb.pcb_savefpu.sv_env.en_sw &= ~0x80bf;
+ MASK_FPU_SW(curproc, ~0x80bf);
else
fnclex();
critical_exit(savecrit);
@@ -813,6 +850,7 @@ npxtrap()
int
npxdna()
{
+ u_long *exstat;
critical_t s;
if (!npx_exists)
@@ -828,7 +866,9 @@ npxdna()
* Record new context early in case frstor causes an IRQ13.
*/
PCPU_SET(npxproc, CURPROC);
- PCPU_GET(curpcb)->pcb_savefpu.sv_ex_sw = 0;
+
+ exstat = GET_FPU_EXSW_PTR(PCPU_GET(curpcb));
+ *exstat = 0;
/*
* The following frstor may cause an IRQ13 when the state being
* restored has a pending error. The error will appear to have been
@@ -841,7 +881,7 @@ npxdna()
* fnsave are broken, so our treatment breaks fnclex if it is the
* first FPU instruction after a context switch.
*/
- frstor(&PCPU_GET(curpcb)->pcb_savefpu);
+ fpurstor(&PCPU_GET(curpcb)->pcb_save, curproc->p_oncpu);
critical_exit(s);
return (1);
@@ -872,15 +912,46 @@ npxdna()
*/
void
npxsave(addr)
- struct save87 *addr;
+ union savefpu *addr;
{
stop_emulating();
- fnsave(addr);
+ fpusave(addr, curproc->p_oncpu);
+
start_emulating();
PCPU_SET(npxproc, NULL);
}
+static void
+fpusave(addr, oncpu)
+ union savefpu *addr;
+ u_char oncpu;
+{
+ static struct savexmm svxmm[MAXCPU];
+
+ if (!cpu_fxsr)
+ fnsave(addr);
+ else {
+ fxsave(&svxmm[oncpu]);
+ bcopy(&svxmm[oncpu], addr, sizeof(struct savexmm));
+ }
+}
+
+static void
+fpurstor(addr, oncpu)
+ union savefpu *addr;
+ u_char oncpu;
+{
+ static struct savexmm svxmm[MAXCPU];
+
+ if (!cpu_fxsr)
+ frstor(addr);
+ else {
+ bcopy(addr, &svxmm[oncpu], sizeof (struct savexmm));
+ fxrstor(&svxmm[oncpu]);
+ }
+}
+
#ifdef I586_CPU_XXX
static long
timezero(funcname, func)
diff --git a/sys/conf/NOTES b/sys/conf/NOTES
index 6933609..e92da02 100644
--- a/sys/conf/NOTES
+++ b/sys/conf/NOTES
@@ -187,6 +187,8 @@ cpu I686_CPU # aka Pentium Pro(tm)
# reorder). This option should not be used if you use memory mapped
# I/O device(s).
#
+# CPU_ENABLE_SSE enables SSE/MMX2 instructions support.
+#
# CPU_FASTER_5X86_FPU enables faster FPU exception handler.
#
# CPU_I486_ON_386 enables CPU cache on i486 based CPU upgrade products
@@ -248,6 +250,7 @@ options CPU_BLUELIGHTNING_3X
options CPU_BTB_EN
options CPU_DIRECT_MAPPED_CACHE
options CPU_DISABLE_5X86_LSSER
+options CPU_ENABLE_SSE
options CPU_FASTER_5X86_FPU
options CPU_I486_ON_386
options CPU_IORT
diff --git a/sys/conf/options.i386 b/sys/conf/options.i386
index e37882c..6cc4ebc 100644
--- a/sys/conf/options.i386
+++ b/sys/conf/options.i386
@@ -59,6 +59,7 @@ CPU_WT_ALLOC opt_cpu.h
CYRIX_CACHE_WORKS opt_cpu.h
CYRIX_CACHE_REALLY_WORKS opt_cpu.h
NO_MEMORY_HOLE opt_cpu.h
+CPU_ENABLE_SSE opt_cpu.h
# The CPU type affects the endian conversion functions all over the kernel.
I386_CPU opt_global.h
diff --git a/sys/i386/conf/NOTES b/sys/i386/conf/NOTES
index 6933609..e92da02 100644
--- a/sys/i386/conf/NOTES
+++ b/sys/i386/conf/NOTES
@@ -187,6 +187,8 @@ cpu I686_CPU # aka Pentium Pro(tm)
# reorder). This option should not be used if you use memory mapped
# I/O device(s).
#
+# CPU_ENABLE_SSE enables SSE/MMX2 instructions support.
+#
# CPU_FASTER_5X86_FPU enables faster FPU exception handler.
#
# CPU_I486_ON_386 enables CPU cache on i486 based CPU upgrade products
@@ -248,6 +250,7 @@ options CPU_BLUELIGHTNING_3X
options CPU_BTB_EN
options CPU_DIRECT_MAPPED_CACHE
options CPU_DISABLE_5X86_LSSER
+options CPU_ENABLE_SSE
options CPU_FASTER_5X86_FPU
options CPU_I486_ON_386
options CPU_IORT
diff --git a/sys/i386/i386/exception.s b/sys/i386/i386/exception.s
index 3ccbc23..419fbd2 100644
--- a/sys/i386/i386/exception.s
+++ b/sys/i386/i386/exception.s
@@ -153,6 +153,9 @@ IDTVEC(fpu)
IDTVEC(align)
TRAP(T_ALIGNFLT)
+IDTVEC(xmm)
+ pushl $0; TRAP(T_XMMFLT)
+
/*
* alltraps entry point. Interrupts are enabled if this was a trap
* gate (TGT), else disabled if this was an interrupt gate (IGT).
diff --git a/sys/i386/i386/genassym.c b/sys/i386/i386/genassym.c
index c6f0970..ac664e8 100644
--- a/sys/i386/i386/genassym.c
+++ b/sys/i386/i386/genassym.c
@@ -126,8 +126,9 @@ ASSYM(PCB_EXT, offsetof(struct pcb, pcb_ext));
ASSYM(PCB_SPARE, offsetof(struct pcb, __pcb_spare));
ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags));
-ASSYM(PCB_SAVEFPU, offsetof(struct pcb, pcb_savefpu));
-ASSYM(PCB_SAVEFPU_SIZE, sizeof(struct save87));
+ASSYM(PCB_SAVEFPU, offsetof(struct pcb, pcb_save));
+ASSYM(PCB_SAVEFPU_SIZE, sizeof(union savefpu));
+ASSYM(PCB_SAVE87_SIZE, sizeof(struct save87));
ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault));
#ifdef SMP
diff --git a/sys/i386/i386/initcpu.c b/sys/i386/i386/initcpu.c
index 8b39b44..7fb56fa 100644
--- a/sys/i386/i386/initcpu.c
+++ b/sys/i386/i386/initcpu.c
@@ -34,6 +34,7 @@
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/systm.h>
+#include <sys/sysctl.h>
#include <machine/cputypes.h>
#include <machine/md_var.h>
@@ -61,8 +62,14 @@ static void init_6x86(void);
static void init_6x86MX(void);
static void init_ppro(void);
static void init_mendocino(void);
+void enable_sse();
#endif
+int hw_instruction_sse = 0;
+SYSCTL_INT(_hw, OID_AUTO, instruction_sse, CTLFLAG_RD,
+ &hw_instruction_sse, 0,
+ "SIMD/MMX2 instructions available in CPU");
+
#ifdef I486_CPU
/*
* IBM Blue Lightning
@@ -501,6 +508,20 @@ init_mendocino(void)
#endif /* CPU_PPRO2CELERON */
}
+/*
+ * Initialize CR4 (Control register 4) to enable SSE instructions.
+ */
+void
+enable_sse(void)
+{
+#if defined(CPU_ENABLE_SSE)
+ if ((cpu_feature & CPUID_XMM) && (cpu_feature & CPUID_FXSR)) {
+ load_cr4(rcr4() | CR4_FXSR | CR4_XMM);
+ cpu_fxsr = hw_instruction_sse = 1;
+ }
+#endif
+}
+
#endif /* I686_CPU */
void
@@ -544,6 +565,7 @@ initializecpu(void)
init_mendocino();
break;
}
+ enable_sse();
}
break;
#endif
diff --git a/sys/i386/i386/locore.s b/sys/i386/i386/locore.s
index 379af45..cdfd799 100644
--- a/sys/i386/i386/locore.s
+++ b/sys/i386/i386/locore.s
@@ -113,12 +113,13 @@ HIDENAME(tmpstk):
.globl boothowto,bootdev
.globl cpu,cpu_vendor,cpu_id,bootinfo
- .globl cpu_high, cpu_feature
+ .globl cpu_high, cpu_feature, cpu_fxsr
cpu: .long 0 /* are we 386, 386sx, or 486 */
cpu_id: .long 0 /* stepping ID */
cpu_high: .long 0 /* highest arg to CPUID */
cpu_feature: .long 0 /* features */
+cpu_fxsr: .long 0 /* use fxsave/fxrstor instruction */
cpu_vendor: .space 20 /* CPU origin code */
bootinfo: .space BOOTINFO_SIZE /* bootinfo that we can handle */
diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c
index 544aff5..7e9a4dd 100644
--- a/sys/i386/i386/machdep.c
+++ b/sys/i386/i386/machdep.c
@@ -127,6 +127,10 @@ extern void initializecpu(void);
#define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
static void cpu_startup __P((void *));
+#ifdef CPU_ENABLE_SSE
+static void set_fpregs_xmm __P((struct save87 *, struct savexmm *));
+static void fill_fpregs_xmm __P((struct savexmm *, struct save87 *));
+#endif /* CPU_ENABLE_SSE */
SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL)
int _udatasel, _ucodesel;
@@ -1361,7 +1365,7 @@ extern inthand_t
IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
- IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall);
+ IDTVEC(xmm), IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall);
void
sdtossd(sd, ssd)
@@ -1900,6 +1904,7 @@ init386(first)
setidt(16, &IDTVEC(fpu), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
setidt(17, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
setidt(18, &IDTVEC(mchk), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+ setidt(19, &IDTVEC(xmm), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
setidt(0x80, &IDTVEC(int0x80_syscall),
SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
@@ -2092,8 +2097,8 @@ int ptrace_write_u(p, off, data)
*(int*)((char *)p->p_addr + off) = data;
return (0);
}
- min = offsetof(struct user, u_pcb) + offsetof(struct pcb, pcb_savefpu);
- if (off >= min && off <= min + sizeof(struct save87) - sizeof(int)) {
+ min = offsetof(struct user, u_pcb) + offsetof(struct pcb, pcb_save);
+ if (off >= min && off <= min + sizeof(union savefpu) - sizeof(int)) {
*(int*)((char *)p->p_addr + off) = data;
return (0);
}
@@ -2161,12 +2166,73 @@ set_regs(p, regs)
return (0);
}
+#ifdef CPU_ENABLE_SSE
+static void
+fill_fpregs_xmm(sv_xmm, sv_87)
+ struct savexmm *sv_xmm;
+ struct save87 *sv_87;
+{
+ register struct env87 *penv_87 = &sv_87->sv_env;
+ register struct envxmm *penv_xmm = &sv_xmm->sv_env;
+ int i;
+
+ /* FPU control/status */
+ penv_87->en_cw = penv_xmm->en_cw;
+ penv_87->en_sw = penv_xmm->en_sw;
+ penv_87->en_tw = penv_xmm->en_tw;
+ penv_87->en_fip = penv_xmm->en_fip;
+ penv_87->en_fcs = penv_xmm->en_fcs;
+ penv_87->en_opcode = penv_xmm->en_opcode;
+ penv_87->en_foo = penv_xmm->en_foo;
+ penv_87->en_fos = penv_xmm->en_fos;
+
+ /* FPU registers */
+ for (i = 0; i < 8; ++i)
+ sv_87->sv_ac[i] = sv_xmm->sv_fp[i].fp_acc;
+
+ sv_87->sv_ex_sw = sv_xmm->sv_ex_sw;
+}
+
+static void
+set_fpregs_xmm(sv_87, sv_xmm)
+ struct save87 *sv_87;
+ struct savexmm *sv_xmm;
+{
+ register struct env87 *penv_87 = &sv_87->sv_env;
+ register struct envxmm *penv_xmm = &sv_xmm->sv_env;
+ int i;
+
+ /* FPU control/status */
+ penv_xmm->en_cw = penv_87->en_cw;
+ penv_xmm->en_sw = penv_87->en_sw;
+ penv_xmm->en_tw = penv_87->en_tw;
+ penv_xmm->en_fip = penv_87->en_fip;
+ penv_xmm->en_fcs = penv_87->en_fcs;
+ penv_xmm->en_opcode = penv_87->en_opcode;
+ penv_xmm->en_foo = penv_87->en_foo;
+ penv_xmm->en_fos = penv_87->en_fos;
+
+ /* FPU registers */
+ for (i = 0; i < 8; ++i)
+ sv_xmm->sv_fp[i].fp_acc = sv_87->sv_ac[i];
+
+ sv_xmm->sv_ex_sw = sv_87->sv_ex_sw;
+}
+#endif /* CPU_ENABLE_SSE */
+
int
fill_fpregs(p, fpregs)
struct proc *p;
struct fpreg *fpregs;
{
- bcopy(&p->p_addr->u_pcb.pcb_savefpu, fpregs, sizeof *fpregs);
+#ifdef CPU_ENABLE_SSE
+ if (cpu_fxsr) {
+ fill_fpregs_xmm(&p->p_addr->u_pcb.pcb_save.sv_xmm,
+ (struct save87 *)fpregs);
+ return (0);
+ }
+#endif /* CPU_ENABLE_SSE */
+ bcopy(&p->p_addr->u_pcb.pcb_save.sv_87, fpregs, sizeof *fpregs);
return (0);
}
@@ -2175,7 +2241,14 @@ set_fpregs(p, fpregs)
struct proc *p;
struct fpreg *fpregs;
{
- bcopy(fpregs, &p->p_addr->u_pcb.pcb_savefpu, sizeof *fpregs);
+#ifdef CPU_ENABLE_SSE
+ if (cpu_fxsr) {
+ set_fpregs_xmm((struct save87 *)fpregs,
+ &p->p_addr->u_pcb.pcb_save.sv_xmm);
+ return (0);
+ }
+#endif /* CPU_ENABLE_SSE */
+ bcopy(fpregs, &p->p_addr->u_pcb.pcb_save.sv_87, sizeof *fpregs);
return (0);
}
diff --git a/sys/i386/i386/mp_machdep.c b/sys/i386/i386/mp_machdep.c
index 0a0de69..d5af7b3 100644
--- a/sys/i386/i386/mp_machdep.c
+++ b/sys/i386/i386/mp_machdep.c
@@ -2221,6 +2221,8 @@ invltlb(void)
* This is called once the rest of the system is up and running and we're
* ready to let the AP's out of the pen.
*/
+extern void enable_sse(void);
+
void
ap_init(void)
{
@@ -2260,6 +2262,9 @@ ap_init(void)
/* set up FPU state on the AP */
npxinit(__INITIAL_NPXCW__);
+ /* set up SSE registers */
+ enable_sse();
+
/* A quick check from sanity claus */
apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]);
if (PCPU_GET(cpuid) != apic_id) {
diff --git a/sys/i386/i386/mptable.c b/sys/i386/i386/mptable.c
index 0a0de69..d5af7b3 100644
--- a/sys/i386/i386/mptable.c
+++ b/sys/i386/i386/mptable.c
@@ -2221,6 +2221,8 @@ invltlb(void)
* This is called once the rest of the system is up and running and we're
* ready to let the AP's out of the pen.
*/
+extern void enable_sse(void);
+
void
ap_init(void)
{
@@ -2260,6 +2262,9 @@ ap_init(void)
/* set up FPU state on the AP */
npxinit(__INITIAL_NPXCW__);
+ /* set up SSE registers */
+ enable_sse();
+
/* A quick check from sanity claus */
apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]);
if (PCPU_GET(cpuid) != apic_id) {
diff --git a/sys/i386/i386/support.s b/sys/i386/i386/support.s
index 3218774..55bc29c 100644
--- a/sys/i386/i386/support.s
+++ b/sys/i386/i386/support.s
@@ -976,7 +976,7 @@ ENTRY(i586_copyin)
ENTRY(fastmove)
pushl %ebp
movl %esp,%ebp
- subl $PCB_SAVEFPU_SIZE+3*4,%esp
+ subl $PCB_SAVE87_SIZE+3*4,%esp
movl 8(%ebp),%ecx
cmpl $63,%ecx
@@ -1018,7 +1018,7 @@ ENTRY(fastmove)
movl PCPU(CURPCB),%esi
addl $PCB_SAVEFPU,%esi
cld
- movl $PCB_SAVEFPU_SIZE>>2,%ecx
+ movl $PCB_SAVE87_SIZE>>2,%ecx
rep
movsl
movl -12(%ebp),%ecx
@@ -1102,7 +1102,7 @@ fastmove_loop:
addl $PCB_SAVEFPU,%edi
movl %esp,%esi
cld
- movl $PCB_SAVEFPU_SIZE>>2,%ecx
+ movl $PCB_SAVE87_SIZE>>2,%ecx
rep
movsl
movl -12(%ebp),%ecx
@@ -1147,7 +1147,7 @@ fastmove_fault:
addl $PCB_SAVEFPU,%edi
movl %esp,%esi
cld
- movl $PCB_SAVEFPU_SIZE>>2,%ecx
+ movl $PCB_SAVE87_SIZE>>2,%ecx
rep
movsl
diff --git a/sys/i386/i386/trap.c b/sys/i386/i386/trap.c
index b1ab3bd..0431e0e 100644
--- a/sys/i386/i386/trap.c
+++ b/sys/i386/i386/trap.c
@@ -386,6 +386,11 @@ restart:
ucode = T_FPOPFLT;
i = SIGILL;
break;
+
+ case T_XMMFLT: /* SIMD floating-point exception */
+ ucode = 0; /* XXX */
+ i = SIGFPE;
+ break;
}
} else {
/* kernel trap */
diff --git a/sys/i386/i386/vm_machdep.c b/sys/i386/i386/vm_machdep.c
index e5e5ea6..05efb4d 100644
--- a/sys/i386/i386/vm_machdep.c
+++ b/sys/i386/i386/vm_machdep.c
@@ -148,7 +148,7 @@ cpu_fork(p1, p2, flags)
p1->p_addr->u_pcb.pcb_gs = rgs();
savecrit = critical_enter();
if (PCPU_GET(npxproc) == p1)
- npxsave(&p1->p_addr->u_pcb.pcb_savefpu);
+ npxsave(&p1->p_addr->u_pcb.pcb_save);
critical_exit(savecrit);
#endif
diff --git a/sys/i386/include/md_var.h b/sys/i386/include/md_var.h
index 5a2ed26..6c81a96 100644
--- a/sys/i386/include/md_var.h
+++ b/sys/i386/include/md_var.h
@@ -47,6 +47,7 @@ extern int (*copyout_vector) __P((const void *kaddr, void *udaddr,
extern u_int cpu_feature;
extern u_int cpu_high;
extern u_int cpu_id;
+extern u_int cpu_fxsr;
extern char cpu_vendor[];
extern u_int cyrix_did;
extern char kstack[];
diff --git a/sys/i386/include/mptable.h b/sys/i386/include/mptable.h
index 0a0de69..d5af7b3 100644
--- a/sys/i386/include/mptable.h
+++ b/sys/i386/include/mptable.h
@@ -2221,6 +2221,8 @@ invltlb(void)
* This is called once the rest of the system is up and running and we're
* ready to let the AP's out of the pen.
*/
+extern void enable_sse(void);
+
void
ap_init(void)
{
@@ -2260,6 +2262,9 @@ ap_init(void)
/* set up FPU state on the AP */
npxinit(__INITIAL_NPXCW__);
+ /* set up SSE registers */
+ enable_sse();
+
/* A quick check from sanity claus */
apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]);
if (PCPU_GET(cpuid) != apic_id) {
diff --git a/sys/i386/include/npx.h b/sys/i386/include/npx.h
index 11f0478..1474f2f 100644
--- a/sys/i386/include/npx.h
+++ b/sys/i386/include/npx.h
@@ -85,6 +85,42 @@ struct save87 {
u_char sv_pad[64]; /* padding; used by emulators */
};
+struct envxmm {
+ u_int16_t en_cw; /* control word (16bits) */
+ u_int16_t en_sw; /* status word (16bits) */
+ u_int16_t en_tw; /* tag word (16bits) */
+ u_int16_t en_opcode; /* opcode last executed (11 bits ) */
+ u_int32_t en_fip; /* floating point instruction pointer */
+ u_int16_t en_fcs; /* floating code segment selector */
+ u_int16_t en_pad0; /* padding */
+ u_int32_t en_foo; /* floating operand offset */
+ u_int16_t en_fos; /* floating operand segment selector */
+ u_int16_t en_pad1; /* padding */
+ u_int32_t en_mxcsr; /* SSE sontorol/status register */
+ u_int32_t en_pad2; /* padding */
+};
+
+/* Contents of each SSE extended accumulator */
+struct xmmacc {
+ u_char xmm_bytes[16];
+};
+
+struct savexmm {
+ struct envxmm sv_env;
+ struct {
+ struct fpacc87 fp_acc;
+ u_char fp_pad[6]; /* padding */
+ } sv_fp[8];
+ struct xmmacc sv_xmm[8];
+ u_long sv_ex_sw; /* status word for last exception */
+ u_char sv_pad[220];
+} __attribute__((aligned(16)));
+
+union savefpu {
+ struct save87 sv_87;
+ struct savexmm sv_xmm;
+};
+
/*
* The hardware default control word for i387's and later coprocessors is
* 0x37F, giving:
@@ -108,7 +144,7 @@ struct save87 {
int npxdna __P((void));
void npxexit __P((struct proc *p));
void npxinit __P((int control));
-void npxsave __P((struct save87 *addr));
+void npxsave __P((union savefpu *addr));
int npxtrap __P((void));
#endif
diff --git a/sys/i386/include/pcb.h b/sys/i386/include/pcb.h
index 962fc6f..6ea7c3d 100644
--- a/sys/i386/include/pcb.h
+++ b/sys/i386/include/pcb.h
@@ -62,7 +62,8 @@ struct pcb {
int pcb_dr7;
struct pcb_ldt *pcb_ldt; /* per process (user) LDT */
- struct save87 pcb_savefpu; /* floating point state for 287/387 */
+ union savefpu pcb_save;
+#define pcb_savefpu pcb_save.sv_87
u_char pcb_flags;
#define FP_SOFTFP 0x01 /* process using software fltng pnt emulator */
#define PCB_DBREGS 0x02 /* process using debug registers */
diff --git a/sys/i386/include/specialreg.h b/sys/i386/include/specialreg.h
index 937cab0..02440c9 100644
--- a/sys/i386/include/specialreg.h
+++ b/sys/i386/include/specialreg.h
@@ -93,6 +93,8 @@
#define CPUID_PGE 0x2000
#define CPUID_MCA 0x4000
#define CPUID_CMOV 0x8000
+#define CPUID_FXSR 0x01000000
+#define CPUID_XMM 0x02000000
/*
* Model-specific registers for the i386 family
diff --git a/sys/i386/include/trap.h b/sys/i386/include/trap.h
index 6db97ec..67becb3 100644
--- a/sys/i386/include/trap.h
+++ b/sys/i386/include/trap.h
@@ -64,7 +64,8 @@
#define T_SEGNPFLT 26 /* segment not present fault */
#define T_STKFLT 27 /* stack fault */
#define T_MCHK 28 /* machine check trap */
-#define T_RESERVED 29 /* reserved (unknown) */
+#define T_XMMFLT 29 /* SIMD floating-point exception */
+#define T_RESERVED 30 /* reserved (unknown) */
/* XXX most of the following codes aren't used, but could be. */
diff --git a/sys/i386/isa/npx.c b/sys/i386/isa/npx.c
index b6c69a0..f6410e9 100644
--- a/sys/i386/isa/npx.c
+++ b/sys/i386/isa/npx.c
@@ -35,6 +35,7 @@
* $FreeBSD$
*/
+#include "opt_cpu.h"
#include "opt_debug_npx.h"
#include "opt_math_emulate.h"
@@ -99,6 +100,8 @@
#define fnstsw(addr) __asm __volatile("fnstsw %0" : "=m" (*(addr)))
#define fp_divide_by_0() __asm("fldz; fld1; fdiv %st,%st(1); fnop")
#define frstor(addr) __asm("frstor %0" : : "m" (*(addr)))
+#define fxrstor(addr) __asm("fxrstor %0" : : "m" (*(addr)))
+#define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr)))
#define start_emulating() __asm("smsw %%ax; orb %0,%%al; lmsw %%ax" \
: : "n" (CR0_TS) : "ax")
#define stop_emulating() __asm("clts")
@@ -113,11 +116,41 @@ void fnstcw __P((caddr_t addr));
void fnstsw __P((caddr_t addr));
void fp_divide_by_0 __P((void));
void frstor __P((caddr_t addr));
+void fxsave __P((caddr_t addr));
+void fxrstor __P((caddr_t addr));
void start_emulating __P((void));
void stop_emulating __P((void));
#endif /* __GNUC__ */
+#ifdef CPU_ENABLE_SSE
+#define GET_FPU_CW(proc) \
+ (cpu_fxsr ? \
+ (proc)->p_addr->u_pcb.pcb_save.sv_xmm.sv_env.en_cw : \
+ (proc)->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_cw)
+#define GET_FPU_SW(proc) \
+ (cpu_fxsr ? \
+ (proc)->p_addr->u_pcb.pcb_save.sv_xmm.sv_env.en_sw : \
+ (proc)->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_sw)
+#define MASK_FPU_SW(proc, mask) \
+ (cpu_fxsr ? \
+ (proc)->p_addr->u_pcb.pcb_save.sv_xmm.sv_env.en_sw & (mask) : \
+ (proc)->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_sw & (mask))
+#define GET_FPU_EXSW_PTR(pcb) \
+ (cpu_fxsr ? \
+ &(pcb)->pcb_save.sv_xmm.sv_ex_sw : \
+ &(pcb)->pcb_save.sv_87.sv_ex_sw)
+#else /* CPU_ENABLE_SSE */
+#define GET_FPU_CW(proc) \
+ (proc->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_cw)
+#define GET_FPU_SW(proc) \
+ (proc->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_sw)
+#define MASK_FPU_SW(proc, mask) \
+ ((proc)->p_addr->u_pcb.pcb_save.sv_87.sv_env.en_sw & (mask))
+#define GET_FPU_EXSW_PTR(pcb) \
+ (&(pcb)->pcb_save.sv_87.sv_ex_sw)
+#endif /* CPU_ENABLE_SSE */
+
typedef u_char bool_t;
static int npx_attach __P((device_t dev));
@@ -127,6 +160,8 @@ static void npx_intr __P((void *));
#endif
static int npx_probe __P((device_t dev));
static int npx_probe1 __P((device_t dev));
+static void fpusave __P((union savefpu *, u_char));
+static void fpurstor __P((union savefpu *, u_char));
#ifdef I586_CPU_XXX
static long timezero __P((const char *funcname,
void (*func)(void *buf, size_t len)));
@@ -529,7 +564,7 @@ void
npxinit(control)
u_short control;
{
- struct save87 dummy;
+ union savefpu dummy;
critical_t savecrit;
if (!npx_exists)
@@ -544,7 +579,7 @@ npxinit(control)
stop_emulating();
fldcw(&control);
if (PCPU_GET(curpcb) != NULL)
- fnsave(&PCPU_GET(curpcb)->pcb_savefpu);
+ fpusave(&PCPU_GET(curpcb)->pcb_save, curproc->p_oncpu);
start_emulating();
critical_exit(savecrit);
}
@@ -560,7 +595,7 @@ npxexit(p)
savecrit = critical_enter();
if (p == PCPU_GET(npxproc))
- npxsave(&PCPU_GET(curpcb)->pcb_savefpu);
+ npxsave(&PCPU_GET(curpcb)->pcb_save);
critical_exit(savecrit);
#ifdef NPX_DEBUG
if (npx_exists) {
@@ -773,6 +808,7 @@ npxtrap()
{
critical_t savecrit;
u_short control, status;
+ u_long *exstat;
if (!npx_exists) {
printf("npxtrap: npxproc = %p, curproc = %p, npx_exists = %d\n",
@@ -787,16 +823,17 @@ npxtrap()
* wherever they are.
*/
if (PCPU_GET(npxproc) != curproc) {
- control = curproc->p_addr->u_pcb.pcb_savefpu.sv_env.en_cw;
- status = curproc->p_addr->u_pcb.pcb_savefpu.sv_env.en_sw;
+ control = GET_FPU_CW(curproc);
+ status = GET_FPU_SW(curproc);
} else {
fnstcw(&control);
fnstsw(&status);
}
- curproc->p_addr->u_pcb.pcb_savefpu.sv_ex_sw = status;
+ exstat = GET_FPU_EXSW_PTR(&curproc->p_addr->u_pcb);
+ *exstat = status;
if (PCPU_GET(npxproc) != curproc)
- curproc->p_addr->u_pcb.pcb_savefpu.sv_env.en_sw &= ~0x80bf;
+ MASK_FPU_SW(curproc, ~0x80bf);
else
fnclex();
critical_exit(savecrit);
@@ -813,6 +850,7 @@ npxtrap()
int
npxdna()
{
+ u_long *exstat;
critical_t s;
if (!npx_exists)
@@ -828,7 +866,9 @@ npxdna()
* Record new context early in case frstor causes an IRQ13.
*/
PCPU_SET(npxproc, CURPROC);
- PCPU_GET(curpcb)->pcb_savefpu.sv_ex_sw = 0;
+
+ exstat = GET_FPU_EXSW_PTR(PCPU_GET(curpcb));
+ *exstat = 0;
/*
* The following frstor may cause an IRQ13 when the state being
* restored has a pending error. The error will appear to have been
@@ -841,7 +881,7 @@ npxdna()
* fnsave are broken, so our treatment breaks fnclex if it is the
* first FPU instruction after a context switch.
*/
- frstor(&PCPU_GET(curpcb)->pcb_savefpu);
+ fpurstor(&PCPU_GET(curpcb)->pcb_save, curproc->p_oncpu);
critical_exit(s);
return (1);
@@ -872,15 +912,46 @@ npxdna()
*/
void
npxsave(addr)
- struct save87 *addr;
+ union savefpu *addr;
{
stop_emulating();
- fnsave(addr);
+ fpusave(addr, curproc->p_oncpu);
+
start_emulating();
PCPU_SET(npxproc, NULL);
}
+static void
+fpusave(addr, oncpu)
+ union savefpu *addr;
+ u_char oncpu;
+{
+ static struct savexmm svxmm[MAXCPU];
+
+ if (!cpu_fxsr)
+ fnsave(addr);
+ else {
+ fxsave(&svxmm[oncpu]);
+ bcopy(&svxmm[oncpu], addr, sizeof(struct savexmm));
+ }
+}
+
+static void
+fpurstor(addr, oncpu)
+ union savefpu *addr;
+ u_char oncpu;
+{
+ static struct savexmm svxmm[MAXCPU];
+
+ if (!cpu_fxsr)
+ frstor(addr);
+ else {
+ bcopy(addr, &svxmm[oncpu], sizeof (struct savexmm));
+ fxrstor(&svxmm[oncpu]);
+ }
+}
+
#ifdef I586_CPU_XXX
static long
timezero(funcname, func)
OpenPOWER on IntegriCloud