summaryrefslogtreecommitdiffstats
path: root/lib/msun/i387
diff options
context:
space:
mode:
authordas <das@FreeBSD.org>2005-03-17 22:21:46 +0000
committerdas <das@FreeBSD.org>2005-03-17 22:21:46 +0000
commitfdf53809bb98e2e50d6eeca7ace0b5f5e559f12a (patch)
treed94c1f0725cdb7c8204f2815623477f889b7c954 /lib/msun/i387
parent148ae38eee2646f9d2d22da1a63f0b571335fc28 (diff)
downloadFreeBSD-src-fdf53809bb98e2e50d6eeca7ace0b5f5e559f12a.zip
FreeBSD-src-fdf53809bb98e2e50d6eeca7ace0b5f5e559f12a.tar.gz
Make the fenv.h routines work for programs that use SSE for
floating-point arithmetic on i386. Now I'm going to make excuses for why this code is kinda scary: - To avoid breaking the ABI with 5.3-RELEASE, we can't change sizeof(fenv_t). I stuck the saved mxcsr in some discontiguous reserved bits in the existing structure. - Attempting to access the mxcsr on older processors results in an illegal instruction exception, so support for SSE must be detected at runtime. (The extra baggage is optimized away if either the application or libm is compiled with -msse{,2}.) I didn't run tests to ensure that this doesn't SIGILL on older 486's lacking the cpuid instruction or on other processors lacking SSE. Results from running the fenv regression test on these processors would be appreciated. (You'll need to compile the test with -DNO_STRICT_DFL_ENV.) If you have an 80386, or if your processor supports SSE but the kernel didn't enable it, then you're probably out of luck. Also, I un-inlined some of the functions that grew larger as a result of this change, moving them from fenv.h to fenv.c.
Diffstat (limited to 'lib/msun/i387')
-rw-r--r--lib/msun/i387/fenv.c180
-rw-r--r--lib/msun/i387/fenv.h167
2 files changed, 262 insertions, 85 deletions
diff --git a/lib/msun/i387/fenv.c b/lib/msun/i387/fenv.c
index 54197f5..567d699 100644
--- a/lib/msun/i387/fenv.c
+++ b/lib/msun/i387/fenv.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2004 David Schultz <das@FreeBSD.ORG>
+ * Copyright (c) 2004-2005 David Schultz <das@FreeBSD.ORG>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -26,14 +26,186 @@
* $FreeBSD$
*/
+#include <sys/cdefs.h>
#include <sys/types.h>
#include <machine/npx.h>
-#include <fenv.h>
+#include "fenv.h"
const fenv_t __fe_dfl_env = {
- 0xffff0000 | __INITIAL_NPXCW__,
- 0xffff0000,
+ __INITIAL_NPXCW__,
+ 0x0000,
+ 0x0000,
+ 0x1f80,
0xffffffff,
{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff }
};
+
+enum __sse_support __has_sse =
+#ifdef __SSE__
+ __SSE_YES;
+#else
+ __SSE_UNK;
+#endif
+
+#define getfl(x) __asm __volatile("pushfl\n\tpopl %0" : "=mr" (*(x)))
+#define setfl(x) __asm __volatile("pushl %0\n\tpopfl" : : "g" (x))
+#define cpuid_dx(x) __asm __volatile("pushl %%ebx\n\tmovl $1, %%eax\n\t" \
+ "cpuid\n\tpopl %%ebx" \
+ : "=d" (*(x)) : : "eax", "ecx")
+
+/*
+ * Test for SSE support on this processor. We need to do this because
+ * we need to use ldmxcsr/stmxcsr to get correct results if any part
+ * of the program was compiled to use SSE floating-point, but we can't
+ * use SSE on older processors.
+ */
+int
+__test_sse(void)
+{
+ int flag, nflag;
+ int dx_features;
+
+ /* Am I a 486? */
+ getfl(&flag);
+ nflag = flag ^ 0x200000;
+ setfl(nflag);
+ getfl(&nflag);
+ if (flag != nflag) {
+ /* Not a 486, so CPUID should work. */
+ cpuid_dx(&dx_features);
+ if (dx_features & 0x2000000) {
+ __has_sse = __SSE_YES;
+ return (1);
+ }
+ }
+ __has_sse = __SSE_NO;
+ return (0);
+}
+
+int
+fesetexceptflag(const fexcept_t *flagp, int excepts)
+{
+ fenv_t env;
+ int mxcsr;
+
+ __fnstenv(&env);
+ env.__status &= ~excepts;
+ env.__status |= *flagp & excepts;
+ __fldenv(env);
+
+ if (__HAS_SSE()) {
+ __stmxcsr(&mxcsr);
+ mxcsr &= ~excepts;
+ mxcsr |= *flagp & excepts;
+ __ldmxcsr(mxcsr);
+ }
+
+ return (0);
+}
+
+int
+feraiseexcept(int excepts)
+{
+ fexcept_t ex = excepts;
+
+ fesetexceptflag(&ex, excepts);
+ __fwait();
+ return (0);
+}
+
+int
+fegetenv(fenv_t *envp)
+{
+ int control, mxcsr;
+
+ /*
+ * fnstenv masks all exceptions, so we need to save and
+ * restore the control word to avoid this side effect.
+ */
+ __fnstcw(&control);
+ __fnstenv(envp);
+ if (__HAS_SSE()) {
+ __stmxcsr(&mxcsr);
+ __set_mxcsr(*envp, mxcsr);
+ }
+ __fldcw(control);
+ return (0);
+}
+
+int
+feholdexcept(fenv_t *envp)
+{
+ int mxcsr;
+
+ __fnstenv(envp);
+ __fnclex();
+ if (__HAS_SSE()) {
+ __stmxcsr(&mxcsr);
+ __set_mxcsr(*envp, mxcsr);
+ mxcsr &= ~FE_ALL_EXCEPT;
+ mxcsr |= FE_ALL_EXCEPT << _SSE_EMASK_SHIFT;
+ __ldmxcsr(mxcsr);
+ }
+ return (0);
+}
+
+int
+feupdateenv(const fenv_t *envp)
+{
+ int mxcsr, status;
+
+ __fnstsw(&status);
+ if (__HAS_SSE())
+ __stmxcsr(&mxcsr);
+ else
+ mxcsr = 0;
+ fesetenv(envp);
+ feraiseexcept((mxcsr | status) & FE_ALL_EXCEPT);
+ return (0);
+}
+
+int
+__feenableexcept(int mask)
+{
+ int mxcsr, control, omask;
+
+ mask &= FE_ALL_EXCEPT;
+ __fnstcw(&control);
+ if (__HAS_SSE())
+ __stmxcsr(&mxcsr);
+ else
+ mxcsr = 0;
+ omask = (control | mxcsr >> _SSE_EMASK_SHIFT) & FE_ALL_EXCEPT;
+ control &= ~mask;
+ __fldcw(control);
+ if (__HAS_SSE()) {
+ mxcsr &= ~(mask << _SSE_EMASK_SHIFT);
+ __ldmxcsr(mxcsr);
+ }
+ return (~omask);
+}
+
+int
+__fedisableexcept(int mask)
+{
+ int mxcsr, control, omask;
+
+ mask &= FE_ALL_EXCEPT;
+ __fnstcw(&control);
+ if (__HAS_SSE())
+ __stmxcsr(&mxcsr);
+ else
+ mxcsr = 0;
+ omask = (control | mxcsr >> _SSE_EMASK_SHIFT) & FE_ALL_EXCEPT;
+ control |= mask;
+ __fldcw(control);
+ if (__HAS_SSE()) {
+ mxcsr |= mask << _SSE_EMASK_SHIFT;
+ __ldmxcsr(mxcsr);
+ }
+ return (~omask);
+}
+
+__weak_reference(__feenableexcept, feenableexcept);
+__weak_reference(__fedisableexcept, fedisableexcept);
diff --git a/lib/msun/i387/fenv.h b/lib/msun/i387/fenv.h
index 2a54c70..d62dcf8 100644
--- a/lib/msun/i387/fenv.h
+++ b/lib/msun/i387/fenv.h
@@ -32,13 +32,26 @@
#include <sys/cdefs.h>
#include <sys/_types.h>
+/*
+ * To preserve binary compatibility with FreeBSD 5.3, we pack the
+ * mxcsr into some reserved fields, rather than changing sizeof(fenv_t).
+ */
typedef struct {
- __uint32_t __control;
- __uint32_t __status;
+ __uint16_t __control;
+ __uint16_t __mxcsr_hi;
+ __uint16_t __status;
+ __uint16_t __mxcsr_lo;
__uint32_t __tag;
char __other[16];
} fenv_t;
+#define __get_mxcsr(env) (((env).__mxcsr_hi << 16) | \
+ ((env).__mxcsr_lo))
+#define __set_mxcsr(env, x) do { \
+ (env).__mxcsr_hi = (__uint32_t)(x) >> 16; \
+ (env).__mxcsr_lo = (__uint16_t)(x); \
+} while (0)
+
typedef __uint16_t fexcept_t;
/* Exception flags */
@@ -59,6 +72,25 @@ typedef __uint16_t fexcept_t;
#define _ROUND_MASK (FE_TONEAREST | FE_DOWNWARD | \
FE_UPWARD | FE_TOWARDZERO)
+/*
+ * As compared to the x87 control word, the SSE unit's control word
+ * has the rounding control bits offset by 3 and the exception mask
+ * bits offset by 7.
+ */
+#define _SSE_ROUND_SHIFT 3
+#define _SSE_EMASK_SHIFT 7
+
+/* After testing for SSE support once, we cache the result in __has_sse. */
+enum __sse_support { __SSE_YES, __SSE_NO, __SSE_UNK };
+extern enum __sse_support __has_sse;
+int __test_sse(void);
+#ifdef __SSE__
+#define __HAS_SSE() 1
+#else
+#define __HAS_SSE() (__has_sse == __SSE_YES || \
+ (__has_sse == __SSE_UNK && __test_sse()))
+#endif
+
__BEGIN_DECLS
/* Default floating-point environment */
@@ -72,11 +104,14 @@ extern const fenv_t __fe_dfl_env;
#define __fnstcw(__cw) __asm __volatile("fnstcw %0" : "=m" (*(__cw)))
#define __fnstsw(__sw) __asm __volatile("fnstsw %0" : "=am" (*(__sw)))
#define __fwait() __asm __volatile("fwait")
+#define __ldmxcsr(__csr) __asm __volatile("ldmxcsr %0" : : "m" (__csr))
+#define __stmxcsr(__csr) __asm __volatile("stmxcsr %0" : "=m" (*(__csr)))
static __inline int
feclearexcept(int __excepts)
{
fenv_t __env;
+ int __mxcsr;
if (__excepts == FE_ALL_EXCEPT) {
__fnclex();
@@ -85,48 +120,42 @@ feclearexcept(int __excepts)
__env.__status &= ~__excepts;
__fldenv(__env);
}
+ if (__HAS_SSE()) {
+ __stmxcsr(&__mxcsr);
+ __mxcsr &= ~__excepts;
+ __ldmxcsr(__mxcsr);
+ }
return (0);
}
static __inline int
fegetexceptflag(fexcept_t *__flagp, int __excepts)
{
- int __status;
+ int __mxcsr, __status;
__fnstsw(&__status);
- *__flagp = __status & __excepts;
+ if (__HAS_SSE())
+ __stmxcsr(&__mxcsr);
+ else
+ __mxcsr = 0;
+ *__flagp = (__mxcsr | __status) & __excepts;
return (0);
}
-static __inline int
-fesetexceptflag(const fexcept_t *__flagp, int __excepts)
-{
- fenv_t __env;
-
- __fnstenv(&__env);
- __env.__status &= ~__excepts;
- __env.__status |= *__flagp & __excepts;
- __fldenv(__env);
- return (0);
-}
-
-static __inline int
-feraiseexcept(int __excepts)
-{
- fexcept_t __ex = __excepts;
-
- fesetexceptflag(&__ex, __excepts);
- __fwait();
- return (0);
-}
+int fesetexceptflag(const fexcept_t *__flagp, int __excepts);
+int feraiseexcept(int __excepts);
static __inline int
fetestexcept(int __excepts)
{
- int __status;
+ int __mxcsr, __status;
__fnstsw(&__status);
- return (__status & __excepts);
+ if (__HAS_SSE())
+ __stmxcsr(&__mxcsr);
+ else
+ __mxcsr = 0;
+ return ((__status | __mxcsr) & __excepts);
}
static __inline int
@@ -134,6 +163,12 @@ fegetround(void)
{
int __control;
+ /*
+ * We assume that the x87 and the SSE unit agree on the
+ * rounding mode. Reading the control word on the x87 turns
+ * out to be about 5 times faster than reading it on the SSE
+ * unit on an Opteron 244.
+ */
__fnstcw(&__control);
return (__control & _ROUND_MASK);
}
@@ -141,89 +176,59 @@ fegetround(void)
static __inline int
fesetround(int __round)
{
- int __control;
+ int __mxcsr, __control;
if (__round & ~_ROUND_MASK)
return (-1);
+
__fnstcw(&__control);
__control &= ~_ROUND_MASK;
__control |= __round;
__fldcw(__control);
- return (0);
-}
-static __inline int
-fegetenv(fenv_t *__envp)
-{
- int __control;
+ if (__HAS_SSE()) {
+ __stmxcsr(&__mxcsr);
+ __mxcsr &= ~(_ROUND_MASK << _SSE_ROUND_SHIFT);
+ __mxcsr |= __round << _SSE_ROUND_SHIFT;
+ __ldmxcsr(__mxcsr);
+ }
- /*
- * fnstenv masks all exceptions, so we need to save and
- * restore the control word to avoid this side effect.
- */
- __fnstcw(&__control);
- __fnstenv(__envp);
- __fldcw(__control);
return (0);
}
-static __inline int
-feholdexcept(fenv_t *__envp)
-{
-
- __fnstenv(__envp);
- __fnclex();
- return (0);
-}
+int fegetenv(fenv_t *__envp);
+int feholdexcept(fenv_t *__envp);
static __inline int
fesetenv(const fenv_t *__envp)
{
+ fenv_t __env = *__envp;
+ int __mxcsr;
- __fldenv(*__envp);
+ __mxcsr = __get_mxcsr(__env);
+ __set_mxcsr(__env, 0xffffffff);
+ __fldenv(__env);
+ if (__HAS_SSE())
+ __ldmxcsr(__mxcsr);
return (0);
}
-static __inline int
-feupdateenv(const fenv_t *__envp)
-{
- int __status;
-
- __fnstsw(&__status);
- __fldenv(*__envp);
- feraiseexcept(__status & FE_ALL_EXCEPT);
- return (0);
-}
+int feupdateenv(const fenv_t *__envp);
#if __BSD_VISIBLE
-static __inline int
-feenableexcept(int __mask)
-{
- int __control;
-
- __fnstcw(&__control);
- __mask = __control & ~(__mask & FE_ALL_EXCEPT);
- __fldcw(__mask);
- return (~__control & FE_ALL_EXCEPT);
-}
-
-static __inline int
-fedisableexcept(int __mask)
-{
- int __control;
-
- __fnstcw(&__control);
- __mask = __control | (__mask & FE_ALL_EXCEPT);
- __fldcw(__mask);
- return (~__control & FE_ALL_EXCEPT);
-}
+int feenableexcept(int __mask);
+int fedisableexcept(int __mask);
static __inline int
fegetexcept(void)
{
int __control;
+ /*
+ * We assume that the masks for the x87 and the SSE unit are
+ * the same.
+ */
__fnstcw(&__control);
return (~__control & FE_ALL_EXCEPT);
}
OpenPOWER on IntegriCloud