summaryrefslogtreecommitdiffstats
path: root/sys/i386/include/ieeefp.h
diff options
context:
space:
mode:
authorbde <bde@FreeBSD.org>2008-01-11 18:59:35 +0000
committerbde <bde@FreeBSD.org>2008-01-11 18:59:35 +0000
commit7371ad79e8998ef3d3819010004ceca320ee3709 (patch)
treef5212ff2778a33313e708a8f2f26ee5e0089eaea /sys/i386/include/ieeefp.h
parentb1a379ee656f7f3ee5bb8d3f47b2fd30ee48b8eb (diff)
downloadFreeBSD-src-7371ad79e8998ef3d3819010004ceca320ee3709.zip
FreeBSD-src-7371ad79e8998ef3d3819010004ceca320ee3709.tar.gz
MFamd64 (everything possible up to 1.19; mainly the amd64 implementations
of fpget*() and fpset*()). The i386 fpget*() were efficient but a bit obfuscated (using macros and a case statement to demultiplex them through a single inline). The demultiplexing mainly gave smaller source code. The i386 fpset*() were obfuscated in the same way and were very inefficient due to the case statement not having enough cases or complexity so all cases used the FP environment. This also fixes a harmless bug in rev.1.12. fpsetmask() extracted the old value from the bit-field twice, but the doubled shift was harmless since the shift count is 0. All fp*() interfaces are now inline functions on i386. They used to be macros that call (a different set of) inline functions. This is a small ABI change which shouldn't cause problems since cases where inlining fails (mainly -O0) only give (working) static functions.
Diffstat (limited to 'sys/i386/include/ieeefp.h')
-rw-r--r--sys/i386/include/ieeefp.h166
1 files changed, 104 insertions, 62 deletions
diff --git a/sys/i386/include/ieeefp.h b/sys/i386/include/ieeefp.h
index 22b4f5d..65502ac 100644
--- a/sys/i386/include/ieeefp.h
+++ b/sys/i386/include/ieeefp.h
@@ -40,7 +40,7 @@
/*
* IEEE floating point type, constant and function definitions.
- * XXX: FP*FLD, FP*REG and FP*OFF are undocumented pollution.
+ * XXX: FP*FLD and FP*OFF are undocumented pollution.
*/
#ifndef _SYS_CDEFS_H_
@@ -81,14 +81,6 @@ typedef enum {
#define FP_X_STK 0x40 /* stack fault */
/*
- * FPU control and status register numbers (indexes into the env array).
- */
-#define FP_MSKS_REG 0 /* exception masks */
-#define FP_PRC_REG 0 /* precision */
-#define FP_RND_REG 0 /* direction */
-#define FP_STKY_REG 1 /* sticky flags */
-
-/*
* FPU control word bit-field masks.
*/
#define FP_MSKS_FLD 0x3f /* exception masks field */
@@ -114,6 +106,7 @@ typedef enum {
#ifdef __GNUCLIKE_ASM
+#define __fldcw(addr) __asm __volatile("fldcw %0" : : "m" (*(addr)))
#define __fldenv(addr) __asm __volatile("fldenv %0" : : "m" (*(addr)))
#define __fnclex() __asm __volatile("fnclex")
#define __fnstcw(addr) __asm __volatile("fnstcw %0" : "=m" (*(addr)))
@@ -121,69 +114,118 @@ typedef enum {
#define __fnstsw(addr) __asm __volatile("fnstsw %0" : "=m" (*(addr)))
/*
- * return the contents of a FP register
+ * Load the control word. Be careful not to trap if there is a currently
+ * unmasked exception (ones that will become freshly unmasked are not a
+ * problem). This case must be handled by a save/restore of the
+ * environment or even of the full x87 state. Accessing the environment
+ * is very inefficient, so only do it when necessary.
*/
-static __inline__ int
-__fpgetreg(int _reg)
+static __inline void
+__fnldcw(unsigned short _cw, unsigned short _newcw)
{
- unsigned short _mem;
-
- /*-
- * This is more efficient than it looks. The switch gets optimized
- * away if _reg is constant.
- *
- * The default case only supports _reg == 0. We could handle more
- * registers (e.g., tags) using fnstenv, but the interface doesn't
- * support more.
- */
- switch(_reg) {
- default:
- __fnstcw(&_mem);
- break;
- case FP_STKY_REG:
- __fnstsw(&_mem);
- break;
+ struct {
+ unsigned _cw;
+ unsigned _other[6];
+ } _env;
+ unsigned short _sw;
+
+ if ((_cw & FP_MSKS_FLD) != FP_MSKS_FLD) {
+ __fnstsw(&_sw);
+ if (((_sw & ~_cw) & FP_STKY_FLD) != 0) {
+ __fnstenv(&_env);
+ _env._cw = _newcw;
+ __fldenv(&_env);
+ return;
+ }
}
- return _mem;
+ __fldcw(&_newcw);
}
-/*
- * set a FP mode; return previous mode
- */
-static __inline__ int
-__fpsetreg(int _m, int _reg, int _fld, int _off)
+static __inline fp_rnd_t
+fpgetround(void)
{
- unsigned _env[7];
- unsigned _p;
-
- /*
- * _reg == 0 could be handled better using fnstcw/fldcw.
- */
- __fnstenv(_env);
- _p = (_env[_reg] & _fld) >> _off;
- _env[_reg] = (_env[_reg] & ~_fld) | (_m << _off & _fld);
- __fldenv(_env);
- return _p;
+ unsigned short _cw;
+
+ __fnstcw(&_cw);
+ return ((fp_rnd_t)((_cw & FP_RND_FLD) >> FP_RND_OFF));
+}
+
+static __inline fp_rnd_t
+fpsetround(fp_rnd_t _m)
+{
+ fp_rnd_t _p;
+ unsigned short _cw, _newcw;
+
+ __fnstcw(&_cw);
+ _p = (fp_rnd_t)((_cw & FP_RND_FLD) >> FP_RND_OFF);
+ _newcw = _cw & ~FP_RND_FLD;
+ _newcw |= (_m << FP_RND_OFF) & FP_RND_FLD;
+ __fnldcw(_cw, _newcw);
+ return (_p);
+}
+
+static __inline fp_prec_t
+fpgetprec(void)
+{
+ unsigned short _cw;
+
+ __fnstcw(&_cw);
+ return ((fp_prec_t)((_cw & FP_PRC_FLD) >> FP_PRC_OFF));
+}
+
+static __inline fp_prec_t
+fpsetprec(fp_prec_t _m)
+{
+ fp_prec_t _p;
+ unsigned short _cw, _newcw;
+
+ __fnstcw(&_cw);
+ _p = (fp_prec_t)((_cw & FP_PRC_FLD) >> FP_PRC_OFF);
+ _newcw = _cw & ~FP_PRC_FLD;
+ _newcw |= (_m << FP_PRC_OFF) & FP_PRC_FLD;
+ __fnldcw(_cw, _newcw);
+ return (_p);
}
/*
- * SysV/386 FP control interface
+ * Get or set the exception mask.
+ * Note that the x87 mask bits are inverted by the API -- a mask bit of 1
+ * means disable for x87 and SSE, but for fp*mask() it means enable.
*/
-#define fpgetround() ((fp_rnd_t) \
- ((__fpgetreg(FP_RND_REG) & FP_RND_FLD) >> FP_RND_OFF))
-#define fpsetround(m) ((fp_rnd_t) \
- __fpsetreg((m), FP_RND_REG, FP_RND_FLD, FP_RND_OFF))
-#define fpgetprec() ((fp_prec_t) \
- ((__fpgetreg(FP_PRC_REG) & FP_PRC_FLD) >> FP_PRC_OFF))
-#define fpsetprec(m) ((fp_prec_t) \
- __fpsetreg((m), FP_PRC_REG, FP_PRC_FLD, FP_PRC_OFF))
-#define fpgetmask() ((fp_except_t) \
- ((~__fpgetreg(FP_MSKS_REG) & FP_MSKS_FLD) >> FP_MSKS_OFF))
-#define fpsetmask(m) ((fp_except_t) \
- (~__fpsetreg(~(m), FP_MSKS_REG, FP_MSKS_FLD, FP_MSKS_OFF)) & \
- (FP_MSKS_FLD >> FP_MSKS_OFF))
-#define fpgetsticky() ((fp_except_t) \
- ((__fpgetreg(FP_STKY_REG) & FP_STKY_FLD) >> FP_STKY_OFF))
+
+static __inline fp_except_t
+fpgetmask(void)
+{
+ unsigned short _cw;
+
+ __fnstcw(&_cw);
+ return ((~_cw & FP_MSKS_FLD) >> FP_MSKS_OFF);
+}
+
+static __inline fp_except_t
+fpsetmask(fp_except_t _m)
+{
+ fp_except_t _p;
+ unsigned short _cw, _newcw;
+
+ __fnstcw(&_cw);
+ _p = (~_cw & FP_MSKS_FLD) >> FP_MSKS_OFF;
+ _newcw = _cw & ~FP_MSKS_FLD;
+ _newcw |= (~_m << FP_MSKS_OFF) & FP_MSKS_FLD;
+ __fnldcw(_cw, _newcw);
+ return (_p);
+}
+
+static __inline fp_except_t
+fpgetsticky(void)
+{
+ unsigned _ex;
+ unsigned short _sw;
+
+ __fnstsw(&_sw);
+ _ex = (_sw & FP_STKY_FLD) >> FP_STKY_OFF;
+ return ((fp_except_t)_ex);
+}
static __inline fp_except_t
fpresetsticky(fp_except_t _m)
OpenPOWER on IntegriCloud