diff options
Diffstat (limited to 'arch/mips/math-emu')
-rw-r--r-- | arch/mips/math-emu/Makefile | 4 | ||||
-rw-r--r-- | arch/mips/math-emu/cp1emu.c | 45 | ||||
-rw-r--r-- | arch/mips/math-emu/dp_maddf.c | 35 | ||||
-rw-r--r-- | arch/mips/math-emu/dp_msubf.c | 269 | ||||
-rw-r--r-- | arch/mips/math-emu/dp_mul.c | 4 | ||||
-rw-r--r-- | arch/mips/math-emu/dsemul.c | 2 | ||||
-rw-r--r-- | arch/mips/math-emu/ieee754dp.c | 9 | ||||
-rw-r--r-- | arch/mips/math-emu/ieee754dp.h | 1 | ||||
-rw-r--r-- | arch/mips/math-emu/ieee754int.h | 10 | ||||
-rw-r--r-- | arch/mips/math-emu/ieee754sp.c | 12 | ||||
-rw-r--r-- | arch/mips/math-emu/ieee754sp.h | 17 | ||||
-rw-r--r-- | arch/mips/math-emu/sp_add.c | 6 | ||||
-rw-r--r-- | arch/mips/math-emu/sp_maddf.c | 43 | ||||
-rw-r--r-- | arch/mips/math-emu/sp_msubf.c | 258 | ||||
-rw-r--r-- | arch/mips/math-emu/sp_sub.c | 6 |
15 files changed, 139 insertions, 582 deletions
diff --git a/arch/mips/math-emu/Makefile b/arch/mips/math-emu/Makefile index a19641d..e9bbc2a 100644 --- a/arch/mips/math-emu/Makefile +++ b/arch/mips/math-emu/Makefile @@ -4,9 +4,9 @@ obj-y += cp1emu.o ieee754dp.o ieee754sp.o ieee754.o \ dp_div.o dp_mul.o dp_sub.o dp_add.o dp_fsp.o dp_cmp.o dp_simple.o \ - dp_tint.o dp_fint.o dp_maddf.o dp_msubf.o dp_2008class.o dp_fmin.o dp_fmax.o \ + dp_tint.o dp_fint.o dp_maddf.o dp_2008class.o dp_fmin.o dp_fmax.o \ sp_div.o sp_mul.o sp_sub.o sp_add.o sp_fdp.o sp_cmp.o sp_simple.o \ - sp_tint.o sp_fint.o sp_maddf.o sp_msubf.o sp_2008class.o sp_fmin.o sp_fmax.o \ + sp_tint.o sp_fint.o sp_maddf.o sp_2008class.o sp_fmin.o sp_fmax.o \ dsemul.o lib-y += ieee754d.o \ diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c index cdfd44f..d96e912b 100644 --- a/arch/mips/math-emu/cp1emu.c +++ b/arch/mips/math-emu/cp1emu.c @@ -445,9 +445,11 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, case spec_op: switch (insn.r_format.func) { case jalr_op: - regs->regs[insn.r_format.rd] = - regs->cp0_epc + dec_insn.pc_inc + - dec_insn.next_pc_inc; + if (insn.r_format.rd != 0) { + regs->regs[insn.r_format.rd] = + regs->cp0_epc + dec_insn.pc_inc + + dec_insn.next_pc_inc; + } /* Fall through */ case jr_op: /* For R6, JR already emulated in jalr_op */ @@ -973,9 +975,10 @@ static int cop1Emulate(struct pt_regs *xcp, struct mips_fpu_struct *ctx, struct mm_decoded_insn dec_insn, void *__user *fault_addr) { unsigned long contpc = xcp->cp0_epc + dec_insn.pc_inc; - unsigned int cond, cbit; + unsigned int cond, cbit, bit0; mips_instruction ir; int likely, pc_inc; + union fpureg *fpr; u32 __user *wva; u64 __user *dva; u32 wval; @@ -1187,14 +1190,14 @@ emul: return SIGILL; cond = likely = 0; + fpr = ¤t->thread.fpu.fpr[MIPSInst_RT(ir)]; + bit0 = get_fpr32(fpr, 0) & 0x1; switch (MIPSInst_RS(ir)) { case bc1eqz_op: - if (get_fpr32(¤t->thread.fpu.fpr[MIPSInst_RT(ir)], 0) & 0x1) - cond = 1; + cond = bit0 == 0; break; case bc1nez_op: - if (!(get_fpr32(¤t->thread.fpu.fpr[MIPSInst_RT(ir)], 0) & 0x1)) - cond = 1; + cond = bit0 != 0; break; } goto branch_common; @@ -1674,7 +1677,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, union ieee754sp(*b) (union ieee754sp, union ieee754sp); union ieee754sp(*u) (union ieee754sp); } handler; - union ieee754sp fs, ft; + union ieee754sp fd, fs, ft; switch (MIPSInst_FUNC(ir)) { /* binary ops */ @@ -1945,6 +1948,17 @@ copcsr: rfmt = w_fmt; goto copcsr; + case fsel_op: + if (!cpu_has_mips_r6) + return SIGILL; + + SPFROMREG(fd, MIPSInst_FD(ir)); + if (fd.bits & 0x1) + SPFROMREG(rv.s, MIPSInst_FT(ir)); + else + SPFROMREG(rv.s, MIPSInst_FS(ir)); + break; + case fcvtl_op: if (!cpu_has_mips_3_4_5_64_r2_r6) return SIGILL; @@ -1993,7 +2007,7 @@ copcsr: } case d_fmt: { - union ieee754dp fs, ft; + union ieee754dp fd, fs, ft; union { union ieee754dp(*b) (union ieee754dp, union ieee754dp); union ieee754dp(*u) (union ieee754dp); @@ -2243,6 +2257,17 @@ dcopuop: rfmt = w_fmt; goto copcsr; + case fsel_op: + if (!cpu_has_mips_r6) + return SIGILL; + + DPFROMREG(fd, MIPSInst_FD(ir)); + if (fd.bits & 0x1) + DPFROMREG(rv.d, MIPSInst_FT(ir)); + else + DPFROMREG(rv.d, MIPSInst_FS(ir)); + break; + case fcvtl_op: if (!cpu_has_mips_3_4_5_64_r2_r6) return SIGILL; diff --git a/arch/mips/math-emu/dp_maddf.c b/arch/mips/math-emu/dp_maddf.c index 119eda9..4a2d03c 100644 --- a/arch/mips/math-emu/dp_maddf.c +++ b/arch/mips/math-emu/dp_maddf.c @@ -14,8 +14,12 @@ #include "ieee754dp.h" -union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x, - union ieee754dp y) +enum maddf_flags { + maddf_negate_product = 1 << 0, +}; + +static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x, + union ieee754dp y, enum maddf_flags flags) { int re; int rs; @@ -32,16 +36,15 @@ union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x, COMPXDP; COMPYDP; - - u64 zm; int ze; int zs __maybe_unused; int zc; + COMPZDP; EXPLODEXDP; EXPLODEYDP; - EXPLODEDP(z, zc, zs, ze, zm) + EXPLODEZDP; FLUSHXDP; FLUSHYDP; - FLUSHDP(z, zc, zs, ze, zm); + FLUSHZDP; ieee754_clearcx(); @@ -50,7 +53,7 @@ union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x, ieee754_setcx(IEEE754_INVALID_OPERATION); return ieee754dp_nanxcpt(z); case IEEE754_CLASS_DNORM: - DPDNORMx(zm, ze); + DPDNORMZ; /* QNAN is handled separately below */ } @@ -154,13 +157,15 @@ union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x, re = xe + ye; rs = xs ^ ys; + if (flags & maddf_negate_product) + rs ^= 1; /* shunt to top of word */ xm <<= 64 - (DP_FBITS + 1); ym <<= 64 - (DP_FBITS + 1); /* - * Multiply 32 bits xm, ym to give high 32 bits rm with stickness. + * Multiply 64 bits xm, ym to give high 64 bits rm with stickness. */ /* 32 * 32 => 64 */ @@ -198,7 +203,7 @@ union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x, if ((s64) rm < 0) { rm = (rm >> (64 - (DP_FBITS + 1 + 3))) | ((rm << (DP_FBITS + 1 + 3)) != 0); - re++; + re++; } else { rm = (rm >> (64 - (DP_FBITS + 1 + 3 + 1))) | ((rm << (DP_FBITS + 1 + 3 + 1)) != 0); @@ -263,3 +268,15 @@ union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x, return ieee754dp_format(zs, ze, zm); } + +union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x, + union ieee754dp y) +{ + return _dp_maddf(z, x, y, 0); +} + +union ieee754dp ieee754dp_msubf(union ieee754dp z, union ieee754dp x, + union ieee754dp y) +{ + return _dp_maddf(z, x, y, maddf_negate_product); +} diff --git a/arch/mips/math-emu/dp_msubf.c b/arch/mips/math-emu/dp_msubf.c deleted file mode 100644 index 1224126..0000000 --- a/arch/mips/math-emu/dp_msubf.c +++ /dev/null @@ -1,269 +0,0 @@ -/* - * IEEE754 floating point arithmetic - * double precision: MSUB.f (Fused Multiply Subtract) - * MSUBF.fmt: FPR[fd] = FPR[fd] - (FPR[fs] x FPR[ft]) - * - * MIPS floating point support - * Copyright (C) 2015 Imagination Technologies, Ltd. - * Author: Markos Chandras <markos.chandras@imgtec.com> - * - * This program is free software; you can distribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; version 2 of the License. - */ - -#include "ieee754dp.h" - -union ieee754dp ieee754dp_msubf(union ieee754dp z, union ieee754dp x, - union ieee754dp y) -{ - int re; - int rs; - u64 rm; - unsigned lxm; - unsigned hxm; - unsigned lym; - unsigned hym; - u64 lrm; - u64 hrm; - u64 t; - u64 at; - int s; - - COMPXDP; - COMPYDP; - - u64 zm; int ze; int zs __maybe_unused; int zc; - - EXPLODEXDP; - EXPLODEYDP; - EXPLODEDP(z, zc, zs, ze, zm) - - FLUSHXDP; - FLUSHYDP; - FLUSHDP(z, zc, zs, ze, zm); - - ieee754_clearcx(); - - switch (zc) { - case IEEE754_CLASS_SNAN: - ieee754_setcx(IEEE754_INVALID_OPERATION); - return ieee754dp_nanxcpt(z); - case IEEE754_CLASS_DNORM: - DPDNORMx(zm, ze); - /* QNAN is handled separately below */ - } - - switch (CLPAIR(xc, yc)) { - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN): - case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN): - case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN): - case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN): - case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN): - return ieee754dp_nanxcpt(y); - - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN): - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN): - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO): - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM): - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM): - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): - return ieee754dp_nanxcpt(x); - - case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): - case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): - case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): - case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): - return y; - - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF): - return x; - - - /* - * Infinity handling - */ - case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): - case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): - if (zc == IEEE754_CLASS_QNAN) - return z; - ieee754_setcx(IEEE754_INVALID_OPERATION); - return ieee754dp_indef(); - - case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): - case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): - case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): - case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): - case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): - if (zc == IEEE754_CLASS_QNAN) - return z; - return ieee754dp_inf(xs ^ ys); - - case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): - case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): - case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): - case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): - case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): - if (zc == IEEE754_CLASS_INF) - return ieee754dp_inf(zs); - /* Multiplication is 0 so just return z */ - return z; - - case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): - DPDNORMX; - - case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): - if (zc == IEEE754_CLASS_QNAN) - return z; - else if (zc == IEEE754_CLASS_INF) - return ieee754dp_inf(zs); - DPDNORMY; - break; - - case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): - if (zc == IEEE754_CLASS_QNAN) - return z; - else if (zc == IEEE754_CLASS_INF) - return ieee754dp_inf(zs); - DPDNORMX; - break; - - case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM): - if (zc == IEEE754_CLASS_QNAN) - return z; - else if (zc == IEEE754_CLASS_INF) - return ieee754dp_inf(zs); - /* fall through to real computations */ - } - - /* Finally get to do some computation */ - - /* - * Do the multiplication bit first - * - * rm = xm * ym, re = xe + ye basically - * - * At this point xm and ym should have been normalized. - */ - assert(xm & DP_HIDDEN_BIT); - assert(ym & DP_HIDDEN_BIT); - - re = xe + ye; - rs = xs ^ ys; - - /* shunt to top of word */ - xm <<= 64 - (DP_FBITS + 1); - ym <<= 64 - (DP_FBITS + 1); - - /* - * Multiply 32 bits xm, ym to give high 32 bits rm with stickness. - */ - - /* 32 * 32 => 64 */ -#define DPXMULT(x, y) ((u64)(x) * (u64)y) - - lxm = xm; - hxm = xm >> 32; - lym = ym; - hym = ym >> 32; - - lrm = DPXMULT(lxm, lym); - hrm = DPXMULT(hxm, hym); - - t = DPXMULT(lxm, hym); - - at = lrm + (t << 32); - hrm += at < lrm; - lrm = at; - - hrm = hrm + (t >> 32); - - t = DPXMULT(hxm, lym); - - at = lrm + (t << 32); - hrm += at < lrm; - lrm = at; - - hrm = hrm + (t >> 32); - - rm = hrm | (lrm != 0); - - /* - * Sticky shift down to normal rounding precision. - */ - if ((s64) rm < 0) { - rm = (rm >> (64 - (DP_FBITS + 1 + 3))) | - ((rm << (DP_FBITS + 1 + 3)) != 0); - re++; - } else { - rm = (rm >> (64 - (DP_FBITS + 1 + 3 + 1))) | - ((rm << (DP_FBITS + 1 + 3 + 1)) != 0); - } - assert(rm & (DP_HIDDEN_BIT << 3)); - - /* And now the subtraction */ - - /* flip sign of r and handle as add */ - rs ^= 1; - - assert(zm & DP_HIDDEN_BIT); - - /* - * Provide guard,round and stick bit space. - */ - zm <<= 3; - - if (ze > re) { - /* - * Have to shift y fraction right to align. - */ - s = ze - re; - rm = XDPSRS(rm, s); - re += s; - } else if (re > ze) { - /* - * Have to shift x fraction right to align. - */ - s = re - ze; - zm = XDPSRS(zm, s); - ze += s; - } - assert(ze == re); - assert(ze <= DP_EMAX); - - if (zs == rs) { - /* - * Generate 28 bit result of adding two 27 bit numbers - * leaving result in xm, xs and xe. - */ - zm = zm + rm; - - if (zm >> (DP_FBITS + 1 + 3)) { /* carry out */ - zm = XDPSRS1(zm); - ze++; - } - } else { - if (zm >= rm) { - zm = zm - rm; - } else { - zm = rm - zm; - zs = rs; - } - if (zm == 0) - return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD); - - /* - * Normalize to rounding precision. - */ - while ((zm >> (DP_FBITS + 3)) == 0) { - zm <<= 1; - ze--; - } - } - - return ieee754dp_format(zs, ze, zm); -} diff --git a/arch/mips/math-emu/dp_mul.c b/arch/mips/math-emu/dp_mul.c index d0901f0..87d0b44 100644 --- a/arch/mips/math-emu/dp_mul.c +++ b/arch/mips/math-emu/dp_mul.c @@ -125,7 +125,7 @@ union ieee754dp ieee754dp_mul(union ieee754dp x, union ieee754dp y) ym <<= 64 - (DP_FBITS + 1); /* - * Multiply 32 bits xm, ym to give high 32 bits rm with stickness. + * Multiply 64 bits xm, ym to give high 64 bits rm with stickness. */ /* 32 * 32 => 64 */ @@ -163,7 +163,7 @@ union ieee754dp ieee754dp_mul(union ieee754dp x, union ieee754dp y) if ((s64) rm < 0) { rm = (rm >> (64 - (DP_FBITS + 1 + 3))) | ((rm << (DP_FBITS + 1 + 3)) != 0); - re++; + re++; } else { rm = (rm >> (64 - (DP_FBITS + 1 + 3 + 1))) | ((rm << (DP_FBITS + 1 + 3 + 1)) != 0); diff --git a/arch/mips/math-emu/dsemul.c b/arch/mips/math-emu/dsemul.c index 46b964d..d4ceacd 100644 --- a/arch/mips/math-emu/dsemul.c +++ b/arch/mips/math-emu/dsemul.c @@ -60,7 +60,7 @@ int mips_dsemul(struct pt_regs *regs, mips_instruction ir, unsigned long cpc) unsigned int rs; s32 v; - rs = (((insn.mm_a_format.rs + 0x1e) & 0xf) + 2); + rs = (((insn.mm_a_format.rs + 0xe) & 0xf) + 2); v = regs->cp0_epc & ~3; v += insn.mm_a_format.simmediate << 2; regs->regs[rs] = (long)v; diff --git a/arch/mips/math-emu/ieee754dp.c b/arch/mips/math-emu/ieee754dp.c index 47d26c8..465a034 100644 --- a/arch/mips/math-emu/ieee754dp.c +++ b/arch/mips/math-emu/ieee754dp.c @@ -54,10 +54,13 @@ union ieee754dp __cold ieee754dp_nanxcpt(union ieee754dp r) assert(ieee754dp_issnan(r)); ieee754_setcx(IEEE754_INVALID_OPERATION); - if (ieee754_csr.nan2008) + if (ieee754_csr.nan2008) { DPMANT(r) |= DP_MBIT(DP_FBITS - 1); - else - r = ieee754dp_indef(); + } else { + DPMANT(r) &= ~DP_MBIT(DP_FBITS - 1); + if (!ieee754dp_isnan(r)) + DPMANT(r) |= DP_MBIT(DP_FBITS - 2); + } return r; } diff --git a/arch/mips/math-emu/ieee754dp.h b/arch/mips/math-emu/ieee754dp.h index e2babd9..9ba0230 100644 --- a/arch/mips/math-emu/ieee754dp.h +++ b/arch/mips/math-emu/ieee754dp.h @@ -60,6 +60,7 @@ static inline int ieee754dp_finite(union ieee754dp x) while ((m >> DP_FBITS) == 0) { m <<= 1; e--; } #define DPDNORMX DPDNORMx(xm, xe) #define DPDNORMY DPDNORMx(ym, ye) +#define DPDNORMZ DPDNORMx(zm, ze) static inline union ieee754dp builddp(int s, int bx, u64 m) { diff --git a/arch/mips/math-emu/ieee754int.h b/arch/mips/math-emu/ieee754int.h index ed7bb27..8bc2f69 100644 --- a/arch/mips/math-emu/ieee754int.h +++ b/arch/mips/math-emu/ieee754int.h @@ -55,6 +55,9 @@ static inline int ieee754_class_nan(int xc) #define COMPYSP \ unsigned ym; int ye; int ys; int yc +#define COMPZSP \ + unsigned zm; int ze; int zs; int zc + #define EXPLODESP(v, vc, vs, ve, vm) \ { \ vs = SPSIGN(v); \ @@ -81,6 +84,7 @@ static inline int ieee754_class_nan(int xc) } #define EXPLODEXSP EXPLODESP(x, xc, xs, xe, xm) #define EXPLODEYSP EXPLODESP(y, yc, ys, ye, ym) +#define EXPLODEZSP EXPLODESP(z, zc, zs, ze, zm) #define COMPXDP \ @@ -89,6 +93,9 @@ static inline int ieee754_class_nan(int xc) #define COMPYDP \ u64 ym; int ye; int ys; int yc +#define COMPZDP \ + u64 zm; int ze; int zs; int zc + #define EXPLODEDP(v, vc, vs, ve, vm) \ { \ vm = DPMANT(v); \ @@ -115,6 +122,7 @@ static inline int ieee754_class_nan(int xc) } #define EXPLODEXDP EXPLODEDP(x, xc, xs, xe, xm) #define EXPLODEYDP EXPLODEDP(y, yc, ys, ye, ym) +#define EXPLODEZDP EXPLODEDP(z, zc, zs, ze, zm) #define FLUSHDP(v, vc, vs, ve, vm) \ if (vc==IEEE754_CLASS_DNORM) { \ @@ -140,7 +148,9 @@ static inline int ieee754_class_nan(int xc) #define FLUSHXDP FLUSHDP(x, xc, xs, xe, xm) #define FLUSHYDP FLUSHDP(y, yc, ys, ye, ym) +#define FLUSHZDP FLUSHDP(z, zc, zs, ze, zm) #define FLUSHXSP FLUSHSP(x, xc, xs, xe, xm) #define FLUSHYSP FLUSHSP(y, yc, ys, ye, ym) +#define FLUSHZSP FLUSHSP(z, zc, zs, ze, zm) #endif /* __IEEE754INT_H */ diff --git a/arch/mips/math-emu/ieee754sp.c b/arch/mips/math-emu/ieee754sp.c index e0b2c45..260e6896 100644 --- a/arch/mips/math-emu/ieee754sp.c +++ b/arch/mips/math-emu/ieee754sp.c @@ -54,10 +54,13 @@ union ieee754sp __cold ieee754sp_nanxcpt(union ieee754sp r) assert(ieee754sp_issnan(r)); ieee754_setcx(IEEE754_INVALID_OPERATION); - if (ieee754_csr.nan2008) + if (ieee754_csr.nan2008) { SPMANT(r) |= SP_MBIT(SP_FBITS - 1); - else - r = ieee754sp_indef(); + } else { + SPMANT(r) &= ~SP_MBIT(SP_FBITS - 1); + if (!ieee754sp_isnan(r)) + SPMANT(r) |= SP_MBIT(SP_FBITS - 2); + } return r; } @@ -138,7 +141,8 @@ union ieee754sp ieee754sp_format(int sn, int xe, unsigned xm) } else { /* sticky right shift es bits */ - SPXSRSXn(es); + xm = XSPSRS(xm, es); + xe += es; assert((xm & (SP_HIDDEN_BIT << 3)) == 0); assert(xe == SP_EMIN); } diff --git a/arch/mips/math-emu/ieee754sp.h b/arch/mips/math-emu/ieee754sp.h index 374a3f0..8476067 100644 --- a/arch/mips/math-emu/ieee754sp.h +++ b/arch/mips/math-emu/ieee754sp.h @@ -46,25 +46,24 @@ static inline int ieee754sp_finite(union ieee754sp x) } /* 3bit extended single precision sticky right shift */ -#define SPXSRSXn(rs) \ - (xe += rs, \ - xm = (rs > (SP_FBITS+3))?1:((xm) >> (rs)) | ((xm) << (32-(rs)) != 0)) +#define XSPSRS(v, rs) \ + ((rs > (SP_FBITS+3))?1:((v) >> (rs)) | ((v) << (32-(rs)) != 0)) -#define SPXSRSX1() \ - (xe++, (xm = (xm >> 1) | (xm & 1))) +#define XSPSRS1(m) \ + ((m >> 1) | (m & 1)) -#define SPXSRSYn(rs) \ - (ye+=rs, \ - ym = (rs > (SP_FBITS+3))?1:((ym) >> (rs)) | ((ym) << (32-(rs)) != 0)) +#define SPXSRSX1() \ + (xe++, (xm = XSPSRS1(xm))) #define SPXSRSY1() \ - (ye++, (ym = (ym >> 1) | (ym & 1))) + (ye++, (ym = XSPSRS1(ym))) /* convert denormal to normalized with extended exponent */ #define SPDNORMx(m,e) \ while ((m >> SP_FBITS) == 0) { m <<= 1; e--; } #define SPDNORMX SPDNORMx(xm, xe) #define SPDNORMY SPDNORMx(ym, ye) +#define SPDNORMZ SPDNORMx(zm, ze) static inline union ieee754sp buildsp(int s, int bx, unsigned m) { diff --git a/arch/mips/math-emu/sp_add.c b/arch/mips/math-emu/sp_add.c index f1c87b0..c55c0c0 100644 --- a/arch/mips/math-emu/sp_add.c +++ b/arch/mips/math-emu/sp_add.c @@ -132,13 +132,15 @@ union ieee754sp ieee754sp_add(union ieee754sp x, union ieee754sp y) * Have to shift y fraction right to align. */ s = xe - ye; - SPXSRSYn(s); + ym = XSPSRS(ym, s); + ye += s; } else if (ye > xe) { /* * Have to shift x fraction right to align. */ s = ye - xe; - SPXSRSXn(s); + xm = XSPSRS(xm, s); + xe += s; } assert(xe == ye); assert(xe <= SP_EMAX); diff --git a/arch/mips/math-emu/sp_maddf.c b/arch/mips/math-emu/sp_maddf.c index dd1dd83..a8cd8b4 100644 --- a/arch/mips/math-emu/sp_maddf.c +++ b/arch/mips/math-emu/sp_maddf.c @@ -14,8 +14,12 @@ #include "ieee754sp.h" -union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x, - union ieee754sp y) +enum maddf_flags { + maddf_negate_product = 1 << 0, +}; + +static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x, + union ieee754sp y, enum maddf_flags flags) { int re; int rs; @@ -32,15 +36,15 @@ union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x, COMPXSP; COMPYSP; - u32 zm; int ze; int zs __maybe_unused; int zc; + COMPZSP; EXPLODEXSP; EXPLODEYSP; - EXPLODESP(z, zc, zs, ze, zm) + EXPLODEZSP; FLUSHXSP; FLUSHYSP; - FLUSHSP(z, zc, zs, ze, zm); + FLUSHZSP; ieee754_clearcx(); @@ -49,7 +53,7 @@ union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x, ieee754_setcx(IEEE754_INVALID_OPERATION); return ieee754sp_nanxcpt(z); case IEEE754_CLASS_DNORM: - SPDNORMx(zm, ze); + SPDNORMZ; /* QNAN is handled separately below */ } @@ -154,6 +158,8 @@ union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x, re = xe + ye; rs = xs ^ ys; + if (flags & maddf_negate_product) + rs ^= 1; /* shunt to top of word */ xm <<= 32 - (SP_FBITS + 1); @@ -208,16 +214,18 @@ union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x, if (ze > re) { /* - * Have to shift y fraction right to align. + * Have to shift r fraction right to align. */ s = ze - re; - SPXSRSYn(s); + rm = XSPSRS(rm, s); + re += s; } else if (re > ze) { /* - * Have to shift x fraction right to align. + * Have to shift z fraction right to align. */ s = re - ze; - SPXSRSYn(s); + zm = XSPSRS(zm, s); + ze += s; } assert(ze == re); assert(ze <= SP_EMAX); @@ -230,7 +238,8 @@ union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x, zm = zm + rm; if (zm >> (SP_FBITS + 1 + 3)) { /* carry out */ - SPXSRSX1(); + zm = XSPSRS1(zm); + ze++; } } else { if (zm >= rm) { @@ -253,3 +262,15 @@ union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x, } return ieee754sp_format(zs, ze, zm); } + +union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x, + union ieee754sp y) +{ + return _sp_maddf(z, x, y, 0); +} + +union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x, + union ieee754sp y) +{ + return _sp_maddf(z, x, y, maddf_negate_product); +} diff --git a/arch/mips/math-emu/sp_msubf.c b/arch/mips/math-emu/sp_msubf.c deleted file mode 100644 index 81c38b980..0000000 --- a/arch/mips/math-emu/sp_msubf.c +++ /dev/null @@ -1,258 +0,0 @@ -/* - * IEEE754 floating point arithmetic - * single precision: MSUB.f (Fused Multiply Subtract) - * MSUBF.fmt: FPR[fd] = FPR[fd] - (FPR[fs] x FPR[ft]) - * - * MIPS floating point support - * Copyright (C) 2015 Imagination Technologies, Ltd. - * Author: Markos Chandras <markos.chandras@imgtec.com> - * - * This program is free software; you can distribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; version 2 of the License. - */ - -#include "ieee754sp.h" - -union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x, - union ieee754sp y) -{ - int re; - int rs; - unsigned rm; - unsigned short lxm; - unsigned short hxm; - unsigned short lym; - unsigned short hym; - unsigned lrm; - unsigned hrm; - unsigned t; - unsigned at; - int s; - - COMPXSP; - COMPYSP; - u32 zm; int ze; int zs __maybe_unused; int zc; - - EXPLODEXSP; - EXPLODEYSP; - EXPLODESP(z, zc, zs, ze, zm) - - FLUSHXSP; - FLUSHYSP; - FLUSHSP(z, zc, zs, ze, zm); - - ieee754_clearcx(); - - switch (zc) { - case IEEE754_CLASS_SNAN: - ieee754_setcx(IEEE754_INVALID_OPERATION); - return ieee754sp_nanxcpt(z); - case IEEE754_CLASS_DNORM: - SPDNORMx(zm, ze); - /* QNAN is handled separately below */ - } - - switch (CLPAIR(xc, yc)) { - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN): - case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN): - case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN): - case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN): - case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN): - return ieee754sp_nanxcpt(y); - - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN): - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN): - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO): - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM): - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM): - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): - return ieee754sp_nanxcpt(x); - - case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): - case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): - case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): - case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): - return y; - - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF): - return x; - - /* - * Infinity handling - */ - case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): - case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): - if (zc == IEEE754_CLASS_QNAN) - return z; - ieee754_setcx(IEEE754_INVALID_OPERATION); - return ieee754sp_indef(); - - case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): - case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): - case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): - case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): - case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): - if (zc == IEEE754_CLASS_QNAN) - return z; - return ieee754sp_inf(xs ^ ys); - - case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): - case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): - case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): - case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): - case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): - if (zc == IEEE754_CLASS_INF) - return ieee754sp_inf(zs); - /* Multiplication is 0 so just return z */ - return z; - - case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): - SPDNORMX; - - case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): - if (zc == IEEE754_CLASS_QNAN) - return z; - else if (zc == IEEE754_CLASS_INF) - return ieee754sp_inf(zs); - SPDNORMY; - break; - - case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): - if (zc == IEEE754_CLASS_QNAN) - return z; - else if (zc == IEEE754_CLASS_INF) - return ieee754sp_inf(zs); - SPDNORMX; - break; - - case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM): - if (zc == IEEE754_CLASS_QNAN) - return z; - else if (zc == IEEE754_CLASS_INF) - return ieee754sp_inf(zs); - /* fall through to real compuation */ - } - - /* Finally get to do some computation */ - - /* - * Do the multiplication bit first - * - * rm = xm * ym, re = xe + ye basically - * - * At this point xm and ym should have been normalized. - */ - - /* rm = xm * ym, re = xe+ye basically */ - assert(xm & SP_HIDDEN_BIT); - assert(ym & SP_HIDDEN_BIT); - - re = xe + ye; - rs = xs ^ ys; - - /* shunt to top of word */ - xm <<= 32 - (SP_FBITS + 1); - ym <<= 32 - (SP_FBITS + 1); - - /* - * Multiply 32 bits xm, ym to give high 32 bits rm with stickness. - */ - lxm = xm & 0xffff; - hxm = xm >> 16; - lym = ym & 0xffff; - hym = ym >> 16; - - lrm = lxm * lym; /* 16 * 16 => 32 */ - hrm = hxm * hym; /* 16 * 16 => 32 */ - - t = lxm * hym; /* 16 * 16 => 32 */ - at = lrm + (t << 16); - hrm += at < lrm; - lrm = at; - hrm = hrm + (t >> 16); - - t = hxm * lym; /* 16 * 16 => 32 */ - at = lrm + (t << 16); - hrm += at < lrm; - lrm = at; - hrm = hrm + (t >> 16); - - rm = hrm | (lrm != 0); - - /* - * Sticky shift down to normal rounding precision. - */ - if ((int) rm < 0) { - rm = (rm >> (32 - (SP_FBITS + 1 + 3))) | - ((rm << (SP_FBITS + 1 + 3)) != 0); - re++; - } else { - rm = (rm >> (32 - (SP_FBITS + 1 + 3 + 1))) | - ((rm << (SP_FBITS + 1 + 3 + 1)) != 0); - } - assert(rm & (SP_HIDDEN_BIT << 3)); - - /* And now the subtraction */ - - /* Flip sign of r and handle as add */ - rs ^= 1; - - assert(zm & SP_HIDDEN_BIT); - - /* - * Provide guard,round and stick bit space. - */ - zm <<= 3; - - if (ze > re) { - /* - * Have to shift y fraction right to align. - */ - s = ze - re; - SPXSRSYn(s); - } else if (re > ze) { - /* - * Have to shift x fraction right to align. - */ - s = re - ze; - SPXSRSYn(s); - } - assert(ze == re); - assert(ze <= SP_EMAX); - - if (zs == rs) { - /* - * Generate 28 bit result of adding two 27 bit numbers - * leaving result in zm, zs and ze. - */ - zm = zm + rm; - - if (zm >> (SP_FBITS + 1 + 3)) { /* carry out */ - SPXSRSX1(); /* shift preserving sticky */ - } - } else { - if (zm >= rm) { - zm = zm - rm; - } else { - zm = rm - zm; - zs = rs; - } - if (zm == 0) - return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD); - - /* - * Normalize in extended single precision - */ - while ((zm >> (SP_MBITS + 3)) == 0) { - zm <<= 1; - ze--; - } - - } - return ieee754sp_format(zs, ze, zm); -} diff --git a/arch/mips/math-emu/sp_sub.c b/arch/mips/math-emu/sp_sub.c index ec5f937..dc998ed 100644 --- a/arch/mips/math-emu/sp_sub.c +++ b/arch/mips/math-emu/sp_sub.c @@ -134,13 +134,15 @@ union ieee754sp ieee754sp_sub(union ieee754sp x, union ieee754sp y) * have to shift y fraction right to align */ s = xe - ye; - SPXSRSYn(s); + ym = XSPSRS(ym, s); + ye += s; } else if (ye > xe) { /* * have to shift x fraction right to align */ s = ye - xe; - SPXSRSXn(s); + xm = XSPSRS(xm, s); + xe += s; } assert(xe == ye); assert(xe <= SP_EMAX); |