diff options
Diffstat (limited to 'arch/mips/math-emu')
-rw-r--r-- | arch/mips/math-emu/Makefile | 6 | ||||
-rw-r--r-- | arch/mips/math-emu/cp1emu.c | 284 | ||||
-rw-r--r-- | arch/mips/math-emu/dp_fmax.c | 84 | ||||
-rw-r--r-- | arch/mips/math-emu/dp_fmin.c | 86 | ||||
-rw-r--r-- | arch/mips/math-emu/dp_maddf.c | 246 | ||||
-rw-r--r-- | arch/mips/math-emu/dp_rint.c | 89 | ||||
-rw-r--r-- | arch/mips/math-emu/ieee754.h | 2 | ||||
-rw-r--r-- | arch/mips/math-emu/ieee754int.h | 4 | ||||
-rw-r--r-- | arch/mips/math-emu/ieee754sp.h | 4 | ||||
-rw-r--r-- | arch/mips/math-emu/me-debugfs.c | 318 | ||||
-rw-r--r-- | arch/mips/math-emu/sp_fmax.c | 84 | ||||
-rw-r--r-- | arch/mips/math-emu/sp_fmin.c | 86 | ||||
-rw-r--r-- | arch/mips/math-emu/sp_maddf.c | 229 | ||||
-rw-r--r-- | arch/mips/math-emu/sp_rint.c | 90 |
14 files changed, 1294 insertions, 318 deletions
diff --git a/arch/mips/math-emu/Makefile b/arch/mips/math-emu/Makefile index e9bbc2a..e9f10b8 100644 --- a/arch/mips/math-emu/Makefile +++ b/arch/mips/math-emu/Makefile @@ -4,9 +4,11 @@ obj-y += cp1emu.o ieee754dp.o ieee754sp.o ieee754.o \ dp_div.o dp_mul.o dp_sub.o dp_add.o dp_fsp.o dp_cmp.o dp_simple.o \ - dp_tint.o dp_fint.o dp_maddf.o dp_2008class.o dp_fmin.o dp_fmax.o \ + dp_tint.o dp_fint.o dp_rint.o dp_maddf.o dp_2008class.o dp_fmin.o \ + dp_fmax.o \ sp_div.o sp_mul.o sp_sub.o sp_add.o sp_fdp.o sp_cmp.o sp_simple.o \ - sp_tint.o sp_fint.o sp_maddf.o sp_2008class.o sp_fmin.o sp_fmax.o \ + sp_tint.o sp_fint.o sp_rint.o sp_maddf.o sp_2008class.o sp_fmin.o \ + sp_fmax.o \ dsemul.o lib-y += ieee754d.o \ diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c index f08a7b4..192542d 100644 --- a/arch/mips/math-emu/cp1emu.c +++ b/arch/mips/math-emu/cp1emu.c @@ -58,7 +58,7 @@ static int fpu_emu(struct pt_regs *, struct mips_fpu_struct *, mips_instruction); static int fpux_emu(struct pt_regs *, - struct mips_fpu_struct *, mips_instruction, void *__user *); + struct mips_fpu_struct *, mips_instruction, void __user **); /* Control registers */ @@ -830,12 +830,12 @@ do { \ } while (0) #define DIFROMREG(di, x) \ - ((di) = get_fpr64(&ctx->fpr[(x) & ~(cop1_64bit(xcp) == 0)], 0)) + ((di) = get_fpr64(&ctx->fpr[(x) & ~(cop1_64bit(xcp) ^ 1)], 0)) #define DITOREG(di, x) \ do { \ unsigned fpr, i; \ - fpr = (x) & ~(cop1_64bit(xcp) == 0); \ + fpr = (x) & ~(cop1_64bit(xcp) ^ 1); \ set_fpr64(&ctx->fpr[fpr], 0, di); \ for (i = 1; i < ARRAY_SIZE(ctx->fpr[x].val64); i++) \ set_fpr64(&ctx->fpr[fpr], i, 0); \ @@ -973,7 +973,7 @@ static inline void cop1_ctc(struct pt_regs *xcp, struct mips_fpu_struct *ctx, */ static int cop1Emulate(struct pt_regs *xcp, struct mips_fpu_struct *ctx, - struct mm_decoded_insn dec_insn, void *__user *fault_addr) + struct mm_decoded_insn dec_insn, void __user **fault_addr) { unsigned long contpc = xcp->cp0_epc + dec_insn.pc_inc; unsigned int cond, cbit, bit0; @@ -1195,9 +1195,11 @@ emul: bit0 = get_fpr32(fpr, 0) & 0x1; switch (MIPSInst_RS(ir)) { case bc1eqz_op: + MIPS_FPU_EMU_INC_STATS(bc1eqz); cond = bit0 == 0; break; case bc1nez_op: + MIPS_FPU_EMU_INC_STATS(bc1nez); cond = bit0 != 0; break; } @@ -1230,6 +1232,7 @@ emul: break; } branch_common: + MIPS_FPU_EMU_INC_STATS(branches); set_delay_slot(xcp); if (cond) { /* @@ -1460,7 +1463,7 @@ DEF3OP(nmadd, dp, ieee754dp_mul, ieee754dp_add, ieee754dp_neg); DEF3OP(nmsub, dp, ieee754dp_mul, ieee754dp_sub, ieee754dp_neg); static int fpux_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, - mips_instruction ir, void *__user *fault_addr) + mips_instruction ir, void __user **fault_addr) { unsigned rcsr = 0; /* resulting csr */ @@ -1682,15 +1685,19 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, switch (MIPSInst_FUNC(ir)) { /* binary ops */ case fadd_op: + MIPS_FPU_EMU_INC_STATS(add_s); handler.b = ieee754sp_add; goto scopbop; case fsub_op: + MIPS_FPU_EMU_INC_STATS(sub_s); handler.b = ieee754sp_sub; goto scopbop; case fmul_op: + MIPS_FPU_EMU_INC_STATS(mul_s); handler.b = ieee754sp_mul; goto scopbop; case fdiv_op: + MIPS_FPU_EMU_INC_STATS(div_s); handler.b = ieee754sp_div; goto scopbop; @@ -1699,6 +1706,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, if (!cpu_has_mips_2_3_4_5_r) return SIGILL; + MIPS_FPU_EMU_INC_STATS(sqrt_s); handler.u = ieee754sp_sqrt; goto scopuop; @@ -1711,6 +1719,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, if (!cpu_has_mips_4_5_64_r2_r6) return SIGILL; + MIPS_FPU_EMU_INC_STATS(rsqrt_s); handler.u = fpemu_sp_rsqrt; goto scopuop; @@ -1718,6 +1727,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, if (!cpu_has_mips_4_5_64_r2_r6) return SIGILL; + MIPS_FPU_EMU_INC_STATS(recip_s); handler.u = fpemu_sp_recip; goto scopuop; @@ -1754,6 +1764,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, if (!cpu_has_mips_r6) return SIGILL; + MIPS_FPU_EMU_INC_STATS(seleqz_s); SPFROMREG(rv.s, MIPSInst_FT(ir)); if (rv.w & 0x1) rv.w = 0; @@ -1765,6 +1776,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, if (!cpu_has_mips_r6) return SIGILL; + MIPS_FPU_EMU_INC_STATS(selnez_s); SPFROMREG(rv.s, MIPSInst_FT(ir)); if (rv.w & 0x1) SPFROMREG(rv.s, MIPSInst_FS(ir)); @@ -1778,6 +1790,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, if (!cpu_has_mips_r6) return SIGILL; + MIPS_FPU_EMU_INC_STATS(maddf_s); SPFROMREG(ft, MIPSInst_FT(ir)); SPFROMREG(fs, MIPSInst_FS(ir)); SPFROMREG(fd, MIPSInst_FD(ir)); @@ -1791,6 +1804,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, if (!cpu_has_mips_r6) return SIGILL; + MIPS_FPU_EMU_INC_STATS(msubf_s); SPFROMREG(ft, MIPSInst_FT(ir)); SPFROMREG(fs, MIPSInst_FS(ir)); SPFROMREG(fd, MIPSInst_FD(ir)); @@ -1804,9 +1818,9 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, if (!cpu_has_mips_r6) return SIGILL; + MIPS_FPU_EMU_INC_STATS(rint_s); SPFROMREG(fs, MIPSInst_FS(ir)); - rv.l = ieee754sp_tlong(fs); - rv.s = ieee754sp_flong(rv.l); + rv.s = ieee754sp_rint(fs); goto copcsr; } @@ -1816,6 +1830,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, if (!cpu_has_mips_r6) return SIGILL; + MIPS_FPU_EMU_INC_STATS(class_s); SPFROMREG(fs, MIPSInst_FS(ir)); rv.w = ieee754sp_2008class(fs); rfmt = w_fmt; @@ -1828,6 +1843,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, if (!cpu_has_mips_r6) return SIGILL; + MIPS_FPU_EMU_INC_STATS(min_s); SPFROMREG(ft, MIPSInst_FT(ir)); SPFROMREG(fs, MIPSInst_FS(ir)); rv.s = ieee754sp_fmin(fs, ft); @@ -1840,6 +1856,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, if (!cpu_has_mips_r6) return SIGILL; + MIPS_FPU_EMU_INC_STATS(mina_s); SPFROMREG(ft, MIPSInst_FT(ir)); SPFROMREG(fs, MIPSInst_FS(ir)); rv.s = ieee754sp_fmina(fs, ft); @@ -1852,6 +1869,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, if (!cpu_has_mips_r6) return SIGILL; + MIPS_FPU_EMU_INC_STATS(max_s); SPFROMREG(ft, MIPSInst_FT(ir)); SPFROMREG(fs, MIPSInst_FS(ir)); rv.s = ieee754sp_fmax(fs, ft); @@ -1864,6 +1882,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, if (!cpu_has_mips_r6) return SIGILL; + MIPS_FPU_EMU_INC_STATS(maxa_s); SPFROMREG(ft, MIPSInst_FT(ir)); SPFROMREG(fs, MIPSInst_FS(ir)); rv.s = ieee754sp_fmaxa(fs, ft); @@ -1871,15 +1890,18 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, } case fabs_op: + MIPS_FPU_EMU_INC_STATS(abs_s); handler.u = ieee754sp_abs; goto scopuop; case fneg_op: + MIPS_FPU_EMU_INC_STATS(neg_s); handler.u = ieee754sp_neg; goto scopuop; case fmov_op: /* an easy one */ + MIPS_FPU_EMU_INC_STATS(mov_s); SPFROMREG(rv.s, MIPSInst_FS(ir)); goto copcsr; @@ -1922,12 +1944,14 @@ copcsr: return SIGILL; /* not defined */ case fcvtd_op: + MIPS_FPU_EMU_INC_STATS(cvt_d_s); SPFROMREG(fs, MIPSInst_FS(ir)); rv.d = ieee754dp_fsp(fs); rfmt = d_fmt; goto copcsr; case fcvtw_op: + MIPS_FPU_EMU_INC_STATS(cvt_w_s); SPFROMREG(fs, MIPSInst_FS(ir)); rv.w = ieee754sp_tint(fs); rfmt = w_fmt; @@ -1940,6 +1964,15 @@ copcsr: if (!cpu_has_mips_2_3_4_5_r) return SIGILL; + if (MIPSInst_FUNC(ir) == fceil_op) + MIPS_FPU_EMU_INC_STATS(ceil_w_s); + if (MIPSInst_FUNC(ir) == ffloor_op) + MIPS_FPU_EMU_INC_STATS(floor_w_s); + if (MIPSInst_FUNC(ir) == fround_op) + MIPS_FPU_EMU_INC_STATS(round_w_s); + if (MIPSInst_FUNC(ir) == ftrunc_op) + MIPS_FPU_EMU_INC_STATS(trunc_w_s); + oldrm = ieee754_csr.rm; SPFROMREG(fs, MIPSInst_FS(ir)); ieee754_csr.rm = MIPSInst_FUNC(ir); @@ -1952,6 +1985,7 @@ copcsr: if (!cpu_has_mips_r6) return SIGILL; + MIPS_FPU_EMU_INC_STATS(sel_s); SPFROMREG(fd, MIPSInst_FD(ir)); if (fd.bits & 0x1) SPFROMREG(rv.s, MIPSInst_FT(ir)); @@ -1963,6 +1997,7 @@ copcsr: if (!cpu_has_mips_3_4_5_64_r2_r6) return SIGILL; + MIPS_FPU_EMU_INC_STATS(cvt_l_s); SPFROMREG(fs, MIPSInst_FS(ir)); rv.l = ieee754sp_tlong(fs); rfmt = l_fmt; @@ -1975,6 +2010,15 @@ copcsr: if (!cpu_has_mips_3_4_5_64_r2_r6) return SIGILL; + if (MIPSInst_FUNC(ir) == fceill_op) + MIPS_FPU_EMU_INC_STATS(ceil_l_s); + if (MIPSInst_FUNC(ir) == ffloorl_op) + MIPS_FPU_EMU_INC_STATS(floor_l_s); + if (MIPSInst_FUNC(ir) == froundl_op) + MIPS_FPU_EMU_INC_STATS(round_l_s); + if (MIPSInst_FUNC(ir) == ftruncl_op) + MIPS_FPU_EMU_INC_STATS(trunc_l_s); + oldrm = ieee754_csr.rm; SPFROMREG(fs, MIPSInst_FS(ir)); ieee754_csr.rm = MIPSInst_FUNC(ir); @@ -2016,15 +2060,19 @@ copcsr: switch (MIPSInst_FUNC(ir)) { /* binary ops */ case fadd_op: + MIPS_FPU_EMU_INC_STATS(add_d); handler.b = ieee754dp_add; goto dcopbop; case fsub_op: + MIPS_FPU_EMU_INC_STATS(sub_d); handler.b = ieee754dp_sub; goto dcopbop; case fmul_op: + MIPS_FPU_EMU_INC_STATS(mul_d); handler.b = ieee754dp_mul; goto dcopbop; case fdiv_op: + MIPS_FPU_EMU_INC_STATS(div_d); handler.b = ieee754dp_div; goto dcopbop; @@ -2033,6 +2081,7 @@ copcsr: if (!cpu_has_mips_2_3_4_5_r) return SIGILL; + MIPS_FPU_EMU_INC_STATS(sqrt_d); handler.u = ieee754dp_sqrt; goto dcopuop; /* @@ -2044,12 +2093,14 @@ copcsr: if (!cpu_has_mips_4_5_64_r2_r6) return SIGILL; + MIPS_FPU_EMU_INC_STATS(rsqrt_d); handler.u = fpemu_dp_rsqrt; goto dcopuop; case frecip_op: if (!cpu_has_mips_4_5_64_r2_r6) return SIGILL; + MIPS_FPU_EMU_INC_STATS(recip_d); handler.u = fpemu_dp_recip; goto dcopuop; case fmovc_op: @@ -2083,6 +2134,7 @@ copcsr: if (!cpu_has_mips_r6) return SIGILL; + MIPS_FPU_EMU_INC_STATS(seleqz_d); DPFROMREG(rv.d, MIPSInst_FT(ir)); if (rv.l & 0x1) rv.l = 0; @@ -2094,6 +2146,7 @@ copcsr: if (!cpu_has_mips_r6) return SIGILL; + MIPS_FPU_EMU_INC_STATS(selnez_d); DPFROMREG(rv.d, MIPSInst_FT(ir)); if (rv.l & 0x1) DPFROMREG(rv.d, MIPSInst_FS(ir)); @@ -2107,6 +2160,7 @@ copcsr: if (!cpu_has_mips_r6) return SIGILL; + MIPS_FPU_EMU_INC_STATS(maddf_d); DPFROMREG(ft, MIPSInst_FT(ir)); DPFROMREG(fs, MIPSInst_FS(ir)); DPFROMREG(fd, MIPSInst_FD(ir)); @@ -2120,6 +2174,7 @@ copcsr: if (!cpu_has_mips_r6) return SIGILL; + MIPS_FPU_EMU_INC_STATS(msubf_d); DPFROMREG(ft, MIPSInst_FT(ir)); DPFROMREG(fs, MIPSInst_FS(ir)); DPFROMREG(fd, MIPSInst_FD(ir)); @@ -2133,9 +2188,9 @@ copcsr: if (!cpu_has_mips_r6) return SIGILL; + MIPS_FPU_EMU_INC_STATS(rint_d); DPFROMREG(fs, MIPSInst_FS(ir)); - rv.l = ieee754dp_tlong(fs); - rv.d = ieee754dp_flong(rv.l); + rv.d = ieee754dp_rint(fs); goto copcsr; } @@ -2145,9 +2200,10 @@ copcsr: if (!cpu_has_mips_r6) return SIGILL; + MIPS_FPU_EMU_INC_STATS(class_d); DPFROMREG(fs, MIPSInst_FS(ir)); - rv.w = ieee754dp_2008class(fs); - rfmt = w_fmt; + rv.l = ieee754dp_2008class(fs); + rfmt = l_fmt; break; } @@ -2157,6 +2213,7 @@ copcsr: if (!cpu_has_mips_r6) return SIGILL; + MIPS_FPU_EMU_INC_STATS(min_d); DPFROMREG(ft, MIPSInst_FT(ir)); DPFROMREG(fs, MIPSInst_FS(ir)); rv.d = ieee754dp_fmin(fs, ft); @@ -2169,6 +2226,7 @@ copcsr: if (!cpu_has_mips_r6) return SIGILL; + MIPS_FPU_EMU_INC_STATS(mina_d); DPFROMREG(ft, MIPSInst_FT(ir)); DPFROMREG(fs, MIPSInst_FS(ir)); rv.d = ieee754dp_fmina(fs, ft); @@ -2181,6 +2239,7 @@ copcsr: if (!cpu_has_mips_r6) return SIGILL; + MIPS_FPU_EMU_INC_STATS(max_d); DPFROMREG(ft, MIPSInst_FT(ir)); DPFROMREG(fs, MIPSInst_FS(ir)); rv.d = ieee754dp_fmax(fs, ft); @@ -2193,6 +2252,7 @@ copcsr: if (!cpu_has_mips_r6) return SIGILL; + MIPS_FPU_EMU_INC_STATS(maxa_d); DPFROMREG(ft, MIPSInst_FT(ir)); DPFROMREG(fs, MIPSInst_FS(ir)); rv.d = ieee754dp_fmaxa(fs, ft); @@ -2200,15 +2260,18 @@ copcsr: } case fabs_op: + MIPS_FPU_EMU_INC_STATS(abs_d); handler.u = ieee754dp_abs; goto dcopuop; case fneg_op: + MIPS_FPU_EMU_INC_STATS(neg_d); handler.u = ieee754dp_neg; goto dcopuop; case fmov_op: /* an easy one */ + MIPS_FPU_EMU_INC_STATS(mov_d); DPFROMREG(rv.d, MIPSInst_FS(ir)); goto copcsr; @@ -2228,6 +2291,7 @@ dcopuop: * unary conv ops */ case fcvts_op: + MIPS_FPU_EMU_INC_STATS(cvt_s_d); DPFROMREG(fs, MIPSInst_FS(ir)); rv.s = ieee754sp_fdp(fs); rfmt = s_fmt; @@ -2237,6 +2301,7 @@ dcopuop: return SIGILL; /* not defined */ case fcvtw_op: + MIPS_FPU_EMU_INC_STATS(cvt_w_d); DPFROMREG(fs, MIPSInst_FS(ir)); rv.w = ieee754dp_tint(fs); /* wrong */ rfmt = w_fmt; @@ -2249,6 +2314,15 @@ dcopuop: if (!cpu_has_mips_2_3_4_5_r) return SIGILL; + if (MIPSInst_FUNC(ir) == fceil_op) + MIPS_FPU_EMU_INC_STATS(ceil_w_d); + if (MIPSInst_FUNC(ir) == ffloor_op) + MIPS_FPU_EMU_INC_STATS(floor_w_d); + if (MIPSInst_FUNC(ir) == fround_op) + MIPS_FPU_EMU_INC_STATS(round_w_d); + if (MIPSInst_FUNC(ir) == ftrunc_op) + MIPS_FPU_EMU_INC_STATS(trunc_w_d); + oldrm = ieee754_csr.rm; DPFROMREG(fs, MIPSInst_FS(ir)); ieee754_csr.rm = MIPSInst_FUNC(ir); @@ -2261,6 +2335,7 @@ dcopuop: if (!cpu_has_mips_r6) return SIGILL; + MIPS_FPU_EMU_INC_STATS(sel_d); DPFROMREG(fd, MIPSInst_FD(ir)); if (fd.bits & 0x1) DPFROMREG(rv.d, MIPSInst_FT(ir)); @@ -2272,6 +2347,7 @@ dcopuop: if (!cpu_has_mips_3_4_5_64_r2_r6) return SIGILL; + MIPS_FPU_EMU_INC_STATS(cvt_l_d); DPFROMREG(fs, MIPSInst_FS(ir)); rv.l = ieee754dp_tlong(fs); rfmt = l_fmt; @@ -2284,6 +2360,15 @@ dcopuop: if (!cpu_has_mips_3_4_5_64_r2_r6) return SIGILL; + if (MIPSInst_FUNC(ir) == fceill_op) + MIPS_FPU_EMU_INC_STATS(ceil_l_d); + if (MIPSInst_FUNC(ir) == ffloorl_op) + MIPS_FPU_EMU_INC_STATS(floor_l_d); + if (MIPSInst_FUNC(ir) == froundl_op) + MIPS_FPU_EMU_INC_STATS(round_l_d); + if (MIPSInst_FUNC(ir) == ftruncl_op) + MIPS_FPU_EMU_INC_STATS(trunc_l_d); + oldrm = ieee754_csr.rm; DPFROMREG(fs, MIPSInst_FS(ir)); ieee754_csr.rm = MIPSInst_FUNC(ir); @@ -2325,12 +2410,14 @@ dcopuop: switch (MIPSInst_FUNC(ir)) { case fcvts_op: /* convert word to single precision real */ + MIPS_FPU_EMU_INC_STATS(cvt_s_w); SPFROMREG(fs, MIPSInst_FS(ir)); rv.s = ieee754sp_fint(fs.bits); rfmt = s_fmt; goto copcsr; case fcvtd_op: /* convert word to double precision real */ + MIPS_FPU_EMU_INC_STATS(cvt_d_w); SPFROMREG(fs, MIPSInst_FS(ir)); rv.d = ieee754dp_fint(fs.bits); rfmt = d_fmt; @@ -2350,6 +2437,90 @@ dcopuop: (MIPSInst_FUNC(ir) & 0x20)) return SIGILL; + if (!sig) { + if (!(MIPSInst_FUNC(ir) & PREDICATE_BIT)) { + switch (cmpop) { + case 0: + MIPS_FPU_EMU_INC_STATS(cmp_af_s); + break; + case 1: + MIPS_FPU_EMU_INC_STATS(cmp_un_s); + break; + case 2: + MIPS_FPU_EMU_INC_STATS(cmp_eq_s); + break; + case 3: + MIPS_FPU_EMU_INC_STATS(cmp_ueq_s); + break; + case 4: + MIPS_FPU_EMU_INC_STATS(cmp_lt_s); + break; + case 5: + MIPS_FPU_EMU_INC_STATS(cmp_ult_s); + break; + case 6: + MIPS_FPU_EMU_INC_STATS(cmp_le_s); + break; + case 7: + MIPS_FPU_EMU_INC_STATS(cmp_ule_s); + break; + } + } else { + switch (cmpop) { + case 1: + MIPS_FPU_EMU_INC_STATS(cmp_or_s); + break; + case 2: + MIPS_FPU_EMU_INC_STATS(cmp_une_s); + break; + case 3: + MIPS_FPU_EMU_INC_STATS(cmp_ne_s); + break; + } + } + } else { + if (!(MIPSInst_FUNC(ir) & PREDICATE_BIT)) { + switch (cmpop) { + case 0: + MIPS_FPU_EMU_INC_STATS(cmp_saf_s); + break; + case 1: + MIPS_FPU_EMU_INC_STATS(cmp_sun_s); + break; + case 2: + MIPS_FPU_EMU_INC_STATS(cmp_seq_s); + break; + case 3: + MIPS_FPU_EMU_INC_STATS(cmp_sueq_s); + break; + case 4: + MIPS_FPU_EMU_INC_STATS(cmp_slt_s); + break; + case 5: + MIPS_FPU_EMU_INC_STATS(cmp_sult_s); + break; + case 6: + MIPS_FPU_EMU_INC_STATS(cmp_sle_s); + break; + case 7: + MIPS_FPU_EMU_INC_STATS(cmp_sule_s); + break; + } + } else { + switch (cmpop) { + case 1: + MIPS_FPU_EMU_INC_STATS(cmp_sor_s); + break; + case 2: + MIPS_FPU_EMU_INC_STATS(cmp_sune_s); + break; + case 3: + MIPS_FPU_EMU_INC_STATS(cmp_sne_s); + break; + } + } + } + /* fmt is w_fmt for single precision so fix it */ rfmt = s_fmt; /* default to false */ @@ -2394,6 +2565,7 @@ dcopuop: break; } } + break; } case l_fmt: @@ -2406,11 +2578,13 @@ dcopuop: switch (MIPSInst_FUNC(ir)) { case fcvts_op: /* convert long to single precision real */ + MIPS_FPU_EMU_INC_STATS(cvt_s_l); rv.s = ieee754sp_flong(bits); rfmt = s_fmt; goto copcsr; case fcvtd_op: /* convert long to double precision real */ + MIPS_FPU_EMU_INC_STATS(cvt_d_l); rv.d = ieee754dp_flong(bits); rfmt = d_fmt; goto copcsr; @@ -2424,6 +2598,90 @@ dcopuop: (MIPSInst_FUNC(ir) & 0x20)) return SIGILL; + if (!sig) { + if (!(MIPSInst_FUNC(ir) & PREDICATE_BIT)) { + switch (cmpop) { + case 0: + MIPS_FPU_EMU_INC_STATS(cmp_af_d); + break; + case 1: + MIPS_FPU_EMU_INC_STATS(cmp_un_d); + break; + case 2: + MIPS_FPU_EMU_INC_STATS(cmp_eq_d); + break; + case 3: + MIPS_FPU_EMU_INC_STATS(cmp_ueq_d); + break; + case 4: + MIPS_FPU_EMU_INC_STATS(cmp_lt_d); + break; + case 5: + MIPS_FPU_EMU_INC_STATS(cmp_ult_d); + break; + case 6: + MIPS_FPU_EMU_INC_STATS(cmp_le_d); + break; + case 7: + MIPS_FPU_EMU_INC_STATS(cmp_ule_d); + break; + } + } else { + switch (cmpop) { + case 1: + MIPS_FPU_EMU_INC_STATS(cmp_or_d); + break; + case 2: + MIPS_FPU_EMU_INC_STATS(cmp_une_d); + break; + case 3: + MIPS_FPU_EMU_INC_STATS(cmp_ne_d); + break; + } + } + } else { + if (!(MIPSInst_FUNC(ir) & PREDICATE_BIT)) { + switch (cmpop) { + case 0: + MIPS_FPU_EMU_INC_STATS(cmp_saf_d); + break; + case 1: + MIPS_FPU_EMU_INC_STATS(cmp_sun_d); + break; + case 2: + MIPS_FPU_EMU_INC_STATS(cmp_seq_d); + break; + case 3: + MIPS_FPU_EMU_INC_STATS(cmp_sueq_d); + break; + case 4: + MIPS_FPU_EMU_INC_STATS(cmp_slt_d); + break; + case 5: + MIPS_FPU_EMU_INC_STATS(cmp_sult_d); + break; + case 6: + MIPS_FPU_EMU_INC_STATS(cmp_sle_d); + break; + case 7: + MIPS_FPU_EMU_INC_STATS(cmp_sule_d); + break; + } + } else { + switch (cmpop) { + case 1: + MIPS_FPU_EMU_INC_STATS(cmp_sor_d); + break; + case 2: + MIPS_FPU_EMU_INC_STATS(cmp_sune_d); + break; + case 3: + MIPS_FPU_EMU_INC_STATS(cmp_sne_d); + break; + } + } + } + /* fmt is l_fmt for double precision so fix it */ rfmt = d_fmt; /* default to false */ @@ -2468,6 +2726,8 @@ dcopuop: break; } } + break; + default: return SIGILL; } @@ -2553,7 +2813,7 @@ dcopuop: * For simplicity we always terminate upon an ISA mode switch. */ int fpu_emulator_cop1Handler(struct pt_regs *xcp, struct mips_fpu_struct *ctx, - int has_fpu, void *__user *fault_addr) + int has_fpu, void __user **fault_addr) { unsigned long oldepc, prevepc; struct mm_decoded_insn dec_insn; diff --git a/arch/mips/math-emu/dp_fmax.c b/arch/mips/math-emu/dp_fmax.c index fd71b8d..5bec64f 100644 --- a/arch/mips/math-emu/dp_fmax.c +++ b/arch/mips/math-emu/dp_fmax.c @@ -47,14 +47,26 @@ union ieee754dp ieee754dp_fmax(union ieee754dp x, union ieee754dp y) case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): return ieee754dp_nanxcpt(x); - /* numbers are preferred to NaNs */ + /* + * Quiet NaN handling + */ + + /* + * The case of both inputs quiet NaNs + */ + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): + return x; + + /* + * The cases of exactly one input quiet NaN (numbers + * are here preferred as returned values to NaNs) + */ case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): return x; - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): @@ -80,9 +92,7 @@ union ieee754dp ieee754dp_fmax(union ieee754dp x, union ieee754dp y) return ys ? x : y; case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): - if (xs == ys) - return x; - return ieee754dp_zero(1); + return ieee754dp_zero(xs & ys); case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): DPDNORMX; @@ -106,16 +116,32 @@ union ieee754dp ieee754dp_fmax(union ieee754dp x, union ieee754dp y) else if (xs < ys) return x; - /* Compare exponent */ - if (xe > ye) - return x; - else if (xe < ye) - return y; + /* Signs of inputs are equal, let's compare exponents */ + if (xs == 0) { + /* Inputs are both positive */ + if (xe > ye) + return x; + else if (xe < ye) + return y; + } else { + /* Inputs are both negative */ + if (xe > ye) + return y; + else if (xe < ye) + return x; + } - /* Compare mantissa */ + /* Signs and exponents of inputs are equal, let's compare mantissas */ + if (xs == 0) { + /* Inputs are both positive, with equal signs and exponents */ + if (xm <= ym) + return y; + return x; + } + /* Inputs are both negative, with equal signs and exponents */ if (xm <= ym) - return y; - return x; + return x; + return y; } union ieee754dp ieee754dp_fmaxa(union ieee754dp x, union ieee754dp y) @@ -147,14 +173,26 @@ union ieee754dp ieee754dp_fmaxa(union ieee754dp x, union ieee754dp y) case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): return ieee754dp_nanxcpt(x); - /* numbers are preferred to NaNs */ + /* + * Quiet NaN handling + */ + + /* + * The case of both inputs quiet NaNs + */ + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): + return x; + + /* + * The cases of exactly one input quiet NaN (numbers + * are here preferred as returned values to NaNs) + */ case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): return x; - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): @@ -164,6 +202,9 @@ union ieee754dp ieee754dp_fmaxa(union ieee754dp x, union ieee754dp y) /* * Infinity and zero handling */ + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): + return ieee754dp_inf(xs & ys); + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): @@ -171,7 +212,6 @@ union ieee754dp ieee754dp_fmaxa(union ieee754dp x, union ieee754dp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): return x; - case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): @@ -180,9 +220,7 @@ union ieee754dp ieee754dp_fmaxa(union ieee754dp x, union ieee754dp y) return y; case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): - if (xs == ys) - return x; - return ieee754dp_zero(1); + return ieee754dp_zero(xs & ys); case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): DPDNORMX; @@ -207,7 +245,11 @@ union ieee754dp ieee754dp_fmaxa(union ieee754dp x, union ieee754dp y) return y; /* Compare mantissa */ - if (xm <= ym) + if (xm < ym) return y; - return x; + else if (xm > ym) + return x; + else if (xs == 0) + return x; + return y; } diff --git a/arch/mips/math-emu/dp_fmin.c b/arch/mips/math-emu/dp_fmin.c index c1072b0..a287b23 100644 --- a/arch/mips/math-emu/dp_fmin.c +++ b/arch/mips/math-emu/dp_fmin.c @@ -47,14 +47,26 @@ union ieee754dp ieee754dp_fmin(union ieee754dp x, union ieee754dp y) case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): return ieee754dp_nanxcpt(x); - /* numbers are preferred to NaNs */ + /* + * Quiet NaN handling + */ + + /* + * The case of both inputs quiet NaNs + */ + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): + return x; + + /* + * The cases of exactly one input quiet NaN (numbers + * are here preferred as returned values to NaNs) + */ case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): return x; - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): @@ -80,9 +92,7 @@ union ieee754dp ieee754dp_fmin(union ieee754dp x, union ieee754dp y) return ys ? y : x; case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): - if (xs == ys) - return x; - return ieee754dp_zero(1); + return ieee754dp_zero(xs | ys); case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): DPDNORMX; @@ -106,16 +116,32 @@ union ieee754dp ieee754dp_fmin(union ieee754dp x, union ieee754dp y) else if (xs < ys) return y; - /* Compare exponent */ - if (xe > ye) - return y; - else if (xe < ye) - return x; + /* Signs of inputs are the same, let's compare exponents */ + if (xs == 0) { + /* Inputs are both positive */ + if (xe > ye) + return y; + else if (xe < ye) + return x; + } else { + /* Inputs are both negative */ + if (xe > ye) + return x; + else if (xe < ye) + return y; + } - /* Compare mantissa */ + /* Signs and exponents of inputs are equal, let's compare mantissas */ + if (xs == 0) { + /* Inputs are both positive, with equal signs and exponents */ + if (xm <= ym) + return x; + return y; + } + /* Inputs are both negative, with equal signs and exponents */ if (xm <= ym) - return x; - return y; + return y; + return x; } union ieee754dp ieee754dp_fmina(union ieee754dp x, union ieee754dp y) @@ -147,14 +173,26 @@ union ieee754dp ieee754dp_fmina(union ieee754dp x, union ieee754dp y) case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): return ieee754dp_nanxcpt(x); - /* numbers are preferred to NaNs */ + /* + * Quiet NaN handling + */ + + /* + * The case of both inputs quiet NaNs + */ + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): + return x; + + /* + * The cases of exactly one input quiet NaN (numbers + * are here preferred as returned values to NaNs) + */ case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): return x; - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): @@ -164,25 +202,25 @@ union ieee754dp ieee754dp_fmina(union ieee754dp x, union ieee754dp y) /* * Infinity and zero handling */ + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): + return ieee754dp_inf(xs | ys); + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): - return x; + return y; - case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): - return y; + return x; case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): - if (xs == ys) - return x; - return ieee754dp_zero(1); + return ieee754dp_zero(xs | ys); case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): DPDNORMX; @@ -207,7 +245,11 @@ union ieee754dp ieee754dp_fmina(union ieee754dp x, union ieee754dp y) return x; /* Compare mantissa */ - if (xm <= ym) + if (xm < ym) + return x; + else if (xm > ym) + return y; + else if (xs == 1) return x; return y; } diff --git a/arch/mips/math-emu/dp_maddf.c b/arch/mips/math-emu/dp_maddf.c index caa62f2..e0d9be5 100644 --- a/arch/mips/math-emu/dp_maddf.c +++ b/arch/mips/math-emu/dp_maddf.c @@ -14,22 +14,45 @@ #include "ieee754dp.h" -enum maddf_flags { - maddf_negate_product = 1 << 0, -}; + +/* 128 bits shift right logical with rounding. */ +void srl128(u64 *hptr, u64 *lptr, int count) +{ + u64 low; + + if (count >= 128) { + *lptr = *hptr != 0 || *lptr != 0; + *hptr = 0; + } else if (count >= 64) { + if (count == 64) { + *lptr = *hptr | (*lptr != 0); + } else { + low = *lptr; + *lptr = *hptr >> (count - 64); + *lptr |= (*hptr << (128 - count)) != 0 || low != 0; + } + *hptr = 0; + } else { + low = *lptr; + *lptr = low >> count | *hptr << (64 - count); + *lptr |= (low << (64 - count)) != 0; + *hptr = *hptr >> count; + } +} static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x, union ieee754dp y, enum maddf_flags flags) { int re; int rs; - u64 rm; unsigned lxm; unsigned hxm; unsigned lym; unsigned hym; u64 lrm; u64 hrm; + u64 lzm; + u64 hzm; u64 t; u64 at; int s; @@ -48,52 +71,34 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x, ieee754_clearcx(); - switch (zc) { - case IEEE754_CLASS_SNAN: - ieee754_setcx(IEEE754_INVALID_OPERATION); + /* + * Handle the cases when at least one of x, y or z is a NaN. + * Order of precedence is sNaN, qNaN and z, x, y. + */ + if (zc == IEEE754_CLASS_SNAN) return ieee754dp_nanxcpt(z); - case IEEE754_CLASS_DNORM: - DPDNORMZ; - /* QNAN and ZERO cases are handled separately below */ - } - - switch (CLPAIR(xc, yc)) { - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN): - case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN): - case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN): - case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN): - case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN): - return ieee754dp_nanxcpt(y); - - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN): - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN): - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO): - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM): - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM): - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): + if (xc == IEEE754_CLASS_SNAN) return ieee754dp_nanxcpt(x); - - case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): - case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): - case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): - case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): + if (yc == IEEE754_CLASS_SNAN) + return ieee754dp_nanxcpt(y); + if (zc == IEEE754_CLASS_QNAN) + return z; + if (xc == IEEE754_CLASS_QNAN) + return x; + if (yc == IEEE754_CLASS_QNAN) return y; - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF): - return x; + if (zc == IEEE754_CLASS_DNORM) + DPDNORMZ; + /* ZERO z cases are handled separately below */ + switch (CLPAIR(xc, yc)) { /* * Infinity handling */ case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): - if (zc == IEEE754_CLASS_QNAN) - return z; ieee754_setcx(IEEE754_INVALID_OPERATION); return ieee754dp_indef(); @@ -102,9 +107,27 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x, case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): - if (zc == IEEE754_CLASS_QNAN) - return z; - return ieee754dp_inf(xs ^ ys); + if ((zc == IEEE754_CLASS_INF) && + ((!(flags & MADDF_NEGATE_PRODUCT) && (zs != (xs ^ ys))) || + ((flags & MADDF_NEGATE_PRODUCT) && (zs == (xs ^ ys))))) { + /* + * Cases of addition of infinities with opposite signs + * or subtraction of infinities with same signs. + */ + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754dp_indef(); + } + /* + * z is here either not an infinity, or an infinity having the + * same sign as product (x*y) (in case of MADDF.D instruction) + * or product -(x*y) (in MSUBF.D case). The result must be an + * infinity, and its sign is determined only by the value of + * (flags & MADDF_NEGATE_PRODUCT) and the signs of x and y. + */ + if (flags & MADDF_NEGATE_PRODUCT) + return ieee754dp_inf(1 ^ (xs ^ ys)); + else + return ieee754dp_inf(xs ^ ys); case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): @@ -113,32 +136,42 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x, case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): if (zc == IEEE754_CLASS_INF) return ieee754dp_inf(zs); - /* Multiplication is 0 so just return z */ + if (zc == IEEE754_CLASS_ZERO) { + /* Handle cases +0 + (-0) and similar ones. */ + if ((!(flags & MADDF_NEGATE_PRODUCT) + && (zs == (xs ^ ys))) || + ((flags & MADDF_NEGATE_PRODUCT) + && (zs != (xs ^ ys)))) + /* + * Cases of addition of zeros of equal signs + * or subtraction of zeroes of opposite signs. + * The sign of the resulting zero is in any + * such case determined only by the sign of z. + */ + return z; + + return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD); + } + /* x*y is here 0, and z is not 0, so just return z */ return z; case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): DPDNORMX; case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): - if (zc == IEEE754_CLASS_QNAN) - return z; - else if (zc == IEEE754_CLASS_INF) + if (zc == IEEE754_CLASS_INF) return ieee754dp_inf(zs); DPDNORMY; break; case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): - if (zc == IEEE754_CLASS_QNAN) - return z; - else if (zc == IEEE754_CLASS_INF) + if (zc == IEEE754_CLASS_INF) return ieee754dp_inf(zs); DPDNORMX; break; case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM): - if (zc == IEEE754_CLASS_QNAN) - return z; - else if (zc == IEEE754_CLASS_INF) + if (zc == IEEE754_CLASS_INF) return ieee754dp_inf(zs); /* fall through to real computations */ } @@ -157,7 +190,7 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x, re = xe + ye; rs = xs ^ ys; - if (flags & maddf_negate_product) + if (flags & MADDF_NEGATE_PRODUCT) rs ^= 1; /* shunt to top of word */ @@ -165,7 +198,7 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x, ym <<= 64 - (DP_FBITS + 1); /* - * Multiply 64 bits xm, ym to give high 64 bits rm with stickness. + * Multiply 64 bits xm and ym to give 128 bits result in hrm:lrm. */ /* 32 * 32 => 64 */ @@ -195,81 +228,110 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x, hrm = hrm + (t >> 32); - rm = hrm | (lrm != 0); - - /* - * Sticky shift down to normal rounding precision. - */ - if ((s64) rm < 0) { - rm = (rm >> (64 - (DP_FBITS + 1 + 3))) | - ((rm << (DP_FBITS + 1 + 3)) != 0); + /* Put explicit bit at bit 126 if necessary */ + if ((int64_t)hrm < 0) { + lrm = (hrm << 63) | (lrm >> 1); + hrm = hrm >> 1; re++; - } else { - rm = (rm >> (64 - (DP_FBITS + 1 + 3 + 1))) | - ((rm << (DP_FBITS + 1 + 3 + 1)) != 0); } - assert(rm & (DP_HIDDEN_BIT << 3)); - if (zc == IEEE754_CLASS_ZERO) - return ieee754dp_format(rs, re, rm); + assert(hrm & (1 << 62)); - /* And now the addition */ - assert(zm & DP_HIDDEN_BIT); + if (zc == IEEE754_CLASS_ZERO) { + /* + * Move explicit bit from bit 126 to bit 55 since the + * ieee754dp_format code expects the mantissa to be + * 56 bits wide (53 + 3 rounding bits). + */ + srl128(&hrm, &lrm, (126 - 55)); + return ieee754dp_format(rs, re, lrm); + } - /* - * Provide guard,round and stick bit space. - */ - zm <<= 3; + /* Move explicit bit from bit 52 to bit 126 */ + lzm = 0; + hzm = zm << 10; + assert(hzm & (1 << 62)); + /* Make the exponents the same */ if (ze > re) { /* * Have to shift y fraction right to align. */ s = ze - re; - rm = XDPSRS(rm, s); + srl128(&hrm, &lrm, s); re += s; } else if (re > ze) { /* * Have to shift x fraction right to align. */ s = re - ze; - zm = XDPSRS(zm, s); + srl128(&hzm, &lzm, s); ze += s; } assert(ze == re); assert(ze <= DP_EMAX); + /* Do the addition */ if (zs == rs) { /* - * Generate 28 bit result of adding two 27 bit numbers - * leaving result in xm, xs and xe. + * Generate 128 bit result by adding two 127 bit numbers + * leaving result in hzm:lzm, zs and ze. */ - zm = zm + rm; - - if (zm >> (DP_FBITS + 1 + 3)) { /* carry out */ - zm = XDPSRS1(zm); + hzm = hzm + hrm + (lzm > (lzm + lrm)); + lzm = lzm + lrm; + if ((int64_t)hzm < 0) { /* carry out */ + srl128(&hzm, &lzm, 1); ze++; } } else { - if (zm >= rm) { - zm = zm - rm; + if (hzm > hrm || (hzm == hrm && lzm >= lrm)) { + hzm = hzm - hrm - (lzm < lrm); + lzm = lzm - lrm; } else { - zm = rm - zm; + hzm = hrm - hzm - (lrm < lzm); + lzm = lrm - lzm; zs = rs; } - if (zm == 0) + if (lzm == 0 && hzm == 0) return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD); /* - * Normalize to rounding precision. + * Put explicit bit at bit 126 if necessary. */ - while ((zm >> (DP_FBITS + 3)) == 0) { - zm <<= 1; - ze--; + if (hzm == 0) { + /* left shift by 63 or 64 bits */ + if ((int64_t)lzm < 0) { + /* MSB of lzm is the explicit bit */ + hzm = lzm >> 1; + lzm = lzm << 63; + ze -= 63; + } else { + hzm = lzm; + lzm = 0; + ze -= 64; + } + } + + t = 0; + while ((hzm >> (62 - t)) == 0) + t++; + + assert(t <= 62); + if (t) { + hzm = hzm << t | lzm >> (64 - t); + lzm = lzm << t; + ze -= t; } } - return ieee754dp_format(zs, ze, zm); + /* + * Move explicit bit from bit 126 to bit 55 since the + * ieee754dp_format code expects the mantissa to be + * 56 bits wide (53 + 3 rounding bits). + */ + srl128(&hzm, &lzm, (126 - 55)); + + return ieee754dp_format(zs, ze, lzm); } union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x, @@ -281,5 +343,5 @@ union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x, union ieee754dp ieee754dp_msubf(union ieee754dp z, union ieee754dp x, union ieee754dp y) { - return _dp_maddf(z, x, y, maddf_negate_product); + return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT); } diff --git a/arch/mips/math-emu/dp_rint.c b/arch/mips/math-emu/dp_rint.c new file mode 100644 index 0000000..c3b9077 --- /dev/null +++ b/arch/mips/math-emu/dp_rint.c @@ -0,0 +1,89 @@ +/* IEEE754 floating point arithmetic + * double precision: common utilities + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + * Copyright (C) 2017 Imagination Technologies, Ltd. + * Author: Aleksandar Markovic <aleksandar.markovic@imgtec.com> + * + * This program is free software; you can distribute it and/or modify it + * under the terms of the GNU General Public License (Version 2) as + * published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program. + */ + +#include "ieee754dp.h" + +union ieee754dp ieee754dp_rint(union ieee754dp x) +{ + union ieee754dp ret; + u64 residue; + int sticky; + int round; + int odd; + + COMPXDP; + + ieee754_clearcx(); + + EXPLODEXDP; + FLUSHXDP; + + if (xc == IEEE754_CLASS_SNAN) + return ieee754dp_nanxcpt(x); + + if ((xc == IEEE754_CLASS_QNAN) || + (xc == IEEE754_CLASS_INF) || + (xc == IEEE754_CLASS_ZERO)) + return x; + + if (xe >= DP_FBITS) + return x; + + if (xe < -1) { + residue = xm; + round = 0; + sticky = residue != 0; + xm = 0; + } else { + residue = xm << (64 - DP_FBITS + xe); + round = (residue >> 63) != 0; + sticky = (residue << 1) != 0; + xm >>= DP_FBITS - xe; + } + + odd = (xm & 0x1) != 0x0; + + switch (ieee754_csr.rm) { + case FPU_CSR_RN: /* toward nearest */ + if (round && (sticky || odd)) + xm++; + break; + case FPU_CSR_RZ: /* toward zero */ + break; + case FPU_CSR_RU: /* toward +infinity */ + if ((round || sticky) && !xs) + xm++; + break; + case FPU_CSR_RD: /* toward -infinity */ + if ((round || sticky) && xs) + xm++; + break; + } + + if (round || sticky) + ieee754_setcx(IEEE754_INEXACT); + + ret = ieee754dp_flong(xm); + DPSIGN(ret) = xs; + + return ret; +} diff --git a/arch/mips/math-emu/ieee754.h b/arch/mips/math-emu/ieee754.h index d3be351..92dc8fa 100644 --- a/arch/mips/math-emu/ieee754.h +++ b/arch/mips/math-emu/ieee754.h @@ -67,6 +67,7 @@ union ieee754sp ieee754sp_div(union ieee754sp x, union ieee754sp y); union ieee754sp ieee754sp_fint(int x); union ieee754sp ieee754sp_flong(s64 x); union ieee754sp ieee754sp_fdp(union ieee754dp x); +union ieee754sp ieee754sp_rint(union ieee754sp x); int ieee754sp_tint(union ieee754sp x); s64 ieee754sp_tlong(union ieee754sp x); @@ -101,6 +102,7 @@ union ieee754dp ieee754dp_neg(union ieee754dp x); union ieee754dp ieee754dp_fint(int x); union ieee754dp ieee754dp_flong(s64 x); union ieee754dp ieee754dp_fsp(union ieee754sp x); +union ieee754dp ieee754dp_rint(union ieee754dp x); int ieee754dp_tint(union ieee754dp x); s64 ieee754dp_tlong(union ieee754dp x); diff --git a/arch/mips/math-emu/ieee754int.h b/arch/mips/math-emu/ieee754int.h index 8bc2f69..dd2071f 100644 --- a/arch/mips/math-emu/ieee754int.h +++ b/arch/mips/math-emu/ieee754int.h @@ -26,6 +26,10 @@ #define CLPAIR(x, y) ((x)*6+(y)) +enum maddf_flags { + MADDF_NEGATE_PRODUCT = 1 << 0, +}; + static inline void ieee754_clearcx(void) { ieee754_csr.cx = 0; diff --git a/arch/mips/math-emu/ieee754sp.h b/arch/mips/math-emu/ieee754sp.h index 8476067..0f63e42 100644 --- a/arch/mips/math-emu/ieee754sp.h +++ b/arch/mips/math-emu/ieee754sp.h @@ -45,6 +45,10 @@ static inline int ieee754sp_finite(union ieee754sp x) return SPBEXP(x) != SP_EMAX + 1 + SP_EBIAS; } +/* 64 bit right shift with rounding */ +#define XSPSRS64(v, rs) \ + (((rs) >= 64) ? ((v) != 0) : ((v) >> (rs)) | ((v) << (64-(rs)) != 0)) + /* 3bit extended single precision sticky right shift */ #define XSPSRS(v, rs) \ ((rs > (SP_FBITS+3))?1:((v) >> (rs)) | ((v) << (32-(rs)) != 0)) diff --git a/arch/mips/math-emu/me-debugfs.c b/arch/mips/math-emu/me-debugfs.c index be650ed..8c0ec15 100644 --- a/arch/mips/math-emu/me-debugfs.c +++ b/arch/mips/math-emu/me-debugfs.c @@ -28,14 +28,190 @@ static int fpuemu_stat_get(void *data, u64 *val) } DEFINE_SIMPLE_ATTRIBUTE(fops_fpuemu_stat, fpuemu_stat_get, NULL, "%llu\n"); +/* + * Used to obtain names for a debugfs instruction counter, given field name + * in fpuemustats structure. For example, for input "cmp_sueq_d", the output + * would be "cmp.sueq.d". This is needed since dots are not allowed to be + * used in structure field names, and are, on the other hand, desired to be + * used in debugfs item names to be clearly associated to corresponding + * MIPS FPU instructions. + */ +static void adjust_instruction_counter_name(char *out_name, char *in_name) +{ + int i = 0; + + strcpy(out_name, in_name); + while (in_name[i] != '\0') { + if (out_name[i] == '_') + out_name[i] = '.'; + i++; + } +} + +static int fpuemustats_clear_show(struct seq_file *s, void *unused) +{ + __this_cpu_write((fpuemustats).emulated, 0); + __this_cpu_write((fpuemustats).loads, 0); + __this_cpu_write((fpuemustats).stores, 0); + __this_cpu_write((fpuemustats).branches, 0); + __this_cpu_write((fpuemustats).cp1ops, 0); + __this_cpu_write((fpuemustats).cp1xops, 0); + __this_cpu_write((fpuemustats).errors, 0); + __this_cpu_write((fpuemustats).ieee754_inexact, 0); + __this_cpu_write((fpuemustats).ieee754_underflow, 0); + __this_cpu_write((fpuemustats).ieee754_overflow, 0); + __this_cpu_write((fpuemustats).ieee754_zerodiv, 0); + __this_cpu_write((fpuemustats).ieee754_invalidop, 0); + __this_cpu_write((fpuemustats).ds_emul, 0); + + __this_cpu_write((fpuemustats).abs_s, 0); + __this_cpu_write((fpuemustats).abs_d, 0); + __this_cpu_write((fpuemustats).add_s, 0); + __this_cpu_write((fpuemustats).add_d, 0); + __this_cpu_write((fpuemustats).bc1eqz, 0); + __this_cpu_write((fpuemustats).bc1nez, 0); + __this_cpu_write((fpuemustats).ceil_w_s, 0); + __this_cpu_write((fpuemustats).ceil_w_d, 0); + __this_cpu_write((fpuemustats).ceil_l_s, 0); + __this_cpu_write((fpuemustats).ceil_l_d, 0); + __this_cpu_write((fpuemustats).class_s, 0); + __this_cpu_write((fpuemustats).class_d, 0); + __this_cpu_write((fpuemustats).cmp_af_s, 0); + __this_cpu_write((fpuemustats).cmp_af_d, 0); + __this_cpu_write((fpuemustats).cmp_eq_s, 0); + __this_cpu_write((fpuemustats).cmp_eq_d, 0); + __this_cpu_write((fpuemustats).cmp_le_s, 0); + __this_cpu_write((fpuemustats).cmp_le_d, 0); + __this_cpu_write((fpuemustats).cmp_lt_s, 0); + __this_cpu_write((fpuemustats).cmp_lt_d, 0); + __this_cpu_write((fpuemustats).cmp_ne_s, 0); + __this_cpu_write((fpuemustats).cmp_ne_d, 0); + __this_cpu_write((fpuemustats).cmp_or_s, 0); + __this_cpu_write((fpuemustats).cmp_or_d, 0); + __this_cpu_write((fpuemustats).cmp_ueq_s, 0); + __this_cpu_write((fpuemustats).cmp_ueq_d, 0); + __this_cpu_write((fpuemustats).cmp_ule_s, 0); + __this_cpu_write((fpuemustats).cmp_ule_d, 0); + __this_cpu_write((fpuemustats).cmp_ult_s, 0); + __this_cpu_write((fpuemustats).cmp_ult_d, 0); + __this_cpu_write((fpuemustats).cmp_un_s, 0); + __this_cpu_write((fpuemustats).cmp_un_d, 0); + __this_cpu_write((fpuemustats).cmp_une_s, 0); + __this_cpu_write((fpuemustats).cmp_une_d, 0); + __this_cpu_write((fpuemustats).cmp_saf_s, 0); + __this_cpu_write((fpuemustats).cmp_saf_d, 0); + __this_cpu_write((fpuemustats).cmp_seq_s, 0); + __this_cpu_write((fpuemustats).cmp_seq_d, 0); + __this_cpu_write((fpuemustats).cmp_sle_s, 0); + __this_cpu_write((fpuemustats).cmp_sle_d, 0); + __this_cpu_write((fpuemustats).cmp_slt_s, 0); + __this_cpu_write((fpuemustats).cmp_slt_d, 0); + __this_cpu_write((fpuemustats).cmp_sne_s, 0); + __this_cpu_write((fpuemustats).cmp_sne_d, 0); + __this_cpu_write((fpuemustats).cmp_sor_s, 0); + __this_cpu_write((fpuemustats).cmp_sor_d, 0); + __this_cpu_write((fpuemustats).cmp_sueq_s, 0); + __this_cpu_write((fpuemustats).cmp_sueq_d, 0); + __this_cpu_write((fpuemustats).cmp_sule_s, 0); + __this_cpu_write((fpuemustats).cmp_sule_d, 0); + __this_cpu_write((fpuemustats).cmp_sult_s, 0); + __this_cpu_write((fpuemustats).cmp_sult_d, 0); + __this_cpu_write((fpuemustats).cmp_sun_s, 0); + __this_cpu_write((fpuemustats).cmp_sun_d, 0); + __this_cpu_write((fpuemustats).cmp_sune_s, 0); + __this_cpu_write((fpuemustats).cmp_sune_d, 0); + __this_cpu_write((fpuemustats).cvt_d_l, 0); + __this_cpu_write((fpuemustats).cvt_d_s, 0); + __this_cpu_write((fpuemustats).cvt_d_w, 0); + __this_cpu_write((fpuemustats).cvt_l_s, 0); + __this_cpu_write((fpuemustats).cvt_l_d, 0); + __this_cpu_write((fpuemustats).cvt_s_d, 0); + __this_cpu_write((fpuemustats).cvt_s_l, 0); + __this_cpu_write((fpuemustats).cvt_s_w, 0); + __this_cpu_write((fpuemustats).cvt_w_s, 0); + __this_cpu_write((fpuemustats).cvt_w_d, 0); + __this_cpu_write((fpuemustats).div_s, 0); + __this_cpu_write((fpuemustats).div_d, 0); + __this_cpu_write((fpuemustats).floor_w_s, 0); + __this_cpu_write((fpuemustats).floor_w_d, 0); + __this_cpu_write((fpuemustats).floor_l_s, 0); + __this_cpu_write((fpuemustats).floor_l_d, 0); + __this_cpu_write((fpuemustats).maddf_s, 0); + __this_cpu_write((fpuemustats).maddf_d, 0); + __this_cpu_write((fpuemustats).max_s, 0); + __this_cpu_write((fpuemustats).max_d, 0); + __this_cpu_write((fpuemustats).maxa_s, 0); + __this_cpu_write((fpuemustats).maxa_d, 0); + __this_cpu_write((fpuemustats).min_s, 0); + __this_cpu_write((fpuemustats).min_d, 0); + __this_cpu_write((fpuemustats).mina_s, 0); + __this_cpu_write((fpuemustats).mina_d, 0); + __this_cpu_write((fpuemustats).mov_s, 0); + __this_cpu_write((fpuemustats).mov_d, 0); + __this_cpu_write((fpuemustats).msubf_s, 0); + __this_cpu_write((fpuemustats).msubf_d, 0); + __this_cpu_write((fpuemustats).mul_s, 0); + __this_cpu_write((fpuemustats).mul_d, 0); + __this_cpu_write((fpuemustats).neg_s, 0); + __this_cpu_write((fpuemustats).neg_d, 0); + __this_cpu_write((fpuemustats).recip_s, 0); + __this_cpu_write((fpuemustats).recip_d, 0); + __this_cpu_write((fpuemustats).rint_s, 0); + __this_cpu_write((fpuemustats).rint_d, 0); + __this_cpu_write((fpuemustats).round_w_s, 0); + __this_cpu_write((fpuemustats).round_w_d, 0); + __this_cpu_write((fpuemustats).round_l_s, 0); + __this_cpu_write((fpuemustats).round_l_d, 0); + __this_cpu_write((fpuemustats).rsqrt_s, 0); + __this_cpu_write((fpuemustats).rsqrt_d, 0); + __this_cpu_write((fpuemustats).sel_s, 0); + __this_cpu_write((fpuemustats).sel_d, 0); + __this_cpu_write((fpuemustats).seleqz_s, 0); + __this_cpu_write((fpuemustats).seleqz_d, 0); + __this_cpu_write((fpuemustats).selnez_s, 0); + __this_cpu_write((fpuemustats).selnez_d, 0); + __this_cpu_write((fpuemustats).sqrt_s, 0); + __this_cpu_write((fpuemustats).sqrt_d, 0); + __this_cpu_write((fpuemustats).sub_s, 0); + __this_cpu_write((fpuemustats).sub_d, 0); + __this_cpu_write((fpuemustats).trunc_w_s, 0); + __this_cpu_write((fpuemustats).trunc_w_d, 0); + __this_cpu_write((fpuemustats).trunc_l_s, 0); + __this_cpu_write((fpuemustats).trunc_l_d, 0); + + return 0; +} + +static int fpuemustats_clear_open(struct inode *inode, struct file *file) +{ + return single_open(file, fpuemustats_clear_show, inode->i_private); +} + +static const struct file_operations fpuemustats_clear_fops = { + .open = fpuemustats_clear_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + static int __init debugfs_fpuemu(void) { - struct dentry *d, *dir; + struct dentry *fpuemu_debugfs_base_dir; + struct dentry *fpuemu_debugfs_inst_dir; + struct dentry *d, *reset_file; if (!mips_debugfs_dir) return -ENODEV; - dir = debugfs_create_dir("fpuemustats", mips_debugfs_dir); - if (!dir) + + fpuemu_debugfs_base_dir = debugfs_create_dir("fpuemustats", + mips_debugfs_dir); + if (!fpuemu_debugfs_base_dir) + return -ENOMEM; + + reset_file = debugfs_create_file("fpuemustats_clear", 0444, + mips_debugfs_dir, NULL, + &fpuemustats_clear_fops); + if (!reset_file) return -ENOMEM; #define FPU_EMU_STAT_OFFSET(m) \ @@ -43,7 +219,7 @@ static int __init debugfs_fpuemu(void) #define FPU_STAT_CREATE(m) \ do { \ - d = debugfs_create_file(#m , S_IRUGO, dir, \ + d = debugfs_create_file(#m, 0444, fpuemu_debugfs_base_dir, \ (void *)FPU_EMU_STAT_OFFSET(m), \ &fops_fpuemu_stat); \ if (!d) \ @@ -53,6 +229,7 @@ do { \ FPU_STAT_CREATE(emulated); FPU_STAT_CREATE(loads); FPU_STAT_CREATE(stores); + FPU_STAT_CREATE(branches); FPU_STAT_CREATE(cp1ops); FPU_STAT_CREATE(cp1xops); FPU_STAT_CREATE(errors); @@ -63,6 +240,139 @@ do { \ FPU_STAT_CREATE(ieee754_invalidop); FPU_STAT_CREATE(ds_emul); + fpuemu_debugfs_inst_dir = debugfs_create_dir("instructions", + fpuemu_debugfs_base_dir); + if (!fpuemu_debugfs_inst_dir) + return -ENOMEM; + +#define FPU_STAT_CREATE_EX(m) \ +do { \ + char name[32]; \ + \ + adjust_instruction_counter_name(name, #m); \ + \ + d = debugfs_create_file(name, 0444, fpuemu_debugfs_inst_dir, \ + (void *)FPU_EMU_STAT_OFFSET(m), \ + &fops_fpuemu_stat); \ + if (!d) \ + return -ENOMEM; \ +} while (0) + + FPU_STAT_CREATE_EX(abs_s); + FPU_STAT_CREATE_EX(abs_d); + FPU_STAT_CREATE_EX(add_s); + FPU_STAT_CREATE_EX(add_d); + FPU_STAT_CREATE_EX(bc1eqz); + FPU_STAT_CREATE_EX(bc1nez); + FPU_STAT_CREATE_EX(ceil_w_s); + FPU_STAT_CREATE_EX(ceil_w_d); + FPU_STAT_CREATE_EX(ceil_l_s); + FPU_STAT_CREATE_EX(ceil_l_d); + FPU_STAT_CREATE_EX(class_s); + FPU_STAT_CREATE_EX(class_d); + FPU_STAT_CREATE_EX(cmp_af_s); + FPU_STAT_CREATE_EX(cmp_af_d); + FPU_STAT_CREATE_EX(cmp_eq_s); + FPU_STAT_CREATE_EX(cmp_eq_d); + FPU_STAT_CREATE_EX(cmp_le_s); + FPU_STAT_CREATE_EX(cmp_le_d); + FPU_STAT_CREATE_EX(cmp_lt_s); + FPU_STAT_CREATE_EX(cmp_lt_d); + FPU_STAT_CREATE_EX(cmp_ne_s); + FPU_STAT_CREATE_EX(cmp_ne_d); + FPU_STAT_CREATE_EX(cmp_or_s); + FPU_STAT_CREATE_EX(cmp_or_d); + FPU_STAT_CREATE_EX(cmp_ueq_s); + FPU_STAT_CREATE_EX(cmp_ueq_d); + FPU_STAT_CREATE_EX(cmp_ule_s); + FPU_STAT_CREATE_EX(cmp_ule_d); + FPU_STAT_CREATE_EX(cmp_ult_s); + FPU_STAT_CREATE_EX(cmp_ult_d); + FPU_STAT_CREATE_EX(cmp_un_s); + FPU_STAT_CREATE_EX(cmp_un_d); + FPU_STAT_CREATE_EX(cmp_une_s); + FPU_STAT_CREATE_EX(cmp_une_d); + FPU_STAT_CREATE_EX(cmp_saf_s); + FPU_STAT_CREATE_EX(cmp_saf_d); + FPU_STAT_CREATE_EX(cmp_seq_s); + FPU_STAT_CREATE_EX(cmp_seq_d); + FPU_STAT_CREATE_EX(cmp_sle_s); + FPU_STAT_CREATE_EX(cmp_sle_d); + FPU_STAT_CREATE_EX(cmp_slt_s); + FPU_STAT_CREATE_EX(cmp_slt_d); + FPU_STAT_CREATE_EX(cmp_sne_s); + FPU_STAT_CREATE_EX(cmp_sne_d); + FPU_STAT_CREATE_EX(cmp_sor_s); + FPU_STAT_CREATE_EX(cmp_sor_d); + FPU_STAT_CREATE_EX(cmp_sueq_s); + FPU_STAT_CREATE_EX(cmp_sueq_d); + FPU_STAT_CREATE_EX(cmp_sule_s); + FPU_STAT_CREATE_EX(cmp_sule_d); + FPU_STAT_CREATE_EX(cmp_sult_s); + FPU_STAT_CREATE_EX(cmp_sult_d); + FPU_STAT_CREATE_EX(cmp_sun_s); + FPU_STAT_CREATE_EX(cmp_sun_d); + FPU_STAT_CREATE_EX(cmp_sune_s); + FPU_STAT_CREATE_EX(cmp_sune_d); + FPU_STAT_CREATE_EX(cvt_d_l); + FPU_STAT_CREATE_EX(cvt_d_s); + FPU_STAT_CREATE_EX(cvt_d_w); + FPU_STAT_CREATE_EX(cvt_l_s); + FPU_STAT_CREATE_EX(cvt_l_d); + FPU_STAT_CREATE_EX(cvt_s_d); + FPU_STAT_CREATE_EX(cvt_s_l); + FPU_STAT_CREATE_EX(cvt_s_w); + FPU_STAT_CREATE_EX(cvt_w_s); + FPU_STAT_CREATE_EX(cvt_w_d); + FPU_STAT_CREATE_EX(div_s); + FPU_STAT_CREATE_EX(div_d); + FPU_STAT_CREATE_EX(floor_w_s); + FPU_STAT_CREATE_EX(floor_w_d); + FPU_STAT_CREATE_EX(floor_l_s); + FPU_STAT_CREATE_EX(floor_l_d); + FPU_STAT_CREATE_EX(maddf_s); + FPU_STAT_CREATE_EX(maddf_d); + FPU_STAT_CREATE_EX(max_s); + FPU_STAT_CREATE_EX(max_d); + FPU_STAT_CREATE_EX(maxa_s); + FPU_STAT_CREATE_EX(maxa_d); + FPU_STAT_CREATE_EX(min_s); + FPU_STAT_CREATE_EX(min_d); + FPU_STAT_CREATE_EX(mina_s); + FPU_STAT_CREATE_EX(mina_d); + FPU_STAT_CREATE_EX(mov_s); + FPU_STAT_CREATE_EX(mov_d); + FPU_STAT_CREATE_EX(msubf_s); + FPU_STAT_CREATE_EX(msubf_d); + FPU_STAT_CREATE_EX(mul_s); + FPU_STAT_CREATE_EX(mul_d); + FPU_STAT_CREATE_EX(neg_s); + FPU_STAT_CREATE_EX(neg_d); + FPU_STAT_CREATE_EX(recip_s); + FPU_STAT_CREATE_EX(recip_d); + FPU_STAT_CREATE_EX(rint_s); + FPU_STAT_CREATE_EX(rint_d); + FPU_STAT_CREATE_EX(round_w_s); + FPU_STAT_CREATE_EX(round_w_d); + FPU_STAT_CREATE_EX(round_l_s); + FPU_STAT_CREATE_EX(round_l_d); + FPU_STAT_CREATE_EX(rsqrt_s); + FPU_STAT_CREATE_EX(rsqrt_d); + FPU_STAT_CREATE_EX(sel_s); + FPU_STAT_CREATE_EX(sel_d); + FPU_STAT_CREATE_EX(seleqz_s); + FPU_STAT_CREATE_EX(seleqz_d); + FPU_STAT_CREATE_EX(selnez_s); + FPU_STAT_CREATE_EX(selnez_d); + FPU_STAT_CREATE_EX(sqrt_s); + FPU_STAT_CREATE_EX(sqrt_d); + FPU_STAT_CREATE_EX(sub_s); + FPU_STAT_CREATE_EX(sub_d); + FPU_STAT_CREATE_EX(trunc_w_s); + FPU_STAT_CREATE_EX(trunc_w_d); + FPU_STAT_CREATE_EX(trunc_l_s); + FPU_STAT_CREATE_EX(trunc_l_d); + return 0; } arch_initcall(debugfs_fpuemu); diff --git a/arch/mips/math-emu/sp_fmax.c b/arch/mips/math-emu/sp_fmax.c index 4d00084..74a5a00 100644 --- a/arch/mips/math-emu/sp_fmax.c +++ b/arch/mips/math-emu/sp_fmax.c @@ -47,14 +47,26 @@ union ieee754sp ieee754sp_fmax(union ieee754sp x, union ieee754sp y) case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): return ieee754sp_nanxcpt(x); - /* numbers are preferred to NaNs */ + /* + * Quiet NaN handling + */ + + /* + * The case of both inputs quiet NaNs + */ + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): + return x; + + /* + * The cases of exactly one input quiet NaN (numbers + * are here preferred as returned values to NaNs) + */ case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): return x; - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): @@ -80,9 +92,7 @@ union ieee754sp ieee754sp_fmax(union ieee754sp x, union ieee754sp y) return ys ? x : y; case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): - if (xs == ys) - return x; - return ieee754sp_zero(1); + return ieee754sp_zero(xs & ys); case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): SPDNORMX; @@ -106,16 +116,32 @@ union ieee754sp ieee754sp_fmax(union ieee754sp x, union ieee754sp y) else if (xs < ys) return x; - /* Compare exponent */ - if (xe > ye) - return x; - else if (xe < ye) - return y; + /* Signs of inputs are equal, let's compare exponents */ + if (xs == 0) { + /* Inputs are both positive */ + if (xe > ye) + return x; + else if (xe < ye) + return y; + } else { + /* Inputs are both negative */ + if (xe > ye) + return y; + else if (xe < ye) + return x; + } - /* Compare mantissa */ + /* Signs and exponents of inputs are equal, let's compare mantissas */ + if (xs == 0) { + /* Inputs are both positive, with equal signs and exponents */ + if (xm <= ym) + return y; + return x; + } + /* Inputs are both negative, with equal signs and exponents */ if (xm <= ym) - return y; - return x; + return x; + return y; } union ieee754sp ieee754sp_fmaxa(union ieee754sp x, union ieee754sp y) @@ -147,14 +173,26 @@ union ieee754sp ieee754sp_fmaxa(union ieee754sp x, union ieee754sp y) case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): return ieee754sp_nanxcpt(x); - /* numbers are preferred to NaNs */ + /* + * Quiet NaN handling + */ + + /* + * The case of both inputs quiet NaNs + */ + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): + return x; + + /* + * The cases of exactly one input quiet NaN (numbers + * are here preferred as returned values to NaNs) + */ case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): return x; - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): @@ -164,6 +202,9 @@ union ieee754sp ieee754sp_fmaxa(union ieee754sp x, union ieee754sp y) /* * Infinity and zero handling */ + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): + return ieee754sp_inf(xs & ys); + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): @@ -171,7 +212,6 @@ union ieee754sp ieee754sp_fmaxa(union ieee754sp x, union ieee754sp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): return x; - case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): @@ -180,9 +220,7 @@ union ieee754sp ieee754sp_fmaxa(union ieee754sp x, union ieee754sp y) return y; case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): - if (xs == ys) - return x; - return ieee754sp_zero(1); + return ieee754sp_zero(xs & ys); case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): SPDNORMX; @@ -207,7 +245,11 @@ union ieee754sp ieee754sp_fmaxa(union ieee754sp x, union ieee754sp y) return y; /* Compare mantissa */ - if (xm <= ym) + if (xm < ym) return y; - return x; + else if (xm > ym) + return x; + else if (xs == 0) + return x; + return y; } diff --git a/arch/mips/math-emu/sp_fmin.c b/arch/mips/math-emu/sp_fmin.c index 4eb1bb9..c51385f 100644 --- a/arch/mips/math-emu/sp_fmin.c +++ b/arch/mips/math-emu/sp_fmin.c @@ -47,14 +47,26 @@ union ieee754sp ieee754sp_fmin(union ieee754sp x, union ieee754sp y) case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): return ieee754sp_nanxcpt(x); - /* numbers are preferred to NaNs */ + /* + * Quiet NaN handling + */ + + /* + * The case of both inputs quiet NaNs + */ + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): + return x; + + /* + * The cases of exactly one input quiet NaN (numbers + * are here preferred as returned values to NaNs) + */ case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): return x; - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): @@ -80,9 +92,7 @@ union ieee754sp ieee754sp_fmin(union ieee754sp x, union ieee754sp y) return ys ? y : x; case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): - if (xs == ys) - return x; - return ieee754sp_zero(1); + return ieee754sp_zero(xs | ys); case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): SPDNORMX; @@ -106,16 +116,32 @@ union ieee754sp ieee754sp_fmin(union ieee754sp x, union ieee754sp y) else if (xs < ys) return y; - /* Compare exponent */ - if (xe > ye) - return y; - else if (xe < ye) - return x; + /* Signs of inputs are the same, let's compare exponents */ + if (xs == 0) { + /* Inputs are both positive */ + if (xe > ye) + return y; + else if (xe < ye) + return x; + } else { + /* Inputs are both negative */ + if (xe > ye) + return x; + else if (xe < ye) + return y; + } - /* Compare mantissa */ + /* Signs and exponents of inputs are equal, let's compare mantissas */ + if (xs == 0) { + /* Inputs are both positive, with equal signs and exponents */ + if (xm <= ym) + return x; + return y; + } + /* Inputs are both negative, with equal signs and exponents */ if (xm <= ym) - return x; - return y; + return y; + return x; } union ieee754sp ieee754sp_fmina(union ieee754sp x, union ieee754sp y) @@ -147,14 +173,26 @@ union ieee754sp ieee754sp_fmina(union ieee754sp x, union ieee754sp y) case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): return ieee754sp_nanxcpt(x); - /* numbers are preferred to NaNs */ + /* + * Quiet NaN handling + */ + + /* + * The case of both inputs quiet NaNs + */ + case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): + return x; + + /* + * The cases of exactly one input quiet NaN (numbers + * are here preferred as returned values to NaNs) + */ case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): return x; - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): @@ -164,25 +202,25 @@ union ieee754sp ieee754sp_fmina(union ieee754sp x, union ieee754sp y) /* * Infinity and zero handling */ + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): + return ieee754sp_inf(xs | ys); + case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO): case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): - return x; + return y; - case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF): case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF): case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM): - return y; + return x; case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): - if (xs == ys) - return x; - return ieee754sp_zero(1); + return ieee754sp_zero(xs | ys); case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): SPDNORMX; @@ -207,7 +245,11 @@ union ieee754sp ieee754sp_fmina(union ieee754sp x, union ieee754sp y) return x; /* Compare mantissa */ - if (xm <= ym) + if (xm < ym) + return x; + else if (xm > ym) + return y; + else if (xs == 1) return x; return y; } diff --git a/arch/mips/math-emu/sp_maddf.c b/arch/mips/math-emu/sp_maddf.c index c91d5e5..7195fe7 100644 --- a/arch/mips/math-emu/sp_maddf.c +++ b/arch/mips/math-emu/sp_maddf.c @@ -14,9 +14,6 @@ #include "ieee754sp.h" -enum maddf_flags { - maddf_negate_product = 1 << 0, -}; static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x, union ieee754sp y, enum maddf_flags flags) @@ -24,14 +21,8 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x, int re; int rs; unsigned rm; - unsigned short lxm; - unsigned short hxm; - unsigned short lym; - unsigned short hym; - unsigned lrm; - unsigned hrm; - unsigned t; - unsigned at; + uint64_t rm64; + uint64_t zm64; int s; COMPXSP; @@ -48,51 +39,35 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x, ieee754_clearcx(); - switch (zc) { - case IEEE754_CLASS_SNAN: - ieee754_setcx(IEEE754_INVALID_OPERATION); + /* + * Handle the cases when at least one of x, y or z is a NaN. + * Order of precedence is sNaN, qNaN and z, x, y. + */ + if (zc == IEEE754_CLASS_SNAN) return ieee754sp_nanxcpt(z); - case IEEE754_CLASS_DNORM: - SPDNORMZ; - /* QNAN and ZERO cases are handled separately below */ - } - - switch (CLPAIR(xc, yc)) { - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN): - case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN): - case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN): - case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN): - case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN): + if (xc == IEEE754_CLASS_SNAN) + return ieee754sp_nanxcpt(x); + if (yc == IEEE754_CLASS_SNAN) return ieee754sp_nanxcpt(y); + if (zc == IEEE754_CLASS_QNAN) + return z; + if (xc == IEEE754_CLASS_QNAN) + return x; + if (yc == IEEE754_CLASS_QNAN) + return y; - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN): - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN): - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO): - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM): - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM): - case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF): - return ieee754sp_nanxcpt(x); + if (zc == IEEE754_CLASS_DNORM) + SPDNORMZ; + /* ZERO z cases are handled separately below */ - case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN): - case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN): - case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN): - case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN): - return y; + switch (CLPAIR(xc, yc)) { - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN): - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO): - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM): - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM): - case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF): - return x; /* * Infinity handling */ case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO): case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF): - if (zc == IEEE754_CLASS_QNAN) - return z; ieee754_setcx(IEEE754_INVALID_OPERATION); return ieee754sp_indef(); @@ -101,9 +76,27 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x, case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM): case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF): - if (zc == IEEE754_CLASS_QNAN) - return z; - return ieee754sp_inf(xs ^ ys); + if ((zc == IEEE754_CLASS_INF) && + ((!(flags & MADDF_NEGATE_PRODUCT) && (zs != (xs ^ ys))) || + ((flags & MADDF_NEGATE_PRODUCT) && (zs == (xs ^ ys))))) { + /* + * Cases of addition of infinities with opposite signs + * or subtraction of infinities with same signs. + */ + ieee754_setcx(IEEE754_INVALID_OPERATION); + return ieee754sp_indef(); + } + /* + * z is here either not an infinity, or an infinity having the + * same sign as product (x*y) (in case of MADDF.D instruction) + * or product -(x*y) (in MSUBF.D case). The result must be an + * infinity, and its sign is determined only by the value of + * (flags & MADDF_NEGATE_PRODUCT) and the signs of x and y. + */ + if (flags & MADDF_NEGATE_PRODUCT) + return ieee754sp_inf(1 ^ (xs ^ ys)); + else + return ieee754sp_inf(xs ^ ys); case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO): case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM): @@ -112,32 +105,42 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x, case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO): if (zc == IEEE754_CLASS_INF) return ieee754sp_inf(zs); - /* Multiplication is 0 so just return z */ + if (zc == IEEE754_CLASS_ZERO) { + /* Handle cases +0 + (-0) and similar ones. */ + if ((!(flags & MADDF_NEGATE_PRODUCT) + && (zs == (xs ^ ys))) || + ((flags & MADDF_NEGATE_PRODUCT) + && (zs != (xs ^ ys)))) + /* + * Cases of addition of zeros of equal signs + * or subtraction of zeroes of opposite signs. + * The sign of the resulting zero is in any + * such case determined only by the sign of z. + */ + return z; + + return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD); + } + /* x*y is here 0, and z is not 0, so just return z */ return z; case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): SPDNORMX; case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): - if (zc == IEEE754_CLASS_QNAN) - return z; - else if (zc == IEEE754_CLASS_INF) + if (zc == IEEE754_CLASS_INF) return ieee754sp_inf(zs); SPDNORMY; break; case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM): - if (zc == IEEE754_CLASS_QNAN) - return z; - else if (zc == IEEE754_CLASS_INF) + if (zc == IEEE754_CLASS_INF) return ieee754sp_inf(zs); SPDNORMX; break; case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM): - if (zc == IEEE754_CLASS_QNAN) - return z; - else if (zc == IEEE754_CLASS_INF) + if (zc == IEEE754_CLASS_INF) return ieee754sp_inf(zs); /* fall through to real computations */ } @@ -158,111 +161,93 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x, re = xe + ye; rs = xs ^ ys; - if (flags & maddf_negate_product) + if (flags & MADDF_NEGATE_PRODUCT) rs ^= 1; - /* shunt to top of word */ - xm <<= 32 - (SP_FBITS + 1); - ym <<= 32 - (SP_FBITS + 1); - - /* - * Multiply 32 bits xm, ym to give high 32 bits rm with stickness. - */ - lxm = xm & 0xffff; - hxm = xm >> 16; - lym = ym & 0xffff; - hym = ym >> 16; - - lrm = lxm * lym; /* 16 * 16 => 32 */ - hrm = hxm * hym; /* 16 * 16 => 32 */ - - t = lxm * hym; /* 16 * 16 => 32 */ - at = lrm + (t << 16); - hrm += at < lrm; - lrm = at; - hrm = hrm + (t >> 16); + /* Multiple 24 bit xm and ym to give 48 bit results */ + rm64 = (uint64_t)xm * ym; - t = hxm * lym; /* 16 * 16 => 32 */ - at = lrm + (t << 16); - hrm += at < lrm; - lrm = at; - hrm = hrm + (t >> 16); + /* Shunt to top of word */ + rm64 = rm64 << 16; - rm = hrm | (lrm != 0); - - /* - * Sticky shift down to normal rounding precision. - */ - if ((int) rm < 0) { - rm = (rm >> (32 - (SP_FBITS + 1 + 3))) | - ((rm << (SP_FBITS + 1 + 3)) != 0); + /* Put explicit bit at bit 62 if necessary */ + if ((int64_t) rm64 < 0) { + rm64 = rm64 >> 1; re++; - } else { - rm = (rm >> (32 - (SP_FBITS + 1 + 3 + 1))) | - ((rm << (SP_FBITS + 1 + 3 + 1)) != 0); } - assert(rm & (SP_HIDDEN_BIT << 3)); - - if (zc == IEEE754_CLASS_ZERO) - return ieee754sp_format(rs, re, rm); - /* And now the addition */ + assert(rm64 & (1 << 62)); - assert(zm & SP_HIDDEN_BIT); + if (zc == IEEE754_CLASS_ZERO) { + /* + * Move explicit bit from bit 62 to bit 26 since the + * ieee754sp_format code expects the mantissa to be + * 27 bits wide (24 + 3 rounding bits). + */ + rm = XSPSRS64(rm64, (62 - 26)); + return ieee754sp_format(rs, re, rm); + } - /* - * Provide guard,round and stick bit space. - */ - zm <<= 3; + /* Move explicit bit from bit 23 to bit 62 */ + zm64 = (uint64_t)zm << (62 - 23); + assert(zm64 & (1 << 62)); + /* Make the exponents the same */ if (ze > re) { /* * Have to shift r fraction right to align. */ s = ze - re; - rm = XSPSRS(rm, s); + rm64 = XSPSRS64(rm64, s); re += s; } else if (re > ze) { /* * Have to shift z fraction right to align. */ s = re - ze; - zm = XSPSRS(zm, s); + zm64 = XSPSRS64(zm64, s); ze += s; } assert(ze == re); assert(ze <= SP_EMAX); + /* Do the addition */ if (zs == rs) { /* - * Generate 28 bit result of adding two 27 bit numbers - * leaving result in zm, zs and ze. + * Generate 64 bit result by adding two 63 bit numbers + * leaving result in zm64, zs and ze. */ - zm = zm + rm; - - if (zm >> (SP_FBITS + 1 + 3)) { /* carry out */ - zm = XSPSRS1(zm); + zm64 = zm64 + rm64; + if ((int64_t)zm64 < 0) { /* carry out */ + zm64 = XSPSRS1(zm64); ze++; } } else { - if (zm >= rm) { - zm = zm - rm; + if (zm64 >= rm64) { + zm64 = zm64 - rm64; } else { - zm = rm - zm; + zm64 = rm64 - zm64; zs = rs; } - if (zm == 0) + if (zm64 == 0) return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD); /* - * Normalize in extended single precision + * Put explicit bit at bit 62 if necessary. */ - while ((zm >> (SP_MBITS + 3)) == 0) { - zm <<= 1; + while ((zm64 >> 62) == 0) { + zm64 <<= 1; ze--; } - } + + /* + * Move explicit bit from bit 62 to bit 26 since the + * ieee754sp_format code expects the mantissa to be + * 27 bits wide (24 + 3 rounding bits). + */ + zm = XSPSRS64(zm64, (62 - 26)); + return ieee754sp_format(zs, ze, zm); } @@ -275,5 +260,5 @@ union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x, union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x, union ieee754sp y) { - return _sp_maddf(z, x, y, maddf_negate_product); + return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT); } diff --git a/arch/mips/math-emu/sp_rint.c b/arch/mips/math-emu/sp_rint.c new file mode 100644 index 0000000..70765b1 --- /dev/null +++ b/arch/mips/math-emu/sp_rint.c @@ -0,0 +1,90 @@ +/* IEEE754 floating point arithmetic + * single precision + */ +/* + * MIPS floating point support + * Copyright (C) 1994-2000 Algorithmics Ltd. + * Copyright (C) 2017 Imagination Technologies, Ltd. + * Author: Aleksandar Markovic <aleksandar.markovic@imgtec.com> + * + * This program is free software; you can distribute it and/or modify it + * under the terms of the GNU General Public License (Version 2) as + * published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program. + */ + +#include "ieee754sp.h" + +union ieee754sp ieee754sp_rint(union ieee754sp x) +{ + union ieee754sp ret; + u32 residue; + int sticky; + int round; + int odd; + + COMPXDP; /* <-- DP needed for 64-bit mantissa tmp */ + + ieee754_clearcx(); + + EXPLODEXSP; + FLUSHXSP; + + if (xc == IEEE754_CLASS_SNAN) + return ieee754sp_nanxcpt(x); + + if ((xc == IEEE754_CLASS_QNAN) || + (xc == IEEE754_CLASS_INF) || + (xc == IEEE754_CLASS_ZERO)) + return x; + + if (xe >= SP_FBITS) + return x; + + if (xe < -1) { + residue = xm; + round = 0; + sticky = residue != 0; + xm = 0; + } else { + residue = xm << (xe + 1); + residue <<= 31 - SP_FBITS; + round = (residue >> 31) != 0; + sticky = (residue << 1) != 0; + xm >>= SP_FBITS - xe; + } + + odd = (xm & 0x1) != 0x0; + + switch (ieee754_csr.rm) { + case FPU_CSR_RN: /* toward nearest */ + if (round && (sticky || odd)) + xm++; + break; + case FPU_CSR_RZ: /* toward zero */ + break; + case FPU_CSR_RU: /* toward +infinity */ + if ((round || sticky) && !xs) + xm++; + break; + case FPU_CSR_RD: /* toward -infinity */ + if ((round || sticky) && xs) + xm++; + break; + } + + if (round || sticky) + ieee754_setcx(IEEE754_INEXACT); + + ret = ieee754sp_flong(xm); + SPSIGN(ret) = xs; + + return ret; +} |