summaryrefslogtreecommitdiffstats
path: root/arch/mips/math-emu
diff options
context:
space:
mode:
Diffstat (limited to 'arch/mips/math-emu')
-rw-r--r--arch/mips/math-emu/Makefile6
-rw-r--r--arch/mips/math-emu/cp1emu.c284
-rw-r--r--arch/mips/math-emu/dp_fmax.c84
-rw-r--r--arch/mips/math-emu/dp_fmin.c86
-rw-r--r--arch/mips/math-emu/dp_maddf.c246
-rw-r--r--arch/mips/math-emu/dp_rint.c89
-rw-r--r--arch/mips/math-emu/ieee754.h2
-rw-r--r--arch/mips/math-emu/ieee754int.h4
-rw-r--r--arch/mips/math-emu/ieee754sp.h4
-rw-r--r--arch/mips/math-emu/me-debugfs.c318
-rw-r--r--arch/mips/math-emu/sp_fmax.c84
-rw-r--r--arch/mips/math-emu/sp_fmin.c86
-rw-r--r--arch/mips/math-emu/sp_maddf.c229
-rw-r--r--arch/mips/math-emu/sp_rint.c90
14 files changed, 1294 insertions, 318 deletions
diff --git a/arch/mips/math-emu/Makefile b/arch/mips/math-emu/Makefile
index e9bbc2a..e9f10b8 100644
--- a/arch/mips/math-emu/Makefile
+++ b/arch/mips/math-emu/Makefile
@@ -4,9 +4,11 @@
obj-y += cp1emu.o ieee754dp.o ieee754sp.o ieee754.o \
dp_div.o dp_mul.o dp_sub.o dp_add.o dp_fsp.o dp_cmp.o dp_simple.o \
- dp_tint.o dp_fint.o dp_maddf.o dp_2008class.o dp_fmin.o dp_fmax.o \
+ dp_tint.o dp_fint.o dp_rint.o dp_maddf.o dp_2008class.o dp_fmin.o \
+ dp_fmax.o \
sp_div.o sp_mul.o sp_sub.o sp_add.o sp_fdp.o sp_cmp.o sp_simple.o \
- sp_tint.o sp_fint.o sp_maddf.o sp_2008class.o sp_fmin.o sp_fmax.o \
+ sp_tint.o sp_fint.o sp_rint.o sp_maddf.o sp_2008class.o sp_fmin.o \
+ sp_fmax.o \
dsemul.o
lib-y += ieee754d.o \
diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c
index f08a7b4..192542d 100644
--- a/arch/mips/math-emu/cp1emu.c
+++ b/arch/mips/math-emu/cp1emu.c
@@ -58,7 +58,7 @@ static int fpu_emu(struct pt_regs *, struct mips_fpu_struct *,
mips_instruction);
static int fpux_emu(struct pt_regs *,
- struct mips_fpu_struct *, mips_instruction, void *__user *);
+ struct mips_fpu_struct *, mips_instruction, void __user **);
/* Control registers */
@@ -830,12 +830,12 @@ do { \
} while (0)
#define DIFROMREG(di, x) \
- ((di) = get_fpr64(&ctx->fpr[(x) & ~(cop1_64bit(xcp) == 0)], 0))
+ ((di) = get_fpr64(&ctx->fpr[(x) & ~(cop1_64bit(xcp) ^ 1)], 0))
#define DITOREG(di, x) \
do { \
unsigned fpr, i; \
- fpr = (x) & ~(cop1_64bit(xcp) == 0); \
+ fpr = (x) & ~(cop1_64bit(xcp) ^ 1); \
set_fpr64(&ctx->fpr[fpr], 0, di); \
for (i = 1; i < ARRAY_SIZE(ctx->fpr[x].val64); i++) \
set_fpr64(&ctx->fpr[fpr], i, 0); \
@@ -973,7 +973,7 @@ static inline void cop1_ctc(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
*/
static int cop1Emulate(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
- struct mm_decoded_insn dec_insn, void *__user *fault_addr)
+ struct mm_decoded_insn dec_insn, void __user **fault_addr)
{
unsigned long contpc = xcp->cp0_epc + dec_insn.pc_inc;
unsigned int cond, cbit, bit0;
@@ -1195,9 +1195,11 @@ emul:
bit0 = get_fpr32(fpr, 0) & 0x1;
switch (MIPSInst_RS(ir)) {
case bc1eqz_op:
+ MIPS_FPU_EMU_INC_STATS(bc1eqz);
cond = bit0 == 0;
break;
case bc1nez_op:
+ MIPS_FPU_EMU_INC_STATS(bc1nez);
cond = bit0 != 0;
break;
}
@@ -1230,6 +1232,7 @@ emul:
break;
}
branch_common:
+ MIPS_FPU_EMU_INC_STATS(branches);
set_delay_slot(xcp);
if (cond) {
/*
@@ -1460,7 +1463,7 @@ DEF3OP(nmadd, dp, ieee754dp_mul, ieee754dp_add, ieee754dp_neg);
DEF3OP(nmsub, dp, ieee754dp_mul, ieee754dp_sub, ieee754dp_neg);
static int fpux_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
- mips_instruction ir, void *__user *fault_addr)
+ mips_instruction ir, void __user **fault_addr)
{
unsigned rcsr = 0; /* resulting csr */
@@ -1682,15 +1685,19 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
switch (MIPSInst_FUNC(ir)) {
/* binary ops */
case fadd_op:
+ MIPS_FPU_EMU_INC_STATS(add_s);
handler.b = ieee754sp_add;
goto scopbop;
case fsub_op:
+ MIPS_FPU_EMU_INC_STATS(sub_s);
handler.b = ieee754sp_sub;
goto scopbop;
case fmul_op:
+ MIPS_FPU_EMU_INC_STATS(mul_s);
handler.b = ieee754sp_mul;
goto scopbop;
case fdiv_op:
+ MIPS_FPU_EMU_INC_STATS(div_s);
handler.b = ieee754sp_div;
goto scopbop;
@@ -1699,6 +1706,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
if (!cpu_has_mips_2_3_4_5_r)
return SIGILL;
+ MIPS_FPU_EMU_INC_STATS(sqrt_s);
handler.u = ieee754sp_sqrt;
goto scopuop;
@@ -1711,6 +1719,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
if (!cpu_has_mips_4_5_64_r2_r6)
return SIGILL;
+ MIPS_FPU_EMU_INC_STATS(rsqrt_s);
handler.u = fpemu_sp_rsqrt;
goto scopuop;
@@ -1718,6 +1727,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
if (!cpu_has_mips_4_5_64_r2_r6)
return SIGILL;
+ MIPS_FPU_EMU_INC_STATS(recip_s);
handler.u = fpemu_sp_recip;
goto scopuop;
@@ -1754,6 +1764,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
if (!cpu_has_mips_r6)
return SIGILL;
+ MIPS_FPU_EMU_INC_STATS(seleqz_s);
SPFROMREG(rv.s, MIPSInst_FT(ir));
if (rv.w & 0x1)
rv.w = 0;
@@ -1765,6 +1776,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
if (!cpu_has_mips_r6)
return SIGILL;
+ MIPS_FPU_EMU_INC_STATS(selnez_s);
SPFROMREG(rv.s, MIPSInst_FT(ir));
if (rv.w & 0x1)
SPFROMREG(rv.s, MIPSInst_FS(ir));
@@ -1778,6 +1790,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
if (!cpu_has_mips_r6)
return SIGILL;
+ MIPS_FPU_EMU_INC_STATS(maddf_s);
SPFROMREG(ft, MIPSInst_FT(ir));
SPFROMREG(fs, MIPSInst_FS(ir));
SPFROMREG(fd, MIPSInst_FD(ir));
@@ -1791,6 +1804,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
if (!cpu_has_mips_r6)
return SIGILL;
+ MIPS_FPU_EMU_INC_STATS(msubf_s);
SPFROMREG(ft, MIPSInst_FT(ir));
SPFROMREG(fs, MIPSInst_FS(ir));
SPFROMREG(fd, MIPSInst_FD(ir));
@@ -1804,9 +1818,9 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
if (!cpu_has_mips_r6)
return SIGILL;
+ MIPS_FPU_EMU_INC_STATS(rint_s);
SPFROMREG(fs, MIPSInst_FS(ir));
- rv.l = ieee754sp_tlong(fs);
- rv.s = ieee754sp_flong(rv.l);
+ rv.s = ieee754sp_rint(fs);
goto copcsr;
}
@@ -1816,6 +1830,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
if (!cpu_has_mips_r6)
return SIGILL;
+ MIPS_FPU_EMU_INC_STATS(class_s);
SPFROMREG(fs, MIPSInst_FS(ir));
rv.w = ieee754sp_2008class(fs);
rfmt = w_fmt;
@@ -1828,6 +1843,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
if (!cpu_has_mips_r6)
return SIGILL;
+ MIPS_FPU_EMU_INC_STATS(min_s);
SPFROMREG(ft, MIPSInst_FT(ir));
SPFROMREG(fs, MIPSInst_FS(ir));
rv.s = ieee754sp_fmin(fs, ft);
@@ -1840,6 +1856,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
if (!cpu_has_mips_r6)
return SIGILL;
+ MIPS_FPU_EMU_INC_STATS(mina_s);
SPFROMREG(ft, MIPSInst_FT(ir));
SPFROMREG(fs, MIPSInst_FS(ir));
rv.s = ieee754sp_fmina(fs, ft);
@@ -1852,6 +1869,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
if (!cpu_has_mips_r6)
return SIGILL;
+ MIPS_FPU_EMU_INC_STATS(max_s);
SPFROMREG(ft, MIPSInst_FT(ir));
SPFROMREG(fs, MIPSInst_FS(ir));
rv.s = ieee754sp_fmax(fs, ft);
@@ -1864,6 +1882,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
if (!cpu_has_mips_r6)
return SIGILL;
+ MIPS_FPU_EMU_INC_STATS(maxa_s);
SPFROMREG(ft, MIPSInst_FT(ir));
SPFROMREG(fs, MIPSInst_FS(ir));
rv.s = ieee754sp_fmaxa(fs, ft);
@@ -1871,15 +1890,18 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
}
case fabs_op:
+ MIPS_FPU_EMU_INC_STATS(abs_s);
handler.u = ieee754sp_abs;
goto scopuop;
case fneg_op:
+ MIPS_FPU_EMU_INC_STATS(neg_s);
handler.u = ieee754sp_neg;
goto scopuop;
case fmov_op:
/* an easy one */
+ MIPS_FPU_EMU_INC_STATS(mov_s);
SPFROMREG(rv.s, MIPSInst_FS(ir));
goto copcsr;
@@ -1922,12 +1944,14 @@ copcsr:
return SIGILL; /* not defined */
case fcvtd_op:
+ MIPS_FPU_EMU_INC_STATS(cvt_d_s);
SPFROMREG(fs, MIPSInst_FS(ir));
rv.d = ieee754dp_fsp(fs);
rfmt = d_fmt;
goto copcsr;
case fcvtw_op:
+ MIPS_FPU_EMU_INC_STATS(cvt_w_s);
SPFROMREG(fs, MIPSInst_FS(ir));
rv.w = ieee754sp_tint(fs);
rfmt = w_fmt;
@@ -1940,6 +1964,15 @@ copcsr:
if (!cpu_has_mips_2_3_4_5_r)
return SIGILL;
+ if (MIPSInst_FUNC(ir) == fceil_op)
+ MIPS_FPU_EMU_INC_STATS(ceil_w_s);
+ if (MIPSInst_FUNC(ir) == ffloor_op)
+ MIPS_FPU_EMU_INC_STATS(floor_w_s);
+ if (MIPSInst_FUNC(ir) == fround_op)
+ MIPS_FPU_EMU_INC_STATS(round_w_s);
+ if (MIPSInst_FUNC(ir) == ftrunc_op)
+ MIPS_FPU_EMU_INC_STATS(trunc_w_s);
+
oldrm = ieee754_csr.rm;
SPFROMREG(fs, MIPSInst_FS(ir));
ieee754_csr.rm = MIPSInst_FUNC(ir);
@@ -1952,6 +1985,7 @@ copcsr:
if (!cpu_has_mips_r6)
return SIGILL;
+ MIPS_FPU_EMU_INC_STATS(sel_s);
SPFROMREG(fd, MIPSInst_FD(ir));
if (fd.bits & 0x1)
SPFROMREG(rv.s, MIPSInst_FT(ir));
@@ -1963,6 +1997,7 @@ copcsr:
if (!cpu_has_mips_3_4_5_64_r2_r6)
return SIGILL;
+ MIPS_FPU_EMU_INC_STATS(cvt_l_s);
SPFROMREG(fs, MIPSInst_FS(ir));
rv.l = ieee754sp_tlong(fs);
rfmt = l_fmt;
@@ -1975,6 +2010,15 @@ copcsr:
if (!cpu_has_mips_3_4_5_64_r2_r6)
return SIGILL;
+ if (MIPSInst_FUNC(ir) == fceill_op)
+ MIPS_FPU_EMU_INC_STATS(ceil_l_s);
+ if (MIPSInst_FUNC(ir) == ffloorl_op)
+ MIPS_FPU_EMU_INC_STATS(floor_l_s);
+ if (MIPSInst_FUNC(ir) == froundl_op)
+ MIPS_FPU_EMU_INC_STATS(round_l_s);
+ if (MIPSInst_FUNC(ir) == ftruncl_op)
+ MIPS_FPU_EMU_INC_STATS(trunc_l_s);
+
oldrm = ieee754_csr.rm;
SPFROMREG(fs, MIPSInst_FS(ir));
ieee754_csr.rm = MIPSInst_FUNC(ir);
@@ -2016,15 +2060,19 @@ copcsr:
switch (MIPSInst_FUNC(ir)) {
/* binary ops */
case fadd_op:
+ MIPS_FPU_EMU_INC_STATS(add_d);
handler.b = ieee754dp_add;
goto dcopbop;
case fsub_op:
+ MIPS_FPU_EMU_INC_STATS(sub_d);
handler.b = ieee754dp_sub;
goto dcopbop;
case fmul_op:
+ MIPS_FPU_EMU_INC_STATS(mul_d);
handler.b = ieee754dp_mul;
goto dcopbop;
case fdiv_op:
+ MIPS_FPU_EMU_INC_STATS(div_d);
handler.b = ieee754dp_div;
goto dcopbop;
@@ -2033,6 +2081,7 @@ copcsr:
if (!cpu_has_mips_2_3_4_5_r)
return SIGILL;
+ MIPS_FPU_EMU_INC_STATS(sqrt_d);
handler.u = ieee754dp_sqrt;
goto dcopuop;
/*
@@ -2044,12 +2093,14 @@ copcsr:
if (!cpu_has_mips_4_5_64_r2_r6)
return SIGILL;
+ MIPS_FPU_EMU_INC_STATS(rsqrt_d);
handler.u = fpemu_dp_rsqrt;
goto dcopuop;
case frecip_op:
if (!cpu_has_mips_4_5_64_r2_r6)
return SIGILL;
+ MIPS_FPU_EMU_INC_STATS(recip_d);
handler.u = fpemu_dp_recip;
goto dcopuop;
case fmovc_op:
@@ -2083,6 +2134,7 @@ copcsr:
if (!cpu_has_mips_r6)
return SIGILL;
+ MIPS_FPU_EMU_INC_STATS(seleqz_d);
DPFROMREG(rv.d, MIPSInst_FT(ir));
if (rv.l & 0x1)
rv.l = 0;
@@ -2094,6 +2146,7 @@ copcsr:
if (!cpu_has_mips_r6)
return SIGILL;
+ MIPS_FPU_EMU_INC_STATS(selnez_d);
DPFROMREG(rv.d, MIPSInst_FT(ir));
if (rv.l & 0x1)
DPFROMREG(rv.d, MIPSInst_FS(ir));
@@ -2107,6 +2160,7 @@ copcsr:
if (!cpu_has_mips_r6)
return SIGILL;
+ MIPS_FPU_EMU_INC_STATS(maddf_d);
DPFROMREG(ft, MIPSInst_FT(ir));
DPFROMREG(fs, MIPSInst_FS(ir));
DPFROMREG(fd, MIPSInst_FD(ir));
@@ -2120,6 +2174,7 @@ copcsr:
if (!cpu_has_mips_r6)
return SIGILL;
+ MIPS_FPU_EMU_INC_STATS(msubf_d);
DPFROMREG(ft, MIPSInst_FT(ir));
DPFROMREG(fs, MIPSInst_FS(ir));
DPFROMREG(fd, MIPSInst_FD(ir));
@@ -2133,9 +2188,9 @@ copcsr:
if (!cpu_has_mips_r6)
return SIGILL;
+ MIPS_FPU_EMU_INC_STATS(rint_d);
DPFROMREG(fs, MIPSInst_FS(ir));
- rv.l = ieee754dp_tlong(fs);
- rv.d = ieee754dp_flong(rv.l);
+ rv.d = ieee754dp_rint(fs);
goto copcsr;
}
@@ -2145,9 +2200,10 @@ copcsr:
if (!cpu_has_mips_r6)
return SIGILL;
+ MIPS_FPU_EMU_INC_STATS(class_d);
DPFROMREG(fs, MIPSInst_FS(ir));
- rv.w = ieee754dp_2008class(fs);
- rfmt = w_fmt;
+ rv.l = ieee754dp_2008class(fs);
+ rfmt = l_fmt;
break;
}
@@ -2157,6 +2213,7 @@ copcsr:
if (!cpu_has_mips_r6)
return SIGILL;
+ MIPS_FPU_EMU_INC_STATS(min_d);
DPFROMREG(ft, MIPSInst_FT(ir));
DPFROMREG(fs, MIPSInst_FS(ir));
rv.d = ieee754dp_fmin(fs, ft);
@@ -2169,6 +2226,7 @@ copcsr:
if (!cpu_has_mips_r6)
return SIGILL;
+ MIPS_FPU_EMU_INC_STATS(mina_d);
DPFROMREG(ft, MIPSInst_FT(ir));
DPFROMREG(fs, MIPSInst_FS(ir));
rv.d = ieee754dp_fmina(fs, ft);
@@ -2181,6 +2239,7 @@ copcsr:
if (!cpu_has_mips_r6)
return SIGILL;
+ MIPS_FPU_EMU_INC_STATS(max_d);
DPFROMREG(ft, MIPSInst_FT(ir));
DPFROMREG(fs, MIPSInst_FS(ir));
rv.d = ieee754dp_fmax(fs, ft);
@@ -2193,6 +2252,7 @@ copcsr:
if (!cpu_has_mips_r6)
return SIGILL;
+ MIPS_FPU_EMU_INC_STATS(maxa_d);
DPFROMREG(ft, MIPSInst_FT(ir));
DPFROMREG(fs, MIPSInst_FS(ir));
rv.d = ieee754dp_fmaxa(fs, ft);
@@ -2200,15 +2260,18 @@ copcsr:
}
case fabs_op:
+ MIPS_FPU_EMU_INC_STATS(abs_d);
handler.u = ieee754dp_abs;
goto dcopuop;
case fneg_op:
+ MIPS_FPU_EMU_INC_STATS(neg_d);
handler.u = ieee754dp_neg;
goto dcopuop;
case fmov_op:
/* an easy one */
+ MIPS_FPU_EMU_INC_STATS(mov_d);
DPFROMREG(rv.d, MIPSInst_FS(ir));
goto copcsr;
@@ -2228,6 +2291,7 @@ dcopuop:
* unary conv ops
*/
case fcvts_op:
+ MIPS_FPU_EMU_INC_STATS(cvt_s_d);
DPFROMREG(fs, MIPSInst_FS(ir));
rv.s = ieee754sp_fdp(fs);
rfmt = s_fmt;
@@ -2237,6 +2301,7 @@ dcopuop:
return SIGILL; /* not defined */
case fcvtw_op:
+ MIPS_FPU_EMU_INC_STATS(cvt_w_d);
DPFROMREG(fs, MIPSInst_FS(ir));
rv.w = ieee754dp_tint(fs); /* wrong */
rfmt = w_fmt;
@@ -2249,6 +2314,15 @@ dcopuop:
if (!cpu_has_mips_2_3_4_5_r)
return SIGILL;
+ if (MIPSInst_FUNC(ir) == fceil_op)
+ MIPS_FPU_EMU_INC_STATS(ceil_w_d);
+ if (MIPSInst_FUNC(ir) == ffloor_op)
+ MIPS_FPU_EMU_INC_STATS(floor_w_d);
+ if (MIPSInst_FUNC(ir) == fround_op)
+ MIPS_FPU_EMU_INC_STATS(round_w_d);
+ if (MIPSInst_FUNC(ir) == ftrunc_op)
+ MIPS_FPU_EMU_INC_STATS(trunc_w_d);
+
oldrm = ieee754_csr.rm;
DPFROMREG(fs, MIPSInst_FS(ir));
ieee754_csr.rm = MIPSInst_FUNC(ir);
@@ -2261,6 +2335,7 @@ dcopuop:
if (!cpu_has_mips_r6)
return SIGILL;
+ MIPS_FPU_EMU_INC_STATS(sel_d);
DPFROMREG(fd, MIPSInst_FD(ir));
if (fd.bits & 0x1)
DPFROMREG(rv.d, MIPSInst_FT(ir));
@@ -2272,6 +2347,7 @@ dcopuop:
if (!cpu_has_mips_3_4_5_64_r2_r6)
return SIGILL;
+ MIPS_FPU_EMU_INC_STATS(cvt_l_d);
DPFROMREG(fs, MIPSInst_FS(ir));
rv.l = ieee754dp_tlong(fs);
rfmt = l_fmt;
@@ -2284,6 +2360,15 @@ dcopuop:
if (!cpu_has_mips_3_4_5_64_r2_r6)
return SIGILL;
+ if (MIPSInst_FUNC(ir) == fceill_op)
+ MIPS_FPU_EMU_INC_STATS(ceil_l_d);
+ if (MIPSInst_FUNC(ir) == ffloorl_op)
+ MIPS_FPU_EMU_INC_STATS(floor_l_d);
+ if (MIPSInst_FUNC(ir) == froundl_op)
+ MIPS_FPU_EMU_INC_STATS(round_l_d);
+ if (MIPSInst_FUNC(ir) == ftruncl_op)
+ MIPS_FPU_EMU_INC_STATS(trunc_l_d);
+
oldrm = ieee754_csr.rm;
DPFROMREG(fs, MIPSInst_FS(ir));
ieee754_csr.rm = MIPSInst_FUNC(ir);
@@ -2325,12 +2410,14 @@ dcopuop:
switch (MIPSInst_FUNC(ir)) {
case fcvts_op:
/* convert word to single precision real */
+ MIPS_FPU_EMU_INC_STATS(cvt_s_w);
SPFROMREG(fs, MIPSInst_FS(ir));
rv.s = ieee754sp_fint(fs.bits);
rfmt = s_fmt;
goto copcsr;
case fcvtd_op:
/* convert word to double precision real */
+ MIPS_FPU_EMU_INC_STATS(cvt_d_w);
SPFROMREG(fs, MIPSInst_FS(ir));
rv.d = ieee754dp_fint(fs.bits);
rfmt = d_fmt;
@@ -2350,6 +2437,90 @@ dcopuop:
(MIPSInst_FUNC(ir) & 0x20))
return SIGILL;
+ if (!sig) {
+ if (!(MIPSInst_FUNC(ir) & PREDICATE_BIT)) {
+ switch (cmpop) {
+ case 0:
+ MIPS_FPU_EMU_INC_STATS(cmp_af_s);
+ break;
+ case 1:
+ MIPS_FPU_EMU_INC_STATS(cmp_un_s);
+ break;
+ case 2:
+ MIPS_FPU_EMU_INC_STATS(cmp_eq_s);
+ break;
+ case 3:
+ MIPS_FPU_EMU_INC_STATS(cmp_ueq_s);
+ break;
+ case 4:
+ MIPS_FPU_EMU_INC_STATS(cmp_lt_s);
+ break;
+ case 5:
+ MIPS_FPU_EMU_INC_STATS(cmp_ult_s);
+ break;
+ case 6:
+ MIPS_FPU_EMU_INC_STATS(cmp_le_s);
+ break;
+ case 7:
+ MIPS_FPU_EMU_INC_STATS(cmp_ule_s);
+ break;
+ }
+ } else {
+ switch (cmpop) {
+ case 1:
+ MIPS_FPU_EMU_INC_STATS(cmp_or_s);
+ break;
+ case 2:
+ MIPS_FPU_EMU_INC_STATS(cmp_une_s);
+ break;
+ case 3:
+ MIPS_FPU_EMU_INC_STATS(cmp_ne_s);
+ break;
+ }
+ }
+ } else {
+ if (!(MIPSInst_FUNC(ir) & PREDICATE_BIT)) {
+ switch (cmpop) {
+ case 0:
+ MIPS_FPU_EMU_INC_STATS(cmp_saf_s);
+ break;
+ case 1:
+ MIPS_FPU_EMU_INC_STATS(cmp_sun_s);
+ break;
+ case 2:
+ MIPS_FPU_EMU_INC_STATS(cmp_seq_s);
+ break;
+ case 3:
+ MIPS_FPU_EMU_INC_STATS(cmp_sueq_s);
+ break;
+ case 4:
+ MIPS_FPU_EMU_INC_STATS(cmp_slt_s);
+ break;
+ case 5:
+ MIPS_FPU_EMU_INC_STATS(cmp_sult_s);
+ break;
+ case 6:
+ MIPS_FPU_EMU_INC_STATS(cmp_sle_s);
+ break;
+ case 7:
+ MIPS_FPU_EMU_INC_STATS(cmp_sule_s);
+ break;
+ }
+ } else {
+ switch (cmpop) {
+ case 1:
+ MIPS_FPU_EMU_INC_STATS(cmp_sor_s);
+ break;
+ case 2:
+ MIPS_FPU_EMU_INC_STATS(cmp_sune_s);
+ break;
+ case 3:
+ MIPS_FPU_EMU_INC_STATS(cmp_sne_s);
+ break;
+ }
+ }
+ }
+
/* fmt is w_fmt for single precision so fix it */
rfmt = s_fmt;
/* default to false */
@@ -2394,6 +2565,7 @@ dcopuop:
break;
}
}
+ break;
}
case l_fmt:
@@ -2406,11 +2578,13 @@ dcopuop:
switch (MIPSInst_FUNC(ir)) {
case fcvts_op:
/* convert long to single precision real */
+ MIPS_FPU_EMU_INC_STATS(cvt_s_l);
rv.s = ieee754sp_flong(bits);
rfmt = s_fmt;
goto copcsr;
case fcvtd_op:
/* convert long to double precision real */
+ MIPS_FPU_EMU_INC_STATS(cvt_d_l);
rv.d = ieee754dp_flong(bits);
rfmt = d_fmt;
goto copcsr;
@@ -2424,6 +2598,90 @@ dcopuop:
(MIPSInst_FUNC(ir) & 0x20))
return SIGILL;
+ if (!sig) {
+ if (!(MIPSInst_FUNC(ir) & PREDICATE_BIT)) {
+ switch (cmpop) {
+ case 0:
+ MIPS_FPU_EMU_INC_STATS(cmp_af_d);
+ break;
+ case 1:
+ MIPS_FPU_EMU_INC_STATS(cmp_un_d);
+ break;
+ case 2:
+ MIPS_FPU_EMU_INC_STATS(cmp_eq_d);
+ break;
+ case 3:
+ MIPS_FPU_EMU_INC_STATS(cmp_ueq_d);
+ break;
+ case 4:
+ MIPS_FPU_EMU_INC_STATS(cmp_lt_d);
+ break;
+ case 5:
+ MIPS_FPU_EMU_INC_STATS(cmp_ult_d);
+ break;
+ case 6:
+ MIPS_FPU_EMU_INC_STATS(cmp_le_d);
+ break;
+ case 7:
+ MIPS_FPU_EMU_INC_STATS(cmp_ule_d);
+ break;
+ }
+ } else {
+ switch (cmpop) {
+ case 1:
+ MIPS_FPU_EMU_INC_STATS(cmp_or_d);
+ break;
+ case 2:
+ MIPS_FPU_EMU_INC_STATS(cmp_une_d);
+ break;
+ case 3:
+ MIPS_FPU_EMU_INC_STATS(cmp_ne_d);
+ break;
+ }
+ }
+ } else {
+ if (!(MIPSInst_FUNC(ir) & PREDICATE_BIT)) {
+ switch (cmpop) {
+ case 0:
+ MIPS_FPU_EMU_INC_STATS(cmp_saf_d);
+ break;
+ case 1:
+ MIPS_FPU_EMU_INC_STATS(cmp_sun_d);
+ break;
+ case 2:
+ MIPS_FPU_EMU_INC_STATS(cmp_seq_d);
+ break;
+ case 3:
+ MIPS_FPU_EMU_INC_STATS(cmp_sueq_d);
+ break;
+ case 4:
+ MIPS_FPU_EMU_INC_STATS(cmp_slt_d);
+ break;
+ case 5:
+ MIPS_FPU_EMU_INC_STATS(cmp_sult_d);
+ break;
+ case 6:
+ MIPS_FPU_EMU_INC_STATS(cmp_sle_d);
+ break;
+ case 7:
+ MIPS_FPU_EMU_INC_STATS(cmp_sule_d);
+ break;
+ }
+ } else {
+ switch (cmpop) {
+ case 1:
+ MIPS_FPU_EMU_INC_STATS(cmp_sor_d);
+ break;
+ case 2:
+ MIPS_FPU_EMU_INC_STATS(cmp_sune_d);
+ break;
+ case 3:
+ MIPS_FPU_EMU_INC_STATS(cmp_sne_d);
+ break;
+ }
+ }
+ }
+
/* fmt is l_fmt for double precision so fix it */
rfmt = d_fmt;
/* default to false */
@@ -2468,6 +2726,8 @@ dcopuop:
break;
}
}
+ break;
+
default:
return SIGILL;
}
@@ -2553,7 +2813,7 @@ dcopuop:
* For simplicity we always terminate upon an ISA mode switch.
*/
int fpu_emulator_cop1Handler(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
- int has_fpu, void *__user *fault_addr)
+ int has_fpu, void __user **fault_addr)
{
unsigned long oldepc, prevepc;
struct mm_decoded_insn dec_insn;
diff --git a/arch/mips/math-emu/dp_fmax.c b/arch/mips/math-emu/dp_fmax.c
index fd71b8d..5bec64f 100644
--- a/arch/mips/math-emu/dp_fmax.c
+++ b/arch/mips/math-emu/dp_fmax.c
@@ -47,14 +47,26 @@ union ieee754dp ieee754dp_fmax(union ieee754dp x, union ieee754dp y)
case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
return ieee754dp_nanxcpt(x);
- /* numbers are preferred to NaNs */
+ /*
+ * Quiet NaN handling
+ */
+
+ /*
+ * The case of both inputs quiet NaNs
+ */
+ case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
+ return x;
+
+ /*
+ * The cases of exactly one input quiet NaN (numbers
+ * are here preferred as returned values to NaNs)
+ */
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN):
return x;
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO):
case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM):
case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM):
@@ -80,9 +92,7 @@ union ieee754dp ieee754dp_fmax(union ieee754dp x, union ieee754dp y)
return ys ? x : y;
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
- if (xs == ys)
- return x;
- return ieee754dp_zero(1);
+ return ieee754dp_zero(xs & ys);
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
DPDNORMX;
@@ -106,16 +116,32 @@ union ieee754dp ieee754dp_fmax(union ieee754dp x, union ieee754dp y)
else if (xs < ys)
return x;
- /* Compare exponent */
- if (xe > ye)
- return x;
- else if (xe < ye)
- return y;
+ /* Signs of inputs are equal, let's compare exponents */
+ if (xs == 0) {
+ /* Inputs are both positive */
+ if (xe > ye)
+ return x;
+ else if (xe < ye)
+ return y;
+ } else {
+ /* Inputs are both negative */
+ if (xe > ye)
+ return y;
+ else if (xe < ye)
+ return x;
+ }
- /* Compare mantissa */
+ /* Signs and exponents of inputs are equal, let's compare mantissas */
+ if (xs == 0) {
+ /* Inputs are both positive, with equal signs and exponents */
+ if (xm <= ym)
+ return y;
+ return x;
+ }
+ /* Inputs are both negative, with equal signs and exponents */
if (xm <= ym)
- return y;
- return x;
+ return x;
+ return y;
}
union ieee754dp ieee754dp_fmaxa(union ieee754dp x, union ieee754dp y)
@@ -147,14 +173,26 @@ union ieee754dp ieee754dp_fmaxa(union ieee754dp x, union ieee754dp y)
case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
return ieee754dp_nanxcpt(x);
- /* numbers are preferred to NaNs */
+ /*
+ * Quiet NaN handling
+ */
+
+ /*
+ * The case of both inputs quiet NaNs
+ */
+ case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
+ return x;
+
+ /*
+ * The cases of exactly one input quiet NaN (numbers
+ * are here preferred as returned values to NaNs)
+ */
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN):
return x;
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO):
case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM):
case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM):
@@ -164,6 +202,9 @@ union ieee754dp ieee754dp_fmaxa(union ieee754dp x, union ieee754dp y)
/*
* Infinity and zero handling
*/
+ case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
+ return ieee754dp_inf(xs & ys);
+
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
@@ -171,7 +212,6 @@ union ieee754dp ieee754dp_fmaxa(union ieee754dp x, union ieee754dp y)
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
return x;
- case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
@@ -180,9 +220,7 @@ union ieee754dp ieee754dp_fmaxa(union ieee754dp x, union ieee754dp y)
return y;
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
- if (xs == ys)
- return x;
- return ieee754dp_zero(1);
+ return ieee754dp_zero(xs & ys);
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
DPDNORMX;
@@ -207,7 +245,11 @@ union ieee754dp ieee754dp_fmaxa(union ieee754dp x, union ieee754dp y)
return y;
/* Compare mantissa */
- if (xm <= ym)
+ if (xm < ym)
return y;
- return x;
+ else if (xm > ym)
+ return x;
+ else if (xs == 0)
+ return x;
+ return y;
}
diff --git a/arch/mips/math-emu/dp_fmin.c b/arch/mips/math-emu/dp_fmin.c
index c1072b0..a287b23 100644
--- a/arch/mips/math-emu/dp_fmin.c
+++ b/arch/mips/math-emu/dp_fmin.c
@@ -47,14 +47,26 @@ union ieee754dp ieee754dp_fmin(union ieee754dp x, union ieee754dp y)
case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
return ieee754dp_nanxcpt(x);
- /* numbers are preferred to NaNs */
+ /*
+ * Quiet NaN handling
+ */
+
+ /*
+ * The case of both inputs quiet NaNs
+ */
+ case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
+ return x;
+
+ /*
+ * The cases of exactly one input quiet NaN (numbers
+ * are here preferred as returned values to NaNs)
+ */
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN):
return x;
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO):
case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM):
case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM):
@@ -80,9 +92,7 @@ union ieee754dp ieee754dp_fmin(union ieee754dp x, union ieee754dp y)
return ys ? y : x;
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
- if (xs == ys)
- return x;
- return ieee754dp_zero(1);
+ return ieee754dp_zero(xs | ys);
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
DPDNORMX;
@@ -106,16 +116,32 @@ union ieee754dp ieee754dp_fmin(union ieee754dp x, union ieee754dp y)
else if (xs < ys)
return y;
- /* Compare exponent */
- if (xe > ye)
- return y;
- else if (xe < ye)
- return x;
+ /* Signs of inputs are the same, let's compare exponents */
+ if (xs == 0) {
+ /* Inputs are both positive */
+ if (xe > ye)
+ return y;
+ else if (xe < ye)
+ return x;
+ } else {
+ /* Inputs are both negative */
+ if (xe > ye)
+ return x;
+ else if (xe < ye)
+ return y;
+ }
- /* Compare mantissa */
+ /* Signs and exponents of inputs are equal, let's compare mantissas */
+ if (xs == 0) {
+ /* Inputs are both positive, with equal signs and exponents */
+ if (xm <= ym)
+ return x;
+ return y;
+ }
+ /* Inputs are both negative, with equal signs and exponents */
if (xm <= ym)
- return x;
- return y;
+ return y;
+ return x;
}
union ieee754dp ieee754dp_fmina(union ieee754dp x, union ieee754dp y)
@@ -147,14 +173,26 @@ union ieee754dp ieee754dp_fmina(union ieee754dp x, union ieee754dp y)
case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
return ieee754dp_nanxcpt(x);
- /* numbers are preferred to NaNs */
+ /*
+ * Quiet NaN handling
+ */
+
+ /*
+ * The case of both inputs quiet NaNs
+ */
+ case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
+ return x;
+
+ /*
+ * The cases of exactly one input quiet NaN (numbers
+ * are here preferred as returned values to NaNs)
+ */
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN):
return x;
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO):
case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM):
case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM):
@@ -164,25 +202,25 @@ union ieee754dp ieee754dp_fmina(union ieee754dp x, union ieee754dp y)
/*
* Infinity and zero handling
*/
+ case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
+ return ieee754dp_inf(xs | ys);
+
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
- return x;
+ return y;
- case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM):
- return y;
+ return x;
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
- if (xs == ys)
- return x;
- return ieee754dp_zero(1);
+ return ieee754dp_zero(xs | ys);
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
DPDNORMX;
@@ -207,7 +245,11 @@ union ieee754dp ieee754dp_fmina(union ieee754dp x, union ieee754dp y)
return x;
/* Compare mantissa */
- if (xm <= ym)
+ if (xm < ym)
+ return x;
+ else if (xm > ym)
+ return y;
+ else if (xs == 1)
return x;
return y;
}
diff --git a/arch/mips/math-emu/dp_maddf.c b/arch/mips/math-emu/dp_maddf.c
index caa62f2..e0d9be5 100644
--- a/arch/mips/math-emu/dp_maddf.c
+++ b/arch/mips/math-emu/dp_maddf.c
@@ -14,22 +14,45 @@
#include "ieee754dp.h"
-enum maddf_flags {
- maddf_negate_product = 1 << 0,
-};
+
+/* 128 bits shift right logical with rounding. */
+void srl128(u64 *hptr, u64 *lptr, int count)
+{
+ u64 low;
+
+ if (count >= 128) {
+ *lptr = *hptr != 0 || *lptr != 0;
+ *hptr = 0;
+ } else if (count >= 64) {
+ if (count == 64) {
+ *lptr = *hptr | (*lptr != 0);
+ } else {
+ low = *lptr;
+ *lptr = *hptr >> (count - 64);
+ *lptr |= (*hptr << (128 - count)) != 0 || low != 0;
+ }
+ *hptr = 0;
+ } else {
+ low = *lptr;
+ *lptr = low >> count | *hptr << (64 - count);
+ *lptr |= (low << (64 - count)) != 0;
+ *hptr = *hptr >> count;
+ }
+}
static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
union ieee754dp y, enum maddf_flags flags)
{
int re;
int rs;
- u64 rm;
unsigned lxm;
unsigned hxm;
unsigned lym;
unsigned hym;
u64 lrm;
u64 hrm;
+ u64 lzm;
+ u64 hzm;
u64 t;
u64 at;
int s;
@@ -48,52 +71,34 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
ieee754_clearcx();
- switch (zc) {
- case IEEE754_CLASS_SNAN:
- ieee754_setcx(IEEE754_INVALID_OPERATION);
+ /*
+ * Handle the cases when at least one of x, y or z is a NaN.
+ * Order of precedence is sNaN, qNaN and z, x, y.
+ */
+ if (zc == IEEE754_CLASS_SNAN)
return ieee754dp_nanxcpt(z);
- case IEEE754_CLASS_DNORM:
- DPDNORMZ;
- /* QNAN and ZERO cases are handled separately below */
- }
-
- switch (CLPAIR(xc, yc)) {
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN):
- case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN):
- case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN):
- case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN):
- case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN):
- return ieee754dp_nanxcpt(y);
-
- case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN):
- case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN):
- case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO):
- case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM):
- case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM):
- case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
+ if (xc == IEEE754_CLASS_SNAN)
return ieee754dp_nanxcpt(x);
-
- case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
- case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
- case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN):
- case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN):
+ if (yc == IEEE754_CLASS_SNAN)
+ return ieee754dp_nanxcpt(y);
+ if (zc == IEEE754_CLASS_QNAN)
+ return z;
+ if (xc == IEEE754_CLASS_QNAN)
+ return x;
+ if (yc == IEEE754_CLASS_QNAN)
return y;
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO):
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM):
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM):
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF):
- return x;
+ if (zc == IEEE754_CLASS_DNORM)
+ DPDNORMZ;
+ /* ZERO z cases are handled separately below */
+ switch (CLPAIR(xc, yc)) {
/*
* Infinity handling
*/
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
- if (zc == IEEE754_CLASS_QNAN)
- return z;
ieee754_setcx(IEEE754_INVALID_OPERATION);
return ieee754dp_indef();
@@ -102,9 +107,27 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
- if (zc == IEEE754_CLASS_QNAN)
- return z;
- return ieee754dp_inf(xs ^ ys);
+ if ((zc == IEEE754_CLASS_INF) &&
+ ((!(flags & MADDF_NEGATE_PRODUCT) && (zs != (xs ^ ys))) ||
+ ((flags & MADDF_NEGATE_PRODUCT) && (zs == (xs ^ ys))))) {
+ /*
+ * Cases of addition of infinities with opposite signs
+ * or subtraction of infinities with same signs.
+ */
+ ieee754_setcx(IEEE754_INVALID_OPERATION);
+ return ieee754dp_indef();
+ }
+ /*
+ * z is here either not an infinity, or an infinity having the
+ * same sign as product (x*y) (in case of MADDF.D instruction)
+ * or product -(x*y) (in MSUBF.D case). The result must be an
+ * infinity, and its sign is determined only by the value of
+ * (flags & MADDF_NEGATE_PRODUCT) and the signs of x and y.
+ */
+ if (flags & MADDF_NEGATE_PRODUCT)
+ return ieee754dp_inf(1 ^ (xs ^ ys));
+ else
+ return ieee754dp_inf(xs ^ ys);
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
@@ -113,32 +136,42 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
if (zc == IEEE754_CLASS_INF)
return ieee754dp_inf(zs);
- /* Multiplication is 0 so just return z */
+ if (zc == IEEE754_CLASS_ZERO) {
+ /* Handle cases +0 + (-0) and similar ones. */
+ if ((!(flags & MADDF_NEGATE_PRODUCT)
+ && (zs == (xs ^ ys))) ||
+ ((flags & MADDF_NEGATE_PRODUCT)
+ && (zs != (xs ^ ys))))
+ /*
+ * Cases of addition of zeros of equal signs
+ * or subtraction of zeroes of opposite signs.
+ * The sign of the resulting zero is in any
+ * such case determined only by the sign of z.
+ */
+ return z;
+
+ return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD);
+ }
+ /* x*y is here 0, and z is not 0, so just return z */
return z;
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
DPDNORMX;
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
- if (zc == IEEE754_CLASS_QNAN)
- return z;
- else if (zc == IEEE754_CLASS_INF)
+ if (zc == IEEE754_CLASS_INF)
return ieee754dp_inf(zs);
DPDNORMY;
break;
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):
- if (zc == IEEE754_CLASS_QNAN)
- return z;
- else if (zc == IEEE754_CLASS_INF)
+ if (zc == IEEE754_CLASS_INF)
return ieee754dp_inf(zs);
DPDNORMX;
break;
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
- if (zc == IEEE754_CLASS_QNAN)
- return z;
- else if (zc == IEEE754_CLASS_INF)
+ if (zc == IEEE754_CLASS_INF)
return ieee754dp_inf(zs);
/* fall through to real computations */
}
@@ -157,7 +190,7 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
re = xe + ye;
rs = xs ^ ys;
- if (flags & maddf_negate_product)
+ if (flags & MADDF_NEGATE_PRODUCT)
rs ^= 1;
/* shunt to top of word */
@@ -165,7 +198,7 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
ym <<= 64 - (DP_FBITS + 1);
/*
- * Multiply 64 bits xm, ym to give high 64 bits rm with stickness.
+ * Multiply 64 bits xm and ym to give 128 bits result in hrm:lrm.
*/
/* 32 * 32 => 64 */
@@ -195,81 +228,110 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
hrm = hrm + (t >> 32);
- rm = hrm | (lrm != 0);
-
- /*
- * Sticky shift down to normal rounding precision.
- */
- if ((s64) rm < 0) {
- rm = (rm >> (64 - (DP_FBITS + 1 + 3))) |
- ((rm << (DP_FBITS + 1 + 3)) != 0);
+ /* Put explicit bit at bit 126 if necessary */
+ if ((int64_t)hrm < 0) {
+ lrm = (hrm << 63) | (lrm >> 1);
+ hrm = hrm >> 1;
re++;
- } else {
- rm = (rm >> (64 - (DP_FBITS + 1 + 3 + 1))) |
- ((rm << (DP_FBITS + 1 + 3 + 1)) != 0);
}
- assert(rm & (DP_HIDDEN_BIT << 3));
- if (zc == IEEE754_CLASS_ZERO)
- return ieee754dp_format(rs, re, rm);
+ assert(hrm & (1 << 62));
- /* And now the addition */
- assert(zm & DP_HIDDEN_BIT);
+ if (zc == IEEE754_CLASS_ZERO) {
+ /*
+ * Move explicit bit from bit 126 to bit 55 since the
+ * ieee754dp_format code expects the mantissa to be
+ * 56 bits wide (53 + 3 rounding bits).
+ */
+ srl128(&hrm, &lrm, (126 - 55));
+ return ieee754dp_format(rs, re, lrm);
+ }
- /*
- * Provide guard,round and stick bit space.
- */
- zm <<= 3;
+ /* Move explicit bit from bit 52 to bit 126 */
+ lzm = 0;
+ hzm = zm << 10;
+ assert(hzm & (1 << 62));
+ /* Make the exponents the same */
if (ze > re) {
/*
* Have to shift y fraction right to align.
*/
s = ze - re;
- rm = XDPSRS(rm, s);
+ srl128(&hrm, &lrm, s);
re += s;
} else if (re > ze) {
/*
* Have to shift x fraction right to align.
*/
s = re - ze;
- zm = XDPSRS(zm, s);
+ srl128(&hzm, &lzm, s);
ze += s;
}
assert(ze == re);
assert(ze <= DP_EMAX);
+ /* Do the addition */
if (zs == rs) {
/*
- * Generate 28 bit result of adding two 27 bit numbers
- * leaving result in xm, xs and xe.
+ * Generate 128 bit result by adding two 127 bit numbers
+ * leaving result in hzm:lzm, zs and ze.
*/
- zm = zm + rm;
-
- if (zm >> (DP_FBITS + 1 + 3)) { /* carry out */
- zm = XDPSRS1(zm);
+ hzm = hzm + hrm + (lzm > (lzm + lrm));
+ lzm = lzm + lrm;
+ if ((int64_t)hzm < 0) { /* carry out */
+ srl128(&hzm, &lzm, 1);
ze++;
}
} else {
- if (zm >= rm) {
- zm = zm - rm;
+ if (hzm > hrm || (hzm == hrm && lzm >= lrm)) {
+ hzm = hzm - hrm - (lzm < lrm);
+ lzm = lzm - lrm;
} else {
- zm = rm - zm;
+ hzm = hrm - hzm - (lrm < lzm);
+ lzm = lrm - lzm;
zs = rs;
}
- if (zm == 0)
+ if (lzm == 0 && hzm == 0)
return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD);
/*
- * Normalize to rounding precision.
+ * Put explicit bit at bit 126 if necessary.
*/
- while ((zm >> (DP_FBITS + 3)) == 0) {
- zm <<= 1;
- ze--;
+ if (hzm == 0) {
+ /* left shift by 63 or 64 bits */
+ if ((int64_t)lzm < 0) {
+ /* MSB of lzm is the explicit bit */
+ hzm = lzm >> 1;
+ lzm = lzm << 63;
+ ze -= 63;
+ } else {
+ hzm = lzm;
+ lzm = 0;
+ ze -= 64;
+ }
+ }
+
+ t = 0;
+ while ((hzm >> (62 - t)) == 0)
+ t++;
+
+ assert(t <= 62);
+ if (t) {
+ hzm = hzm << t | lzm >> (64 - t);
+ lzm = lzm << t;
+ ze -= t;
}
}
- return ieee754dp_format(zs, ze, zm);
+ /*
+ * Move explicit bit from bit 126 to bit 55 since the
+ * ieee754dp_format code expects the mantissa to be
+ * 56 bits wide (53 + 3 rounding bits).
+ */
+ srl128(&hzm, &lzm, (126 - 55));
+
+ return ieee754dp_format(zs, ze, lzm);
}
union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x,
@@ -281,5 +343,5 @@ union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x,
union ieee754dp ieee754dp_msubf(union ieee754dp z, union ieee754dp x,
union ieee754dp y)
{
- return _dp_maddf(z, x, y, maddf_negate_product);
+ return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);
}
diff --git a/arch/mips/math-emu/dp_rint.c b/arch/mips/math-emu/dp_rint.c
new file mode 100644
index 0000000..c3b9077
--- /dev/null
+++ b/arch/mips/math-emu/dp_rint.c
@@ -0,0 +1,89 @@
+/* IEEE754 floating point arithmetic
+ * double precision: common utilities
+ */
+/*
+ * MIPS floating point support
+ * Copyright (C) 1994-2000 Algorithmics Ltd.
+ * Copyright (C) 2017 Imagination Technologies, Ltd.
+ * Author: Aleksandar Markovic <aleksandar.markovic@imgtec.com>
+ *
+ * This program is free software; you can distribute it and/or modify it
+ * under the terms of the GNU General Public License (Version 2) as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program.
+ */
+
+#include "ieee754dp.h"
+
+union ieee754dp ieee754dp_rint(union ieee754dp x)
+{
+ union ieee754dp ret;
+ u64 residue;
+ int sticky;
+ int round;
+ int odd;
+
+ COMPXDP;
+
+ ieee754_clearcx();
+
+ EXPLODEXDP;
+ FLUSHXDP;
+
+ if (xc == IEEE754_CLASS_SNAN)
+ return ieee754dp_nanxcpt(x);
+
+ if ((xc == IEEE754_CLASS_QNAN) ||
+ (xc == IEEE754_CLASS_INF) ||
+ (xc == IEEE754_CLASS_ZERO))
+ return x;
+
+ if (xe >= DP_FBITS)
+ return x;
+
+ if (xe < -1) {
+ residue = xm;
+ round = 0;
+ sticky = residue != 0;
+ xm = 0;
+ } else {
+ residue = xm << (64 - DP_FBITS + xe);
+ round = (residue >> 63) != 0;
+ sticky = (residue << 1) != 0;
+ xm >>= DP_FBITS - xe;
+ }
+
+ odd = (xm & 0x1) != 0x0;
+
+ switch (ieee754_csr.rm) {
+ case FPU_CSR_RN: /* toward nearest */
+ if (round && (sticky || odd))
+ xm++;
+ break;
+ case FPU_CSR_RZ: /* toward zero */
+ break;
+ case FPU_CSR_RU: /* toward +infinity */
+ if ((round || sticky) && !xs)
+ xm++;
+ break;
+ case FPU_CSR_RD: /* toward -infinity */
+ if ((round || sticky) && xs)
+ xm++;
+ break;
+ }
+
+ if (round || sticky)
+ ieee754_setcx(IEEE754_INEXACT);
+
+ ret = ieee754dp_flong(xm);
+ DPSIGN(ret) = xs;
+
+ return ret;
+}
diff --git a/arch/mips/math-emu/ieee754.h b/arch/mips/math-emu/ieee754.h
index d3be351..92dc8fa 100644
--- a/arch/mips/math-emu/ieee754.h
+++ b/arch/mips/math-emu/ieee754.h
@@ -67,6 +67,7 @@ union ieee754sp ieee754sp_div(union ieee754sp x, union ieee754sp y);
union ieee754sp ieee754sp_fint(int x);
union ieee754sp ieee754sp_flong(s64 x);
union ieee754sp ieee754sp_fdp(union ieee754dp x);
+union ieee754sp ieee754sp_rint(union ieee754sp x);
int ieee754sp_tint(union ieee754sp x);
s64 ieee754sp_tlong(union ieee754sp x);
@@ -101,6 +102,7 @@ union ieee754dp ieee754dp_neg(union ieee754dp x);
union ieee754dp ieee754dp_fint(int x);
union ieee754dp ieee754dp_flong(s64 x);
union ieee754dp ieee754dp_fsp(union ieee754sp x);
+union ieee754dp ieee754dp_rint(union ieee754dp x);
int ieee754dp_tint(union ieee754dp x);
s64 ieee754dp_tlong(union ieee754dp x);
diff --git a/arch/mips/math-emu/ieee754int.h b/arch/mips/math-emu/ieee754int.h
index 8bc2f69..dd2071f 100644
--- a/arch/mips/math-emu/ieee754int.h
+++ b/arch/mips/math-emu/ieee754int.h
@@ -26,6 +26,10 @@
#define CLPAIR(x, y) ((x)*6+(y))
+enum maddf_flags {
+ MADDF_NEGATE_PRODUCT = 1 << 0,
+};
+
static inline void ieee754_clearcx(void)
{
ieee754_csr.cx = 0;
diff --git a/arch/mips/math-emu/ieee754sp.h b/arch/mips/math-emu/ieee754sp.h
index 8476067..0f63e42 100644
--- a/arch/mips/math-emu/ieee754sp.h
+++ b/arch/mips/math-emu/ieee754sp.h
@@ -45,6 +45,10 @@ static inline int ieee754sp_finite(union ieee754sp x)
return SPBEXP(x) != SP_EMAX + 1 + SP_EBIAS;
}
+/* 64 bit right shift with rounding */
+#define XSPSRS64(v, rs) \
+ (((rs) >= 64) ? ((v) != 0) : ((v) >> (rs)) | ((v) << (64-(rs)) != 0))
+
/* 3bit extended single precision sticky right shift */
#define XSPSRS(v, rs) \
((rs > (SP_FBITS+3))?1:((v) >> (rs)) | ((v) << (32-(rs)) != 0))
diff --git a/arch/mips/math-emu/me-debugfs.c b/arch/mips/math-emu/me-debugfs.c
index be650ed..8c0ec15 100644
--- a/arch/mips/math-emu/me-debugfs.c
+++ b/arch/mips/math-emu/me-debugfs.c
@@ -28,14 +28,190 @@ static int fpuemu_stat_get(void *data, u64 *val)
}
DEFINE_SIMPLE_ATTRIBUTE(fops_fpuemu_stat, fpuemu_stat_get, NULL, "%llu\n");
+/*
+ * Used to obtain names for a debugfs instruction counter, given field name
+ * in fpuemustats structure. For example, for input "cmp_sueq_d", the output
+ * would be "cmp.sueq.d". This is needed since dots are not allowed to be
+ * used in structure field names, and are, on the other hand, desired to be
+ * used in debugfs item names to be clearly associated to corresponding
+ * MIPS FPU instructions.
+ */
+static void adjust_instruction_counter_name(char *out_name, char *in_name)
+{
+ int i = 0;
+
+ strcpy(out_name, in_name);
+ while (in_name[i] != '\0') {
+ if (out_name[i] == '_')
+ out_name[i] = '.';
+ i++;
+ }
+}
+
+static int fpuemustats_clear_show(struct seq_file *s, void *unused)
+{
+ __this_cpu_write((fpuemustats).emulated, 0);
+ __this_cpu_write((fpuemustats).loads, 0);
+ __this_cpu_write((fpuemustats).stores, 0);
+ __this_cpu_write((fpuemustats).branches, 0);
+ __this_cpu_write((fpuemustats).cp1ops, 0);
+ __this_cpu_write((fpuemustats).cp1xops, 0);
+ __this_cpu_write((fpuemustats).errors, 0);
+ __this_cpu_write((fpuemustats).ieee754_inexact, 0);
+ __this_cpu_write((fpuemustats).ieee754_underflow, 0);
+ __this_cpu_write((fpuemustats).ieee754_overflow, 0);
+ __this_cpu_write((fpuemustats).ieee754_zerodiv, 0);
+ __this_cpu_write((fpuemustats).ieee754_invalidop, 0);
+ __this_cpu_write((fpuemustats).ds_emul, 0);
+
+ __this_cpu_write((fpuemustats).abs_s, 0);
+ __this_cpu_write((fpuemustats).abs_d, 0);
+ __this_cpu_write((fpuemustats).add_s, 0);
+ __this_cpu_write((fpuemustats).add_d, 0);
+ __this_cpu_write((fpuemustats).bc1eqz, 0);
+ __this_cpu_write((fpuemustats).bc1nez, 0);
+ __this_cpu_write((fpuemustats).ceil_w_s, 0);
+ __this_cpu_write((fpuemustats).ceil_w_d, 0);
+ __this_cpu_write((fpuemustats).ceil_l_s, 0);
+ __this_cpu_write((fpuemustats).ceil_l_d, 0);
+ __this_cpu_write((fpuemustats).class_s, 0);
+ __this_cpu_write((fpuemustats).class_d, 0);
+ __this_cpu_write((fpuemustats).cmp_af_s, 0);
+ __this_cpu_write((fpuemustats).cmp_af_d, 0);
+ __this_cpu_write((fpuemustats).cmp_eq_s, 0);
+ __this_cpu_write((fpuemustats).cmp_eq_d, 0);
+ __this_cpu_write((fpuemustats).cmp_le_s, 0);
+ __this_cpu_write((fpuemustats).cmp_le_d, 0);
+ __this_cpu_write((fpuemustats).cmp_lt_s, 0);
+ __this_cpu_write((fpuemustats).cmp_lt_d, 0);
+ __this_cpu_write((fpuemustats).cmp_ne_s, 0);
+ __this_cpu_write((fpuemustats).cmp_ne_d, 0);
+ __this_cpu_write((fpuemustats).cmp_or_s, 0);
+ __this_cpu_write((fpuemustats).cmp_or_d, 0);
+ __this_cpu_write((fpuemustats).cmp_ueq_s, 0);
+ __this_cpu_write((fpuemustats).cmp_ueq_d, 0);
+ __this_cpu_write((fpuemustats).cmp_ule_s, 0);
+ __this_cpu_write((fpuemustats).cmp_ule_d, 0);
+ __this_cpu_write((fpuemustats).cmp_ult_s, 0);
+ __this_cpu_write((fpuemustats).cmp_ult_d, 0);
+ __this_cpu_write((fpuemustats).cmp_un_s, 0);
+ __this_cpu_write((fpuemustats).cmp_un_d, 0);
+ __this_cpu_write((fpuemustats).cmp_une_s, 0);
+ __this_cpu_write((fpuemustats).cmp_une_d, 0);
+ __this_cpu_write((fpuemustats).cmp_saf_s, 0);
+ __this_cpu_write((fpuemustats).cmp_saf_d, 0);
+ __this_cpu_write((fpuemustats).cmp_seq_s, 0);
+ __this_cpu_write((fpuemustats).cmp_seq_d, 0);
+ __this_cpu_write((fpuemustats).cmp_sle_s, 0);
+ __this_cpu_write((fpuemustats).cmp_sle_d, 0);
+ __this_cpu_write((fpuemustats).cmp_slt_s, 0);
+ __this_cpu_write((fpuemustats).cmp_slt_d, 0);
+ __this_cpu_write((fpuemustats).cmp_sne_s, 0);
+ __this_cpu_write((fpuemustats).cmp_sne_d, 0);
+ __this_cpu_write((fpuemustats).cmp_sor_s, 0);
+ __this_cpu_write((fpuemustats).cmp_sor_d, 0);
+ __this_cpu_write((fpuemustats).cmp_sueq_s, 0);
+ __this_cpu_write((fpuemustats).cmp_sueq_d, 0);
+ __this_cpu_write((fpuemustats).cmp_sule_s, 0);
+ __this_cpu_write((fpuemustats).cmp_sule_d, 0);
+ __this_cpu_write((fpuemustats).cmp_sult_s, 0);
+ __this_cpu_write((fpuemustats).cmp_sult_d, 0);
+ __this_cpu_write((fpuemustats).cmp_sun_s, 0);
+ __this_cpu_write((fpuemustats).cmp_sun_d, 0);
+ __this_cpu_write((fpuemustats).cmp_sune_s, 0);
+ __this_cpu_write((fpuemustats).cmp_sune_d, 0);
+ __this_cpu_write((fpuemustats).cvt_d_l, 0);
+ __this_cpu_write((fpuemustats).cvt_d_s, 0);
+ __this_cpu_write((fpuemustats).cvt_d_w, 0);
+ __this_cpu_write((fpuemustats).cvt_l_s, 0);
+ __this_cpu_write((fpuemustats).cvt_l_d, 0);
+ __this_cpu_write((fpuemustats).cvt_s_d, 0);
+ __this_cpu_write((fpuemustats).cvt_s_l, 0);
+ __this_cpu_write((fpuemustats).cvt_s_w, 0);
+ __this_cpu_write((fpuemustats).cvt_w_s, 0);
+ __this_cpu_write((fpuemustats).cvt_w_d, 0);
+ __this_cpu_write((fpuemustats).div_s, 0);
+ __this_cpu_write((fpuemustats).div_d, 0);
+ __this_cpu_write((fpuemustats).floor_w_s, 0);
+ __this_cpu_write((fpuemustats).floor_w_d, 0);
+ __this_cpu_write((fpuemustats).floor_l_s, 0);
+ __this_cpu_write((fpuemustats).floor_l_d, 0);
+ __this_cpu_write((fpuemustats).maddf_s, 0);
+ __this_cpu_write((fpuemustats).maddf_d, 0);
+ __this_cpu_write((fpuemustats).max_s, 0);
+ __this_cpu_write((fpuemustats).max_d, 0);
+ __this_cpu_write((fpuemustats).maxa_s, 0);
+ __this_cpu_write((fpuemustats).maxa_d, 0);
+ __this_cpu_write((fpuemustats).min_s, 0);
+ __this_cpu_write((fpuemustats).min_d, 0);
+ __this_cpu_write((fpuemustats).mina_s, 0);
+ __this_cpu_write((fpuemustats).mina_d, 0);
+ __this_cpu_write((fpuemustats).mov_s, 0);
+ __this_cpu_write((fpuemustats).mov_d, 0);
+ __this_cpu_write((fpuemustats).msubf_s, 0);
+ __this_cpu_write((fpuemustats).msubf_d, 0);
+ __this_cpu_write((fpuemustats).mul_s, 0);
+ __this_cpu_write((fpuemustats).mul_d, 0);
+ __this_cpu_write((fpuemustats).neg_s, 0);
+ __this_cpu_write((fpuemustats).neg_d, 0);
+ __this_cpu_write((fpuemustats).recip_s, 0);
+ __this_cpu_write((fpuemustats).recip_d, 0);
+ __this_cpu_write((fpuemustats).rint_s, 0);
+ __this_cpu_write((fpuemustats).rint_d, 0);
+ __this_cpu_write((fpuemustats).round_w_s, 0);
+ __this_cpu_write((fpuemustats).round_w_d, 0);
+ __this_cpu_write((fpuemustats).round_l_s, 0);
+ __this_cpu_write((fpuemustats).round_l_d, 0);
+ __this_cpu_write((fpuemustats).rsqrt_s, 0);
+ __this_cpu_write((fpuemustats).rsqrt_d, 0);
+ __this_cpu_write((fpuemustats).sel_s, 0);
+ __this_cpu_write((fpuemustats).sel_d, 0);
+ __this_cpu_write((fpuemustats).seleqz_s, 0);
+ __this_cpu_write((fpuemustats).seleqz_d, 0);
+ __this_cpu_write((fpuemustats).selnez_s, 0);
+ __this_cpu_write((fpuemustats).selnez_d, 0);
+ __this_cpu_write((fpuemustats).sqrt_s, 0);
+ __this_cpu_write((fpuemustats).sqrt_d, 0);
+ __this_cpu_write((fpuemustats).sub_s, 0);
+ __this_cpu_write((fpuemustats).sub_d, 0);
+ __this_cpu_write((fpuemustats).trunc_w_s, 0);
+ __this_cpu_write((fpuemustats).trunc_w_d, 0);
+ __this_cpu_write((fpuemustats).trunc_l_s, 0);
+ __this_cpu_write((fpuemustats).trunc_l_d, 0);
+
+ return 0;
+}
+
+static int fpuemustats_clear_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, fpuemustats_clear_show, inode->i_private);
+}
+
+static const struct file_operations fpuemustats_clear_fops = {
+ .open = fpuemustats_clear_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
static int __init debugfs_fpuemu(void)
{
- struct dentry *d, *dir;
+ struct dentry *fpuemu_debugfs_base_dir;
+ struct dentry *fpuemu_debugfs_inst_dir;
+ struct dentry *d, *reset_file;
if (!mips_debugfs_dir)
return -ENODEV;
- dir = debugfs_create_dir("fpuemustats", mips_debugfs_dir);
- if (!dir)
+
+ fpuemu_debugfs_base_dir = debugfs_create_dir("fpuemustats",
+ mips_debugfs_dir);
+ if (!fpuemu_debugfs_base_dir)
+ return -ENOMEM;
+
+ reset_file = debugfs_create_file("fpuemustats_clear", 0444,
+ mips_debugfs_dir, NULL,
+ &fpuemustats_clear_fops);
+ if (!reset_file)
return -ENOMEM;
#define FPU_EMU_STAT_OFFSET(m) \
@@ -43,7 +219,7 @@ static int __init debugfs_fpuemu(void)
#define FPU_STAT_CREATE(m) \
do { \
- d = debugfs_create_file(#m , S_IRUGO, dir, \
+ d = debugfs_create_file(#m, 0444, fpuemu_debugfs_base_dir, \
(void *)FPU_EMU_STAT_OFFSET(m), \
&fops_fpuemu_stat); \
if (!d) \
@@ -53,6 +229,7 @@ do { \
FPU_STAT_CREATE(emulated);
FPU_STAT_CREATE(loads);
FPU_STAT_CREATE(stores);
+ FPU_STAT_CREATE(branches);
FPU_STAT_CREATE(cp1ops);
FPU_STAT_CREATE(cp1xops);
FPU_STAT_CREATE(errors);
@@ -63,6 +240,139 @@ do { \
FPU_STAT_CREATE(ieee754_invalidop);
FPU_STAT_CREATE(ds_emul);
+ fpuemu_debugfs_inst_dir = debugfs_create_dir("instructions",
+ fpuemu_debugfs_base_dir);
+ if (!fpuemu_debugfs_inst_dir)
+ return -ENOMEM;
+
+#define FPU_STAT_CREATE_EX(m) \
+do { \
+ char name[32]; \
+ \
+ adjust_instruction_counter_name(name, #m); \
+ \
+ d = debugfs_create_file(name, 0444, fpuemu_debugfs_inst_dir, \
+ (void *)FPU_EMU_STAT_OFFSET(m), \
+ &fops_fpuemu_stat); \
+ if (!d) \
+ return -ENOMEM; \
+} while (0)
+
+ FPU_STAT_CREATE_EX(abs_s);
+ FPU_STAT_CREATE_EX(abs_d);
+ FPU_STAT_CREATE_EX(add_s);
+ FPU_STAT_CREATE_EX(add_d);
+ FPU_STAT_CREATE_EX(bc1eqz);
+ FPU_STAT_CREATE_EX(bc1nez);
+ FPU_STAT_CREATE_EX(ceil_w_s);
+ FPU_STAT_CREATE_EX(ceil_w_d);
+ FPU_STAT_CREATE_EX(ceil_l_s);
+ FPU_STAT_CREATE_EX(ceil_l_d);
+ FPU_STAT_CREATE_EX(class_s);
+ FPU_STAT_CREATE_EX(class_d);
+ FPU_STAT_CREATE_EX(cmp_af_s);
+ FPU_STAT_CREATE_EX(cmp_af_d);
+ FPU_STAT_CREATE_EX(cmp_eq_s);
+ FPU_STAT_CREATE_EX(cmp_eq_d);
+ FPU_STAT_CREATE_EX(cmp_le_s);
+ FPU_STAT_CREATE_EX(cmp_le_d);
+ FPU_STAT_CREATE_EX(cmp_lt_s);
+ FPU_STAT_CREATE_EX(cmp_lt_d);
+ FPU_STAT_CREATE_EX(cmp_ne_s);
+ FPU_STAT_CREATE_EX(cmp_ne_d);
+ FPU_STAT_CREATE_EX(cmp_or_s);
+ FPU_STAT_CREATE_EX(cmp_or_d);
+ FPU_STAT_CREATE_EX(cmp_ueq_s);
+ FPU_STAT_CREATE_EX(cmp_ueq_d);
+ FPU_STAT_CREATE_EX(cmp_ule_s);
+ FPU_STAT_CREATE_EX(cmp_ule_d);
+ FPU_STAT_CREATE_EX(cmp_ult_s);
+ FPU_STAT_CREATE_EX(cmp_ult_d);
+ FPU_STAT_CREATE_EX(cmp_un_s);
+ FPU_STAT_CREATE_EX(cmp_un_d);
+ FPU_STAT_CREATE_EX(cmp_une_s);
+ FPU_STAT_CREATE_EX(cmp_une_d);
+ FPU_STAT_CREATE_EX(cmp_saf_s);
+ FPU_STAT_CREATE_EX(cmp_saf_d);
+ FPU_STAT_CREATE_EX(cmp_seq_s);
+ FPU_STAT_CREATE_EX(cmp_seq_d);
+ FPU_STAT_CREATE_EX(cmp_sle_s);
+ FPU_STAT_CREATE_EX(cmp_sle_d);
+ FPU_STAT_CREATE_EX(cmp_slt_s);
+ FPU_STAT_CREATE_EX(cmp_slt_d);
+ FPU_STAT_CREATE_EX(cmp_sne_s);
+ FPU_STAT_CREATE_EX(cmp_sne_d);
+ FPU_STAT_CREATE_EX(cmp_sor_s);
+ FPU_STAT_CREATE_EX(cmp_sor_d);
+ FPU_STAT_CREATE_EX(cmp_sueq_s);
+ FPU_STAT_CREATE_EX(cmp_sueq_d);
+ FPU_STAT_CREATE_EX(cmp_sule_s);
+ FPU_STAT_CREATE_EX(cmp_sule_d);
+ FPU_STAT_CREATE_EX(cmp_sult_s);
+ FPU_STAT_CREATE_EX(cmp_sult_d);
+ FPU_STAT_CREATE_EX(cmp_sun_s);
+ FPU_STAT_CREATE_EX(cmp_sun_d);
+ FPU_STAT_CREATE_EX(cmp_sune_s);
+ FPU_STAT_CREATE_EX(cmp_sune_d);
+ FPU_STAT_CREATE_EX(cvt_d_l);
+ FPU_STAT_CREATE_EX(cvt_d_s);
+ FPU_STAT_CREATE_EX(cvt_d_w);
+ FPU_STAT_CREATE_EX(cvt_l_s);
+ FPU_STAT_CREATE_EX(cvt_l_d);
+ FPU_STAT_CREATE_EX(cvt_s_d);
+ FPU_STAT_CREATE_EX(cvt_s_l);
+ FPU_STAT_CREATE_EX(cvt_s_w);
+ FPU_STAT_CREATE_EX(cvt_w_s);
+ FPU_STAT_CREATE_EX(cvt_w_d);
+ FPU_STAT_CREATE_EX(div_s);
+ FPU_STAT_CREATE_EX(div_d);
+ FPU_STAT_CREATE_EX(floor_w_s);
+ FPU_STAT_CREATE_EX(floor_w_d);
+ FPU_STAT_CREATE_EX(floor_l_s);
+ FPU_STAT_CREATE_EX(floor_l_d);
+ FPU_STAT_CREATE_EX(maddf_s);
+ FPU_STAT_CREATE_EX(maddf_d);
+ FPU_STAT_CREATE_EX(max_s);
+ FPU_STAT_CREATE_EX(max_d);
+ FPU_STAT_CREATE_EX(maxa_s);
+ FPU_STAT_CREATE_EX(maxa_d);
+ FPU_STAT_CREATE_EX(min_s);
+ FPU_STAT_CREATE_EX(min_d);
+ FPU_STAT_CREATE_EX(mina_s);
+ FPU_STAT_CREATE_EX(mina_d);
+ FPU_STAT_CREATE_EX(mov_s);
+ FPU_STAT_CREATE_EX(mov_d);
+ FPU_STAT_CREATE_EX(msubf_s);
+ FPU_STAT_CREATE_EX(msubf_d);
+ FPU_STAT_CREATE_EX(mul_s);
+ FPU_STAT_CREATE_EX(mul_d);
+ FPU_STAT_CREATE_EX(neg_s);
+ FPU_STAT_CREATE_EX(neg_d);
+ FPU_STAT_CREATE_EX(recip_s);
+ FPU_STAT_CREATE_EX(recip_d);
+ FPU_STAT_CREATE_EX(rint_s);
+ FPU_STAT_CREATE_EX(rint_d);
+ FPU_STAT_CREATE_EX(round_w_s);
+ FPU_STAT_CREATE_EX(round_w_d);
+ FPU_STAT_CREATE_EX(round_l_s);
+ FPU_STAT_CREATE_EX(round_l_d);
+ FPU_STAT_CREATE_EX(rsqrt_s);
+ FPU_STAT_CREATE_EX(rsqrt_d);
+ FPU_STAT_CREATE_EX(sel_s);
+ FPU_STAT_CREATE_EX(sel_d);
+ FPU_STAT_CREATE_EX(seleqz_s);
+ FPU_STAT_CREATE_EX(seleqz_d);
+ FPU_STAT_CREATE_EX(selnez_s);
+ FPU_STAT_CREATE_EX(selnez_d);
+ FPU_STAT_CREATE_EX(sqrt_s);
+ FPU_STAT_CREATE_EX(sqrt_d);
+ FPU_STAT_CREATE_EX(sub_s);
+ FPU_STAT_CREATE_EX(sub_d);
+ FPU_STAT_CREATE_EX(trunc_w_s);
+ FPU_STAT_CREATE_EX(trunc_w_d);
+ FPU_STAT_CREATE_EX(trunc_l_s);
+ FPU_STAT_CREATE_EX(trunc_l_d);
+
return 0;
}
arch_initcall(debugfs_fpuemu);
diff --git a/arch/mips/math-emu/sp_fmax.c b/arch/mips/math-emu/sp_fmax.c
index 4d00084..74a5a00 100644
--- a/arch/mips/math-emu/sp_fmax.c
+++ b/arch/mips/math-emu/sp_fmax.c
@@ -47,14 +47,26 @@ union ieee754sp ieee754sp_fmax(union ieee754sp x, union ieee754sp y)
case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
return ieee754sp_nanxcpt(x);
- /* numbers are preferred to NaNs */
+ /*
+ * Quiet NaN handling
+ */
+
+ /*
+ * The case of both inputs quiet NaNs
+ */
+ case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
+ return x;
+
+ /*
+ * The cases of exactly one input quiet NaN (numbers
+ * are here preferred as returned values to NaNs)
+ */
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN):
return x;
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO):
case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM):
case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM):
@@ -80,9 +92,7 @@ union ieee754sp ieee754sp_fmax(union ieee754sp x, union ieee754sp y)
return ys ? x : y;
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
- if (xs == ys)
- return x;
- return ieee754sp_zero(1);
+ return ieee754sp_zero(xs & ys);
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
SPDNORMX;
@@ -106,16 +116,32 @@ union ieee754sp ieee754sp_fmax(union ieee754sp x, union ieee754sp y)
else if (xs < ys)
return x;
- /* Compare exponent */
- if (xe > ye)
- return x;
- else if (xe < ye)
- return y;
+ /* Signs of inputs are equal, let's compare exponents */
+ if (xs == 0) {
+ /* Inputs are both positive */
+ if (xe > ye)
+ return x;
+ else if (xe < ye)
+ return y;
+ } else {
+ /* Inputs are both negative */
+ if (xe > ye)
+ return y;
+ else if (xe < ye)
+ return x;
+ }
- /* Compare mantissa */
+ /* Signs and exponents of inputs are equal, let's compare mantissas */
+ if (xs == 0) {
+ /* Inputs are both positive, with equal signs and exponents */
+ if (xm <= ym)
+ return y;
+ return x;
+ }
+ /* Inputs are both negative, with equal signs and exponents */
if (xm <= ym)
- return y;
- return x;
+ return x;
+ return y;
}
union ieee754sp ieee754sp_fmaxa(union ieee754sp x, union ieee754sp y)
@@ -147,14 +173,26 @@ union ieee754sp ieee754sp_fmaxa(union ieee754sp x, union ieee754sp y)
case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
return ieee754sp_nanxcpt(x);
- /* numbers are preferred to NaNs */
+ /*
+ * Quiet NaN handling
+ */
+
+ /*
+ * The case of both inputs quiet NaNs
+ */
+ case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
+ return x;
+
+ /*
+ * The cases of exactly one input quiet NaN (numbers
+ * are here preferred as returned values to NaNs)
+ */
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN):
return x;
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO):
case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM):
case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM):
@@ -164,6 +202,9 @@ union ieee754sp ieee754sp_fmaxa(union ieee754sp x, union ieee754sp y)
/*
* Infinity and zero handling
*/
+ case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
+ return ieee754sp_inf(xs & ys);
+
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
@@ -171,7 +212,6 @@ union ieee754sp ieee754sp_fmaxa(union ieee754sp x, union ieee754sp y)
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
return x;
- case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
@@ -180,9 +220,7 @@ union ieee754sp ieee754sp_fmaxa(union ieee754sp x, union ieee754sp y)
return y;
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
- if (xs == ys)
- return x;
- return ieee754sp_zero(1);
+ return ieee754sp_zero(xs & ys);
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
SPDNORMX;
@@ -207,7 +245,11 @@ union ieee754sp ieee754sp_fmaxa(union ieee754sp x, union ieee754sp y)
return y;
/* Compare mantissa */
- if (xm <= ym)
+ if (xm < ym)
return y;
- return x;
+ else if (xm > ym)
+ return x;
+ else if (xs == 0)
+ return x;
+ return y;
}
diff --git a/arch/mips/math-emu/sp_fmin.c b/arch/mips/math-emu/sp_fmin.c
index 4eb1bb9..c51385f 100644
--- a/arch/mips/math-emu/sp_fmin.c
+++ b/arch/mips/math-emu/sp_fmin.c
@@ -47,14 +47,26 @@ union ieee754sp ieee754sp_fmin(union ieee754sp x, union ieee754sp y)
case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
return ieee754sp_nanxcpt(x);
- /* numbers are preferred to NaNs */
+ /*
+ * Quiet NaN handling
+ */
+
+ /*
+ * The case of both inputs quiet NaNs
+ */
+ case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
+ return x;
+
+ /*
+ * The cases of exactly one input quiet NaN (numbers
+ * are here preferred as returned values to NaNs)
+ */
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN):
return x;
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO):
case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM):
case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM):
@@ -80,9 +92,7 @@ union ieee754sp ieee754sp_fmin(union ieee754sp x, union ieee754sp y)
return ys ? y : x;
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
- if (xs == ys)
- return x;
- return ieee754sp_zero(1);
+ return ieee754sp_zero(xs | ys);
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
SPDNORMX;
@@ -106,16 +116,32 @@ union ieee754sp ieee754sp_fmin(union ieee754sp x, union ieee754sp y)
else if (xs < ys)
return y;
- /* Compare exponent */
- if (xe > ye)
- return y;
- else if (xe < ye)
- return x;
+ /* Signs of inputs are the same, let's compare exponents */
+ if (xs == 0) {
+ /* Inputs are both positive */
+ if (xe > ye)
+ return y;
+ else if (xe < ye)
+ return x;
+ } else {
+ /* Inputs are both negative */
+ if (xe > ye)
+ return x;
+ else if (xe < ye)
+ return y;
+ }
- /* Compare mantissa */
+ /* Signs and exponents of inputs are equal, let's compare mantissas */
+ if (xs == 0) {
+ /* Inputs are both positive, with equal signs and exponents */
+ if (xm <= ym)
+ return x;
+ return y;
+ }
+ /* Inputs are both negative, with equal signs and exponents */
if (xm <= ym)
- return x;
- return y;
+ return y;
+ return x;
}
union ieee754sp ieee754sp_fmina(union ieee754sp x, union ieee754sp y)
@@ -147,14 +173,26 @@ union ieee754sp ieee754sp_fmina(union ieee754sp x, union ieee754sp y)
case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
return ieee754sp_nanxcpt(x);
- /* numbers are preferred to NaNs */
+ /*
+ * Quiet NaN handling
+ */
+
+ /*
+ * The case of both inputs quiet NaNs
+ */
+ case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
+ return x;
+
+ /*
+ * The cases of exactly one input quiet NaN (numbers
+ * are here preferred as returned values to NaNs)
+ */
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN):
return x;
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO):
case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM):
case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM):
@@ -164,25 +202,25 @@ union ieee754sp ieee754sp_fmina(union ieee754sp x, union ieee754sp y)
/*
* Infinity and zero handling
*/
+ case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
+ return ieee754sp_inf(xs | ys);
+
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
- return x;
+ return y;
- case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM):
- return y;
+ return x;
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
- if (xs == ys)
- return x;
- return ieee754sp_zero(1);
+ return ieee754sp_zero(xs | ys);
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
SPDNORMX;
@@ -207,7 +245,11 @@ union ieee754sp ieee754sp_fmina(union ieee754sp x, union ieee754sp y)
return x;
/* Compare mantissa */
- if (xm <= ym)
+ if (xm < ym)
+ return x;
+ else if (xm > ym)
+ return y;
+ else if (xs == 1)
return x;
return y;
}
diff --git a/arch/mips/math-emu/sp_maddf.c b/arch/mips/math-emu/sp_maddf.c
index c91d5e5..7195fe7 100644
--- a/arch/mips/math-emu/sp_maddf.c
+++ b/arch/mips/math-emu/sp_maddf.c
@@ -14,9 +14,6 @@
#include "ieee754sp.h"
-enum maddf_flags {
- maddf_negate_product = 1 << 0,
-};
static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
union ieee754sp y, enum maddf_flags flags)
@@ -24,14 +21,8 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
int re;
int rs;
unsigned rm;
- unsigned short lxm;
- unsigned short hxm;
- unsigned short lym;
- unsigned short hym;
- unsigned lrm;
- unsigned hrm;
- unsigned t;
- unsigned at;
+ uint64_t rm64;
+ uint64_t zm64;
int s;
COMPXSP;
@@ -48,51 +39,35 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
ieee754_clearcx();
- switch (zc) {
- case IEEE754_CLASS_SNAN:
- ieee754_setcx(IEEE754_INVALID_OPERATION);
+ /*
+ * Handle the cases when at least one of x, y or z is a NaN.
+ * Order of precedence is sNaN, qNaN and z, x, y.
+ */
+ if (zc == IEEE754_CLASS_SNAN)
return ieee754sp_nanxcpt(z);
- case IEEE754_CLASS_DNORM:
- SPDNORMZ;
- /* QNAN and ZERO cases are handled separately below */
- }
-
- switch (CLPAIR(xc, yc)) {
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN):
- case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN):
- case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN):
- case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN):
- case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN):
+ if (xc == IEEE754_CLASS_SNAN)
+ return ieee754sp_nanxcpt(x);
+ if (yc == IEEE754_CLASS_SNAN)
return ieee754sp_nanxcpt(y);
+ if (zc == IEEE754_CLASS_QNAN)
+ return z;
+ if (xc == IEEE754_CLASS_QNAN)
+ return x;
+ if (yc == IEEE754_CLASS_QNAN)
+ return y;
- case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN):
- case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN):
- case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO):
- case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM):
- case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM):
- case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
- return ieee754sp_nanxcpt(x);
+ if (zc == IEEE754_CLASS_DNORM)
+ SPDNORMZ;
+ /* ZERO z cases are handled separately below */
- case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
- case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
- case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN):
- case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN):
- return y;
+ switch (CLPAIR(xc, yc)) {
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO):
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM):
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM):
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF):
- return x;
/*
* Infinity handling
*/
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
- if (zc == IEEE754_CLASS_QNAN)
- return z;
ieee754_setcx(IEEE754_INVALID_OPERATION);
return ieee754sp_indef();
@@ -101,9 +76,27 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
- if (zc == IEEE754_CLASS_QNAN)
- return z;
- return ieee754sp_inf(xs ^ ys);
+ if ((zc == IEEE754_CLASS_INF) &&
+ ((!(flags & MADDF_NEGATE_PRODUCT) && (zs != (xs ^ ys))) ||
+ ((flags & MADDF_NEGATE_PRODUCT) && (zs == (xs ^ ys))))) {
+ /*
+ * Cases of addition of infinities with opposite signs
+ * or subtraction of infinities with same signs.
+ */
+ ieee754_setcx(IEEE754_INVALID_OPERATION);
+ return ieee754sp_indef();
+ }
+ /*
+ * z is here either not an infinity, or an infinity having the
+ * same sign as product (x*y) (in case of MADDF.D instruction)
+ * or product -(x*y) (in MSUBF.D case). The result must be an
+ * infinity, and its sign is determined only by the value of
+ * (flags & MADDF_NEGATE_PRODUCT) and the signs of x and y.
+ */
+ if (flags & MADDF_NEGATE_PRODUCT)
+ return ieee754sp_inf(1 ^ (xs ^ ys));
+ else
+ return ieee754sp_inf(xs ^ ys);
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
@@ -112,32 +105,42 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
if (zc == IEEE754_CLASS_INF)
return ieee754sp_inf(zs);
- /* Multiplication is 0 so just return z */
+ if (zc == IEEE754_CLASS_ZERO) {
+ /* Handle cases +0 + (-0) and similar ones. */
+ if ((!(flags & MADDF_NEGATE_PRODUCT)
+ && (zs == (xs ^ ys))) ||
+ ((flags & MADDF_NEGATE_PRODUCT)
+ && (zs != (xs ^ ys))))
+ /*
+ * Cases of addition of zeros of equal signs
+ * or subtraction of zeroes of opposite signs.
+ * The sign of the resulting zero is in any
+ * such case determined only by the sign of z.
+ */
+ return z;
+
+ return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD);
+ }
+ /* x*y is here 0, and z is not 0, so just return z */
return z;
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
SPDNORMX;
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
- if (zc == IEEE754_CLASS_QNAN)
- return z;
- else if (zc == IEEE754_CLASS_INF)
+ if (zc == IEEE754_CLASS_INF)
return ieee754sp_inf(zs);
SPDNORMY;
break;
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):
- if (zc == IEEE754_CLASS_QNAN)
- return z;
- else if (zc == IEEE754_CLASS_INF)
+ if (zc == IEEE754_CLASS_INF)
return ieee754sp_inf(zs);
SPDNORMX;
break;
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
- if (zc == IEEE754_CLASS_QNAN)
- return z;
- else if (zc == IEEE754_CLASS_INF)
+ if (zc == IEEE754_CLASS_INF)
return ieee754sp_inf(zs);
/* fall through to real computations */
}
@@ -158,111 +161,93 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
re = xe + ye;
rs = xs ^ ys;
- if (flags & maddf_negate_product)
+ if (flags & MADDF_NEGATE_PRODUCT)
rs ^= 1;
- /* shunt to top of word */
- xm <<= 32 - (SP_FBITS + 1);
- ym <<= 32 - (SP_FBITS + 1);
-
- /*
- * Multiply 32 bits xm, ym to give high 32 bits rm with stickness.
- */
- lxm = xm & 0xffff;
- hxm = xm >> 16;
- lym = ym & 0xffff;
- hym = ym >> 16;
-
- lrm = lxm * lym; /* 16 * 16 => 32 */
- hrm = hxm * hym; /* 16 * 16 => 32 */
-
- t = lxm * hym; /* 16 * 16 => 32 */
- at = lrm + (t << 16);
- hrm += at < lrm;
- lrm = at;
- hrm = hrm + (t >> 16);
+ /* Multiple 24 bit xm and ym to give 48 bit results */
+ rm64 = (uint64_t)xm * ym;
- t = hxm * lym; /* 16 * 16 => 32 */
- at = lrm + (t << 16);
- hrm += at < lrm;
- lrm = at;
- hrm = hrm + (t >> 16);
+ /* Shunt to top of word */
+ rm64 = rm64 << 16;
- rm = hrm | (lrm != 0);
-
- /*
- * Sticky shift down to normal rounding precision.
- */
- if ((int) rm < 0) {
- rm = (rm >> (32 - (SP_FBITS + 1 + 3))) |
- ((rm << (SP_FBITS + 1 + 3)) != 0);
+ /* Put explicit bit at bit 62 if necessary */
+ if ((int64_t) rm64 < 0) {
+ rm64 = rm64 >> 1;
re++;
- } else {
- rm = (rm >> (32 - (SP_FBITS + 1 + 3 + 1))) |
- ((rm << (SP_FBITS + 1 + 3 + 1)) != 0);
}
- assert(rm & (SP_HIDDEN_BIT << 3));
-
- if (zc == IEEE754_CLASS_ZERO)
- return ieee754sp_format(rs, re, rm);
- /* And now the addition */
+ assert(rm64 & (1 << 62));
- assert(zm & SP_HIDDEN_BIT);
+ if (zc == IEEE754_CLASS_ZERO) {
+ /*
+ * Move explicit bit from bit 62 to bit 26 since the
+ * ieee754sp_format code expects the mantissa to be
+ * 27 bits wide (24 + 3 rounding bits).
+ */
+ rm = XSPSRS64(rm64, (62 - 26));
+ return ieee754sp_format(rs, re, rm);
+ }
- /*
- * Provide guard,round and stick bit space.
- */
- zm <<= 3;
+ /* Move explicit bit from bit 23 to bit 62 */
+ zm64 = (uint64_t)zm << (62 - 23);
+ assert(zm64 & (1 << 62));
+ /* Make the exponents the same */
if (ze > re) {
/*
* Have to shift r fraction right to align.
*/
s = ze - re;
- rm = XSPSRS(rm, s);
+ rm64 = XSPSRS64(rm64, s);
re += s;
} else if (re > ze) {
/*
* Have to shift z fraction right to align.
*/
s = re - ze;
- zm = XSPSRS(zm, s);
+ zm64 = XSPSRS64(zm64, s);
ze += s;
}
assert(ze == re);
assert(ze <= SP_EMAX);
+ /* Do the addition */
if (zs == rs) {
/*
- * Generate 28 bit result of adding two 27 bit numbers
- * leaving result in zm, zs and ze.
+ * Generate 64 bit result by adding two 63 bit numbers
+ * leaving result in zm64, zs and ze.
*/
- zm = zm + rm;
-
- if (zm >> (SP_FBITS + 1 + 3)) { /* carry out */
- zm = XSPSRS1(zm);
+ zm64 = zm64 + rm64;
+ if ((int64_t)zm64 < 0) { /* carry out */
+ zm64 = XSPSRS1(zm64);
ze++;
}
} else {
- if (zm >= rm) {
- zm = zm - rm;
+ if (zm64 >= rm64) {
+ zm64 = zm64 - rm64;
} else {
- zm = rm - zm;
+ zm64 = rm64 - zm64;
zs = rs;
}
- if (zm == 0)
+ if (zm64 == 0)
return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD);
/*
- * Normalize in extended single precision
+ * Put explicit bit at bit 62 if necessary.
*/
- while ((zm >> (SP_MBITS + 3)) == 0) {
- zm <<= 1;
+ while ((zm64 >> 62) == 0) {
+ zm64 <<= 1;
ze--;
}
-
}
+
+ /*
+ * Move explicit bit from bit 62 to bit 26 since the
+ * ieee754sp_format code expects the mantissa to be
+ * 27 bits wide (24 + 3 rounding bits).
+ */
+ zm = XSPSRS64(zm64, (62 - 26));
+
return ieee754sp_format(zs, ze, zm);
}
@@ -275,5 +260,5 @@ union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x,
union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x,
union ieee754sp y)
{
- return _sp_maddf(z, x, y, maddf_negate_product);
+ return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);
}
diff --git a/arch/mips/math-emu/sp_rint.c b/arch/mips/math-emu/sp_rint.c
new file mode 100644
index 0000000..70765b1
--- /dev/null
+++ b/arch/mips/math-emu/sp_rint.c
@@ -0,0 +1,90 @@
+/* IEEE754 floating point arithmetic
+ * single precision
+ */
+/*
+ * MIPS floating point support
+ * Copyright (C) 1994-2000 Algorithmics Ltd.
+ * Copyright (C) 2017 Imagination Technologies, Ltd.
+ * Author: Aleksandar Markovic <aleksandar.markovic@imgtec.com>
+ *
+ * This program is free software; you can distribute it and/or modify it
+ * under the terms of the GNU General Public License (Version 2) as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program.
+ */
+
+#include "ieee754sp.h"
+
+union ieee754sp ieee754sp_rint(union ieee754sp x)
+{
+ union ieee754sp ret;
+ u32 residue;
+ int sticky;
+ int round;
+ int odd;
+
+ COMPXDP; /* <-- DP needed for 64-bit mantissa tmp */
+
+ ieee754_clearcx();
+
+ EXPLODEXSP;
+ FLUSHXSP;
+
+ if (xc == IEEE754_CLASS_SNAN)
+ return ieee754sp_nanxcpt(x);
+
+ if ((xc == IEEE754_CLASS_QNAN) ||
+ (xc == IEEE754_CLASS_INF) ||
+ (xc == IEEE754_CLASS_ZERO))
+ return x;
+
+ if (xe >= SP_FBITS)
+ return x;
+
+ if (xe < -1) {
+ residue = xm;
+ round = 0;
+ sticky = residue != 0;
+ xm = 0;
+ } else {
+ residue = xm << (xe + 1);
+ residue <<= 31 - SP_FBITS;
+ round = (residue >> 31) != 0;
+ sticky = (residue << 1) != 0;
+ xm >>= SP_FBITS - xe;
+ }
+
+ odd = (xm & 0x1) != 0x0;
+
+ switch (ieee754_csr.rm) {
+ case FPU_CSR_RN: /* toward nearest */
+ if (round && (sticky || odd))
+ xm++;
+ break;
+ case FPU_CSR_RZ: /* toward zero */
+ break;
+ case FPU_CSR_RU: /* toward +infinity */
+ if ((round || sticky) && !xs)
+ xm++;
+ break;
+ case FPU_CSR_RD: /* toward -infinity */
+ if ((round || sticky) && xs)
+ xm++;
+ break;
+ }
+
+ if (round || sticky)
+ ieee754_setcx(IEEE754_INEXACT);
+
+ ret = ieee754sp_flong(xm);
+ SPSIGN(ret) = xs;
+
+ return ret;
+}
OpenPOWER on IntegriCloud