14 files changed, 1294 insertions, 318 deletions
diff --git a/arch/mips/math-emu/Makefile b/arch/mips/math-emu/Makefile
index e9bbc2a..e9f10b8 100644
--- a/arch/mips/math-emu/Makefile
+++ b/arch/mips/math-emu/Makefile
@@ -4,9 +4,11 @@
 
 obj-y	+= cp1emu.o ieee754dp.o ieee754sp.o ieee754.o \
 	   dp_div.o dp_mul.o dp_sub.o dp_add.o dp_fsp.o dp_cmp.o dp_simple.o \
-	   dp_tint.o dp_fint.o dp_maddf.o dp_2008class.o dp_fmin.o dp_fmax.o \
+	   dp_tint.o dp_fint.o dp_rint.o dp_maddf.o dp_2008class.o dp_fmin.o \
+	   dp_fmax.o							     \
 	   sp_div.o sp_mul.o sp_sub.o sp_add.o sp_fdp.o sp_cmp.o sp_simple.o \
-	   sp_tint.o sp_fint.o sp_maddf.o sp_2008class.o sp_fmin.o sp_fmax.o \
+	   sp_tint.o sp_fint.o sp_rint.o sp_maddf.o sp_2008class.o sp_fmin.o \
+	   sp_fmax.o							     \
 	   dsemul.o
 
 lib-y	+= ieee754d.o \
diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c
index f08a7b4..192542d 100644
--- a/arch/mips/math-emu/cp1emu.c
+++ b/arch/mips/math-emu/cp1emu.c
@@ -58,7 +58,7 @@ static int fpu_emu(struct pt_regs *, struct mips_fpu_struct *,
 	mips_instruction);
 
 static int fpux_emu(struct pt_regs *,
-	struct mips_fpu_struct *, mips_instruction, void *__user *);
+	struct mips_fpu_struct *, mips_instruction, void __user **);
 
 /* Control registers */
 
@@ -830,12 +830,12 @@ do {									\
 } while (0)
 
 #define DIFROMREG(di, x)						\
-	((di) = get_fpr64(&ctx->fpr[(x) & ~(cop1_64bit(xcp) == 0)], 0))
+	((di) = get_fpr64(&ctx->fpr[(x) & ~(cop1_64bit(xcp) ^ 1)], 0))
 
 #define DITOREG(di, x)							\
 do {									\
 	unsigned fpr, i;						\
-	fpr = (x) & ~(cop1_64bit(xcp) == 0);				\
+	fpr = (x) & ~(cop1_64bit(xcp) ^ 1);				\
 	set_fpr64(&ctx->fpr[fpr], 0, di);				\
 	for (i = 1; i < ARRAY_SIZE(ctx->fpr[x].val64); i++)		\
 		set_fpr64(&ctx->fpr[fpr], i, 0);			\
@@ -973,7 +973,7 @@ static inline void cop1_ctc(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
  */
 
 static int cop1Emulate(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
-		struct mm_decoded_insn dec_insn, void *__user *fault_addr)
+		struct mm_decoded_insn dec_insn, void __user **fault_addr)
 {
 	unsigned long contpc = xcp->cp0_epc + dec_insn.pc_inc;
 	unsigned int cond, cbit, bit0;
@@ -1195,9 +1195,11 @@ emul:
 			bit0 = get_fpr32(fpr, 0) & 0x1;
 			switch (MIPSInst_RS(ir)) {
 			case bc1eqz_op:
+				MIPS_FPU_EMU_INC_STATS(bc1eqz);
 				cond = bit0 == 0;
 				break;
 			case bc1nez_op:
+				MIPS_FPU_EMU_INC_STATS(bc1nez);
 				cond = bit0 != 0;
 				break;
 			}
@@ -1230,6 +1232,7 @@ emul:
 				break;
 			}
 branch_common:
+			MIPS_FPU_EMU_INC_STATS(branches);
 			set_delay_slot(xcp);
 			if (cond) {
 				/*
@@ -1460,7 +1463,7 @@ DEF3OP(nmadd, dp, ieee754dp_mul, ieee754dp_add, ieee754dp_neg);
 DEF3OP(nmsub, dp, ieee754dp_mul, ieee754dp_sub, ieee754dp_neg);
 
 static int fpux_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
-	mips_instruction ir, void *__user *fault_addr)
+	mips_instruction ir, void __user **fault_addr)
 {
 	unsigned rcsr = 0;	/* resulting csr */
 
@@ -1682,15 +1685,19 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
 		switch (MIPSInst_FUNC(ir)) {
 			/* binary ops */
 		case fadd_op:
+			MIPS_FPU_EMU_INC_STATS(add_s);
 			handler.b = ieee754sp_add;
 			goto scopbop;
 		case fsub_op:
+			MIPS_FPU_EMU_INC_STATS(sub_s);
 			handler.b = ieee754sp_sub;
 			goto scopbop;
 		case fmul_op:
+			MIPS_FPU_EMU_INC_STATS(mul_s);
 			handler.b = ieee754sp_mul;
 			goto scopbop;
 		case fdiv_op:
+			MIPS_FPU_EMU_INC_STATS(div_s);
 			handler.b = ieee754sp_div;
 			goto scopbop;
 
@@ -1699,6 +1706,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
 			if (!cpu_has_mips_2_3_4_5_r)
 				return SIGILL;
 
+			MIPS_FPU_EMU_INC_STATS(sqrt_s);
 			handler.u = ieee754sp_sqrt;
 			goto scopuop;
 
@@ -1711,6 +1719,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
 			if (!cpu_has_mips_4_5_64_r2_r6)
 				return SIGILL;
 
+			MIPS_FPU_EMU_INC_STATS(rsqrt_s);
 			handler.u = fpemu_sp_rsqrt;
 			goto scopuop;
 
@@ -1718,6 +1727,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
 			if (!cpu_has_mips_4_5_64_r2_r6)
 				return SIGILL;
 
+			MIPS_FPU_EMU_INC_STATS(recip_s);
 			handler.u = fpemu_sp_recip;
 			goto scopuop;
 
@@ -1754,6 +1764,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
 			if (!cpu_has_mips_r6)
 				return SIGILL;
 
+			MIPS_FPU_EMU_INC_STATS(seleqz_s);
 			SPFROMREG(rv.s, MIPSInst_FT(ir));
 			if (rv.w & 0x1)
 				rv.w = 0;
@@ -1765,6 +1776,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
 			if (!cpu_has_mips_r6)
 				return SIGILL;
 
+			MIPS_FPU_EMU_INC_STATS(selnez_s);
 			SPFROMREG(rv.s, MIPSInst_FT(ir));
 			if (rv.w & 0x1)
 				SPFROMREG(rv.s, MIPSInst_FS(ir));
@@ -1778,6 +1790,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
 			if (!cpu_has_mips_r6)
 				return SIGILL;
 
+			MIPS_FPU_EMU_INC_STATS(maddf_s);
 			SPFROMREG(ft, MIPSInst_FT(ir));
 			SPFROMREG(fs, MIPSInst_FS(ir));
 			SPFROMREG(fd, MIPSInst_FD(ir));
@@ -1791,6 +1804,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
 			if (!cpu_has_mips_r6)
 				return SIGILL;
 
+			MIPS_FPU_EMU_INC_STATS(msubf_s);
 			SPFROMREG(ft, MIPSInst_FT(ir));
 			SPFROMREG(fs, MIPSInst_FS(ir));
 			SPFROMREG(fd, MIPSInst_FD(ir));
@@ -1804,9 +1818,9 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
 			if (!cpu_has_mips_r6)
 				return SIGILL;
 
+			MIPS_FPU_EMU_INC_STATS(rint_s);
 			SPFROMREG(fs, MIPSInst_FS(ir));
-			rv.l = ieee754sp_tlong(fs);
-			rv.s = ieee754sp_flong(rv.l);
+			rv.s = ieee754sp_rint(fs);
 			goto copcsr;
 		}
 
@@ -1816,6 +1830,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
 			if (!cpu_has_mips_r6)
 				return SIGILL;
 
+			MIPS_FPU_EMU_INC_STATS(class_s);
 			SPFROMREG(fs, MIPSInst_FS(ir));
 			rv.w = ieee754sp_2008class(fs);
 			rfmt = w_fmt;
@@ -1828,6 +1843,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
 			if (!cpu_has_mips_r6)
 				return SIGILL;
 
+			MIPS_FPU_EMU_INC_STATS(min_s);
 			SPFROMREG(ft, MIPSInst_FT(ir));
 			SPFROMREG(fs, MIPSInst_FS(ir));
 			rv.s = ieee754sp_fmin(fs, ft);
@@ -1840,6 +1856,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
 			if (!cpu_has_mips_r6)
 				return SIGILL;
 
+			MIPS_FPU_EMU_INC_STATS(mina_s);
 			SPFROMREG(ft, MIPSInst_FT(ir));
 			SPFROMREG(fs, MIPSInst_FS(ir));
 			rv.s = ieee754sp_fmina(fs, ft);
@@ -1852,6 +1869,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
 			if (!cpu_has_mips_r6)
 				return SIGILL;
 
+			MIPS_FPU_EMU_INC_STATS(max_s);
 			SPFROMREG(ft, MIPSInst_FT(ir));
 			SPFROMREG(fs, MIPSInst_FS(ir));
 			rv.s = ieee754sp_fmax(fs, ft);
@@ -1864,6 +1882,7 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
 			if (!cpu_has_mips_r6)
 				return SIGILL;
 
+			MIPS_FPU_EMU_INC_STATS(maxa_s);
 			SPFROMREG(ft, MIPSInst_FT(ir));
 			SPFROMREG(fs, MIPSInst_FS(ir));
 			rv.s = ieee754sp_fmaxa(fs, ft);
@@ -1871,15 +1890,18 @@ static int fpu_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
 		}
 
 		case fabs_op:
+			MIPS_FPU_EMU_INC_STATS(abs_s);
 			handler.u = ieee754sp_abs;
 			goto scopuop;
 
 		case fneg_op:
+			MIPS_FPU_EMU_INC_STATS(neg_s);
 			handler.u = ieee754sp_neg;
 			goto scopuop;
 
 		case fmov_op:
 			/* an easy one */
+			MIPS_FPU_EMU_INC_STATS(mov_s);
 			SPFROMREG(rv.s, MIPSInst_FS(ir));
 			goto copcsr;
 
@@ -1922,12 +1944,14 @@ copcsr:
 			return SIGILL;	/* not defined */
 
 		case fcvtd_op:
+			MIPS_FPU_EMU_INC_STATS(cvt_d_s);
 			SPFROMREG(fs, MIPSInst_FS(ir));
 			rv.d = ieee754dp_fsp(fs);
 			rfmt = d_fmt;
 			goto copcsr;
 
 		case fcvtw_op:
+			MIPS_FPU_EMU_INC_STATS(cvt_w_s);
 			SPFROMREG(fs, MIPSInst_FS(ir));
 			rv.w = ieee754sp_tint(fs);
 			rfmt = w_fmt;
@@ -1940,6 +1964,15 @@ copcsr:
 			if (!cpu_has_mips_2_3_4_5_r)
 				return SIGILL;
 
+			if (MIPSInst_FUNC(ir) == fceil_op)
+				MIPS_FPU_EMU_INC_STATS(ceil_w_s);
+			if (MIPSInst_FUNC(ir) == ffloor_op)
+				MIPS_FPU_EMU_INC_STATS(floor_w_s);
+			if (MIPSInst_FUNC(ir) == fround_op)
+				MIPS_FPU_EMU_INC_STATS(round_w_s);
+			if (MIPSInst_FUNC(ir) == ftrunc_op)
+				MIPS_FPU_EMU_INC_STATS(trunc_w_s);
+
 			oldrm = ieee754_csr.rm;
 			SPFROMREG(fs, MIPSInst_FS(ir));
 			ieee754_csr.rm = MIPSInst_FUNC(ir);
@@ -1952,6 +1985,7 @@ copcsr:
 			if (!cpu_has_mips_r6)
 				return SIGILL;
 
+			MIPS_FPU_EMU_INC_STATS(sel_s);
 			SPFROMREG(fd, MIPSInst_FD(ir));
 			if (fd.bits & 0x1)
 				SPFROMREG(rv.s, MIPSInst_FT(ir));
@@ -1963,6 +1997,7 @@ copcsr:
 			if (!cpu_has_mips_3_4_5_64_r2_r6)
 				return SIGILL;
 
+			MIPS_FPU_EMU_INC_STATS(cvt_l_s);
 			SPFROMREG(fs, MIPSInst_FS(ir));
 			rv.l = ieee754sp_tlong(fs);
 			rfmt = l_fmt;
@@ -1975,6 +2010,15 @@ copcsr:
 			if (!cpu_has_mips_3_4_5_64_r2_r6)
 				return SIGILL;
 
+			if (MIPSInst_FUNC(ir) == fceill_op)
+				MIPS_FPU_EMU_INC_STATS(ceil_l_s);
+			if (MIPSInst_FUNC(ir) == ffloorl_op)
+				MIPS_FPU_EMU_INC_STATS(floor_l_s);
+			if (MIPSInst_FUNC(ir) == froundl_op)
+				MIPS_FPU_EMU_INC_STATS(round_l_s);
+			if (MIPSInst_FUNC(ir) == ftruncl_op)
+				MIPS_FPU_EMU_INC_STATS(trunc_l_s);
+
 			oldrm = ieee754_csr.rm;
 			SPFROMREG(fs, MIPSInst_FS(ir));
 			ieee754_csr.rm = MIPSInst_FUNC(ir);
@@ -2016,15 +2060,19 @@ copcsr:
 		switch (MIPSInst_FUNC(ir)) {
 			/* binary ops */
 		case fadd_op:
+			MIPS_FPU_EMU_INC_STATS(add_d);
 			handler.b = ieee754dp_add;
 			goto dcopbop;
 		case fsub_op:
+			MIPS_FPU_EMU_INC_STATS(sub_d);
 			handler.b = ieee754dp_sub;
 			goto dcopbop;
 		case fmul_op:
+			MIPS_FPU_EMU_INC_STATS(mul_d);
 			handler.b = ieee754dp_mul;
 			goto dcopbop;
 		case fdiv_op:
+			MIPS_FPU_EMU_INC_STATS(div_d);
 			handler.b = ieee754dp_div;
 			goto dcopbop;
 
@@ -2033,6 +2081,7 @@ copcsr:
 			if (!cpu_has_mips_2_3_4_5_r)
 				return SIGILL;
 
+			MIPS_FPU_EMU_INC_STATS(sqrt_d);
 			handler.u = ieee754dp_sqrt;
 			goto dcopuop;
 		/*
@@ -2044,12 +2093,14 @@ copcsr:
 			if (!cpu_has_mips_4_5_64_r2_r6)
 				return SIGILL;
 
+			MIPS_FPU_EMU_INC_STATS(rsqrt_d);
 			handler.u = fpemu_dp_rsqrt;
 			goto dcopuop;
 		case frecip_op:
 			if (!cpu_has_mips_4_5_64_r2_r6)
 				return SIGILL;
 
+			MIPS_FPU_EMU_INC_STATS(recip_d);
 			handler.u = fpemu_dp_recip;
 			goto dcopuop;
 		case fmovc_op:
@@ -2083,6 +2134,7 @@ copcsr:
 			if (!cpu_has_mips_r6)
 				return SIGILL;
 
+			MIPS_FPU_EMU_INC_STATS(seleqz_d);
 			DPFROMREG(rv.d, MIPSInst_FT(ir));
 			if (rv.l & 0x1)
 				rv.l = 0;
@@ -2094,6 +2146,7 @@ copcsr:
 			if (!cpu_has_mips_r6)
 				return SIGILL;
 
+			MIPS_FPU_EMU_INC_STATS(selnez_d);
 			DPFROMREG(rv.d, MIPSInst_FT(ir));
 			if (rv.l & 0x1)
 				DPFROMREG(rv.d, MIPSInst_FS(ir));
@@ -2107,6 +2160,7 @@ copcsr:
 			if (!cpu_has_mips_r6)
 				return SIGILL;
 
+			MIPS_FPU_EMU_INC_STATS(maddf_d);
 			DPFROMREG(ft, MIPSInst_FT(ir));
 			DPFROMREG(fs, MIPSInst_FS(ir));
 			DPFROMREG(fd, MIPSInst_FD(ir));
@@ -2120,6 +2174,7 @@ copcsr:
 			if (!cpu_has_mips_r6)
 				return SIGILL;
 
+			MIPS_FPU_EMU_INC_STATS(msubf_d);
 			DPFROMREG(ft, MIPSInst_FT(ir));
 			DPFROMREG(fs, MIPSInst_FS(ir));
 			DPFROMREG(fd, MIPSInst_FD(ir));
@@ -2133,9 +2188,9 @@ copcsr:
 			if (!cpu_has_mips_r6)
 				return SIGILL;
 
+			MIPS_FPU_EMU_INC_STATS(rint_d);
 			DPFROMREG(fs, MIPSInst_FS(ir));
-			rv.l = ieee754dp_tlong(fs);
-			rv.d = ieee754dp_flong(rv.l);
+			rv.d = ieee754dp_rint(fs);
 			goto copcsr;
 		}
 
@@ -2145,9 +2200,10 @@ copcsr:
 			if (!cpu_has_mips_r6)
 				return SIGILL;
 
+			MIPS_FPU_EMU_INC_STATS(class_d);
 			DPFROMREG(fs, MIPSInst_FS(ir));
-			rv.w = ieee754dp_2008class(fs);
-			rfmt = w_fmt;
+			rv.l = ieee754dp_2008class(fs);
+			rfmt = l_fmt;
 			break;
 		}
 
@@ -2157,6 +2213,7 @@ copcsr:
 			if (!cpu_has_mips_r6)
 				return SIGILL;
 
+			MIPS_FPU_EMU_INC_STATS(min_d);
 			DPFROMREG(ft, MIPSInst_FT(ir));
 			DPFROMREG(fs, MIPSInst_FS(ir));
 			rv.d = ieee754dp_fmin(fs, ft);
@@ -2169,6 +2226,7 @@ copcsr:
 			if (!cpu_has_mips_r6)
 				return SIGILL;
 
+			MIPS_FPU_EMU_INC_STATS(mina_d);
 			DPFROMREG(ft, MIPSInst_FT(ir));
 			DPFROMREG(fs, MIPSInst_FS(ir));
 			rv.d = ieee754dp_fmina(fs, ft);
@@ -2181,6 +2239,7 @@ copcsr:
 			if (!cpu_has_mips_r6)
 				return SIGILL;
 
+			MIPS_FPU_EMU_INC_STATS(max_d);
 			DPFROMREG(ft, MIPSInst_FT(ir));
 			DPFROMREG(fs, MIPSInst_FS(ir));
 			rv.d = ieee754dp_fmax(fs, ft);
@@ -2193,6 +2252,7 @@ copcsr:
 			if (!cpu_has_mips_r6)
 				return SIGILL;
 
+			MIPS_FPU_EMU_INC_STATS(maxa_d);
 			DPFROMREG(ft, MIPSInst_FT(ir));
 			DPFROMREG(fs, MIPSInst_FS(ir));
 			rv.d = ieee754dp_fmaxa(fs, ft);
@@ -2200,15 +2260,18 @@ copcsr:
 		}
 
 		case fabs_op:
+			MIPS_FPU_EMU_INC_STATS(abs_d);
 			handler.u = ieee754dp_abs;
 			goto dcopuop;
 
 		case fneg_op:
+			MIPS_FPU_EMU_INC_STATS(neg_d);
 			handler.u = ieee754dp_neg;
 			goto dcopuop;
 
 		case fmov_op:
 			/* an easy one */
+			MIPS_FPU_EMU_INC_STATS(mov_d);
 			DPFROMREG(rv.d, MIPSInst_FS(ir));
 			goto copcsr;
 
@@ -2228,6 +2291,7 @@ dcopuop:
 		 * unary conv ops
 		 */
 		case fcvts_op:
+			MIPS_FPU_EMU_INC_STATS(cvt_s_d);
 			DPFROMREG(fs, MIPSInst_FS(ir));
 			rv.s = ieee754sp_fdp(fs);
 			rfmt = s_fmt;
@@ -2237,6 +2301,7 @@ dcopuop:
 			return SIGILL;	/* not defined */
 
 		case fcvtw_op:
+			MIPS_FPU_EMU_INC_STATS(cvt_w_d);
 			DPFROMREG(fs, MIPSInst_FS(ir));
 			rv.w = ieee754dp_tint(fs);	/* wrong */
 			rfmt = w_fmt;
@@ -2249,6 +2314,15 @@ dcopuop:
 			if (!cpu_has_mips_2_3_4_5_r)
 				return SIGILL;
 
+			if (MIPSInst_FUNC(ir) == fceil_op)
+				MIPS_FPU_EMU_INC_STATS(ceil_w_d);
+			if (MIPSInst_FUNC(ir) == ffloor_op)
+				MIPS_FPU_EMU_INC_STATS(floor_w_d);
+			if (MIPSInst_FUNC(ir) == fround_op)
+				MIPS_FPU_EMU_INC_STATS(round_w_d);
+			if (MIPSInst_FUNC(ir) == ftrunc_op)
+				MIPS_FPU_EMU_INC_STATS(trunc_w_d);
+
 			oldrm = ieee754_csr.rm;
 			DPFROMREG(fs, MIPSInst_FS(ir));
 			ieee754_csr.rm = MIPSInst_FUNC(ir);
@@ -2261,6 +2335,7 @@ dcopuop:
 			if (!cpu_has_mips_r6)
 				return SIGILL;
 
+			MIPS_FPU_EMU_INC_STATS(sel_d);
 			DPFROMREG(fd, MIPSInst_FD(ir));
 			if (fd.bits & 0x1)
 				DPFROMREG(rv.d, MIPSInst_FT(ir));
@@ -2272,6 +2347,7 @@ dcopuop:
 			if (!cpu_has_mips_3_4_5_64_r2_r6)
 				return SIGILL;
 
+			MIPS_FPU_EMU_INC_STATS(cvt_l_d);
 			DPFROMREG(fs, MIPSInst_FS(ir));
 			rv.l = ieee754dp_tlong(fs);
 			rfmt = l_fmt;
@@ -2284,6 +2360,15 @@ dcopuop:
 			if (!cpu_has_mips_3_4_5_64_r2_r6)
 				return SIGILL;
 
+			if (MIPSInst_FUNC(ir) == fceill_op)
+				MIPS_FPU_EMU_INC_STATS(ceil_l_d);
+			if (MIPSInst_FUNC(ir) == ffloorl_op)
+				MIPS_FPU_EMU_INC_STATS(floor_l_d);
+			if (MIPSInst_FUNC(ir) == froundl_op)
+				MIPS_FPU_EMU_INC_STATS(round_l_d);
+			if (MIPSInst_FUNC(ir) == ftruncl_op)
+				MIPS_FPU_EMU_INC_STATS(trunc_l_d);
+
 			oldrm = ieee754_csr.rm;
 			DPFROMREG(fs, MIPSInst_FS(ir));
 			ieee754_csr.rm = MIPSInst_FUNC(ir);
@@ -2325,12 +2410,14 @@ dcopuop:
 		switch (MIPSInst_FUNC(ir)) {
 		case fcvts_op:
 			/* convert word to single precision real */
+			MIPS_FPU_EMU_INC_STATS(cvt_s_w);
 			SPFROMREG(fs, MIPSInst_FS(ir));
 			rv.s = ieee754sp_fint(fs.bits);
 			rfmt = s_fmt;
 			goto copcsr;
 		case fcvtd_op:
 			/* convert word to double precision real */
+			MIPS_FPU_EMU_INC_STATS(cvt_d_w);
 			SPFROMREG(fs, MIPSInst_FS(ir));
 			rv.d = ieee754dp_fint(fs.bits);
 			rfmt = d_fmt;
@@ -2350,6 +2437,90 @@ dcopuop:
 			    (MIPSInst_FUNC(ir) & 0x20))
 				return SIGILL;
 
+			if (!sig) {
+				if (!(MIPSInst_FUNC(ir) & PREDICATE_BIT)) {
+					switch (cmpop) {
+					case 0:
+					MIPS_FPU_EMU_INC_STATS(cmp_af_s);
+					break;
+					case 1:
+					MIPS_FPU_EMU_INC_STATS(cmp_un_s);
+					break;
+					case 2:
+					MIPS_FPU_EMU_INC_STATS(cmp_eq_s);
+					break;
+					case 3:
+					MIPS_FPU_EMU_INC_STATS(cmp_ueq_s);
+					break;
+					case 4:
+					MIPS_FPU_EMU_INC_STATS(cmp_lt_s);
+					break;
+					case 5:
+					MIPS_FPU_EMU_INC_STATS(cmp_ult_s);
+					break;
+					case 6:
+					MIPS_FPU_EMU_INC_STATS(cmp_le_s);
+					break;
+					case 7:
+					MIPS_FPU_EMU_INC_STATS(cmp_ule_s);
+					break;
+					}
+				} else {
+					switch (cmpop) {
+					case 1:
+					MIPS_FPU_EMU_INC_STATS(cmp_or_s);
+					break;
+					case 2:
+					MIPS_FPU_EMU_INC_STATS(cmp_une_s);
+					break;
+					case 3:
+					MIPS_FPU_EMU_INC_STATS(cmp_ne_s);
+					break;
+					}
+				}
+			} else {
+				if (!(MIPSInst_FUNC(ir) & PREDICATE_BIT)) {
+					switch (cmpop) {
+					case 0:
+					MIPS_FPU_EMU_INC_STATS(cmp_saf_s);
+					break;
+					case 1:
+					MIPS_FPU_EMU_INC_STATS(cmp_sun_s);
+					break;
+					case 2:
+					MIPS_FPU_EMU_INC_STATS(cmp_seq_s);
+					break;
+					case 3:
+					MIPS_FPU_EMU_INC_STATS(cmp_sueq_s);
+					break;
+					case 4:
+					MIPS_FPU_EMU_INC_STATS(cmp_slt_s);
+					break;
+					case 5:
+					MIPS_FPU_EMU_INC_STATS(cmp_sult_s);
+					break;
+					case 6:
+					MIPS_FPU_EMU_INC_STATS(cmp_sle_s);
+					break;
+					case 7:
+					MIPS_FPU_EMU_INC_STATS(cmp_sule_s);
+					break;
+					}
+				} else {
+					switch (cmpop) {
+					case 1:
+					MIPS_FPU_EMU_INC_STATS(cmp_sor_s);
+					break;
+					case 2:
+					MIPS_FPU_EMU_INC_STATS(cmp_sune_s);
+					break;
+					case 3:
+					MIPS_FPU_EMU_INC_STATS(cmp_sne_s);
+					break;
+					}
+				}
+			}
+
 			/* fmt is w_fmt for single precision so fix it */
 			rfmt = s_fmt;
 			/* default to false */
@@ -2394,6 +2565,7 @@ dcopuop:
 			break;
 			}
 		}
+		break;
 	}
 
 	case l_fmt:
@@ -2406,11 +2578,13 @@ dcopuop:
 		switch (MIPSInst_FUNC(ir)) {
 		case fcvts_op:
 			/* convert long to single precision real */
+			MIPS_FPU_EMU_INC_STATS(cvt_s_l);
 			rv.s = ieee754sp_flong(bits);
 			rfmt = s_fmt;
 			goto copcsr;
 		case fcvtd_op:
 			/* convert long to double precision real */
+			MIPS_FPU_EMU_INC_STATS(cvt_d_l);
 			rv.d = ieee754dp_flong(bits);
 			rfmt = d_fmt;
 			goto copcsr;
@@ -2424,6 +2598,90 @@ dcopuop:
 			    (MIPSInst_FUNC(ir) & 0x20))
 				return SIGILL;
 
+			if (!sig) {
+				if (!(MIPSInst_FUNC(ir) & PREDICATE_BIT)) {
+					switch (cmpop) {
+					case 0:
+					MIPS_FPU_EMU_INC_STATS(cmp_af_d);
+					break;
+					case 1:
+					MIPS_FPU_EMU_INC_STATS(cmp_un_d);
+					break;
+					case 2:
+					MIPS_FPU_EMU_INC_STATS(cmp_eq_d);
+					break;
+					case 3:
+					MIPS_FPU_EMU_INC_STATS(cmp_ueq_d);
+					break;
+					case 4:
+					MIPS_FPU_EMU_INC_STATS(cmp_lt_d);
+					break;
+					case 5:
+					MIPS_FPU_EMU_INC_STATS(cmp_ult_d);
+					break;
+					case 6:
+					MIPS_FPU_EMU_INC_STATS(cmp_le_d);
+					break;
+					case 7:
+					MIPS_FPU_EMU_INC_STATS(cmp_ule_d);
+					break;
+					}
+				} else {
+					switch (cmpop) {
+					case 1:
+					MIPS_FPU_EMU_INC_STATS(cmp_or_d);
+					break;
+					case 2:
+					MIPS_FPU_EMU_INC_STATS(cmp_une_d);
+					break;
+					case 3:
+					MIPS_FPU_EMU_INC_STATS(cmp_ne_d);
+					break;
+					}
+				}
+			} else {
+				if (!(MIPSInst_FUNC(ir) & PREDICATE_BIT)) {
+					switch (cmpop) {
+					case 0:
+					MIPS_FPU_EMU_INC_STATS(cmp_saf_d);
+					break;
+					case 1:
+					MIPS_FPU_EMU_INC_STATS(cmp_sun_d);
+					break;
+					case 2:
+					MIPS_FPU_EMU_INC_STATS(cmp_seq_d);
+					break;
+					case 3:
+					MIPS_FPU_EMU_INC_STATS(cmp_sueq_d);
+					break;
+					case 4:
+					MIPS_FPU_EMU_INC_STATS(cmp_slt_d);
+					break;
+					case 5:
+					MIPS_FPU_EMU_INC_STATS(cmp_sult_d);
+					break;
+					case 6:
+					MIPS_FPU_EMU_INC_STATS(cmp_sle_d);
+					break;
+					case 7:
+					MIPS_FPU_EMU_INC_STATS(cmp_sule_d);
+					break;
+					}
+				} else {
+					switch (cmpop) {
+					case 1:
+					MIPS_FPU_EMU_INC_STATS(cmp_sor_d);
+					break;
+					case 2:
+					MIPS_FPU_EMU_INC_STATS(cmp_sune_d);
+					break;
+					case 3:
+					MIPS_FPU_EMU_INC_STATS(cmp_sne_d);
+					break;
+					}
+				}
+			}
+
 			/* fmt is l_fmt for double precision so fix it */
 			rfmt = d_fmt;
 			/* default to false */
@@ -2468,6 +2726,8 @@ dcopuop:
 			break;
 			}
 		}
+		break;
+
 	default:
 		return SIGILL;
 	}
@@ -2553,7 +2813,7 @@ dcopuop:
  * For simplicity we always terminate upon an ISA mode switch.
  */
 int fpu_emulator_cop1Handler(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
-	int has_fpu, void *__user *fault_addr)
+	int has_fpu, void __user **fault_addr)
 {
 	unsigned long oldepc, prevepc;
 	struct mm_decoded_insn dec_insn;
diff --git a/arch/mips/math-emu/dp_fmax.c b/arch/mips/math-emu/dp_fmax.c
index fd71b8d..5bec64f 100644
--- a/arch/mips/math-emu/dp_fmax.c
+++ b/arch/mips/math-emu/dp_fmax.c
@@ -47,14 +47,26 @@ union ieee754dp ieee754dp_fmax(union ieee754dp x, union ieee754dp y)
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
 		return ieee754dp_nanxcpt(x);
 
-	/* numbers are preferred to NaNs */
+	/*
+	 * Quiet NaN handling
+	 */
+
+	/*
+	 *    The case of both inputs quiet NaNs
+	 */
+	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
+		return x;
+
+	/*
+	 *    The cases of exactly one input quiet NaN (numbers
+	 *    are here preferred as returned values to NaNs)
+	 */
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN):
 		return x;
 
-	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO):
 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM):
 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM):
@@ -80,9 +92,7 @@ union ieee754dp ieee754dp_fmax(union ieee754dp x, union ieee754dp y)
 		return ys ? x : y;
 
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
-		if (xs == ys)
-			return x;
-		return ieee754dp_zero(1);
+		return ieee754dp_zero(xs & ys);
 
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
 		DPDNORMX;
@@ -106,16 +116,32 @@ union ieee754dp ieee754dp_fmax(union ieee754dp x, union ieee754dp y)
 	else if (xs < ys)
 		return x;
 
-	/* Compare exponent */
-	if (xe > ye)
-		return x;
-	else if (xe < ye)
-		return y;
+	/* Signs of inputs are equal, let's compare exponents */
+	if (xs == 0) {
+		/* Inputs are both positive */
+		if (xe > ye)
+			return x;
+		else if (xe < ye)
+			return y;
+	} else {
+		/* Inputs are both negative */
+		if (xe > ye)
+			return y;
+		else if (xe < ye)
+			return x;
+	}
 
-	/* Compare mantissa */
+	/* Signs and exponents of inputs are equal, let's compare mantissas */
+	if (xs == 0) {
+		/* Inputs are both positive, with equal signs and exponents */
+		if (xm <= ym)
+			return y;
+		return x;
+	}
+	/* Inputs are both negative, with equal signs and exponents */
 	if (xm <= ym)
-		return y;
-	return x;
+		return x;
+	return y;
 }
 
 union ieee754dp ieee754dp_fmaxa(union ieee754dp x, union ieee754dp y)
@@ -147,14 +173,26 @@ union ieee754dp ieee754dp_fmaxa(union ieee754dp x, union ieee754dp y)
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
 		return ieee754dp_nanxcpt(x);
 
-	/* numbers are preferred to NaNs */
+	/*
+	 * Quiet NaN handling
+	 */
+
+	/*
+	 *    The case of both inputs quiet NaNs
+	 */
+	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
+		return x;
+
+	/*
+	 *    The cases of exactly one input quiet NaN (numbers
+	 *    are here preferred as returned values to NaNs)
+	 */
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN):
 		return x;
 
-	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO):
 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM):
 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM):
@@ -164,6 +202,9 @@ union ieee754dp ieee754dp_fmaxa(union ieee754dp x, union ieee754dp y)
 	/*
 	 * Infinity and zero handling
 	 */
+	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
+		return ieee754dp_inf(xs & ys);
+
 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
@@ -171,7 +212,6 @@ union ieee754dp ieee754dp_fmaxa(union ieee754dp x, union ieee754dp y)
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
 		return x;
 
-	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
@@ -180,9 +220,7 @@ union ieee754dp ieee754dp_fmaxa(union ieee754dp x, union ieee754dp y)
 		return y;
 
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
-		if (xs == ys)
-			return x;
-		return ieee754dp_zero(1);
+		return ieee754dp_zero(xs & ys);
 
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
 		DPDNORMX;
@@ -207,7 +245,11 @@ union ieee754dp ieee754dp_fmaxa(union ieee754dp x, union ieee754dp y)
 		return y;
 
 	/* Compare mantissa */
-	if (xm <= ym)
+	if (xm < ym)
 		return y;
-	return x;
+	else if (xm > ym)
+		return x;
+	else if (xs == 0)
+		return x;
+	return y;
 }
diff --git a/arch/mips/math-emu/dp_fmin.c b/arch/mips/math-emu/dp_fmin.c
index c1072b0..a287b23 100644
--- a/arch/mips/math-emu/dp_fmin.c
+++ b/arch/mips/math-emu/dp_fmin.c
@@ -47,14 +47,26 @@ union ieee754dp ieee754dp_fmin(union ieee754dp x, union ieee754dp y)
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
 		return ieee754dp_nanxcpt(x);
 
-	/* numbers are preferred to NaNs */
+	/*
+	 * Quiet NaN handling
+	 */
+
+	/*
+	 *    The case of both inputs quiet NaNs
+	 */
+	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
+		return x;
+
+	/*
+	 *    The cases of exactly one input quiet NaN (numbers
+	 *    are here preferred as returned values to NaNs)
+	 */
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN):
 		return x;
 
-	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO):
 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM):
 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM):
@@ -80,9 +92,7 @@ union ieee754dp ieee754dp_fmin(union ieee754dp x, union ieee754dp y)
 		return ys ? y : x;
 
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
-		if (xs == ys)
-			return x;
-		return ieee754dp_zero(1);
+		return ieee754dp_zero(xs | ys);
 
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
 		DPDNORMX;
@@ -106,16 +116,32 @@ union ieee754dp ieee754dp_fmin(union ieee754dp x, union ieee754dp y)
 	else if (xs < ys)
 		return y;
 
-	/* Compare exponent */
-	if (xe > ye)
-		return y;
-	else if (xe < ye)
-		return x;
+	/* Signs of inputs are the same, let's compare exponents */
+	if (xs == 0) {
+		/* Inputs are both positive */
+		if (xe > ye)
+			return y;
+		else if (xe < ye)
+			return x;
+	} else {
+		/* Inputs are both negative */
+		if (xe > ye)
+			return x;
+		else if (xe < ye)
+			return y;
+	}
 
-	/* Compare mantissa */
+	/* Signs and exponents of inputs are equal, let's compare mantissas */
+	if (xs == 0) {
+		/* Inputs are both positive, with equal signs and exponents */
+		if (xm <= ym)
+			return x;
+		return y;
+	}
+	/* Inputs are both negative, with equal signs and exponents */
 	if (xm <= ym)
-		return x;
-	return y;
+		return y;
+	return x;
 }
 
 union ieee754dp ieee754dp_fmina(union ieee754dp x, union ieee754dp y)
@@ -147,14 +173,26 @@ union ieee754dp ieee754dp_fmina(union ieee754dp x, union ieee754dp y)
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
 		return ieee754dp_nanxcpt(x);
 
-	/* numbers are preferred to NaNs */
+	/*
+	 * Quiet NaN handling
+	 */
+
+	/*
+	 *    The case of both inputs quiet NaNs
+	 */
+	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
+		return x;
+
+	/*
+	 *    The cases of exactly one input quiet NaN (numbers
+	 *    are here preferred as returned values to NaNs)
+	 */
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN):
 		return x;
 
-	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO):
 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM):
 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM):
@@ -164,25 +202,25 @@ union ieee754dp ieee754dp_fmina(union ieee754dp x, union ieee754dp y)
 	/*
 	 * Infinity and zero handling
 	 */
+	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
+		return ieee754dp_inf(xs | ys);
+
 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
-		return x;
+		return y;
 
-	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM):
-		return y;
+		return x;
 
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
-		if (xs == ys)
-			return x;
-		return ieee754dp_zero(1);
+		return ieee754dp_zero(xs | ys);
 
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
 		DPDNORMX;
@@ -207,7 +245,11 @@ union ieee754dp ieee754dp_fmina(union ieee754dp x, union ieee754dp y)
 		return x;
 
 	/* Compare mantissa */
-	if (xm <= ym)
+	if (xm < ym)
+		return x;
+	else if (xm > ym)
+		return y;
+	else if (xs == 1)
 		return x;
 	return y;
 }
diff --git a/arch/mips/math-emu/dp_maddf.c b/arch/mips/math-emu/dp_maddf.c
index caa62f2..e0d9be5 100644
--- a/arch/mips/math-emu/dp_maddf.c
+++ b/arch/mips/math-emu/dp_maddf.c
@@ -14,22 +14,45 @@
 
 #include "ieee754dp.h"
 
-enum maddf_flags {
-	maddf_negate_product	= 1 << 0,
-};
+
+/* 128 bits shift right logical with rounding. */
+void srl128(u64 *hptr, u64 *lptr, int count)
+{
+	u64 low;
+
+	if (count >= 128) {
+		*lptr = *hptr != 0 || *lptr != 0;
+		*hptr = 0;
+	} else if (count >= 64) {
+		if (count == 64) {
+			*lptr = *hptr | (*lptr != 0);
+		} else {
+			low = *lptr;
+			*lptr = *hptr >> (count - 64);
+			*lptr |= (*hptr << (128 - count)) != 0 || low != 0;
+		}
+		*hptr = 0;
+	} else {
+		low = *lptr;
+		*lptr = low >> count | *hptr << (64 - count);
+		*lptr |= (low << (64 - count)) != 0;
+		*hptr = *hptr >> count;
+	}
+}
 
 static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
 				 union ieee754dp y, enum maddf_flags flags)
 {
 	int re;
 	int rs;
-	u64 rm;
 	unsigned lxm;
 	unsigned hxm;
 	unsigned lym;
 	unsigned hym;
 	u64 lrm;
 	u64 hrm;
+	u64 lzm;
+	u64 hzm;
 	u64 t;
 	u64 at;
 	int s;
@@ -48,52 +71,34 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
 
 	ieee754_clearcx();
 
-	switch (zc) {
-	case IEEE754_CLASS_SNAN:
-		ieee754_setcx(IEEE754_INVALID_OPERATION);
+	/*
+	 * Handle the cases when at least one of x, y or z is a NaN.
+	 * Order of precedence is sNaN, qNaN and z, x, y.
+	 */
+	if (zc == IEEE754_CLASS_SNAN)
 		return ieee754dp_nanxcpt(z);
-	case IEEE754_CLASS_DNORM:
-		DPDNORMZ;
-	/* QNAN and ZERO cases are handled separately below */
-	}
-
-	switch (CLPAIR(xc, yc)) {
-	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN):
-	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN):
-	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN):
-	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN):
-	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN):
-		return ieee754dp_nanxcpt(y);
-
-	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN):
-	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN):
-	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO):
-	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM):
-	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM):
-	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
+	if (xc == IEEE754_CLASS_SNAN)
 		return ieee754dp_nanxcpt(x);
-
-	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
-	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
-	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN):
-	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN):
+	if (yc == IEEE754_CLASS_SNAN)
+		return ieee754dp_nanxcpt(y);
+	if (zc == IEEE754_CLASS_QNAN)
+		return z;
+	if (xc == IEEE754_CLASS_QNAN)
+		return x;
+	if (yc == IEEE754_CLASS_QNAN)
 		return y;
 
-	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
-	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO):
-	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM):
-	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM):
-	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF):
-		return x;
+	if (zc == IEEE754_CLASS_DNORM)
+		DPDNORMZ;
+	/* ZERO z cases are handled separately below */
 
+	switch (CLPAIR(xc, yc)) {
 
 	/*
 	 * Infinity handling
 	 */
 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
-		if (zc == IEEE754_CLASS_QNAN)
-			return z;
 		ieee754_setcx(IEEE754_INVALID_OPERATION);
 		return ieee754dp_indef();
 
@@ -102,9 +107,27 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
-		if (zc == IEEE754_CLASS_QNAN)
-			return z;
-		return ieee754dp_inf(xs ^ ys);
+		if ((zc == IEEE754_CLASS_INF) &&
+		    ((!(flags & MADDF_NEGATE_PRODUCT) && (zs != (xs ^ ys))) ||
+		     ((flags & MADDF_NEGATE_PRODUCT) && (zs == (xs ^ ys))))) {
+			/*
+			 * Cases of addition of infinities with opposite signs
+			 * or subtraction of infinities with same signs.
+			 */
+			ieee754_setcx(IEEE754_INVALID_OPERATION);
+			return ieee754dp_indef();
+		}
+		/*
+		 * z is here either not an infinity, or an infinity having the
+		 * same sign as product (x*y) (in case of MADDF.D instruction)
+		 * or product -(x*y) (in MSUBF.D case). The result must be an
+		 * infinity, and its sign is determined only by the value of
+		 * (flags & MADDF_NEGATE_PRODUCT) and the signs of x and y.
+		 */
+		if (flags & MADDF_NEGATE_PRODUCT)
+			return ieee754dp_inf(1 ^ (xs ^ ys));
+		else
+			return ieee754dp_inf(xs ^ ys);
 
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
@@ -113,32 +136,42 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
 		if (zc == IEEE754_CLASS_INF)
 			return ieee754dp_inf(zs);
-		/* Multiplication is 0 so just return z */
+		if (zc == IEEE754_CLASS_ZERO) {
+			/* Handle cases +0 + (-0) and similar ones. */
+			if ((!(flags & MADDF_NEGATE_PRODUCT)
+					&& (zs == (xs ^ ys))) ||
+			    ((flags & MADDF_NEGATE_PRODUCT)
+					&& (zs != (xs ^ ys))))
+				/*
+				 * Cases of addition of zeros of equal signs
+				 * or subtraction of zeroes of opposite signs.
+				 * The sign of the resulting zero is in any
+				 * such case determined only by the sign of z.
+				 */
+				return z;
+
+			return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD);
+		}
+		/* x*y is here 0, and z is not 0, so just return z */
 		return z;
 
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
 		DPDNORMX;
 
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
-		if (zc == IEEE754_CLASS_QNAN)
-			return z;
-		else if (zc == IEEE754_CLASS_INF)
+		if (zc == IEEE754_CLASS_INF)
 			return ieee754dp_inf(zs);
 		DPDNORMY;
 		break;
 
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):
-		if (zc == IEEE754_CLASS_QNAN)
-			return z;
-		else if (zc == IEEE754_CLASS_INF)
+		if (zc == IEEE754_CLASS_INF)
 			return ieee754dp_inf(zs);
 		DPDNORMX;
 		break;
 
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
-		if (zc == IEEE754_CLASS_QNAN)
-			return z;
-		else if (zc == IEEE754_CLASS_INF)
+		if (zc == IEEE754_CLASS_INF)
 			return ieee754dp_inf(zs);
 		/* fall through to real computations */
 	}
@@ -157,7 +190,7 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
 
 	re = xe + ye;
 	rs = xs ^ ys;
-	if (flags & maddf_negate_product)
+	if (flags & MADDF_NEGATE_PRODUCT)
 		rs ^= 1;
 
 	/* shunt to top of word */
@@ -165,7 +198,7 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
 	ym <<= 64 - (DP_FBITS + 1);
 
 	/*
-	 * Multiply 64 bits xm, ym to give high 64 bits rm with stickness.
+	 * Multiply 64 bits xm and ym to give 128 bits result in hrm:lrm.
 	 */
 
 	/* 32 * 32 => 64 */
@@ -195,81 +228,110 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
 
 	hrm = hrm + (t >> 32);
 
-	rm = hrm | (lrm != 0);
-
-	/*
-	 * Sticky shift down to normal rounding precision.
-	 */
-	if ((s64) rm < 0) {
-		rm = (rm >> (64 - (DP_FBITS + 1 + 3))) |
-		     ((rm << (DP_FBITS + 1 + 3)) != 0);
+	/* Put explicit bit at bit 126 if necessary */
+	if ((int64_t)hrm < 0) {
+		lrm = (hrm << 63) | (lrm >> 1);
+		hrm = hrm >> 1;
 		re++;
-	} else {
-		rm = (rm >> (64 - (DP_FBITS + 1 + 3 + 1))) |
-		     ((rm << (DP_FBITS + 1 + 3 + 1)) != 0);
 	}
-	assert(rm & (DP_HIDDEN_BIT << 3));
 
-	if (zc == IEEE754_CLASS_ZERO)
-		return ieee754dp_format(rs, re, rm);
+	assert(hrm & (1 << 62));
 
-	/* And now the addition */
-	assert(zm & DP_HIDDEN_BIT);
+	if (zc == IEEE754_CLASS_ZERO) {
+		/*
+		 * Move explicit bit from bit 126 to bit 55 since the
+		 * ieee754dp_format code expects the mantissa to be
+		 * 56 bits wide (53 + 3 rounding bits).
+		 */
+		srl128(&hrm, &lrm, (126 - 55));
+		return ieee754dp_format(rs, re, lrm);
+	}
 
-	/*
-	 * Provide guard,round and stick bit space.
-	 */
-	zm <<= 3;
+	/* Move explicit bit from bit 52 to bit 126 */
+	lzm = 0;
+	hzm = zm << 10;
+	assert(hzm & (1 << 62));
 
+	/* Make the exponents the same */
 	if (ze > re) {
 		/*
 		 * Have to shift y fraction right to align.
 		 */
 		s = ze - re;
-		rm = XDPSRS(rm, s);
+		srl128(&hrm, &lrm, s);
 		re += s;
 	} else if (re > ze) {
 		/*
 		 * Have to shift x fraction right to align.
 		 */
 		s = re - ze;
-		zm = XDPSRS(zm, s);
+		srl128(&hzm, &lzm, s);
 		ze += s;
 	}
 	assert(ze == re);
 	assert(ze <= DP_EMAX);
 
+	/* Do the addition */
 	if (zs == rs) {
 		/*
-		 * Generate 28 bit result of adding two 27 bit numbers
-		 * leaving result in xm, xs and xe.
+		 * Generate 128 bit result by adding two 127 bit numbers
+		 * leaving result in hzm:lzm, zs and ze.
 		 */
-		zm = zm + rm;
-
-		if (zm >> (DP_FBITS + 1 + 3)) { /* carry out */
-			zm = XDPSRS1(zm);
+		hzm = hzm + hrm + (lzm > (lzm + lrm));
+		lzm = lzm + lrm;
+		if ((int64_t)hzm < 0) {        /* carry out */
+			srl128(&hzm, &lzm, 1);
 			ze++;
 		}
 	} else {
-		if (zm >= rm) {
-			zm = zm - rm;
+		if (hzm > hrm || (hzm == hrm && lzm >= lrm)) {
+			hzm = hzm - hrm - (lzm < lrm);
+			lzm = lzm - lrm;
 		} else {
-			zm = rm - zm;
+			hzm = hrm - hzm - (lrm < lzm);
+			lzm = lrm - lzm;
 			zs = rs;
 		}
-		if (zm == 0)
+		if (lzm == 0 && hzm == 0)
 			return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD);
 
 		/*
-		 * Normalize to rounding precision.
+		 * Put explicit bit at bit 126 if necessary.
 		 */
-		while ((zm >> (DP_FBITS + 3)) == 0) {
-			zm <<= 1;
-			ze--;
+		if (hzm == 0) {
+			/* left shift by 63 or 64 bits */
+			if ((int64_t)lzm < 0) {
+				/* MSB of lzm is the explicit bit */
+				hzm = lzm >> 1;
+				lzm = lzm << 63;
+				ze -= 63;
+			} else {
+				hzm = lzm;
+				lzm = 0;
+				ze -= 64;
+			}
+		}
+
+		t = 0;
+		while ((hzm >> (62 - t)) == 0)
+			t++;
+
+		assert(t <= 62);
+		if (t) {
+			hzm = hzm << t | lzm >> (64 - t);
+			lzm = lzm << t;
+			ze -= t;
 		}
 	}
 
-	return ieee754dp_format(zs, ze, zm);
+	/*
+	 * Move explicit bit from bit 126 to bit 55 since the
+	 * ieee754dp_format code expects the mantissa to be
+	 * 56 bits wide (53 + 3 rounding bits).
+	 */
+	srl128(&hzm, &lzm, (126 - 55));
+
+	return ieee754dp_format(zs, ze, lzm);
 }
 
 union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x,
@@ -281,5 +343,5 @@ union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x,
 union ieee754dp ieee754dp_msubf(union ieee754dp z, union ieee754dp x,
 				union ieee754dp y)
 {
-	return _dp_maddf(z, x, y, maddf_negate_product);
+	return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);
 }
diff --git a/arch/mips/math-emu/dp_rint.c b/arch/mips/math-emu/dp_rint.c
new file mode 100644
index 0000000..c3b9077
--- /dev/null
+++ b/arch/mips/math-emu/dp_rint.c
@@ -0,0 +1,89 @@
+/* IEEE754 floating point arithmetic
+ * double precision: common utilities
+ */
+/*
+ * MIPS floating point support
+ * Copyright (C) 1994-2000 Algorithmics Ltd.
+ * Copyright (C) 2017 Imagination Technologies, Ltd.
+ * Author: Aleksandar Markovic <aleksandar.markovic@imgtec.com>
+ *
+ *  This program is free software; you can distribute it and/or modify it
+ *  under the terms of the GNU General Public License (Version 2) as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program.
+ */
+
+#include "ieee754dp.h"
+
+union ieee754dp ieee754dp_rint(union ieee754dp x)
+{
+	union ieee754dp ret;
+	u64 residue;
+	int sticky;
+	int round;
+	int odd;
+
+	COMPXDP;
+
+	ieee754_clearcx();
+
+	EXPLODEXDP;
+	FLUSHXDP;
+
+	if (xc == IEEE754_CLASS_SNAN)
+		return ieee754dp_nanxcpt(x);
+
+	if ((xc == IEEE754_CLASS_QNAN) ||
+	    (xc == IEEE754_CLASS_INF) ||
+	    (xc == IEEE754_CLASS_ZERO))
+		return x;
+
+	if (xe >= DP_FBITS)
+		return x;
+
+	if (xe < -1) {
+		residue = xm;
+		round = 0;
+		sticky = residue != 0;
+		xm = 0;
+	} else {
+		residue = xm << (64 - DP_FBITS + xe);
+		round = (residue >> 63) != 0;
+		sticky = (residue << 1) != 0;
+		xm >>= DP_FBITS - xe;
+	}
+
+	odd = (xm & 0x1) != 0x0;
+
+	switch (ieee754_csr.rm) {
+	case FPU_CSR_RN:	/* toward nearest */
+		if (round && (sticky || odd))
+			xm++;
+		break;
+	case FPU_CSR_RZ:	/* toward zero */
+		break;
+	case FPU_CSR_RU:	/* toward +infinity */
+		if ((round || sticky) && !xs)
+			xm++;
+		break;
+	case FPU_CSR_RD:	/* toward -infinity */
+		if ((round || sticky) && xs)
+			xm++;
+		break;
+	}
+
+	if (round || sticky)
+		ieee754_setcx(IEEE754_INEXACT);
+
+	ret = ieee754dp_flong(xm);
+	DPSIGN(ret) = xs;
+
+	return ret;
+}
diff --git a/arch/mips/math-emu/ieee754.h b/arch/mips/math-emu/ieee754.h
index d3be351..92dc8fa 100644
--- a/arch/mips/math-emu/ieee754.h
+++ b/arch/mips/math-emu/ieee754.h
@@ -67,6 +67,7 @@ union ieee754sp ieee754sp_div(union ieee754sp x, union ieee754sp y);
 union ieee754sp ieee754sp_fint(int x);
 union ieee754sp ieee754sp_flong(s64 x);
 union ieee754sp ieee754sp_fdp(union ieee754dp x);
+union ieee754sp ieee754sp_rint(union ieee754sp x);
 
 int ieee754sp_tint(union ieee754sp x);
 s64 ieee754sp_tlong(union ieee754sp x);
@@ -101,6 +102,7 @@ union ieee754dp ieee754dp_neg(union ieee754dp x);
 union ieee754dp ieee754dp_fint(int x);
 union ieee754dp ieee754dp_flong(s64 x);
 union ieee754dp ieee754dp_fsp(union ieee754sp x);
+union ieee754dp ieee754dp_rint(union ieee754dp x);
 
 int ieee754dp_tint(union ieee754dp x);
 s64 ieee754dp_tlong(union ieee754dp x);
diff --git a/arch/mips/math-emu/ieee754int.h b/arch/mips/math-emu/ieee754int.h
index 8bc2f69..dd2071f 100644
--- a/arch/mips/math-emu/ieee754int.h
+++ b/arch/mips/math-emu/ieee754int.h
@@ -26,6 +26,10 @@
 
 #define CLPAIR(x, y)	((x)*6+(y))
 
+enum maddf_flags {
+	MADDF_NEGATE_PRODUCT	= 1 << 0,
+};
+
 static inline void ieee754_clearcx(void)
 {
 	ieee754_csr.cx = 0;
diff --git a/arch/mips/math-emu/ieee754sp.h b/arch/mips/math-emu/ieee754sp.h
index 8476067..0f63e42 100644
--- a/arch/mips/math-emu/ieee754sp.h
+++ b/arch/mips/math-emu/ieee754sp.h
@@ -45,6 +45,10 @@ static inline int ieee754sp_finite(union ieee754sp x)
 	return SPBEXP(x) != SP_EMAX + 1 + SP_EBIAS;
 }
 
+/* 64 bit right shift with rounding */
+#define XSPSRS64(v, rs)						\
+	(((rs) >= 64) ? ((v) != 0) : ((v) >> (rs)) | ((v) << (64-(rs)) != 0))
+
 /* 3bit extended single precision sticky right shift */
 #define XSPSRS(v, rs)						\
 	((rs > (SP_FBITS+3))?1:((v) >> (rs)) | ((v) << (32-(rs)) != 0))
diff --git a/arch/mips/math-emu/me-debugfs.c b/arch/mips/math-emu/me-debugfs.c
index be650ed..8c0ec15 100644
--- a/arch/mips/math-emu/me-debugfs.c
+++ b/arch/mips/math-emu/me-debugfs.c
@@ -28,14 +28,190 @@ static int fpuemu_stat_get(void *data, u64 *val)
 }
 DEFINE_SIMPLE_ATTRIBUTE(fops_fpuemu_stat, fpuemu_stat_get, NULL, "%llu\n");
 
+/*
+ * Used to obtain names for a debugfs instruction counter, given field name
+ * in fpuemustats structure. For example, for input "cmp_sueq_d", the output
+ * would be "cmp.sueq.d". This is needed since dots are not allowed to be
+ * used in structure field names, and are, on the other hand, desired to be
+ * used in debugfs item names to be clearly associated to corresponding
+ * MIPS FPU instructions.
+ */
+static void adjust_instruction_counter_name(char *out_name, char *in_name)
+{
+	int i = 0;
+
+	strcpy(out_name, in_name);
+	while (in_name[i] != '\0') {
+		if (out_name[i] == '_')
+			out_name[i] = '.';
+		i++;
+	}
+}
+
+static int fpuemustats_clear_show(struct seq_file *s, void *unused)
+{
+	__this_cpu_write((fpuemustats).emulated, 0);
+	__this_cpu_write((fpuemustats).loads, 0);
+	__this_cpu_write((fpuemustats).stores, 0);
+	__this_cpu_write((fpuemustats).branches, 0);
+	__this_cpu_write((fpuemustats).cp1ops, 0);
+	__this_cpu_write((fpuemustats).cp1xops, 0);
+	__this_cpu_write((fpuemustats).errors, 0);
+	__this_cpu_write((fpuemustats).ieee754_inexact, 0);
+	__this_cpu_write((fpuemustats).ieee754_underflow, 0);
+	__this_cpu_write((fpuemustats).ieee754_overflow, 0);
+	__this_cpu_write((fpuemustats).ieee754_zerodiv, 0);
+	__this_cpu_write((fpuemustats).ieee754_invalidop, 0);
+	__this_cpu_write((fpuemustats).ds_emul, 0);
+
+	__this_cpu_write((fpuemustats).abs_s, 0);
+	__this_cpu_write((fpuemustats).abs_d, 0);
+	__this_cpu_write((fpuemustats).add_s, 0);
+	__this_cpu_write((fpuemustats).add_d, 0);
+	__this_cpu_write((fpuemustats).bc1eqz, 0);
+	__this_cpu_write((fpuemustats).bc1nez, 0);
+	__this_cpu_write((fpuemustats).ceil_w_s, 0);
+	__this_cpu_write((fpuemustats).ceil_w_d, 0);
+	__this_cpu_write((fpuemustats).ceil_l_s, 0);
+	__this_cpu_write((fpuemustats).ceil_l_d, 0);
+	__this_cpu_write((fpuemustats).class_s, 0);
+	__this_cpu_write((fpuemustats).class_d, 0);
+	__this_cpu_write((fpuemustats).cmp_af_s, 0);
+	__this_cpu_write((fpuemustats).cmp_af_d, 0);
+	__this_cpu_write((fpuemustats).cmp_eq_s, 0);
+	__this_cpu_write((fpuemustats).cmp_eq_d, 0);
+	__this_cpu_write((fpuemustats).cmp_le_s, 0);
+	__this_cpu_write((fpuemustats).cmp_le_d, 0);
+	__this_cpu_write((fpuemustats).cmp_lt_s, 0);
+	__this_cpu_write((fpuemustats).cmp_lt_d, 0);
+	__this_cpu_write((fpuemustats).cmp_ne_s, 0);
+	__this_cpu_write((fpuemustats).cmp_ne_d, 0);
+	__this_cpu_write((fpuemustats).cmp_or_s, 0);
+	__this_cpu_write((fpuemustats).cmp_or_d, 0);
+	__this_cpu_write((fpuemustats).cmp_ueq_s, 0);
+	__this_cpu_write((fpuemustats).cmp_ueq_d, 0);
+	__this_cpu_write((fpuemustats).cmp_ule_s, 0);
+	__this_cpu_write((fpuemustats).cmp_ule_d, 0);
+	__this_cpu_write((fpuemustats).cmp_ult_s, 0);
+	__this_cpu_write((fpuemustats).cmp_ult_d, 0);
+	__this_cpu_write((fpuemustats).cmp_un_s, 0);
+	__this_cpu_write((fpuemustats).cmp_un_d, 0);
+	__this_cpu_write((fpuemustats).cmp_une_s, 0);
+	__this_cpu_write((fpuemustats).cmp_une_d, 0);
+	__this_cpu_write((fpuemustats).cmp_saf_s, 0);
+	__this_cpu_write((fpuemustats).cmp_saf_d, 0);
+	__this_cpu_write((fpuemustats).cmp_seq_s, 0);
+	__this_cpu_write((fpuemustats).cmp_seq_d, 0);
+	__this_cpu_write((fpuemustats).cmp_sle_s, 0);
+	__this_cpu_write((fpuemustats).cmp_sle_d, 0);
+	__this_cpu_write((fpuemustats).cmp_slt_s, 0);
+	__this_cpu_write((fpuemustats).cmp_slt_d, 0);
+	__this_cpu_write((fpuemustats).cmp_sne_s, 0);
+	__this_cpu_write((fpuemustats).cmp_sne_d, 0);
+	__this_cpu_write((fpuemustats).cmp_sor_s, 0);
+	__this_cpu_write((fpuemustats).cmp_sor_d, 0);
+	__this_cpu_write((fpuemustats).cmp_sueq_s, 0);
+	__this_cpu_write((fpuemustats).cmp_sueq_d, 0);
+	__this_cpu_write((fpuemustats).cmp_sule_s, 0);
+	__this_cpu_write((fpuemustats).cmp_sule_d, 0);
+	__this_cpu_write((fpuemustats).cmp_sult_s, 0);
+	__this_cpu_write((fpuemustats).cmp_sult_d, 0);
+	__this_cpu_write((fpuemustats).cmp_sun_s, 0);
+	__this_cpu_write((fpuemustats).cmp_sun_d, 0);
+	__this_cpu_write((fpuemustats).cmp_sune_s, 0);
+	__this_cpu_write((fpuemustats).cmp_sune_d, 0);
+	__this_cpu_write((fpuemustats).cvt_d_l, 0);
+	__this_cpu_write((fpuemustats).cvt_d_s, 0);
+	__this_cpu_write((fpuemustats).cvt_d_w, 0);
+	__this_cpu_write((fpuemustats).cvt_l_s, 0);
+	__this_cpu_write((fpuemustats).cvt_l_d, 0);
+	__this_cpu_write((fpuemustats).cvt_s_d, 0);
+	__this_cpu_write((fpuemustats).cvt_s_l, 0);
+	__this_cpu_write((fpuemustats).cvt_s_w, 0);
+	__this_cpu_write((fpuemustats).cvt_w_s, 0);
+	__this_cpu_write((fpuemustats).cvt_w_d, 0);
+	__this_cpu_write((fpuemustats).div_s, 0);
+	__this_cpu_write((fpuemustats).div_d, 0);
+	__this_cpu_write((fpuemustats).floor_w_s, 0);
+	__this_cpu_write((fpuemustats).floor_w_d, 0);
+	__this_cpu_write((fpuemustats).floor_l_s, 0);
+	__this_cpu_write((fpuemustats).floor_l_d, 0);
+	__this_cpu_write((fpuemustats).maddf_s, 0);
+	__this_cpu_write((fpuemustats).maddf_d, 0);
+	__this_cpu_write((fpuemustats).max_s, 0);
+	__this_cpu_write((fpuemustats).max_d, 0);
+	__this_cpu_write((fpuemustats).maxa_s, 0);
+	__this_cpu_write((fpuemustats).maxa_d, 0);
+	__this_cpu_write((fpuemustats).min_s, 0);
+	__this_cpu_write((fpuemustats).min_d, 0);
+	__this_cpu_write((fpuemustats).mina_s, 0);
+	__this_cpu_write((fpuemustats).mina_d, 0);
+	__this_cpu_write((fpuemustats).mov_s, 0);
+	__this_cpu_write((fpuemustats).mov_d, 0);
+	__this_cpu_write((fpuemustats).msubf_s, 0);
+	__this_cpu_write((fpuemustats).msubf_d, 0);
+	__this_cpu_write((fpuemustats).mul_s, 0);
+	__this_cpu_write((fpuemustats).mul_d, 0);
+	__this_cpu_write((fpuemustats).neg_s, 0);
+	__this_cpu_write((fpuemustats).neg_d, 0);
+	__this_cpu_write((fpuemustats).recip_s, 0);
+	__this_cpu_write((fpuemustats).recip_d, 0);
+	__this_cpu_write((fpuemustats).rint_s, 0);
+	__this_cpu_write((fpuemustats).rint_d, 0);
+	__this_cpu_write((fpuemustats).round_w_s, 0);
+	__this_cpu_write((fpuemustats).round_w_d, 0);
+	__this_cpu_write((fpuemustats).round_l_s, 0);
+	__this_cpu_write((fpuemustats).round_l_d, 0);
+	__this_cpu_write((fpuemustats).rsqrt_s, 0);
+	__this_cpu_write((fpuemustats).rsqrt_d, 0);
+	__this_cpu_write((fpuemustats).sel_s, 0);
+	__this_cpu_write((fpuemustats).sel_d, 0);
+	__this_cpu_write((fpuemustats).seleqz_s, 0);
+	__this_cpu_write((fpuemustats).seleqz_d, 0);
+	__this_cpu_write((fpuemustats).selnez_s, 0);
+	__this_cpu_write((fpuemustats).selnez_d, 0);
+	__this_cpu_write((fpuemustats).sqrt_s, 0);
+	__this_cpu_write((fpuemustats).sqrt_d, 0);
+	__this_cpu_write((fpuemustats).sub_s, 0);
+	__this_cpu_write((fpuemustats).sub_d, 0);
+	__this_cpu_write((fpuemustats).trunc_w_s, 0);
+	__this_cpu_write((fpuemustats).trunc_w_d, 0);
+	__this_cpu_write((fpuemustats).trunc_l_s, 0);
+	__this_cpu_write((fpuemustats).trunc_l_d, 0);
+
+	return 0;
+}
+
+static int fpuemustats_clear_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, fpuemustats_clear_show, inode->i_private);
+}
+
+static const struct file_operations fpuemustats_clear_fops = {
+	.open                   = fpuemustats_clear_open,
+	.read			= seq_read,
+	.llseek			= seq_lseek,
+	.release		= single_release,
+};
+
 static int __init debugfs_fpuemu(void)
 {
-	struct dentry *d, *dir;
+	struct dentry *fpuemu_debugfs_base_dir;
+	struct dentry *fpuemu_debugfs_inst_dir;
+	struct dentry *d, *reset_file;
 
 	if (!mips_debugfs_dir)
 		return -ENODEV;
-	dir = debugfs_create_dir("fpuemustats", mips_debugfs_dir);
-	if (!dir)
+
+	fpuemu_debugfs_base_dir = debugfs_create_dir("fpuemustats",
+						     mips_debugfs_dir);
+	if (!fpuemu_debugfs_base_dir)
+		return -ENOMEM;
+
+	reset_file = debugfs_create_file("fpuemustats_clear", 0444,
+					 mips_debugfs_dir, NULL,
+					 &fpuemustats_clear_fops);
+	if (!reset_file)
 		return -ENOMEM;
 
 #define FPU_EMU_STAT_OFFSET(m)						\
@@ -43,7 +219,7 @@ static int __init debugfs_fpuemu(void)
 
 #define FPU_STAT_CREATE(m)						\
 do {									\
-	d = debugfs_create_file(#m , S_IRUGO, dir,			\
+	d = debugfs_create_file(#m, 0444, fpuemu_debugfs_base_dir,	\
 				(void *)FPU_EMU_STAT_OFFSET(m),		\
 				&fops_fpuemu_stat);			\
 	if (!d)								\
@@ -53,6 +229,7 @@ do {									\
 	FPU_STAT_CREATE(emulated);
 	FPU_STAT_CREATE(loads);
 	FPU_STAT_CREATE(stores);
+	FPU_STAT_CREATE(branches);
 	FPU_STAT_CREATE(cp1ops);
 	FPU_STAT_CREATE(cp1xops);
 	FPU_STAT_CREATE(errors);
@@ -63,6 +240,139 @@ do {									\
 	FPU_STAT_CREATE(ieee754_invalidop);
 	FPU_STAT_CREATE(ds_emul);
 
+	fpuemu_debugfs_inst_dir = debugfs_create_dir("instructions",
+						     fpuemu_debugfs_base_dir);
+	if (!fpuemu_debugfs_inst_dir)
+		return -ENOMEM;
+
+#define FPU_STAT_CREATE_EX(m)						\
+do {									\
+	char name[32];							\
+									\
+	adjust_instruction_counter_name(name, #m);			\
+									\
+	d = debugfs_create_file(name, 0444, fpuemu_debugfs_inst_dir,	\
+				(void *)FPU_EMU_STAT_OFFSET(m),		\
+				&fops_fpuemu_stat);			\
+	if (!d)								\
+		return -ENOMEM;						\
+} while (0)
+
+	FPU_STAT_CREATE_EX(abs_s);
+	FPU_STAT_CREATE_EX(abs_d);
+	FPU_STAT_CREATE_EX(add_s);
+	FPU_STAT_CREATE_EX(add_d);
+	FPU_STAT_CREATE_EX(bc1eqz);
+	FPU_STAT_CREATE_EX(bc1nez);
+	FPU_STAT_CREATE_EX(ceil_w_s);
+	FPU_STAT_CREATE_EX(ceil_w_d);
+	FPU_STAT_CREATE_EX(ceil_l_s);
+	FPU_STAT_CREATE_EX(ceil_l_d);
+	FPU_STAT_CREATE_EX(class_s);
+	FPU_STAT_CREATE_EX(class_d);
+	FPU_STAT_CREATE_EX(cmp_af_s);
+	FPU_STAT_CREATE_EX(cmp_af_d);
+	FPU_STAT_CREATE_EX(cmp_eq_s);
+	FPU_STAT_CREATE_EX(cmp_eq_d);
+	FPU_STAT_CREATE_EX(cmp_le_s);
+	FPU_STAT_CREATE_EX(cmp_le_d);
+	FPU_STAT_CREATE_EX(cmp_lt_s);
+	FPU_STAT_CREATE_EX(cmp_lt_d);
+	FPU_STAT_CREATE_EX(cmp_ne_s);
+	FPU_STAT_CREATE_EX(cmp_ne_d);
+	FPU_STAT_CREATE_EX(cmp_or_s);
+	FPU_STAT_CREATE_EX(cmp_or_d);
+	FPU_STAT_CREATE_EX(cmp_ueq_s);
+	FPU_STAT_CREATE_EX(cmp_ueq_d);
+	FPU_STAT_CREATE_EX(cmp_ule_s);
+	FPU_STAT_CREATE_EX(cmp_ule_d);
+	FPU_STAT_CREATE_EX(cmp_ult_s);
+	FPU_STAT_CREATE_EX(cmp_ult_d);
+	FPU_STAT_CREATE_EX(cmp_un_s);
+	FPU_STAT_CREATE_EX(cmp_un_d);
+	FPU_STAT_CREATE_EX(cmp_une_s);
+	FPU_STAT_CREATE_EX(cmp_une_d);
+	FPU_STAT_CREATE_EX(cmp_saf_s);
+	FPU_STAT_CREATE_EX(cmp_saf_d);
+	FPU_STAT_CREATE_EX(cmp_seq_s);
+	FPU_STAT_CREATE_EX(cmp_seq_d);
+	FPU_STAT_CREATE_EX(cmp_sle_s);
+	FPU_STAT_CREATE_EX(cmp_sle_d);
+	FPU_STAT_CREATE_EX(cmp_slt_s);
+	FPU_STAT_CREATE_EX(cmp_slt_d);
+	FPU_STAT_CREATE_EX(cmp_sne_s);
+	FPU_STAT_CREATE_EX(cmp_sne_d);
+	FPU_STAT_CREATE_EX(cmp_sor_s);
+	FPU_STAT_CREATE_EX(cmp_sor_d);
+	FPU_STAT_CREATE_EX(cmp_sueq_s);
+	FPU_STAT_CREATE_EX(cmp_sueq_d);
+	FPU_STAT_CREATE_EX(cmp_sule_s);
+	FPU_STAT_CREATE_EX(cmp_sule_d);
+	FPU_STAT_CREATE_EX(cmp_sult_s);
+	FPU_STAT_CREATE_EX(cmp_sult_d);
+	FPU_STAT_CREATE_EX(cmp_sun_s);
+	FPU_STAT_CREATE_EX(cmp_sun_d);
+	FPU_STAT_CREATE_EX(cmp_sune_s);
+	FPU_STAT_CREATE_EX(cmp_sune_d);
+	FPU_STAT_CREATE_EX(cvt_d_l);
+	FPU_STAT_CREATE_EX(cvt_d_s);
+	FPU_STAT_CREATE_EX(cvt_d_w);
+	FPU_STAT_CREATE_EX(cvt_l_s);
+	FPU_STAT_CREATE_EX(cvt_l_d);
+	FPU_STAT_CREATE_EX(cvt_s_d);
+	FPU_STAT_CREATE_EX(cvt_s_l);
+	FPU_STAT_CREATE_EX(cvt_s_w);
+	FPU_STAT_CREATE_EX(cvt_w_s);
+	FPU_STAT_CREATE_EX(cvt_w_d);
+	FPU_STAT_CREATE_EX(div_s);
+	FPU_STAT_CREATE_EX(div_d);
+	FPU_STAT_CREATE_EX(floor_w_s);
+	FPU_STAT_CREATE_EX(floor_w_d);
+	FPU_STAT_CREATE_EX(floor_l_s);
+	FPU_STAT_CREATE_EX(floor_l_d);
+	FPU_STAT_CREATE_EX(maddf_s);
+	FPU_STAT_CREATE_EX(maddf_d);
+	FPU_STAT_CREATE_EX(max_s);
+	FPU_STAT_CREATE_EX(max_d);
+	FPU_STAT_CREATE_EX(maxa_s);
+	FPU_STAT_CREATE_EX(maxa_d);
+	FPU_STAT_CREATE_EX(min_s);
+	FPU_STAT_CREATE_EX(min_d);
+	FPU_STAT_CREATE_EX(mina_s);
+	FPU_STAT_CREATE_EX(mina_d);
+	FPU_STAT_CREATE_EX(mov_s);
+	FPU_STAT_CREATE_EX(mov_d);
+	FPU_STAT_CREATE_EX(msubf_s);
+	FPU_STAT_CREATE_EX(msubf_d);
+	FPU_STAT_CREATE_EX(mul_s);
+	FPU_STAT_CREATE_EX(mul_d);
+	FPU_STAT_CREATE_EX(neg_s);
+	FPU_STAT_CREATE_EX(neg_d);
+	FPU_STAT_CREATE_EX(recip_s);
+	FPU_STAT_CREATE_EX(recip_d);
+	FPU_STAT_CREATE_EX(rint_s);
+	FPU_STAT_CREATE_EX(rint_d);
+	FPU_STAT_CREATE_EX(round_w_s);
+	FPU_STAT_CREATE_EX(round_w_d);
+	FPU_STAT_CREATE_EX(round_l_s);
+	FPU_STAT_CREATE_EX(round_l_d);
+	FPU_STAT_CREATE_EX(rsqrt_s);
+	FPU_STAT_CREATE_EX(rsqrt_d);
+	FPU_STAT_CREATE_EX(sel_s);
+	FPU_STAT_CREATE_EX(sel_d);
+	FPU_STAT_CREATE_EX(seleqz_s);
+	FPU_STAT_CREATE_EX(seleqz_d);
+	FPU_STAT_CREATE_EX(selnez_s);
+	FPU_STAT_CREATE_EX(selnez_d);
+	FPU_STAT_CREATE_EX(sqrt_s);
+	FPU_STAT_CREATE_EX(sqrt_d);
+	FPU_STAT_CREATE_EX(sub_s);
+	FPU_STAT_CREATE_EX(sub_d);
+	FPU_STAT_CREATE_EX(trunc_w_s);
+	FPU_STAT_CREATE_EX(trunc_w_d);
+	FPU_STAT_CREATE_EX(trunc_l_s);
+	FPU_STAT_CREATE_EX(trunc_l_d);
+
 	return 0;
 }
 arch_initcall(debugfs_fpuemu);
diff --git a/arch/mips/math-emu/sp_fmax.c b/arch/mips/math-emu/sp_fmax.c
index 4d00084..74a5a00 100644
--- a/arch/mips/math-emu/sp_fmax.c
+++ b/arch/mips/math-emu/sp_fmax.c
@@ -47,14 +47,26 @@ union ieee754sp ieee754sp_fmax(union ieee754sp x, union ieee754sp y)
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
 		return ieee754sp_nanxcpt(x);
 
-	/* numbers are preferred to NaNs */
+	/*
+	 * Quiet NaN handling
+	 */
+
+	/*
+	 *    The case of both inputs quiet NaNs
+	 */
+	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
+		return x;
+
+	/*
+	 *    The cases of exactly one input quiet NaN (numbers
+	 *    are here preferred as returned values to NaNs)
+	 */
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN):
 		return x;
 
-	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO):
 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM):
 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM):
@@ -80,9 +92,7 @@ union ieee754sp ieee754sp_fmax(union ieee754sp x, union ieee754sp y)
 		return ys ? x : y;
 
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
-		if (xs == ys)
-			return x;
-		return ieee754sp_zero(1);
+		return ieee754sp_zero(xs & ys);
 
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
 		SPDNORMX;
@@ -106,16 +116,32 @@ union ieee754sp ieee754sp_fmax(union ieee754sp x, union ieee754sp y)
 	else if (xs < ys)
 		return x;
 
-	/* Compare exponent */
-	if (xe > ye)
-		return x;
-	else if (xe < ye)
-		return y;
+	/* Signs of inputs are equal, let's compare exponents */
+	if (xs == 0) {
+		/* Inputs are both positive */
+		if (xe > ye)
+			return x;
+		else if (xe < ye)
+			return y;
+	} else {
+		/* Inputs are both negative */
+		if (xe > ye)
+			return y;
+		else if (xe < ye)
+			return x;
+	}
 
-	/* Compare mantissa */
+	/* Signs and exponents of inputs are equal, let's compare mantissas */
+	if (xs == 0) {
+		/* Inputs are both positive, with equal signs and exponents */
+		if (xm <= ym)
+			return y;
+		return x;
+	}
+	/* Inputs are both negative, with equal signs and exponents */
 	if (xm <= ym)
-		return y;
-	return x;
+		return x;
+	return y;
 }
 
 union ieee754sp ieee754sp_fmaxa(union ieee754sp x, union ieee754sp y)
@@ -147,14 +173,26 @@ union ieee754sp ieee754sp_fmaxa(union ieee754sp x, union ieee754sp y)
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
 		return ieee754sp_nanxcpt(x);
 
-	/* numbers are preferred to NaNs */
+	/*
+	 * Quiet NaN handling
+	 */
+
+	/*
+	 *    The case of both inputs quiet NaNs
+	 */
+	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
+		return x;
+
+	/*
+	 *    The cases of exactly one input quiet NaN (numbers
+	 *    are here preferred as returned values to NaNs)
+	 */
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN):
 		return x;
 
-	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO):
 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM):
 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM):
@@ -164,6 +202,9 @@ union ieee754sp ieee754sp_fmaxa(union ieee754sp x, union ieee754sp y)
 	/*
 	 * Infinity and zero handling
 	 */
+	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
+		return ieee754sp_inf(xs & ys);
+
 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
@@ -171,7 +212,6 @@ union ieee754sp ieee754sp_fmaxa(union ieee754sp x, union ieee754sp y)
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
 		return x;
 
-	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
@@ -180,9 +220,7 @@ union ieee754sp ieee754sp_fmaxa(union ieee754sp x, union ieee754sp y)
 		return y;
 
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
-		if (xs == ys)
-			return x;
-		return ieee754sp_zero(1);
+		return ieee754sp_zero(xs & ys);
 
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
 		SPDNORMX;
@@ -207,7 +245,11 @@ union ieee754sp ieee754sp_fmaxa(union ieee754sp x, union ieee754sp y)
 		return y;
 
 	/* Compare mantissa */
-	if (xm <= ym)
+	if (xm < ym)
 		return y;
-	return x;
+	else if (xm > ym)
+		return x;
+	else if (xs == 0)
+		return x;
+	return y;
 }
diff --git a/arch/mips/math-emu/sp_fmin.c b/arch/mips/math-emu/sp_fmin.c
index 4eb1bb9..c51385f 100644
--- a/arch/mips/math-emu/sp_fmin.c
+++ b/arch/mips/math-emu/sp_fmin.c
@@ -47,14 +47,26 @@ union ieee754sp ieee754sp_fmin(union ieee754sp x, union ieee754sp y)
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
 		return ieee754sp_nanxcpt(x);
 
-	/* numbers are preferred to NaNs */
+	/*
+	 * Quiet NaN handling
+	 */
+
+	/*
+	 *    The case of both inputs quiet NaNs
+	 */
+	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
+		return x;
+
+	/*
+	 *    The cases of exactly one input quiet NaN (numbers
+	 *    are here preferred as returned values to NaNs)
+	 */
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN):
 		return x;
 
-	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO):
 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM):
 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM):
@@ -80,9 +92,7 @@ union ieee754sp ieee754sp_fmin(union ieee754sp x, union ieee754sp y)
 		return ys ? y : x;
 
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
-		if (xs == ys)
-			return x;
-		return ieee754sp_zero(1);
+		return ieee754sp_zero(xs | ys);
 
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
 		SPDNORMX;
@@ -106,16 +116,32 @@ union ieee754sp ieee754sp_fmin(union ieee754sp x, union ieee754sp y)
 	else if (xs < ys)
 		return y;
 
-	/* Compare exponent */
-	if (xe > ye)
-		return y;
-	else if (xe < ye)
-		return x;
+	/* Signs of inputs are the same, let's compare exponents */
+	if (xs == 0) {
+		/* Inputs are both positive */
+		if (xe > ye)
+			return y;
+		else if (xe < ye)
+			return x;
+	} else {
+		/* Inputs are both negative */
+		if (xe > ye)
+			return x;
+		else if (xe < ye)
+			return y;
+	}
 
-	/* Compare mantissa */
+	/* Signs and exponents of inputs are equal, let's compare mantissas */
+	if (xs == 0) {
+		/* Inputs are both positive, with equal signs and exponents */
+		if (xm <= ym)
+			return x;
+		return y;
+	}
+	/* Inputs are both negative, with equal signs and exponents */
 	if (xm <= ym)
-		return x;
-	return y;
+		return y;
+	return x;
 }
 
 union ieee754sp ieee754sp_fmina(union ieee754sp x, union ieee754sp y)
@@ -147,14 +173,26 @@ union ieee754sp ieee754sp_fmina(union ieee754sp x, union ieee754sp y)
 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
 		return ieee754sp_nanxcpt(x);
 
-	/* numbers are preferred to NaNs */
+	/*
+	 * Quiet NaN handling
+	 */
+
+	/*
+	 *    The case of both inputs quiet NaNs
+	 */
+	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
+		return x;
+
+	/*
+	 *    The cases of exactly one input quiet NaN (numbers
+	 *    are here preferred as returned values to NaNs)
+	 */
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN):
 		return x;
 
-	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO):
 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM):
 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM):
@@ -164,25 +202,25 @@ union ieee754sp ieee754sp_fmina(union ieee754sp x, union ieee754sp y)
 	/*
 	 * Infinity and zero handling
 	 */
+	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
+		return ieee754sp_inf(xs | ys);
+
 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
-		return x;
+		return y;
 
-	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM):
-		return y;
+		return x;
 
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
-		if (xs == ys)
-			return x;
-		return ieee754sp_zero(1);
+		return ieee754sp_zero(xs | ys);
 
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
 		SPDNORMX;
@@ -207,7 +245,11 @@ union ieee754sp ieee754sp_fmina(union ieee754sp x, union ieee754sp y)
 		return x;
 
 	/* Compare mantissa */
-	if (xm <= ym)
+	if (xm < ym)
+		return x;
+	else if (xm > ym)
+		return y;
+	else if (xs == 1)
 		return x;
 	return y;
 }
diff --git a/arch/mips/math-emu/sp_maddf.c b/arch/mips/math-emu/sp_maddf.c
index c91d5e5..7195fe7 100644
--- a/arch/mips/math-emu/sp_maddf.c
+++ b/arch/mips/math-emu/sp_maddf.c
@@ -14,9 +14,6 @@
 
 #include "ieee754sp.h"
 
-enum maddf_flags {
-	maddf_negate_product	= 1 << 0,
-};
 
 static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
 				 union ieee754sp y, enum maddf_flags flags)
@@ -24,14 +21,8 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
 	int re;
 	int rs;
 	unsigned rm;
-	unsigned short lxm;
-	unsigned short hxm;
-	unsigned short lym;
-	unsigned short hym;
-	unsigned lrm;
-	unsigned hrm;
-	unsigned t;
-	unsigned at;
+	uint64_t rm64;
+	uint64_t zm64;
 	int s;
 
 	COMPXSP;
@@ -48,51 +39,35 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
 
 	ieee754_clearcx();
 
-	switch (zc) {
-	case IEEE754_CLASS_SNAN:
-		ieee754_setcx(IEEE754_INVALID_OPERATION);
+	/*
+	 * Handle the cases when at least one of x, y or z is a NaN.
+	 * Order of precedence is sNaN, qNaN and z, x, y.
+	 */
+	if (zc == IEEE754_CLASS_SNAN)
 		return ieee754sp_nanxcpt(z);
-	case IEEE754_CLASS_DNORM:
-		SPDNORMZ;
-	/* QNAN and ZERO cases are handled separately below */
-	}
-
-	switch (CLPAIR(xc, yc)) {
-	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN):
-	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN):
-	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN):
-	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN):
-	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN):
+	if (xc == IEEE754_CLASS_SNAN)
+		return ieee754sp_nanxcpt(x);
+	if (yc == IEEE754_CLASS_SNAN)
 		return ieee754sp_nanxcpt(y);
+	if (zc == IEEE754_CLASS_QNAN)
+		return z;
+	if (xc == IEEE754_CLASS_QNAN)
+		return x;
+	if (yc == IEEE754_CLASS_QNAN)
+		return y;
 
-	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN):
-	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN):
-	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO):
-	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM):
-	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM):
-	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
-		return ieee754sp_nanxcpt(x);
+	if (zc == IEEE754_CLASS_DNORM)
+		SPDNORMZ;
+	/* ZERO z cases are handled separately below */
 
-	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
-	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
-	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN):
-	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN):
-		return y;
+	switch (CLPAIR(xc, yc)) {
 
-	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
-	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO):
-	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM):
-	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM):
-	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF):
-		return x;
 
 	/*
 	 * Infinity handling
 	 */
 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
-		if (zc == IEEE754_CLASS_QNAN)
-			return z;
 		ieee754_setcx(IEEE754_INVALID_OPERATION);
 		return ieee754sp_indef();
 
@@ -101,9 +76,27 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
-		if (zc == IEEE754_CLASS_QNAN)
-			return z;
-		return ieee754sp_inf(xs ^ ys);
+		if ((zc == IEEE754_CLASS_INF) &&
+		    ((!(flags & MADDF_NEGATE_PRODUCT) && (zs != (xs ^ ys))) ||
+		     ((flags & MADDF_NEGATE_PRODUCT) && (zs == (xs ^ ys))))) {
+			/*
+			 * Cases of addition of infinities with opposite signs
+			 * or subtraction of infinities with same signs.
+			 */
+			ieee754_setcx(IEEE754_INVALID_OPERATION);
+			return ieee754sp_indef();
+		}
+		/*
+		 * z is here either not an infinity, or an infinity having the
+		 * same sign as product (x*y) (in case of MADDF.D instruction)
+		 * or product -(x*y) (in MSUBF.D case). The result must be an
+		 * infinity, and its sign is determined only by the value of
+		 * (flags & MADDF_NEGATE_PRODUCT) and the signs of x and y.
+		 */
+		if (flags & MADDF_NEGATE_PRODUCT)
+			return ieee754sp_inf(1 ^ (xs ^ ys));
+		else
+			return ieee754sp_inf(xs ^ ys);
 
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
@@ -112,32 +105,42 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
 		if (zc == IEEE754_CLASS_INF)
 			return ieee754sp_inf(zs);
-		/* Multiplication is 0 so just return z */
+		if (zc == IEEE754_CLASS_ZERO) {
+			/* Handle cases +0 + (-0) and similar ones. */
+			if ((!(flags & MADDF_NEGATE_PRODUCT)
+					&& (zs == (xs ^ ys))) ||
+			    ((flags & MADDF_NEGATE_PRODUCT)
+					&& (zs != (xs ^ ys))))
+				/*
+				 * Cases of addition of zeros of equal signs
+				 * or subtraction of zeroes of opposite signs.
+				 * The sign of the resulting zero is in any
+				 * such case determined only by the sign of z.
+				 */
+				return z;
+
+			return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD);
+		}
+		/* x*y is here 0, and z is not 0, so just return z */
 		return z;
 
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
 		SPDNORMX;
 
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
-		if (zc == IEEE754_CLASS_QNAN)
-			return z;
-		else if (zc == IEEE754_CLASS_INF)
+		if (zc == IEEE754_CLASS_INF)
 			return ieee754sp_inf(zs);
 		SPDNORMY;
 		break;
 
 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):
-		if (zc == IEEE754_CLASS_QNAN)
-			return z;
-		else if (zc == IEEE754_CLASS_INF)
+		if (zc == IEEE754_CLASS_INF)
 			return ieee754sp_inf(zs);
 		SPDNORMX;
 		break;
 
 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
-		if (zc == IEEE754_CLASS_QNAN)
-			return z;
-		else if (zc == IEEE754_CLASS_INF)
+		if (zc == IEEE754_CLASS_INF)
 			return ieee754sp_inf(zs);
 		/* fall through to real computations */
 	}
@@ -158,111 +161,93 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
 
 	re = xe + ye;
 	rs = xs ^ ys;
-	if (flags & maddf_negate_product)
+	if (flags & MADDF_NEGATE_PRODUCT)
 		rs ^= 1;
 
-	/* shunt to top of word */
-	xm <<= 32 - (SP_FBITS + 1);
-	ym <<= 32 - (SP_FBITS + 1);
-
-	/*
-	 * Multiply 32 bits xm, ym to give high 32 bits rm with stickness.
-	 */
-	lxm = xm & 0xffff;
-	hxm = xm >> 16;
-	lym = ym & 0xffff;
-	hym = ym >> 16;
-
-	lrm = lxm * lym;	/* 16 * 16 => 32 */
-	hrm = hxm * hym;	/* 16 * 16 => 32 */
-
-	t = lxm * hym; /* 16 * 16 => 32 */
-	at = lrm + (t << 16);
-	hrm += at < lrm;
-	lrm = at;
-	hrm = hrm + (t >> 16);
+	/* Multiple 24 bit xm and ym to give 48 bit results */
+	rm64 = (uint64_t)xm * ym;
 
-	t = hxm * lym; /* 16 * 16 => 32 */
-	at = lrm + (t << 16);
-	hrm += at < lrm;
-	lrm = at;
-	hrm = hrm + (t >> 16);
+	/* Shunt to top of word */
+	rm64 = rm64 << 16;
 
-	rm = hrm | (lrm != 0);
-
-	/*
-	 * Sticky shift down to normal rounding precision.
-	 */
-	if ((int) rm < 0) {
-		rm = (rm >> (32 - (SP_FBITS + 1 + 3))) |
-		    ((rm << (SP_FBITS + 1 + 3)) != 0);
+	/* Put explicit bit at bit 62 if necessary */
+	if ((int64_t) rm64 < 0) {
+		rm64 = rm64 >> 1;
 		re++;
-	} else {
-		rm = (rm >> (32 - (SP_FBITS + 1 + 3 + 1))) |
-		     ((rm << (SP_FBITS + 1 + 3 + 1)) != 0);
 	}
-	assert(rm & (SP_HIDDEN_BIT << 3));
-
-	if (zc == IEEE754_CLASS_ZERO)
-		return ieee754sp_format(rs, re, rm);
 
-	/* And now the addition */
+	assert(rm64 & (1 << 62));
 
-	assert(zm & SP_HIDDEN_BIT);
+	if (zc == IEEE754_CLASS_ZERO) {
+		/*
+		 * Move explicit bit from bit 62 to bit 26 since the
+		 * ieee754sp_format code expects the mantissa to be
+		 * 27 bits wide (24 + 3 rounding bits).
+		 */
+		rm = XSPSRS64(rm64, (62 - 26));
+		return ieee754sp_format(rs, re, rm);
+	}
 
-	/*
-	 * Provide guard,round and stick bit space.
-	 */
-	zm <<= 3;
+	/* Move explicit bit from bit 23 to bit 62 */
+	zm64 = (uint64_t)zm << (62 - 23);
+	assert(zm64 & (1 << 62));
 
+	/* Make the exponents the same */
 	if (ze > re) {
 		/*
 		 * Have to shift r fraction right to align.
 		 */
 		s = ze - re;
-		rm = XSPSRS(rm, s);
+		rm64 = XSPSRS64(rm64, s);
 		re += s;
 	} else if (re > ze) {
 		/*
 		 * Have to shift z fraction right to align.
 		 */
 		s = re - ze;
-		zm = XSPSRS(zm, s);
+		zm64 = XSPSRS64(zm64, s);
 		ze += s;
 	}
 	assert(ze == re);
 	assert(ze <= SP_EMAX);
 
+	/* Do the addition */
 	if (zs == rs) {
 		/*
-		 * Generate 28 bit result of adding two 27 bit numbers
-		 * leaving result in zm, zs and ze.
+		 * Generate 64 bit result by adding two 63 bit numbers
+		 * leaving result in zm64, zs and ze.
 		 */
-		zm = zm + rm;
-
-		if (zm >> (SP_FBITS + 1 + 3)) { /* carry out */
-			zm = XSPSRS1(zm);
+		zm64 = zm64 + rm64;
+		if ((int64_t)zm64 < 0) {	/* carry out */
+			zm64 = XSPSRS1(zm64);
 			ze++;
 		}
 	} else {
-		if (zm >= rm) {
-			zm = zm - rm;
+		if (zm64 >= rm64) {
+			zm64 = zm64 - rm64;
 		} else {
-			zm = rm - zm;
+			zm64 = rm64 - zm64;
 			zs = rs;
 		}
-		if (zm == 0)
+		if (zm64 == 0)
 			return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD);
 
 		/*
-		 * Normalize in extended single precision
+		 * Put explicit bit at bit 62 if necessary.
 		 */
-		while ((zm >> (SP_MBITS + 3)) == 0) {
-			zm <<= 1;
+		while ((zm64 >> 62) == 0) {
+			zm64 <<= 1;
 			ze--;
 		}
-
 	}
+
+	/*
+	 * Move explicit bit from bit 62 to bit 26 since the
+	 * ieee754sp_format code expects the mantissa to be
+	 * 27 bits wide (24 + 3 rounding bits).
+	 */
+	zm = XSPSRS64(zm64, (62 - 26));
+
 	return ieee754sp_format(zs, ze, zm);
 }
 
@@ -275,5 +260,5 @@ union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x,
 union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x,
 				union ieee754sp y)
 {
-	return _sp_maddf(z, x, y, maddf_negate_product);
+	return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);
 }
diff --git a/arch/mips/math-emu/sp_rint.c b/arch/mips/math-emu/sp_rint.c
new file mode 100644
index 0000000..70765b1
--- /dev/null
+++ b/arch/mips/math-emu/sp_rint.c
@@ -0,0 +1,90 @@
+/* IEEE754 floating point arithmetic
+ * single precision
+ */
+/*
+ * MIPS floating point support
+ * Copyright (C) 1994-2000 Algorithmics Ltd.
+ * Copyright (C) 2017 Imagination Technologies, Ltd.
+ * Author: Aleksandar Markovic <aleksandar.markovic@imgtec.com>
+ *
+ *  This program is free software; you can distribute it and/or modify it
+ *  under the terms of the GNU General Public License (Version 2) as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program.
+ */
+
+#include "ieee754sp.h"
+
+union ieee754sp ieee754sp_rint(union ieee754sp x)
+{
+	union ieee754sp ret;
+	u32 residue;
+	int sticky;
+	int round;
+	int odd;
+
+	COMPXDP;		/* <-- DP needed for 64-bit mantissa tmp */
+
+	ieee754_clearcx();
+
+	EXPLODEXSP;
+	FLUSHXSP;
+
+	if (xc == IEEE754_CLASS_SNAN)
+		return ieee754sp_nanxcpt(x);
+
+	if ((xc == IEEE754_CLASS_QNAN) ||
+	    (xc == IEEE754_CLASS_INF) ||
+	    (xc == IEEE754_CLASS_ZERO))
+		return x;
+
+	if (xe >= SP_FBITS)
+		return x;
+
+	if (xe < -1) {
+		residue = xm;
+		round = 0;
+		sticky = residue != 0;
+		xm = 0;
+	} else {
+		residue = xm << (xe + 1);
+		residue <<= 31 - SP_FBITS;
+		round = (residue >> 31) != 0;
+		sticky = (residue << 1) != 0;
+		xm >>= SP_FBITS - xe;
+	}
+
+	odd = (xm & 0x1) != 0x0;
+
+	switch (ieee754_csr.rm) {
+	case FPU_CSR_RN:	/* toward nearest */
+		if (round && (sticky || odd))
+			xm++;
+		break;
+	case FPU_CSR_RZ:	/* toward zero */
+		break;
+	case FPU_CSR_RU:	/* toward +infinity */
+		if ((round || sticky) && !xs)
+			xm++;
+		break;
+	case FPU_CSR_RD:	/* toward -infinity */
+		if ((round || sticky) && xs)
+			xm++;
+		break;
+	}
+
+	if (round || sticky)
+		ieee754_setcx(IEEE754_INEXACT);
+
+	ret = ieee754sp_flong(xm);
+	SPSIGN(ret) = xs;
+
+	return ret;
+}