diff options
Diffstat (limited to 'libavcodec/mips/h264qpel_msa.c')
-rw-r--r-- | libavcodec/mips/h264qpel_msa.c | 138 |
1 files changed, 68 insertions, 70 deletions
diff --git a/libavcodec/mips/h264qpel_msa.c b/libavcodec/mips/h264qpel_msa.c index 9c779bd..df7e3e2 100644 --- a/libavcodec/mips/h264qpel_msa.c +++ b/libavcodec/mips/h264qpel_msa.c @@ -149,7 +149,7 @@ static void avc_luma_hv_qrt_4x4_msa(const uint8_t *src_x, const uint8_t *src_y, SAT_SH2_SH(out0, out1, 7); out = PCKEV_XORI128_UB(out0, out1); - ST4x4_UB(out, out, 0, 1, 2, 3, dst, stride); + ST_W4(out, 0, 1, 2, 3, dst, stride); } static void avc_luma_hv_qrt_8x8_msa(const uint8_t *src_x, const uint8_t *src_y, @@ -220,7 +220,7 @@ static void avc_luma_hv_qrt_8x8_msa(const uint8_t *src_x, const uint8_t *src_y, SAT_SH4_SH(tmp0, tmp1, tmp2, tmp3, 7); out0 = PCKEV_XORI128_UB(tmp0, tmp1); out1 = PCKEV_XORI128_UB(tmp2, tmp3); - ST8x4_UB(out0, out1, dst, stride); + ST_D4(out0, out1, 0, 1, 0, 1, dst, stride); dst += (4 * stride); LD_SB4(src_y, stride, src_vt9, src_vt10, src_vt11, src_vt12); @@ -256,8 +256,7 @@ static void avc_luma_hv_qrt_8x8_msa(const uint8_t *src_x, const uint8_t *src_y, SAT_SH4_SH(tmp0, tmp1, tmp2, tmp3, 7); out0 = PCKEV_XORI128_UB(tmp0, tmp1); out1 = PCKEV_XORI128_UB(tmp2, tmp3); - ST8x4_UB(out0, out1, dst, stride); - dst += (4 * stride); + ST_D4(out0, out1, 0, 1, 0, 1, dst, stride); } static void avc_luma_hv_qrt_16x16_msa(const uint8_t *src_x, @@ -337,7 +336,7 @@ static void avc_luma_hv_qrt_16x16_msa(const uint8_t *src_x, SAT_SH4_SH(out0, out1, out2, out3, 7); tmp0 = PCKEV_XORI128_UB(out0, out1); tmp1 = PCKEV_XORI128_UB(out2, out3); - ST8x4_UB(tmp0, tmp1, dst, stride); + ST_D4(tmp0, tmp1, 0, 1, 0, 1, dst, stride); dst += (4 * stride); src_vt0 = src_vt4; @@ -419,7 +418,7 @@ static void avc_luma_hv_qrt_and_aver_dst_4x4_msa(const uint8_t *src_x, res = PCKEV_XORI128_UB(res0, res1); dst0 = __msa_aver_u_b(res, dst0); - ST4x4_UB(dst0, dst0, 0, 1, 2, 3, dst, stride); + ST_W4(dst0, 0, 1, 2, 3, dst, stride); } static void avc_luma_hv_qrt_and_aver_dst_8x8_msa(const uint8_t *src_x, @@ -498,7 +497,7 @@ static void avc_luma_hv_qrt_and_aver_dst_8x8_msa(const uint8_t *src_x, out0 = PCKEV_XORI128_UB(tmp0, tmp1); out1 = PCKEV_XORI128_UB(tmp2, tmp3); AVER_UB2_UB(out0, dst0, out1, dst1, dst0, dst1); - ST8x4_UB(dst0, dst1, dst, stride); + ST_D4(dst0, dst1, 0, 1, 0, 1, dst, stride); dst += (4 * stride); LD_SB4(src_y, stride, src_vt9, src_vt10, src_vt11, src_vt12); @@ -539,8 +538,7 @@ static void avc_luma_hv_qrt_and_aver_dst_8x8_msa(const uint8_t *src_x, out0 = PCKEV_XORI128_UB(tmp0, tmp1); out1 = PCKEV_XORI128_UB(tmp2, tmp3); AVER_UB2_UB(out0, dst0, out1, dst1, dst0, dst1); - ST8x4_UB(dst0, dst1, dst, stride); - dst += (4 * stride); + ST_D4(dst0, dst1, 0, 1, 0, 1, dst, stride); } static void avc_luma_hv_qrt_and_aver_dst_16x16_msa(const uint8_t *src_x, @@ -627,7 +625,7 @@ static void avc_luma_hv_qrt_and_aver_dst_16x16_msa(const uint8_t *src_x, tmp0 = PCKEV_XORI128_UB(out0, out1); tmp1 = PCKEV_XORI128_UB(out2, out3); AVER_UB2_UB(tmp0, dst0, tmp1, dst1, dst0, dst1); - ST8x4_UB(dst0, dst1, dst, stride); + ST_D4(dst0, dst1, 0, 1, 0, 1, dst, stride); dst += (4 * stride); src_vt0 = src_vt4; @@ -723,7 +721,7 @@ void ff_avg_h264_qpel8_mc00_msa(uint8_t *dst, const uint8_t *src, AVER_UB4_UB(src0, dst0, src1, dst1, src2, dst2, src3, dst3, dst0, dst1, dst2, dst3); - ST8x8_UB(dst0, dst1, dst2, dst3, dst, stride); + ST_D8(dst0, dst1, dst2, dst3, 0, 1, 0, 1, 0, 1, 0, 1, dst, stride); } void ff_avg_h264_qpel4_mc00_msa(uint8_t *dst, const uint8_t *src, @@ -739,7 +737,7 @@ void ff_avg_h264_qpel4_mc00_msa(uint8_t *dst, const uint8_t *src, dst0 = __msa_aver_u_b(src0, dst0); - ST4x4_UB(dst0, dst0, 0, 1, 2, 3, dst, stride); + ST_W4(dst0, 0, 1, 2, 3, dst, stride); } void ff_put_h264_qpel16_mc10_msa(uint8_t *dst, const uint8_t *src, @@ -930,7 +928,7 @@ void ff_put_h264_qpel8_mc10_msa(uint8_t *dst, const uint8_t *src, tmp2 = __msa_aver_s_b(tmp2, src4); tmp3 = __msa_aver_s_b(tmp3, src5); XORI_B4_128_SB(tmp0, tmp1, tmp2, tmp3); - ST8x8_UB(tmp0, tmp1, tmp2, tmp3, dst, stride); + ST_D8(tmp0, tmp1, tmp2, tmp3, 0, 1, 0, 1, 0, 1, 0, 1, dst, stride); } void ff_put_h264_qpel8_mc30_msa(uint8_t *dst, const uint8_t *src, @@ -985,7 +983,7 @@ void ff_put_h264_qpel8_mc30_msa(uint8_t *dst, const uint8_t *src, tmp2 = __msa_aver_s_b(tmp2, src4); tmp3 = __msa_aver_s_b(tmp3, src5); XORI_B4_128_SB(tmp0, tmp1, tmp2, tmp3); - ST8x8_UB(tmp0, tmp1, tmp2, tmp3, dst, stride); + ST_D8(tmp0, tmp1, tmp2, tmp3, 0, 1, 0, 1, 0, 1, 0, 1, dst, stride); } void ff_put_h264_qpel4_mc10_msa(uint8_t *dst, const uint8_t *src, @@ -1016,7 +1014,7 @@ void ff_put_h264_qpel4_mc10_msa(uint8_t *dst, const uint8_t *src, src0 = (v16i8) __msa_insve_d((v2i64) src0, 1, (v2i64) src1); res = __msa_aver_s_b(res, src0); res = (v16i8) __msa_xori_b((v16u8) res, 128); - ST4x4_UB(res, res, 0, 1, 2, 3, dst, stride); + ST_W4(res, 0, 1, 2, 3, dst, stride); } void ff_put_h264_qpel4_mc30_msa(uint8_t *dst, const uint8_t *src, @@ -1047,7 +1045,7 @@ void ff_put_h264_qpel4_mc30_msa(uint8_t *dst, const uint8_t *src, src0 = (v16i8) __msa_insve_d((v2i64) src0, 1, (v2i64) src1); res = __msa_aver_s_b(res, src0); res = (v16i8) __msa_xori_b((v16u8) res, 128); - ST4x4_UB(res, res, 0, 1, 2, 3, dst, stride); + ST_W4(res, 0, 1, 2, 3, dst, stride); } void ff_put_h264_qpel16_mc20_msa(uint8_t *dst, const uint8_t *src, @@ -1153,7 +1151,7 @@ void ff_put_h264_qpel8_mc20_msa(uint8_t *dst, const uint8_t *src, out1 = PCKEV_XORI128_UB(res2, res3); out2 = PCKEV_XORI128_UB(res4, res5); out3 = PCKEV_XORI128_UB(res6, res7); - ST8x8_UB(out0, out1, out2, out3, dst, stride); + ST_D8(out0, out1, out2, out3, 0, 1, 0, 1, 0, 1, 0, 1, dst, stride); } void ff_put_h264_qpel4_mc20_msa(uint8_t *dst, const uint8_t *src, @@ -1178,7 +1176,7 @@ void ff_put_h264_qpel4_mc20_msa(uint8_t *dst, const uint8_t *src, SRARI_H2_SH(res0, res1, 5); SAT_SH2_SH(res0, res1, 7); out = PCKEV_XORI128_UB(res0, res1); - ST4x4_UB(out, out, 0, 1, 2, 3, dst, stride); + ST_W4(out, 0, 1, 2, 3, dst, stride); } void ff_put_h264_qpel16_mc01_msa(uint8_t *dst, const uint8_t *src, @@ -1378,7 +1376,7 @@ void ff_put_h264_qpel8_mc01_msa(uint8_t *dst, const uint8_t *src, out2 = __msa_aver_s_b(out2, tmp2); out3 = __msa_aver_s_b(out3, tmp3); XORI_B4_128_SB(out0, out1, out2, out3); - ST8x8_UB(out0, out1, out2, out3, dst, stride); + ST_D8(out0, out1, out2, out3, 0, 1, 0, 1, 0, 1, 0, 1, dst, stride); } void ff_put_h264_qpel8_mc03_msa(uint8_t *dst, const uint8_t *src, @@ -1431,7 +1429,7 @@ void ff_put_h264_qpel8_mc03_msa(uint8_t *dst, const uint8_t *src, out2 = __msa_aver_s_b(out2, tmp2); out3 = __msa_aver_s_b(out3, tmp3); XORI_B4_128_SB(out0, out1, out2, out3); - ST8x8_UB(out0, out1, out2, out3, dst, stride); + ST_D8(out0, out1, out2, out3, 0, 1, 0, 1, 0, 1, 0, 1, dst, stride); } void ff_put_h264_qpel4_mc01_msa(uint8_t *dst, const uint8_t *src, @@ -1472,7 +1470,7 @@ void ff_put_h264_qpel4_mc01_msa(uint8_t *dst, const uint8_t *src, src54_r = (v16i8) __msa_insve_w((v4i32) src4, 1, (v4i32) src5); src32_r = (v16i8) __msa_insve_d((v2i64) src32_r, 1, (v2i64) src54_r); out = __msa_aver_u_b(out, (v16u8) src32_r); - ST4x4_UB(out, out, 0, 1, 2, 3, dst, stride); + ST_W4(out, 0, 1, 2, 3, dst, stride); } void ff_put_h264_qpel4_mc03_msa(uint8_t *dst, const uint8_t *src, @@ -1513,7 +1511,7 @@ void ff_put_h264_qpel4_mc03_msa(uint8_t *dst, const uint8_t *src, src54_r = (v16i8) __msa_insve_w((v4i32) src5, 1, (v4i32) src6); src32_r = (v16i8) __msa_insve_d((v2i64) src32_r, 1, (v2i64) src54_r); out = __msa_aver_u_b(out, (v16u8) src32_r); - ST4x4_UB(out, out, 0, 1, 2, 3, dst, stride); + ST_W4(out, 0, 1, 2, 3, dst, stride); } void ff_put_h264_qpel16_mc11_msa(uint8_t *dst, const uint8_t *src, @@ -1691,7 +1689,7 @@ void ff_put_h264_qpel16_mc21_msa(uint8_t *dst, const uint8_t *src, out0 = PCKEV_XORI128_UB(dst0, dst1); out1 = PCKEV_XORI128_UB(dst2, dst3); - ST8x4_UB(out0, out1, dst, stride); + ST_D4(out0, out1, 0, 1, 0, 1, dst, stride); dst += (4 * stride); hz_out0 = hz_out4; @@ -1804,7 +1802,7 @@ void ff_put_h264_qpel16_mc23_msa(uint8_t *dst, const uint8_t *src, out0 = PCKEV_XORI128_UB(dst0, dst1); out1 = PCKEV_XORI128_UB(dst2, dst3); - ST8x4_UB(out0, out1, dst, stride); + ST_D4(out0, out1, 0, 1, 0, 1, dst, stride); dst += (4 * stride); hz_out0 = hz_out4; @@ -1905,7 +1903,7 @@ void ff_put_h264_qpel8_mc21_msa(uint8_t *dst, const uint8_t *src, out0 = PCKEV_XORI128_UB(dst0, dst1); out1 = PCKEV_XORI128_UB(dst2, dst3); - ST8x4_UB(out0, out1, dst, stride); + ST_D4(out0, out1, 0, 1, 0, 1, dst, stride); dst += (4 * stride); LD_SB4(src, stride, src9, src10, src11, src12); @@ -1951,7 +1949,7 @@ void ff_put_h264_qpel8_mc21_msa(uint8_t *dst, const uint8_t *src, out0 = PCKEV_XORI128_UB(dst0, dst1); out1 = PCKEV_XORI128_UB(dst2, dst3); - ST8x4_UB(out0, out1, dst, stride); + ST_D4(out0, out1, 0, 1, 0, 1, dst, stride); } void ff_put_h264_qpel8_mc23_msa(uint8_t *dst, const uint8_t *src, @@ -2040,7 +2038,7 @@ void ff_put_h264_qpel8_mc23_msa(uint8_t *dst, const uint8_t *src, out0 = PCKEV_XORI128_UB(dst0, dst1); out1 = PCKEV_XORI128_UB(dst2, dst3); - ST8x4_UB(out0, out1, dst, stride); + ST_D4(out0, out1, 0, 1, 0, 1, dst, stride); dst += (4 * stride); LD_SB4(src, stride, src9, src10, src11, src12); @@ -2086,7 +2084,7 @@ void ff_put_h264_qpel8_mc23_msa(uint8_t *dst, const uint8_t *src, out0 = PCKEV_XORI128_UB(dst0, dst1); out1 = PCKEV_XORI128_UB(dst2, dst3); - ST8x4_UB(out0, out1, dst, stride); + ST_D4(out0, out1, 0, 1, 0, 1, dst, stride); } void ff_put_h264_qpel4_mc21_msa(uint8_t *dst, const uint8_t *src, @@ -2150,7 +2148,7 @@ void ff_put_h264_qpel4_mc21_msa(uint8_t *dst, const uint8_t *src, dst1 = __msa_aver_s_h(dst1, hz_out4); res = PCKEV_XORI128_UB(dst0, dst1); - ST4x4_UB(res, res, 0, 1, 2, 3, dst, stride); + ST_W4(res, 0, 1, 2, 3, dst, stride); } void ff_put_h264_qpel4_mc23_msa(uint8_t *dst, const uint8_t *src, @@ -2215,7 +2213,7 @@ void ff_put_h264_qpel4_mc23_msa(uint8_t *dst, const uint8_t *src, dst1 = __msa_aver_s_h(dst1, hz_out1); res = PCKEV_XORI128_UB(dst0, dst1); - ST4x4_UB(res, res, 0, 1, 2, 3, dst, stride); + ST_W4(res, 0, 1, 2, 3, dst, stride); } void ff_put_h264_qpel16_mc02_msa(uint8_t *dst, const uint8_t *src, @@ -2332,7 +2330,7 @@ void ff_put_h264_qpel8_mc02_msa(uint8_t *dst, const uint8_t *src, out1 = PCKEV_XORI128_UB(out2_r, out3_r); out2 = PCKEV_XORI128_UB(out4_r, out5_r); out3 = PCKEV_XORI128_UB(out6_r, out7_r); - ST8x8_UB(out0, out1, out2, out3, dst, stride); + ST_D8(out0, out1, out2, out3, 0, 1, 0, 1, 0, 1, 0, 1, dst, stride); } void ff_put_h264_qpel4_mc02_msa(uint8_t *dst, const uint8_t *src, @@ -2369,7 +2367,7 @@ void ff_put_h264_qpel4_mc02_msa(uint8_t *dst, const uint8_t *src, SRARI_H2_SH(out10, out32, 5); SAT_SH2_SH(out10, out32, 7); out = PCKEV_XORI128_UB(out10, out32); - ST4x4_UB(out, out, 0, 1, 2, 3, dst, stride); + ST_W4(out, 0, 1, 2, 3, dst, stride); } void ff_put_h264_qpel16_mc12_msa(uint8_t *dst, const uint8_t *src, @@ -2601,7 +2599,7 @@ void ff_put_h264_qpel8_mc12_msa(uint8_t *dst, const uint8_t *src, dst0 = __msa_aver_s_h(dst2, dst0); dst1 = __msa_aver_s_h(dst3, dst1); out = PCKEV_XORI128_UB(dst0, dst1); - ST8x2_UB(out, dst, stride); + ST_D2(out, 0, 1, dst, stride); dst += (2 * stride); src0 = src2; @@ -2677,7 +2675,7 @@ void ff_put_h264_qpel8_mc32_msa(uint8_t *dst, const uint8_t *src, dst0 = __msa_aver_s_h(dst2, dst0); dst1 = __msa_aver_s_h(dst3, dst1); out = PCKEV_XORI128_UB(dst0, dst1); - ST8x2_UB(out, dst, stride); + ST_D2(out, 0, 1, dst, stride); dst += (2 * stride); src0 = src2; @@ -2777,7 +2775,7 @@ void ff_put_h264_qpel4_mc12_msa(uint8_t *dst, const uint8_t *src, PCKEV_H2_SH(hz_res1, hz_res0, hz_res3, hz_res2, dst0, dst2); out = PCKEV_XORI128_UB(dst0, dst2); - ST4x4_UB(out, out, 0, 1, 2, 3, dst, stride); + ST_W4(out, 0, 1, 2, 3, dst, stride); } void ff_put_h264_qpel4_mc32_msa(uint8_t *dst, const uint8_t *src, @@ -2873,7 +2871,7 @@ void ff_put_h264_qpel4_mc32_msa(uint8_t *dst, const uint8_t *src, PCKEV_H2_SH(hz_res1, hz_res0, hz_res3, hz_res2, dst0, dst2); out = PCKEV_XORI128_UB(dst0, dst2); - ST4x4_UB(out, out, 0, 1, 2, 3, dst, stride); + ST_W4(out, 0, 1, 2, 3, dst, stride); } void ff_put_h264_qpel16_mc22_msa(uint8_t *dst, const uint8_t *src, @@ -2961,7 +2959,7 @@ void ff_put_h264_qpel16_mc22_msa(uint8_t *dst, const uint8_t *src, out0 = PCKEV_XORI128_UB(dst0, dst1); out1 = PCKEV_XORI128_UB(dst2, dst3); - ST8x4_UB(out0, out1, dst, stride); + ST_D4(out0, out1, 0, 1, 0, 1, dst, stride); dst += (4 * stride); hz_out0 = hz_out4; @@ -3049,7 +3047,7 @@ void ff_put_h264_qpel8_mc22_msa(uint8_t *dst, const uint8_t *src, dst3 = __msa_pckev_h((v8i16) tmp1, (v8i16) tmp0); out0 = PCKEV_XORI128_UB(dst0, dst1); out1 = PCKEV_XORI128_UB(dst2, dst3); - ST8x4_UB(out0, out1, dst, stride); + ST_D4(out0, out1, 0, 1, 0, 1, dst, stride); dst += (4 * stride); LD_SB4(src, stride, src0, src1, src2, src3); @@ -3086,7 +3084,7 @@ void ff_put_h264_qpel8_mc22_msa(uint8_t *dst, const uint8_t *src, dst3 = __msa_pckev_h((v8i16) tmp1, (v8i16) tmp0); out0 = PCKEV_XORI128_UB(dst0, dst1); out1 = PCKEV_XORI128_UB(dst2, dst3); - ST8x4_UB(out0, out1, dst, stride); + ST_D4(out0, out1, 0, 1, 0, 1, dst, stride); } void ff_put_h264_qpel4_mc22_msa(uint8_t *dst, const uint8_t *src, @@ -3141,7 +3139,7 @@ void ff_put_h264_qpel4_mc22_msa(uint8_t *dst, const uint8_t *src, filt2); dst1 = __msa_pckev_h((v8i16) tmp1, (v8i16) tmp0); res = PCKEV_XORI128_UB(dst0, dst1); - ST4x4_UB(res, res, 0, 1, 2, 3, dst, stride); + ST_W4(res, 0, 1, 2, 3, dst, stride); } void ff_avg_h264_qpel16_mc10_msa(uint8_t *dst, const uint8_t *src, @@ -3350,7 +3348,7 @@ void ff_avg_h264_qpel8_mc10_msa(uint8_t *dst, const uint8_t *src, INSERT_D2_UB(tp2, tp3, dst3); AVER_UB2_UB(tmp0, dst0, tmp1, dst1, dst0, dst1); AVER_UB2_UB(tmp2, dst2, tmp3, dst3, dst2, dst3); - ST8x8_UB(dst0, dst1, dst2, dst3, dst, stride); + ST_D8(dst0, dst1, dst2, dst3, 0, 1, 0, 1, 0, 1, 0, 1, dst, stride); } void ff_avg_h264_qpel8_mc30_msa(uint8_t *dst, const uint8_t *src, @@ -3415,7 +3413,7 @@ void ff_avg_h264_qpel8_mc30_msa(uint8_t *dst, const uint8_t *src, INSERT_D2_UB(tp2, tp3, dst3); AVER_UB2_UB(tmp0, dst0, tmp1, dst1, dst0, dst1); AVER_UB2_UB(tmp2, dst2, tmp3, dst3, dst2, dst3); - ST8x8_UB(dst0, dst1, dst2, dst3, dst, stride); + ST_D8(dst0, dst1, dst2, dst3, 0, 1, 0, 1, 0, 1, 0, 1, dst, stride); } void ff_avg_h264_qpel4_mc10_msa(uint8_t *dst, const uint8_t *src, @@ -3451,7 +3449,7 @@ void ff_avg_h264_qpel4_mc10_msa(uint8_t *dst, const uint8_t *src, LW4(dst, stride, tp0, tp1, tp2, tp3); INSERT_W4_UB(tp0, tp1, tp2, tp3, dst0); dst0 = __msa_aver_u_b((v16u8) res, dst0); - ST4x4_UB(dst0, dst0, 0, 1, 2, 3, dst, stride); + ST_W4(dst0, 0, 1, 2, 3, dst, stride); } void ff_avg_h264_qpel4_mc30_msa(uint8_t *dst, const uint8_t *src, @@ -3487,7 +3485,7 @@ void ff_avg_h264_qpel4_mc30_msa(uint8_t *dst, const uint8_t *src, LW4(dst, stride, tp0, tp1, tp2, tp3); INSERT_W4_UB(tp0, tp1, tp2, tp3, dst0); dst0 = __msa_aver_u_b((v16u8) res, dst0); - ST4x4_UB(dst0, dst0, 0, 1, 2, 3, dst, stride); + ST_W4(dst0, 0, 1, 2, 3, dst, stride); } void ff_avg_h264_qpel16_mc20_msa(uint8_t *dst, const uint8_t *src, @@ -3608,7 +3606,7 @@ void ff_avg_h264_qpel8_mc20_msa(uint8_t *dst, const uint8_t *src, INSERT_D2_UB(tp2, tp3, out7); AVER_UB2_UB(out0, out2, out1, out3, out0, out1); AVER_UB2_UB(out4, out6, out5, out7, out4, out5); - ST8x8_UB(out0, out1, out4, out5, dst, stride); + ST_D8(out0, out1, out4, out5, 0, 1, 0, 1, 0, 1, 0, 1, dst, stride); } void ff_avg_h264_qpel4_mc20_msa(uint8_t *dst, const uint8_t *src, @@ -3637,7 +3635,7 @@ void ff_avg_h264_qpel4_mc20_msa(uint8_t *dst, const uint8_t *src, LW4(dst, stride, tp0, tp1, tp2, tp3); INSERT_W4_UB(tp0, tp1, tp2, tp3, dst0); res = __msa_aver_u_b(res, dst0); - ST4x4_UB(res, res, 0, 1, 2, 3, dst, stride); + ST_W4(res, 0, 1, 2, 3, dst, stride); } void ff_avg_h264_qpel16_mc01_msa(uint8_t *dst, const uint8_t *src, @@ -3856,7 +3854,7 @@ void ff_avg_h264_qpel8_mc01_msa(uint8_t *dst, const uint8_t *src, XORI_B4_128_SB(out0, out1, out2, out3); AVER_UB4_UB(out0, dst0, out1, dst1, out2, dst2, out3, dst3, dst0, dst1, dst2, dst3); - ST8x8_UB(dst0, dst1, dst2, dst3, dst, stride); + ST_D8(dst0, dst1, dst2, dst3, 0, 1, 0, 1, 0, 1, 0, 1, dst, stride); } void ff_avg_h264_qpel8_mc03_msa(uint8_t *dst, const uint8_t *src, @@ -3922,7 +3920,7 @@ void ff_avg_h264_qpel8_mc03_msa(uint8_t *dst, const uint8_t *src, XORI_B4_128_SB(out0, out1, out2, out3); AVER_UB4_UB(out0, dst0, out1, dst1, out2, dst2, out3, dst3, dst0, dst1, dst2, dst3); - ST8x8_UB(dst0, dst1, dst2, dst3, dst, stride); + ST_D8(dst0, dst1, dst2, dst3, 0, 1, 0, 1, 0, 1, 0, 1, dst, stride); } void ff_avg_h264_qpel4_mc01_msa(uint8_t *dst, const uint8_t *src, @@ -3967,7 +3965,7 @@ void ff_avg_h264_qpel4_mc01_msa(uint8_t *dst, const uint8_t *src, res = PCKEV_XORI128_UB(out10, out32); res = __msa_aver_u_b(res, (v16u8) src32_r); dst0 = __msa_aver_u_b(res, dst0); - ST4x4_UB(dst0, dst0, 0, 1, 2, 3, dst, stride); + ST_W4(dst0, 0, 1, 2, 3, dst, stride); } void ff_avg_h264_qpel4_mc03_msa(uint8_t *dst, const uint8_t *src, @@ -4013,7 +4011,7 @@ void ff_avg_h264_qpel4_mc03_msa(uint8_t *dst, const uint8_t *src, src32_r = (v16i8) __msa_insve_d((v2i64) src32_r, 1, (v2i64) src54_r); res = __msa_aver_u_b(res, (v16u8) src32_r); dst0 = __msa_aver_u_b(res, dst0); - ST4x4_UB(dst0, dst0, 0, 1, 2, 3, dst, stride); + ST_W4(dst0, 0, 1, 2, 3, dst, stride); } void ff_avg_h264_qpel16_mc11_msa(uint8_t *dst, const uint8_t *src, @@ -4196,7 +4194,7 @@ void ff_avg_h264_qpel16_mc21_msa(uint8_t *dst, const uint8_t *src, out0 = PCKEV_XORI128_UB(tmp0, tmp1); dst0 = __msa_aver_u_b(out0, dst0); - ST8x2_UB(dst0, dst, stride); + ST_D2(dst0, 0, 1, dst, stride); dst += (2 * stride); LD_SB2(src, stride, src7, src8); @@ -4232,7 +4230,7 @@ void ff_avg_h264_qpel16_mc21_msa(uint8_t *dst, const uint8_t *src, out1 = PCKEV_XORI128_UB(tmp2, tmp3); dst1 = __msa_aver_u_b(out1, dst1); - ST8x2_UB(dst1, dst, stride); + ST_D2(dst1, 0, 1, dst, stride); dst += (2 * stride); hz_out0 = hz_out4; @@ -4326,7 +4324,7 @@ void ff_avg_h264_qpel16_mc23_msa(uint8_t *dst, const uint8_t *src, INSERT_D2_UB(tp0, tp1, dst0); out0 = PCKEV_XORI128_UB(tmp0, tmp1); dst0 = __msa_aver_u_b(out0, dst0); - ST8x2_UB(dst0, dst, stride); + ST_D2(dst0, 0, 1, dst, stride); dst += (2 * stride); LD_SB2(src, stride, src7, src8); @@ -4361,7 +4359,7 @@ void ff_avg_h264_qpel16_mc23_msa(uint8_t *dst, const uint8_t *src, INSERT_D2_UB(tp2, tp3, dst1); out1 = PCKEV_XORI128_UB(tmp2, tmp3); dst1 = __msa_aver_u_b(out1, dst1); - ST8x2_UB(dst1, dst, stride); + ST_D2(dst1, 0, 1, dst, stride); dst += (2 * stride); hz_out0 = hz_out4; @@ -4468,7 +4466,7 @@ void ff_avg_h264_qpel8_mc21_msa(uint8_t *dst, const uint8_t *src, out0 = PCKEV_XORI128_UB(tmp0, tmp1); out1 = PCKEV_XORI128_UB(tmp2, tmp3); AVER_UB2_UB(out0, dst0, out1, dst1, dst0, dst1); - ST8x4_UB(dst0, dst1, dst, stride); + ST_D4(dst0, dst1, 0, 1, 0, 1, dst, stride); dst += (4 * stride); LD_SB4(src, stride, src9, src10, src11, src12); @@ -4519,7 +4517,7 @@ void ff_avg_h264_qpel8_mc21_msa(uint8_t *dst, const uint8_t *src, out0 = PCKEV_XORI128_UB(tmp0, tmp1); out1 = PCKEV_XORI128_UB(tmp2, tmp3); AVER_UB2_UB(out0, dst0, out1, dst1, dst0, dst1); - ST8x4_UB(dst0, dst1, dst, stride); + ST_D4(dst0, dst1, 0, 1, 0, 1, dst, stride); } void ff_avg_h264_qpel8_mc23_msa(uint8_t *dst, const uint8_t *src, @@ -4614,7 +4612,7 @@ void ff_avg_h264_qpel8_mc23_msa(uint8_t *dst, const uint8_t *src, out0 = PCKEV_XORI128_UB(tmp0, tmp1); out1 = PCKEV_XORI128_UB(tmp2, tmp3); AVER_UB2_UB(out0, dst0, out1, dst1, dst0, dst1); - ST8x4_UB(dst0, dst1, dst, stride); + ST_D4(dst0, dst1, 0, 1, 0, 1, dst, stride); dst += (4 * stride); LD_SB4(src, stride, src9, src10, src11, src12); @@ -4665,7 +4663,7 @@ void ff_avg_h264_qpel8_mc23_msa(uint8_t *dst, const uint8_t *src, out0 = PCKEV_XORI128_UB(tmp0, tmp1); out1 = PCKEV_XORI128_UB(tmp2, tmp3); AVER_UB2_UB(out0, dst0, out1, dst1, dst0, dst1); - ST8x4_UB(dst0, dst1, dst, stride); + ST_D4(dst0, dst1, 0, 1, 0, 1, dst, stride); } void ff_avg_h264_qpel4_mc21_msa(uint8_t *dst, const uint8_t *src, @@ -4732,7 +4730,7 @@ void ff_avg_h264_qpel4_mc21_msa(uint8_t *dst, const uint8_t *src, INSERT_W4_UB(tp0, tp1, tp2, tp3, out); res = PCKEV_XORI128_UB(dst0, dst1); res = __msa_aver_u_b(res, out); - ST4x4_UB(res, res, 0, 1, 2, 3, dst, stride); + ST_W4(res, 0, 1, 2, 3, dst, stride); } void ff_avg_h264_qpel4_mc23_msa(uint8_t *dst, const uint8_t *src, @@ -4800,7 +4798,7 @@ void ff_avg_h264_qpel4_mc23_msa(uint8_t *dst, const uint8_t *src, INSERT_W4_UB(tp0, tp1, tp2, tp3, out); res = PCKEV_XORI128_UB(dst0, dst1); res = __msa_aver_u_b(res, out); - ST4x4_UB(res, res, 0, 1, 2, 3, dst, stride); + ST_W4(res, 0, 1, 2, 3, dst, stride); } void ff_avg_h264_qpel16_mc02_msa(uint8_t *dst, const uint8_t *src, @@ -4936,7 +4934,7 @@ void ff_avg_h264_qpel8_mc02_msa(uint8_t *dst, const uint8_t *src, out3 = PCKEV_XORI128_UB(out6_r, out7_r); AVER_UB4_UB(out0, dst0, out1, dst1, out2, dst2, out3, dst3, dst0, dst1, dst2, dst3); - ST8x8_UB(dst0, dst1, dst2, dst3, dst, stride); + ST_D8(dst0, dst1, dst2, dst3, 0, 1, 0, 1, 0, 1, 0, 1, dst, stride); } void ff_avg_h264_qpel4_mc02_msa(uint8_t *dst, const uint8_t *src, @@ -4977,7 +4975,7 @@ void ff_avg_h264_qpel4_mc02_msa(uint8_t *dst, const uint8_t *src, INSERT_W4_UB(tp0, tp1, tp2, tp3, dst0); res = PCKEV_XORI128_UB(out10, out32); dst0 = __msa_aver_u_b(res, dst0); - ST4x4_UB(dst0, dst0, 0, 1, 2, 3, dst, stride); + ST_W4(dst0, 0, 1, 2, 3, dst, stride); } void ff_avg_h264_qpel16_mc12_msa(uint8_t *dst, const uint8_t *src, @@ -5217,7 +5215,7 @@ void ff_avg_h264_qpel8_mc12_msa(uint8_t *dst, const uint8_t *src, tmp1 = __msa_aver_s_h(tmp3, tmp1); out = PCKEV_XORI128_UB(tmp0, tmp1); out = __msa_aver_u_b(out, dst0); - ST8x2_UB(out, dst, stride); + ST_D2(out, 0, 1, dst, stride); dst += (2 * stride); src0 = src2; @@ -5297,7 +5295,7 @@ void ff_avg_h264_qpel8_mc32_msa(uint8_t *dst, const uint8_t *src, tmp1 = __msa_aver_s_h(tmp3, tmp1); out = PCKEV_XORI128_UB(tmp0, tmp1); out = __msa_aver_u_b(out, dst0); - ST8x2_UB(out, dst, stride); + ST_D2(out, 0, 1, dst, stride); dst += (2 * stride); src0 = src2; @@ -5401,7 +5399,7 @@ void ff_avg_h264_qpel4_mc12_msa(uint8_t *dst, const uint8_t *src, PCKEV_H2_SH(hz_res1, hz_res0, hz_res3, hz_res2, dst0, dst2); out = PCKEV_XORI128_UB(dst0, dst2); out = __msa_aver_u_b(out, dstv); - ST4x4_UB(out, out, 0, 1, 2, 3, dst, stride); + ST_W4(out, 0, 1, 2, 3, dst, stride); } void ff_avg_h264_qpel4_mc32_msa(uint8_t *dst, const uint8_t *src, @@ -5500,7 +5498,7 @@ void ff_avg_h264_qpel4_mc32_msa(uint8_t *dst, const uint8_t *src, PCKEV_H2_SH(hz_res1, hz_res0, hz_res3, hz_res2, dst0, dst2); out = PCKEV_XORI128_UB(dst0, dst2); out = __msa_aver_u_b(out, dstv); - ST4x4_UB(out, out, 0, 1, 2, 3, dst, stride); + ST_W4(out, 0, 1, 2, 3, dst, stride); } void ff_avg_h264_qpel16_mc22_msa(uint8_t *dst, const uint8_t *src, @@ -5592,7 +5590,7 @@ void ff_avg_h264_qpel16_mc22_msa(uint8_t *dst, const uint8_t *src, out0 = PCKEV_XORI128_UB(res0, res1); out1 = PCKEV_XORI128_UB(res2, res3); AVER_UB2_UB(out0, dst0, out1, dst1, out0, out1); - ST8x4_UB(out0, out1, dst, stride); + ST_D4(out0, out1, 0, 1, 0, 1, dst, stride); dst += (4 * stride); hz_out0 = hz_out4; @@ -5685,7 +5683,7 @@ void ff_avg_h264_qpel8_mc22_msa(uint8_t *dst, const uint8_t *src, out0 = PCKEV_XORI128_UB(res0, res1); out1 = PCKEV_XORI128_UB(res2, res3); AVER_UB2_UB(out0, dst0, out1, dst1, dst0, dst1); - ST8x4_UB(dst0, dst1, dst, stride); + ST_D4(dst0, dst1, 0, 1, 0, 1, dst, stride); dst += (4 * stride); LD_SB4(src, stride, src0, src1, src2, src3); @@ -5726,7 +5724,7 @@ void ff_avg_h264_qpel8_mc22_msa(uint8_t *dst, const uint8_t *src, out0 = PCKEV_XORI128_UB(res0, res1); out1 = PCKEV_XORI128_UB(res2, res3); AVER_UB2_UB(out0, dst0, out1, dst1, dst0, dst1); - ST8x4_UB(dst0, dst1, dst, stride); + ST_D4(dst0, dst1, 0, 1, 0, 1, dst, stride); } void ff_avg_h264_qpel4_mc22_msa(uint8_t *dst, const uint8_t *src, @@ -5785,5 +5783,5 @@ void ff_avg_h264_qpel4_mc22_msa(uint8_t *dst, const uint8_t *src, INSERT_W4_UB(tp0, tp1, tp2, tp3, dst0); res = PCKEV_XORI128_UB(res0, res1); res = __msa_aver_u_b(res, dst0); - ST4x4_UB(res, res, 0, 1, 2, 3, dst, stride); + ST_W4(res, 0, 1, 2, 3, dst, stride); } |