summaryrefslogtreecommitdiffstats
path: root/libavcodec/mips/hevc_lpf_sao_msa.c
diff options
context:
space:
mode:
authorgxw <guxiwei-hf@loongson.cn>2019-08-07 17:52:00 +0800
committerMichael Niedermayer <michael@niedermayer.cc>2019-08-13 16:48:38 +0200
commita3e572d96fd1dd6291f6b28e173db858c08ff8d8 (patch)
tree85807f6ec1442cc362cf8946e67f564c92267e07 /libavcodec/mips/hevc_lpf_sao_msa.c
parent8f92eb05e063e6c4d6e36521020620d4e6e1c21d (diff)
downloadffmpeg-streaming-a3e572d96fd1dd6291f6b28e173db858c08ff8d8.zip
ffmpeg-streaming-a3e572d96fd1dd6291f6b28e173db858c08ff8d8.tar.gz
avutil/mips: refine msa macros CLIP_*.
Changing details as following: 1. Remove the local variable 'out_m' in 'CLIP_SH' and store the result in source vector. 2. Refine the implementation of macro 'CLIP_SH_0_255' and 'CLIP_SW_0_255'. Performance of VP8 decoding has speed up about 1.1%(from 7.03x to 7.11x). Performance of H264 decoding has speed up about 0.5%(from 4.35x to 4.37x). Performance of Theora decoding has speed up about 0.7%(from 5.79x to 5.83x). 3. Remove redundant macro 'CLIP_SH/Wn_0_255_MAX_SATU' and use 'CLIP_SH/Wn_0_255' instead, because there are no difference in the effect of this two macros. Reviewed-by: Shiyou Yin <yinshiyou-hf@loongson.cn> Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
Diffstat (limited to 'libavcodec/mips/hevc_lpf_sao_msa.c')
-rw-r--r--libavcodec/mips/hevc_lpf_sao_msa.c132
1 files changed, 66 insertions, 66 deletions
diff --git a/libavcodec/mips/hevc_lpf_sao_msa.c b/libavcodec/mips/hevc_lpf_sao_msa.c
index ac21806..7153fef 100644
--- a/libavcodec/mips/hevc_lpf_sao_msa.c
+++ b/libavcodec/mips/hevc_lpf_sao_msa.c
@@ -140,19 +140,19 @@ static void hevc_loopfilter_luma_hor_msa(uint8_t *src, int32_t stride,
temp1 = ((p3_src + p2_src) << 1) + p2_src + temp0;
temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 3);
temp2 = (v8i16) (temp1 - p2_src);
- temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+ CLIP_SH(temp2, tc_neg, tc_pos);
dst0 = (v16u8) (temp2 + (v8i16) p2_src);
temp1 = temp0 + p2_src;
temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 2);
temp2 = (v8i16) (temp1 - p1_src);
- temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+ CLIP_SH(temp2, tc_neg, tc_pos);
dst1 = (v16u8) (temp2 + (v8i16) p1_src);
temp1 = (temp0 << 1) + p2_src + q1_src;
temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 3);
temp2 = (v8i16) (temp1 - p0_src);
- temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+ CLIP_SH(temp2, tc_neg, tc_pos);
dst2 = (v16u8) (temp2 + (v8i16) p0_src);
dst0 = __msa_bmz_v(dst0, (v16u8) p2_src, (v16u8) p_is_pcm_vec);
@@ -165,19 +165,19 @@ static void hevc_loopfilter_luma_hor_msa(uint8_t *src, int32_t stride,
temp1 = ((q3_src + q2_src) << 1) + q2_src + temp0;
temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 3);
temp2 = (v8i16) (temp1 - q2_src);
- temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+ CLIP_SH(temp2, tc_neg, tc_pos);
dst5 = (v16u8) (temp2 + (v8i16) q2_src);
temp1 = temp0 + q2_src;
temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 2);
temp2 = (v8i16) (temp1 - q1_src);
- temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+ CLIP_SH(temp2, tc_neg, tc_pos);
dst4 = (v16u8) (temp2 + (v8i16) q1_src);
temp1 = (temp0 << 1) + p1_src + q2_src;
temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 3);
temp2 = (v8i16) (temp1 - q0_src);
- temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+ CLIP_SH(temp2, tc_neg, tc_pos);
dst3 = (v16u8) (temp2 + (v8i16) q0_src);
dst3 = __msa_bmz_v(dst3, (v16u8) q0_src, (v16u8) q_is_pcm_vec);
@@ -218,15 +218,15 @@ static void hevc_loopfilter_luma_hor_msa(uint8_t *src, int32_t stride,
abs_delta0 = __msa_add_a_h(delta0, (v8i16) zero);
abs_delta0 = (v8u16) abs_delta0 < temp1;
- delta0 = CLIP_SH(delta0, tc_neg, tc_pos);
+ CLIP_SH(delta0, tc_neg, tc_pos);
- temp0 = (v8u16) (delta0 + p0_src);
- temp0 = (v8u16) CLIP_SH_0_255(temp0);
- temp0 = (v8u16) __msa_bmz_v((v16u8) temp0, (v16u8) p0_src,
+ temp2 = (v8i16) (delta0 + p0_src);
+ CLIP_SH_0_255(temp2);
+ temp0 = (v8u16) __msa_bmz_v((v16u8) temp2, (v16u8) p0_src,
(v16u8) p_is_pcm_vec);
temp2 = (v8i16) (q0_src - delta0);
- temp2 = CLIP_SH_0_255(temp2);
+ CLIP_SH_0_255(temp2);
temp2 = (v8i16) __msa_bmz_v((v16u8) temp2, (v16u8) q0_src,
(v16u8) q_is_pcm_vec);
@@ -252,9 +252,9 @@ static void hevc_loopfilter_luma_hor_msa(uint8_t *src, int32_t stride,
delta1 -= (v8i16) p1_src;
delta1 += delta0;
delta1 >>= 1;
- delta1 = CLIP_SH(delta1, tc_neg, tc_pos);
+ CLIP_SH(delta1, tc_neg, tc_pos);
delta1 = (v8i16) p1_src + (v8i16) delta1;
- delta1 = CLIP_SH_0_255(delta1);
+ CLIP_SH_0_255(delta1);
delta1 = (v8i16) __msa_bmnz_v((v16u8) delta1, (v16u8) p1_src,
(v16u8) p_is_pcm_vec);
@@ -262,9 +262,9 @@ static void hevc_loopfilter_luma_hor_msa(uint8_t *src, int32_t stride,
delta2 = delta2 - (v8i16) q1_src;
delta2 = delta2 - delta0;
delta2 = delta2 >> 1;
- delta2 = CLIP_SH(delta2, tc_neg, tc_pos);
+ CLIP_SH(delta2, tc_neg, tc_pos);
delta2 = (v8i16) q1_src + (v8i16) delta2;
- delta2 = CLIP_SH_0_255(delta2);
+ CLIP_SH_0_255(delta2);
delta2 = (v8i16) __msa_bmnz_v((v16u8) delta2, (v16u8) q1_src,
(v16u8) q_is_pcm_vec);
@@ -298,19 +298,19 @@ static void hevc_loopfilter_luma_hor_msa(uint8_t *src, int32_t stride,
temp1 = ((p3_src + p2_src) << 1) + p2_src + temp0;
temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 3);
temp2 = (v8i16) (temp1 - p2_src);
- temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+ CLIP_SH(temp2, tc_neg, tc_pos);
dst0 = (v16u8) (temp2 + (v8i16) p2_src);
temp1 = temp0 + p2_src;
temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 2);
temp2 = (v8i16) (temp1 - p1_src);
- temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+ CLIP_SH(temp2, tc_neg, tc_pos);
dst1 = (v16u8) (temp2 + (v8i16) p1_src);
temp1 = (temp0 << 1) + p2_src + q1_src;
temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 3);
temp2 = (v8i16) (temp1 - p0_src);
- temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+ CLIP_SH(temp2, tc_neg, tc_pos);
dst2 = (v16u8) (temp2 + (v8i16) p0_src);
dst0 = __msa_bmz_v(dst0, (v16u8) p2_src, (v16u8) p_is_pcm_vec);
@@ -323,19 +323,19 @@ static void hevc_loopfilter_luma_hor_msa(uint8_t *src, int32_t stride,
temp1 = ((q3_src + q2_src) << 1) + q2_src + temp0;
temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 3);
temp2 = (v8i16) (temp1 - q2_src);
- temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+ CLIP_SH(temp2, tc_neg, tc_pos);
dst5 = (v16u8) (temp2 + (v8i16) q2_src);
temp1 = temp0 + q2_src;
temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 2);
temp2 = (v8i16) (temp1 - q1_src);
- temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+ CLIP_SH(temp2, tc_neg, tc_pos);
dst4 = (v16u8) (temp2 + (v8i16) q1_src);
temp1 = (temp0 << 1) + p1_src + q2_src;
temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 3);
temp2 = (v8i16) (temp1 - q0_src);
- temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+ CLIP_SH(temp2, tc_neg, tc_pos);
dst3 = (v16u8) (temp2 + (v8i16) q0_src);
dst3 = __msa_bmz_v(dst3, (v16u8) q0_src, (v16u8) q_is_pcm_vec);
@@ -362,15 +362,15 @@ static void hevc_loopfilter_luma_hor_msa(uint8_t *src, int32_t stride,
abs_delta0 = __msa_add_a_h(delta0, (v8i16) zero);
abs_delta0 = (v8u16) abs_delta0 < temp1;
- delta0 = CLIP_SH(delta0, tc_neg, tc_pos);
+ CLIP_SH(delta0, tc_neg, tc_pos);
- temp0 = (v8u16) (delta0 + p0_src);
- temp0 = (v8u16) CLIP_SH_0_255(temp0);
- temp0 = (v8u16) __msa_bmz_v((v16u8) temp0, (v16u8) p0_src,
+ temp2 = (v8i16) (delta0 + p0_src);
+ CLIP_SH_0_255(temp2);
+ temp0 = (v8u16) __msa_bmz_v((v16u8) temp2, (v16u8) p0_src,
(v16u8) p_is_pcm_vec);
temp2 = (v8i16) (q0_src - delta0);
- temp2 = CLIP_SH_0_255(temp2);
+ CLIP_SH_0_255(temp2);
temp2 = (v8i16) __msa_bmz_v((v16u8) temp2, (v16u8) q0_src,
(v16u8) q_is_pcm_vec);
@@ -394,9 +394,9 @@ static void hevc_loopfilter_luma_hor_msa(uint8_t *src, int32_t stride,
delta1 -= (v8i16) p1_src;
delta1 += delta0;
delta1 >>= 1;
- delta1 = CLIP_SH(delta1, tc_neg, tc_pos);
+ CLIP_SH(delta1, tc_neg, tc_pos);
delta1 = (v8i16) p1_src + (v8i16) delta1;
- delta1 = CLIP_SH_0_255(delta1);
+ CLIP_SH_0_255(delta1);
delta1 = (v8i16) __msa_bmnz_v((v16u8) delta1, (v16u8) p1_src,
(v16u8) p_is_pcm_vec);
@@ -404,9 +404,9 @@ static void hevc_loopfilter_luma_hor_msa(uint8_t *src, int32_t stride,
delta2 = delta2 - (v8i16) q1_src;
delta2 = delta2 - delta0;
delta2 = delta2 >> 1;
- delta2 = CLIP_SH(delta2, tc_neg, tc_pos);
+ CLIP_SH(delta2, tc_neg, tc_pos);
delta2 = (v8i16) q1_src + (v8i16) delta2;
- delta2 = CLIP_SH_0_255(delta2);
+ CLIP_SH_0_255(delta2);
delta2 = (v8i16) __msa_bmnz_v((v16u8) delta2, (v16u8) q1_src,
(v16u8) q_is_pcm_vec);
@@ -561,19 +561,19 @@ static void hevc_loopfilter_luma_ver_msa(uint8_t *src, int32_t stride,
temp1 = ((p3_src + p2_src) << 1) + p2_src + temp0;
temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 3);
temp2 = (v8i16) (temp1 - p2_src);
- temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+ CLIP_SH(temp2, tc_neg, tc_pos);
dst0 = (v16u8) (temp2 + (v8i16) p2_src);
temp1 = temp0 + p2_src;
temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 2);
temp2 = (v8i16) (temp1 - p1_src);
- temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+ CLIP_SH(temp2, tc_neg, tc_pos);
dst1 = (v16u8) (temp2 + (v8i16) p1_src);
temp1 = (temp0 << 1) + p2_src + q1_src;
temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 3);
temp2 = (v8i16) (temp1 - p0_src);
- temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+ CLIP_SH(temp2, tc_neg, tc_pos);
dst2 = (v16u8) (temp2 + (v8i16) p0_src);
dst0 = __msa_bmz_v(dst0, (v16u8) p2_src, (v16u8) p_is_pcm_vec);
@@ -585,19 +585,19 @@ static void hevc_loopfilter_luma_ver_msa(uint8_t *src, int32_t stride,
temp1 = ((q3_src + q2_src) << 1) + q2_src + temp0;
temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 3);
temp2 = (v8i16) (temp1 - q2_src);
- temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+ CLIP_SH(temp2, tc_neg, tc_pos);
dst5 = (v16u8) (temp2 + (v8i16) q2_src);
temp1 = temp0 + q2_src;
temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 2);
temp2 = (v8i16) (temp1 - q1_src);
- temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+ CLIP_SH(temp2, tc_neg, tc_pos);
dst4 = (v16u8) (temp2 + (v8i16) q1_src);
temp1 = (temp0 << 1) + p1_src + q2_src;
temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 3);
temp2 = (v8i16) (temp1 - q0_src);
- temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+ CLIP_SH(temp2, tc_neg, tc_pos);
dst3 = (v16u8) (temp2 + (v8i16) q0_src);
dst3 = __msa_bmz_v(dst3, (v16u8) q0_src, (v16u8) q_is_pcm_vec);
@@ -620,14 +620,14 @@ static void hevc_loopfilter_luma_ver_msa(uint8_t *src, int32_t stride,
abs_delta0 = __msa_add_a_h(delta0, (v8i16) zero);
abs_delta0 = (v8u16) abs_delta0 < temp1;
- delta0 = CLIP_SH(delta0, tc_neg, tc_pos);
- temp0 = (v8u16) (delta0 + p0_src);
- temp0 = (v8u16) CLIP_SH_0_255(temp0);
- temp0 = (v8u16) __msa_bmz_v((v16u8) temp0, (v16u8) p0_src,
+ CLIP_SH(delta0, tc_neg, tc_pos);
+ temp2 = (v8i16) (delta0 + p0_src);
+ CLIP_SH_0_255(temp2);
+ temp0 = (v8u16) __msa_bmz_v((v16u8) temp2, (v16u8) p0_src,
(v16u8) p_is_pcm_vec);
temp2 = (v8i16) (q0_src - delta0);
- temp2 = CLIP_SH_0_255(temp2);
+ CLIP_SH_0_255(temp2);
temp2 = (v8i16) __msa_bmz_v((v16u8) temp2, (v16u8) q0_src,
(v16u8) q_is_pcm_vec);
@@ -649,9 +649,9 @@ static void hevc_loopfilter_luma_ver_msa(uint8_t *src, int32_t stride,
delta1 -= (v8i16) p1_src;
delta1 += delta0;
delta1 >>= 1;
- delta1 = CLIP_SH(delta1, tc_neg, tc_pos);
+ CLIP_SH(delta1, tc_neg, tc_pos);
delta1 = (v8i16) p1_src + (v8i16) delta1;
- delta1 = CLIP_SH_0_255(delta1);
+ CLIP_SH_0_255(delta1);
delta1 = (v8i16) __msa_bmnz_v((v16u8) delta1, (v16u8) p1_src,
(v16u8) p_is_pcm_vec);
@@ -659,9 +659,9 @@ static void hevc_loopfilter_luma_ver_msa(uint8_t *src, int32_t stride,
delta2 = delta2 - (v8i16) q1_src;
delta2 = delta2 - delta0;
delta2 = delta2 >> 1;
- delta2 = CLIP_SH(delta2, tc_neg, tc_pos);
+ CLIP_SH(delta2, tc_neg, tc_pos);
delta2 = (v8i16) q1_src + (v8i16) delta2;
- delta2 = CLIP_SH_0_255(delta2);
+ CLIP_SH_0_255(delta2);
delta2 = (v8i16) __msa_bmnz_v((v16u8) delta2, (v16u8) q1_src,
(v16u8) q_is_pcm_vec);
@@ -726,19 +726,19 @@ static void hevc_loopfilter_luma_ver_msa(uint8_t *src, int32_t stride,
temp1 = ((p3_src + p2_src) << 1) + p2_src + temp0;
temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 3);
temp2 = (v8i16) (temp1 - p2_src);
- temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+ CLIP_SH(temp2, tc_neg, tc_pos);
dst0 = (v16u8) (temp2 + (v8i16) p2_src);
temp1 = temp0 + p2_src;
temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 2);
temp2 = (v8i16) (temp1 - p1_src);
- temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+ CLIP_SH(temp2, tc_neg, tc_pos);
dst1 = (v16u8) (temp2 + (v8i16) p1_src);
temp1 = (temp0 << 1) + p2_src + q1_src;
temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 3);
temp2 = (v8i16) (temp1 - p0_src);
- temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+ CLIP_SH(temp2, tc_neg, tc_pos);
dst2 = (v16u8) (temp2 + (v8i16) p0_src);
dst0 = __msa_bmz_v(dst0, (v16u8) p2_src, (v16u8) p_is_pcm_vec);
@@ -750,19 +750,19 @@ static void hevc_loopfilter_luma_ver_msa(uint8_t *src, int32_t stride,
temp1 = ((q3_src + q2_src) << 1) + q2_src + temp0;
temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 3);
temp2 = (v8i16) (temp1 - q2_src);
- temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+ CLIP_SH(temp2, tc_neg, tc_pos);
dst5 = (v16u8) (temp2 + (v8i16) q2_src);
temp1 = temp0 + q2_src;
temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 2);
temp2 = (v8i16) (temp1 - q1_src);
- temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+ CLIP_SH(temp2, tc_neg, tc_pos);
dst4 = (v16u8) (temp2 + (v8i16) q1_src);
temp1 = (temp0 << 1) + p1_src + q2_src;
temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 3);
temp2 = (v8i16) (temp1 - q0_src);
- temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+ CLIP_SH(temp2, tc_neg, tc_pos);
dst3 = (v16u8) (temp2 + (v8i16) q0_src);
dst3 = __msa_bmz_v(dst3, (v16u8) q0_src, (v16u8) q_is_pcm_vec);
@@ -785,15 +785,15 @@ static void hevc_loopfilter_luma_ver_msa(uint8_t *src, int32_t stride,
abs_delta0 = __msa_add_a_h(delta0, (v8i16) zero);
abs_delta0 = (v8u16) abs_delta0 < temp1;
- delta0 = CLIP_SH(delta0, tc_neg, tc_pos);
+ CLIP_SH(delta0, tc_neg, tc_pos);
- temp0 = (v8u16) (delta0 + p0_src);
- temp0 = (v8u16) CLIP_SH_0_255(temp0);
- temp0 = (v8u16) __msa_bmz_v((v16u8) temp0, (v16u8) p0_src,
+ temp2 = (v8i16) (delta0 + p0_src);
+ CLIP_SH_0_255(temp2);
+ temp0 = (v8u16) __msa_bmz_v((v16u8) temp2, (v16u8) p0_src,
(v16u8) p_is_pcm_vec);
temp2 = (v8i16) (q0_src - delta0);
- temp2 = CLIP_SH_0_255(temp2);
+ CLIP_SH_0_255(temp2);
temp2 = (v8i16) __msa_bmz_v((v16u8) temp2, (v16u8) q0_src,
(v16u8) q_is_pcm_vec);
@@ -815,9 +815,9 @@ static void hevc_loopfilter_luma_ver_msa(uint8_t *src, int32_t stride,
delta1 -= (v8i16) p1_src;
delta1 += delta0;
delta1 >>= 1;
- delta1 = CLIP_SH(delta1, tc_neg, tc_pos);
+ CLIP_SH(delta1, tc_neg, tc_pos);
delta1 = (v8i16) p1_src + (v8i16) delta1;
- delta1 = CLIP_SH_0_255(delta1);
+ CLIP_SH_0_255(delta1);
delta1 = (v8i16) __msa_bmnz_v((v16u8) delta1, (v16u8) p1_src,
(v16u8) p_is_pcm_vec);
@@ -825,9 +825,9 @@ static void hevc_loopfilter_luma_ver_msa(uint8_t *src, int32_t stride,
delta2 = delta2 - (v8i16) q1_src;
delta2 = delta2 - delta0;
delta2 = delta2 >> 1;
- delta2 = CLIP_SH(delta2, tc_neg, tc_pos);
+ CLIP_SH(delta2, tc_neg, tc_pos);
delta2 = (v8i16) q1_src + (v8i16) delta2;
- delta2 = CLIP_SH_0_255(delta2);
+ CLIP_SH_0_255(delta2);
delta2 = (v8i16) __msa_bmnz_v((v16u8) delta2, (v16u8) q1_src,
(v16u8) q_is_pcm_vec);
delta1 = (v8i16) __msa_bmz_v((v16u8) delta1, (v16u8) p1_src,
@@ -955,15 +955,15 @@ static void hevc_loopfilter_chroma_hor_msa(uint8_t *src, int32_t stride,
temp0 <<= 2;
temp0 += temp1;
delta = __msa_srari_h((v8i16) temp0, 3);
- delta = CLIP_SH(delta, tc_neg, tc_pos);
+ CLIP_SH(delta, tc_neg, tc_pos);
temp0 = (v8i16) ((v8i16) p0 + delta);
- temp0 = CLIP_SH_0_255(temp0);
+ CLIP_SH_0_255(temp0);
temp0 = (v8i16) __msa_bmz_v((v16u8) temp0, (v16u8) p0,
(v16u8) p_is_pcm_vec);
temp1 = (v8i16) ((v8i16) q0 - delta);
- temp1 = CLIP_SH_0_255(temp1);
+ CLIP_SH_0_255(temp1);
temp1 = (v8i16) __msa_bmz_v((v16u8) temp1, (v16u8) q0,
(v16u8) q_is_pcm_vec);
@@ -1014,15 +1014,15 @@ static void hevc_loopfilter_chroma_ver_msa(uint8_t *src, int32_t stride,
temp0 <<= 2;
temp0 += temp1;
delta = __msa_srari_h((v8i16) temp0, 3);
- delta = CLIP_SH(delta, tc_neg, tc_pos);
+ CLIP_SH(delta, tc_neg, tc_pos);
temp0 = (v8i16) ((v8i16) p0 + delta);
- temp0 = CLIP_SH_0_255(temp0);
+ CLIP_SH_0_255(temp0);
temp0 = (v8i16) __msa_bmz_v((v16u8) temp0, (v16u8) p0,
(v16u8) p_is_pcm_vec);
temp1 = (v8i16) ((v8i16) q0 - delta);
- temp1 = CLIP_SH_0_255(temp1);
+ CLIP_SH_0_255(temp1);
temp1 = (v8i16) __msa_bmz_v((v16u8) temp1, (v16u8) q0,
(v16u8) q_is_pcm_vec);
OpenPOWER on IntegriCloud