diff options
author | Shivraj Patil <shivraj.patil@imgtec.com> | 2015-06-14 23:26:23 +0530 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2015-06-18 12:33:15 +0200 |
commit | 98eb1ac901276c1f348ad52f9eea8f11d66b511c (patch) | |
tree | 6ec8ef7c5e8dfc6a56efe5509f6f6c6d16513c18 /libavutil | |
parent | d1050d9950610aa2b27878b67bb2b902dd717e7c (diff) | |
download | ffmpeg-streaming-98eb1ac901276c1f348ad52f9eea8f11d66b511c.zip ffmpeg-streaming-98eb1ac901276c1f348ad52f9eea8f11d66b511c.tar.gz |
avcodec/mips: MSA (MIPS-SIMD-Arch) optimizations for qpel functions
This patch adds MSA (MIPS-SIMD-Arch) optimizations for qpel functions in new file qpeldsp_msa.c
Adds new generic macros (needed for this patch) in libavutil/mips/generic_macros_msa.h
Signed-off-by: Shivraj Patil <shivraj.patil@imgtec.com>
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavutil')
-rw-r--r-- | libavutil/mips/generic_macros_msa.h | 21 |
1 files changed, 21 insertions, 0 deletions
diff --git a/libavutil/mips/generic_macros_msa.h b/libavutil/mips/generic_macros_msa.h index 0ee88d2..e6e11e8 100644 --- a/libavutil/mips/generic_macros_msa.h +++ b/libavutil/mips/generic_macros_msa.h @@ -1027,6 +1027,27 @@ } #define DPADD_SB4_SH(...) DPADD_SB4(v8i16, __VA_ARGS__) +/* Description : Dot product & addition of byte vector elements + Arguments : Inputs - mult0, mult1 + cnst0, cnst1 + Outputs - out0, out1 + Return Type - unsigned halfword + Details : Unsigned byte elements from mult0 are multiplied with + unsigned byte elements from cnst0 producing a result + twice the size of input i.e. unsigned halfword. + Then this multiplication results of adjacent odd-even elements + are added to the out vector + (2 unsigned halfword results) +*/ +#define DPADD_UB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \ +{ \ + out0 = (RTYPE) __msa_dpadd_u_h((v8u16) out0, \ + (v16u8) mult0, (v16u8) cnst0); \ + out1 = (RTYPE) __msa_dpadd_u_h((v8u16) out1, \ + (v16u8) mult1, (v16u8) cnst1); \ +} +#define DPADD_UB2_UH(...) DPADD_UB2(v8u16, __VA_ARGS__) + /* Description : Dot product & addition of halfword vector elements Arguments : Inputs - mult0, mult1 cnst0, cnst1 |