diff options
author | Shiyou Yin <yinshiyou-hf@loongson.cn> | 2018-08-31 21:41:49 +0800 |
---|---|---|
committer | Michael Niedermayer <michael@niedermayer.cc> | 2018-09-02 03:37:32 +0200 |
commit | df13b75aa18633f95761b34775ab5e6797d92c57 (patch) | |
tree | bdc8df903143764ab4fc756e628fd1b265720320 /libavutil | |
parent | 1124df0397372c4d1dd798dc2cfb7d4e0f2bb890 (diff) | |
download | ffmpeg-streaming-df13b75aa18633f95761b34775ab5e6797d92c57.zip ffmpeg-streaming-df13b75aa18633f95761b34775ab5e6797d92c57.tar.gz |
avcodec/mips: [loongson] reoptimize simple idct with mmi.
Performance of mpeg4 decoding improved about 23%(from 128fps to 158fps, tested on loongson 3A3000).
Reoptimized following functions with mmi.
1. ff_simple_idct_put_8_mmi
2. ff_simple_idct_add_8_mmi
3. ff_simple_idct_8_mmi
Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
Diffstat (limited to 'libavutil')
-rw-r--r-- | libavutil/mips/mmiutils.h | 49 |
1 files changed, 49 insertions, 0 deletions
diff --git a/libavutil/mips/mmiutils.h b/libavutil/mips/mmiutils.h index 491579e..2b1a521 100644 --- a/libavutil/mips/mmiutils.h +++ b/libavutil/mips/mmiutils.h @@ -201,6 +201,55 @@ #endif /* HAVE_LOONGSON2 */ +/** + * backup register + */ +#define BACKUP_REG \ + double temp_backup_reg[8]; \ + if (_MIPS_SIM == _ABI64) \ + __asm__ volatile ( \ + "gssqc1 $f25, $f24, 0x00(%[temp]) \n\t" \ + "gssqc1 $f27, $f26, 0x10(%[temp]) \n\t" \ + "gssqc1 $f29, $f28, 0x20(%[temp]) \n\t" \ + "gssqc1 $f31, $f30, 0x30(%[temp]) \n\t" \ + : \ + : [temp]"r"(temp_backup_reg) \ + : "memory" \ + ); \ + else \ + __asm__ volatile ( \ + "gssqc1 $f22, $f20, 0x00(%[temp]) \n\t" \ + "gssqc1 $f26, $f24, 0x10(%[temp]) \n\t" \ + "gssqc1 $f30, $f28, 0x20(%[temp]) \n\t" \ + : \ + : [temp]"r"(temp_backup_reg) \ + : "memory" \ + ); + +/** + * recover register + */ +#define RECOVER_REG \ + if (_MIPS_SIM == _ABI64) \ + __asm__ volatile ( \ + "gslqc1 $f25, $f24, 0x00(%[temp]) \n\t" \ + "gslqc1 $f27, $f26, 0x10(%[temp]) \n\t" \ + "gslqc1 $f29, $f28, 0x20(%[temp]) \n\t" \ + "gslqc1 $f31, $f30, 0x30(%[temp]) \n\t" \ + : \ + : [temp]"r"(temp_backup_reg) \ + : "memory" \ + ); \ + else \ + __asm__ volatile ( \ + "gslqc1 $f22, $f20, 0x00(%[temp]) \n\t" \ + "gslqc1 $f26, $f24, 0x10(%[temp]) \n\t" \ + "gslqc1 $f30, $f28, 0x20(%[temp]) \n\t" \ + : \ + : [temp]"r"(temp_backup_reg) \ + : "memory" \ + ); + #define TRANSPOSE_4H(m1, m2, m3, m4, t1, t2, t3, t4, t5, r1, zero, shift) \ "li "#r1", 0x93 \n\t" \ "xor "#zero","#zero","#zero" \n\t" \ |