summaryrefslogtreecommitdiffstats
path: root/libavcodec
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2012-07-18 22:27:46 +0200
committerMichael Niedermayer <michaelni@gmx.at>2012-07-18 22:34:48 +0200
commit204c4e953d895e15ab0908d715fd46181bf32add (patch)
treeaebdffe23aa1133eeb1db55b810135ecdfe1c188 /libavcodec
parentc66978e29ad9cae2c71da83857c6a3cec11bb9cb (diff)
parentffdd93a25e64db82c053577f415ea82c54fd5235 (diff)
downloadffmpeg-streaming-204c4e953d895e15ab0908d715fd46181bf32add.zip
ffmpeg-streaming-204c4e953d895e15ab0908d715fd46181bf32add.tar.gz
Merge remote-tracking branch 'qatar/master'
* qatar/master: ppc: fix build with altivec disabled vp3: move idct and loop filter pointers to new vp3dsp context build: add CONFIG_VP3DSP, reduce repetition in OBJS lists tscc2: do not add/subtract 128 bias during DCT tscc2: fix typo in DCT configure: clarify external library section of help output configure: mark libfdk-aac as nonfree configure: cosmetics: drop some unnecessary backslashes os_support: K&R formatting cosmetics Conflicts: configure libavcodec/vp3.c Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec')
-rw-r--r--libavcodec/Makefile7
-rw-r--r--libavcodec/arm/Makefile5
-rw-r--r--libavcodec/arm/dsputil_init_neon.c21
-rw-r--r--libavcodec/arm/vp3dsp_init_arm.c45
-rw-r--r--libavcodec/arm/vp3dsp_neon.S26
-rw-r--r--libavcodec/dsputil.c12
-rw-r--r--libavcodec/dsputil.h13
-rw-r--r--libavcodec/ppc/Makefile5
-rw-r--r--libavcodec/ppc/dsputil_altivec.h4
-rw-r--r--libavcodec/ppc/dsputil_ppc.c6
-rw-r--r--libavcodec/ppc/vp3dsp_altivec.c42
-rw-r--r--libavcodec/tscc2.c6
-rw-r--r--libavcodec/vp3.c22
-rw-r--r--libavcodec/vp3dsp.c37
-rw-r--r--libavcodec/vp3dsp.h40
-rw-r--r--libavcodec/vp56.c10
-rw-r--r--libavcodec/vp56.h2
-rw-r--r--libavcodec/x86/Makefile7
-rw-r--r--libavcodec/x86/dsputil_mmx.c35
-rw-r--r--libavcodec/x86/vp3dsp.asm4
-rw-r--r--libavcodec/x86/vp3dsp_init.c65
21 files changed, 232 insertions, 182 deletions
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index bbe188a..45dee19 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -59,6 +59,7 @@ OBJS-$(CONFIG_SINEWIN) += sinewin.o
OBJS-$(CONFIG_VAAPI) += vaapi.o
OBJS-$(CONFIG_VDA) += vda.o
OBJS-$(CONFIG_VDPAU) += vdpau.o
+OBJS-$(CONFIG_VP3DSP) += vp3dsp.o
# decoders/encoders/hardware accelerators
OBJS-$(CONFIG_A64MULTI_ENCODER) += a64multienc.o elbg.o
@@ -484,11 +485,11 @@ OBJS-$(CONFIG_VORBIS_DECODER) += vorbisdec.o vorbis.o \
vorbis_data.o xiph.o
OBJS-$(CONFIG_VORBIS_ENCODER) += vorbisenc.o vorbis.o \
vorbis_data.o
-OBJS-$(CONFIG_VP3_DECODER) += vp3.o vp3dsp.o
+OBJS-$(CONFIG_VP3_DECODER) += vp3.o
OBJS-$(CONFIG_VP5_DECODER) += vp5.o vp56.o vp56data.o vp56dsp.o \
- vp3dsp.o vp56rac.o
+ vp56rac.o
OBJS-$(CONFIG_VP6_DECODER) += vp6.o vp56.o vp56data.o vp56dsp.o \
- vp3dsp.o vp6dsp.o vp56rac.o
+ vp6dsp.o vp56rac.o
OBJS-$(CONFIG_VP8_DECODER) += vp8.o vp8dsp.o vp56rac.o
OBJS-$(CONFIG_VQA_DECODER) += vqavideo.o
OBJS-$(CONFIG_WAVPACK_DECODER) += wavpack.o
diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile
index d2bdd50..a8e531c 100644
--- a/libavcodec/arm/Makefile
+++ b/libavcodec/arm/Makefile
@@ -11,6 +11,7 @@ ARMV6-OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_armv6.o
OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_init_arm.o
ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_fixed_armv6.o
+OBJS-$(CONFIG_VP3DSP) += arm/vp3dsp_init_arm.o
OBJS-$(CONFIG_VP5_DECODER) += arm/vp56dsp_init_arm.o
OBJS-$(CONFIG_VP6_DECODER) += arm/vp56dsp_init_arm.o
OBJS-$(CONFIG_VP8_DECODER) += arm/vp8dsp_init_arm.o
@@ -75,13 +76,11 @@ NEON-OBJS-$(CONFIG_RV40_DECODER) += arm/rv34dsp_init_neon.o \
arm/rv40dsp_neon.o \
arm/h264cmc_neon.o \
-NEON-OBJS-$(CONFIG_VP3_DECODER) += arm/vp3dsp_neon.o
+NEON-OBJS-$(CONFIG_VP3DSP) += arm/vp3dsp_neon.o
NEON-OBJS-$(CONFIG_VP5_DECODER) += arm/vp56dsp_neon.o \
- arm/vp3dsp_neon.o \
NEON-OBJS-$(CONFIG_VP6_DECODER) += arm/vp56dsp_neon.o \
- arm/vp3dsp_neon.o \
NEON-OBJS-$(CONFIG_VP8_DECODER) += arm/vp8dsp_init_neon.o \
arm/vp8dsp_neon.o
diff --git a/libavcodec/arm/dsputil_init_neon.c b/libavcodec/arm/dsputil_init_neon.c
index ef5a8df..5533a28 100644
--- a/libavcodec/arm/dsputil_init_neon.c
+++ b/libavcodec/arm/dsputil_init_neon.c
@@ -29,11 +29,6 @@ void ff_simple_idct_neon(DCTELEM *data);
void ff_simple_idct_put_neon(uint8_t *dest, int line_size, DCTELEM *data);
void ff_simple_idct_add_neon(uint8_t *dest, int line_size, DCTELEM *data);
-void ff_vp3_idct_neon(DCTELEM *data);
-void ff_vp3_idct_put_neon(uint8_t *dest, int line_size, DCTELEM *data);
-void ff_vp3_idct_add_neon(uint8_t *dest, int line_size, DCTELEM *data);
-void ff_vp3_idct_dc_add_neon(uint8_t *dest, int line_size, const DCTELEM *data);
-
void ff_clear_block_neon(DCTELEM *block);
void ff_clear_blocks_neon(DCTELEM *blocks);
@@ -147,9 +142,6 @@ void ff_avg_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int);
void ff_avg_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int);
void ff_avg_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int);
-void ff_vp3_v_loop_filter_neon(uint8_t *, int, int *);
-void ff_vp3_h_loop_filter_neon(uint8_t *, int, int *);
-
void ff_vector_fmul_window_neon(float *dst, const float *src0,
const float *src1, const float *win, int len);
void ff_vector_fmul_scalar_neon(float *dst, const float *src, float mul,
@@ -186,13 +178,6 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
c->idct_add = ff_simple_idct_add_neon;
c->idct = ff_simple_idct_neon;
c->idct_permutation_type = FF_PARTTRANS_IDCT_PERM;
- } else if ((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER ||
- CONFIG_VP6_DECODER) &&
- avctx->idct_algo == FF_IDCT_VP3) {
- c->idct_put = ff_vp3_idct_put_neon;
- c->idct_add = ff_vp3_idct_add_neon;
- c->idct = ff_vp3_idct_neon;
- c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
}
}
@@ -319,12 +304,6 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
c->avg_h264_qpel_pixels_tab[1][15] = ff_avg_h264_qpel8_mc33_neon;
}
- if (CONFIG_VP3_DECODER) {
- c->vp3_v_loop_filter = ff_vp3_v_loop_filter_neon;
- c->vp3_h_loop_filter = ff_vp3_h_loop_filter_neon;
- c->vp3_idct_dc_add = ff_vp3_idct_dc_add_neon;
- }
-
c->vector_fmul_window = ff_vector_fmul_window_neon;
c->vector_fmul_scalar = ff_vector_fmul_scalar_neon;
c->butterflies_float = ff_butterflies_float_neon;
diff --git a/libavcodec/arm/vp3dsp_init_arm.c b/libavcodec/arm/vp3dsp_init_arm.c
new file mode 100644
index 0000000..90fc34b
--- /dev/null
+++ b/libavcodec/arm/vp3dsp_init_arm.c
@@ -0,0 +1,45 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/arm/cpu.h"
+#include "libavcodec/vp3dsp.h"
+
+void ff_vp3_idct_put_neon(uint8_t *dest, int line_size, DCTELEM *data);
+void ff_vp3_idct_add_neon(uint8_t *dest, int line_size, DCTELEM *data);
+void ff_vp3_idct_dc_add_neon(uint8_t *dest, int line_size, const DCTELEM *data);
+
+void ff_vp3_v_loop_filter_neon(uint8_t *, int, int *);
+void ff_vp3_h_loop_filter_neon(uint8_t *, int, int *);
+
+av_cold void ff_vp3dsp_init_arm(VP3DSPContext *c, int flags)
+{
+ int cpu_flags = av_get_cpu_flags();
+
+ if (have_neon(cpu_flags)) {
+ c->idct_put = ff_vp3_idct_put_neon;
+ c->idct_add = ff_vp3_idct_add_neon;
+ c->idct_dc_add = ff_vp3_idct_dc_add_neon;
+ c->v_loop_filter = ff_vp3_v_loop_filter_neon;
+ c->h_loop_filter = ff_vp3_h_loop_filter_neon;
+ c->idct_perm = FF_TRANSPOSE_IDCT_PERM;
+ }
+}
diff --git a/libavcodec/arm/vp3dsp_neon.S b/libavcodec/arm/vp3dsp_neon.S
index 70cfd29..0c88562 100644
--- a/libavcodec/arm/vp3dsp_neon.S
+++ b/libavcodec/arm/vp3dsp_neon.S
@@ -260,32 +260,6 @@ endfunc
VP3_IDCT_END row
VP3_IDCT_END col
-function ff_vp3_idct_neon, export=1
- mov ip, lr
- mov r2, r0
- bl vp3_idct_start_neon
- bl vp3_idct_end_row_neon
- mov r3, #8
- bl vp3_idct_core_neon
- bl vp3_idct_end_col_neon
- mov lr, ip
- vpop {d8-d15}
-
- vshr.s16 q8, q8, #4
- vshr.s16 q9, q9, #4
- vshr.s16 q10, q10, #4
- vshr.s16 q11, q11, #4
- vshr.s16 q12, q12, #4
- vst1.64 {d16-d19}, [r0,:128]!
- vshr.s16 q13, q13, #4
- vshr.s16 q14, q14, #4
- vst1.64 {d20-d23}, [r0,:128]!
- vshr.s16 q15, q15, #4
- vst1.64 {d24-d27}, [r0,:128]!
- vst1.64 {d28-d31}, [r0,:128]!
- bx lr
-endfunc
-
function ff_vp3_idct_put_neon, export=1
mov ip, lr
bl vp3_idct_start_neon
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index 3577926..ef31431 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -2870,12 +2870,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
c->idct_add= ff_jref_idct_add;
c->idct = ff_j_rev_dct;
c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
- }else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DECODER ) &&
- avctx->idct_algo==FF_IDCT_VP3){
- c->idct_put= ff_vp3_idct_put_c;
- c->idct_add= ff_vp3_idct_add_c;
- c->idct = ff_vp3_idct_c;
- c->idct_permutation_type= FF_NO_IDCT_PERM;
}else if(avctx->idct_algo==FF_IDCT_WMV2){
c->idct_put= ff_wmv2_idct_put_c;
c->idct_add= ff_wmv2_idct_add_c;
@@ -3037,12 +3031,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
c->h263_v_loop_filter= h263_v_loop_filter_c;
}
- if (CONFIG_VP3_DECODER) {
- c->vp3_h_loop_filter= ff_vp3_h_loop_filter_c;
- c->vp3_v_loop_filter= ff_vp3_v_loop_filter_c;
- c->vp3_idct_dc_add= ff_vp3_idct_dc_add_c;
- }
-
c->h261_loop_filter= h261_loop_filter_c;
c->try_8x8basis= try_8x8basis_c;
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index af11048..7b533cc 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -108,15 +108,6 @@ PUTAVG_PIXELS(14)
#define ff_put_pixels16x16_c ff_put_pixels16x16_8_c
#define ff_avg_pixels16x16_c ff_avg_pixels16x16_8_c
-/* VP3 DSP functions */
-void ff_vp3_idct_c(DCTELEM *block/* align 16*/);
-void ff_vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
-void ff_vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
-void ff_vp3_idct_dc_add_c(uint8_t *dest/*align 8*/, int line_size, const DCTELEM *block/*align 16*/);
-
-void ff_vp3_v_loop_filter_c(uint8_t *src, int stride, int *bounding_values);
-void ff_vp3_h_loop_filter_c(uint8_t *src, int stride, int *bounding_values);
-
/* EA functions */
void ff_ea_idct_put_c(uint8_t *dest, int linesize, DCTELEM *block);
@@ -401,10 +392,6 @@ typedef struct DSPContext {
void (*x8_v_loop_filter)(uint8_t *src, int stride, int qscale);
void (*x8_h_loop_filter)(uint8_t *src, int stride, int qscale);
- void (*vp3_idct_dc_add)(uint8_t *dest/*align 8*/, int line_size, const DCTELEM *block/*align 16*/);
- void (*vp3_v_loop_filter)(uint8_t *src, int stride, int *bounding_values);
- void (*vp3_h_loop_filter)(uint8_t *src, int stride, int *bounding_values);
-
/* assume len is a multiple of 4, and arrays are 16-byte aligned */
void (*vorbis_inverse_coupling)(float *mag, float *ang, int blocksize);
void (*ac3_downmix)(float (*samples)[256], float (*matrix)[2], int out_ch, int in_ch, int len);
diff --git a/libavcodec/ppc/Makefile b/libavcodec/ppc/Makefile
index 31f4fb8..e5d1d39 100644
--- a/libavcodec/ppc/Makefile
+++ b/libavcodec/ppc/Makefile
@@ -1,14 +1,13 @@
OBJS += ppc/dsputil_ppc.o \
+OBJS-$(CONFIG_VP3DSP) += ppc/vp3dsp_altivec.o
+
FFT-OBJS-$(HAVE_GNU_AS) += ppc/fft_altivec_s.o
ALTIVEC-OBJS-$(CONFIG_FFT) += ppc/fft_altivec.o \
$(FFT-OBJS-yes)
ALTIVEC-OBJS-$(CONFIG_H264DSP) += ppc/h264_altivec.o
ALTIVEC-OBJS-$(CONFIG_MPEGAUDIODSP) += ppc/mpegaudiodec_altivec.o
ALTIVEC-OBJS-$(CONFIG_VC1_DECODER) += ppc/vc1dsp_altivec.o
-ALTIVEC-OBJS-$(CONFIG_VP3_DECODER) += ppc/vp3dsp_altivec.o
-ALTIVEC-OBJS-$(CONFIG_VP5_DECODER) += ppc/vp3dsp_altivec.o
-ALTIVEC-OBJS-$(CONFIG_VP6_DECODER) += ppc/vp3dsp_altivec.o
ALTIVEC-OBJS-$(CONFIG_VP8_DECODER) += ppc/vp8dsp_altivec.o
ALTIVEC-OBJS += ppc/dsputil_altivec.o \
diff --git a/libavcodec/ppc/dsputil_altivec.h b/libavcodec/ppc/dsputil_altivec.h
index 6c87782..0b5e404 100644
--- a/libavcodec/ppc/dsputil_altivec.h
+++ b/libavcodec/ppc/dsputil_altivec.h
@@ -36,10 +36,6 @@ void ff_gmc1_altivec(uint8_t *dst, uint8_t *src, int stride, int h,
void ff_idct_put_altivec(uint8_t *dest, int line_size, int16_t *block);
void ff_idct_add_altivec(uint8_t *dest, int line_size, int16_t *block);
-void ff_vp3_idct_altivec(DCTELEM *block);
-void ff_vp3_idct_put_altivec(uint8_t *dest, int line_size, DCTELEM *block);
-void ff_vp3_idct_add_altivec(uint8_t *dest, int line_size, DCTELEM *block);
-
void ff_dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx);
void ff_dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx);
diff --git a/libavcodec/ppc/dsputil_ppc.c b/libavcodec/ppc/dsputil_ppc.c
index 195aa20..c6fdc8e 100644
--- a/libavcodec/ppc/dsputil_ppc.c
+++ b/libavcodec/ppc/dsputil_ppc.c
@@ -193,12 +193,6 @@ void ff_dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
c->idct_put = ff_idct_put_altivec;
c->idct_add = ff_idct_add_altivec;
c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
- }else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DECODER) &&
- avctx->idct_algo==FF_IDCT_VP3){
- c->idct_put = ff_vp3_idct_put_altivec;
- c->idct_add = ff_vp3_idct_add_altivec;
- c->idct = ff_vp3_idct_altivec;
- c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
}
}
diff --git a/libavcodec/ppc/vp3dsp_altivec.c b/libavcodec/ppc/vp3dsp_altivec.c
index 950e5c7..ac00c93 100644
--- a/libavcodec/ppc/vp3dsp_altivec.c
+++ b/libavcodec/ppc/vp3dsp_altivec.c
@@ -18,6 +18,13 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
+#include "config.h"
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavcodec/vp3dsp.h"
+
+#if HAVE_ALTIVEC
+
#include "libavutil/ppc/types_altivec.h"
#include "libavutil/ppc/util_altivec.h"
#include "libavcodec/dsputil.h"
@@ -107,25 +114,7 @@ static inline vec_s16 M16(vec_s16 a, vec_s16 C)
#define ADD8(a) vec_add(a, eight)
#define SHIFT4(a) vec_sra(a, four)
-void ff_vp3_idct_altivec(DCTELEM block[64])
-{
- IDCT_START
-
- IDCT_1D(NOP, NOP)
- TRANSPOSE8(b0, b1, b2, b3, b4, b5, b6, b7);
- IDCT_1D(ADD8, SHIFT4)
-
- vec_st(b0, 0x00, block);
- vec_st(b1, 0x10, block);
- vec_st(b2, 0x20, block);
- vec_st(b3, 0x30, block);
- vec_st(b4, 0x40, block);
- vec_st(b5, 0x50, block);
- vec_st(b6, 0x60, block);
- vec_st(b7, 0x70, block);
-}
-
-void ff_vp3_idct_put_altivec(uint8_t *dst, int stride, DCTELEM block[64])
+static void vp3_idct_put_altivec(uint8_t *dst, int stride, DCTELEM block[64])
{
vec_u8 t;
IDCT_START
@@ -153,7 +142,7 @@ void ff_vp3_idct_put_altivec(uint8_t *dst, int stride, DCTELEM block[64])
PUT(b7)
}
-void ff_vp3_idct_add_altivec(uint8_t *dst, int stride, DCTELEM block[64])
+static void vp3_idct_add_altivec(uint8_t *dst, int stride, DCTELEM block[64])
{
LOAD_ZERO;
vec_u8 t, vdst;
@@ -183,3 +172,16 @@ void ff_vp3_idct_add_altivec(uint8_t *dst, int stride, DCTELEM block[64])
ADD(b6) dst += stride;
ADD(b7)
}
+
+#endif /* HAVE_ALTIVEC */
+
+av_cold void ff_vp3dsp_init_ppc(VP3DSPContext *c, int flags)
+{
+#if HAVE_ALTIVEC
+ if (av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) {
+ c->idct_put = vp3_idct_put_altivec;
+ c->idct_add = vp3_idct_add_altivec;
+ c->idct_perm = FF_TRANSPOSE_IDCT_PERM;
+ }
+#endif
+}
diff --git a/libavcodec/tscc2.c b/libavcodec/tscc2.c
index 9c8d234..a8fd652 100644
--- a/libavcodec/tscc2.c
+++ b/libavcodec/tscc2.c
@@ -91,11 +91,11 @@ static av_cold int init_vlcs(TSCC2Context *c)
#define DCT1D(d0, d1, d2, d3, s0, s1, s2, s3, OP) \
OP(d0, 5 * ((s0) + (s1) + (s2)) + 2 * (s3)); \
OP(d1, 5 * ((s0) - (s2) - (s3)) + 2 * (s1)); \
- OP(d2, 5 * ((s0) - (s2) + (s3)) - 2 * (s3)); \
+ OP(d2, 5 * ((s0) - (s2) + (s3)) - 2 * (s1)); \
OP(d3, 5 * ((s0) - (s1) + (s2)) - 2 * (s3)); \
#define COL_OP(a, b) a = b
-#define ROW_OP(a, b) a = (((b) + 0x20) >> 6) + 0x80
+#define ROW_OP(a, b) a = ((b) + 0x20) >> 6
static void tscc2_idct4_put(int *in, int q[3], uint8_t *dst, int stride)
{
@@ -158,7 +158,7 @@ static int tscc2_decode_mb(TSCC2Context *c, int *q, int vlc_set,
}
dc = (dc + prev_dc) & 0xFF;
prev_dc = dc;
- c->block[0] = dc - 0x80;
+ c->block[0] = dc;
nc = get_vlc2(gb, c->nc_vlc[vlc_set].table, 9, 1);
if (nc == -1)
diff --git a/libavcodec/vp3.c b/libavcodec/vp3.c
index 81f6b89..a3bfd74 100644
--- a/libavcodec/vp3.c
+++ b/libavcodec/vp3.c
@@ -40,6 +40,7 @@
#include "get_bits.h"
#include "vp3data.h"
+#include "vp3dsp.h"
#include "xiph.h"
#include "thread.h"
@@ -135,6 +136,7 @@ typedef struct Vp3DecodeContext {
AVFrame current_frame;
int keyframe;
DSPContext dsp;
+ VP3DSPContext vp3dsp;
int flipped_image;
int last_slice_end;
int skip_loop_filter;
@@ -1302,14 +1304,14 @@ static void apply_loop_filter(Vp3DecodeContext *s, int plane, int ystart, int ye
{
/* do not perform left edge filter for left columns frags */
if (x > 0) {
- s->dsp.vp3_h_loop_filter(
+ s->vp3dsp.h_loop_filter(
plane_data + 8*x,
stride, bounding_values);
}
/* do not perform top edge filter for top row fragments */
if (y > 0) {
- s->dsp.vp3_v_loop_filter(
+ s->vp3dsp.v_loop_filter(
plane_data + 8*x,
stride, bounding_values);
}
@@ -1319,7 +1321,7 @@ static void apply_loop_filter(Vp3DecodeContext *s, int plane, int ystart, int ye
* in this frame (it will be filtered in next iteration) */
if ((x < width - 1) &&
(s->all_fragments[fragment + 1].coding_method == MODE_COPY)) {
- s->dsp.vp3_h_loop_filter(
+ s->vp3dsp.h_loop_filter(
plane_data + 8*x + 8,
stride, bounding_values);
}
@@ -1329,7 +1331,7 @@ static void apply_loop_filter(Vp3DecodeContext *s, int plane, int ystart, int ye
* in this frame (it will be filtered in the next row) */
if ((y < height - 1) &&
(s->all_fragments[fragment + width].coding_method == MODE_COPY)) {
- s->dsp.vp3_v_loop_filter(
+ s->vp3dsp.v_loop_filter(
plane_data + 8*x + 8*stride,
stride, bounding_values);
}
@@ -1574,20 +1576,18 @@ static void render_slice(Vp3DecodeContext *s, int slice)
if (s->all_fragments[i].coding_method == MODE_INTRA) {
vp3_dequant(s, s->all_fragments + i, plane, 0, block);
- if(s->avctx->idct_algo!=FF_IDCT_VP3)
- block[0] += 128<<3;
- s->dsp.idct_put(
+ s->vp3dsp.idct_put(
output_plane + first_pixel,
stride,
block);
} else {
if (vp3_dequant(s, s->all_fragments + i, plane, 1, block)) {
- s->dsp.idct_add(
+ s->vp3dsp.idct_add(
output_plane + first_pixel,
stride,
block);
} else {
- s->dsp.vp3_idct_dc_add(output_plane + first_pixel, stride, block);
+ s->vp3dsp.idct_dc_add(output_plane + first_pixel, stride, block);
}
}
} else {
@@ -1670,10 +1670,10 @@ static av_cold int vp3_decode_init(AVCodecContext *avctx)
if (avctx->codec_id != CODEC_ID_THEORA)
avctx->pix_fmt = PIX_FMT_YUV420P;
avctx->chroma_sample_location = AVCHROMA_LOC_CENTER;
- if(avctx->idct_algo==FF_IDCT_AUTO)
- avctx->idct_algo=FF_IDCT_VP3;
ff_dsputil_init(&s->dsp, avctx);
+ ff_vp3dsp_init(&s->vp3dsp, avctx->flags);
+ ff_init_scantable_permutation(s->dsp.idct_permutation, s->vp3dsp.idct_perm);
ff_init_scantable(s->dsp.idct_permutation, &s->scantable, ff_zigzag_direct);
/* initialize to an impossible value which will force a recalculation
diff --git a/libavcodec/vp3dsp.c b/libavcodec/vp3dsp.c
index 9fded0f..4c5ff6a 100644
--- a/libavcodec/vp3dsp.c
+++ b/libavcodec/vp3dsp.c
@@ -24,8 +24,10 @@
* source code.
*/
+#include "libavutil/attributes.h"
#include "avcodec.h"
#include "dsputil.h"
+#include "vp3dsp.h"
#define IdctAdjustBeforeShift 8
#define xC1S7 64277
@@ -210,19 +212,16 @@ static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int
}
}
-void ff_vp3_idct_c(DCTELEM *block/* align 16*/){
- idct(NULL, 0, block, 0);
-}
-
-void ff_vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/){
+static void vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/){
idct(dest, line_size, block, 1);
}
-void ff_vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/){
+static void vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/){
idct(dest, line_size, block, 2);
}
-void ff_vp3_idct_dc_add_c(uint8_t *dest/*align 8*/, int line_size, const DCTELEM *block/*align 16*/){
+static void vp3_idct_dc_add_c(uint8_t *dest/*align 8*/, int line_size,
+ const DCTELEM *block/*align 16*/){
int i, dc = (block[0] + 15) >> 5;
for(i = 0; i < 8; i++){
@@ -238,7 +237,8 @@ void ff_vp3_idct_dc_add_c(uint8_t *dest/*align 8*/, int line_size, const DCTELEM
}
}
-void ff_vp3_v_loop_filter_c(uint8_t *first_pixel, int stride, int *bounding_values)
+static void vp3_v_loop_filter_c(uint8_t *first_pixel, int stride,
+ int *bounding_values)
{
unsigned char *end;
int filter_value;
@@ -254,7 +254,8 @@ void ff_vp3_v_loop_filter_c(uint8_t *first_pixel, int stride, int *bounding_valu
}
}
-void ff_vp3_h_loop_filter_c(uint8_t *first_pixel, int stride, int *bounding_values)
+static void vp3_h_loop_filter_c(uint8_t *first_pixel, int stride,
+ int *bounding_values)
{
unsigned char *end;
int filter_value;
@@ -268,3 +269,21 @@ void ff_vp3_h_loop_filter_c(uint8_t *first_pixel, int stride, int *bounding_valu
first_pixel[ 0] = av_clip_uint8(first_pixel[ 0] - filter_value);
}
}
+
+av_cold void ff_vp3dsp_init(VP3DSPContext *c, int flags)
+{
+ c->idct_put = vp3_idct_put_c;
+ c->idct_add = vp3_idct_add_c;
+ c->idct_dc_add = vp3_idct_dc_add_c;
+ c->v_loop_filter = vp3_v_loop_filter_c;
+ c->h_loop_filter = vp3_h_loop_filter_c;
+
+ c->idct_perm = FF_NO_IDCT_PERM;
+
+ if (ARCH_ARM)
+ ff_vp3dsp_init_arm(c, flags);
+ if (ARCH_PPC)
+ ff_vp3dsp_init_ppc(c, flags);
+ if (ARCH_X86)
+ ff_vp3dsp_init_x86(c, flags);
+}
diff --git a/libavcodec/vp3dsp.h b/libavcodec/vp3dsp.h
new file mode 100644
index 0000000..a14dec1
--- /dev/null
+++ b/libavcodec/vp3dsp.h
@@ -0,0 +1,40 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_VP3DSP_H
+#define AVCODEC_VP3DSP_H
+
+#include <stdint.h>
+#include "dsputil.h"
+
+typedef struct VP3DSPContext {
+ void (*idct_put)(uint8_t *dest, int line_size, DCTELEM *block);
+ void (*idct_add)(uint8_t *dest, int line_size, DCTELEM *block);
+ void (*idct_dc_add)(uint8_t *dest, int line_size, const DCTELEM *block);
+ void (*v_loop_filter)(uint8_t *src, int stride, int *bounding_values);
+ void (*h_loop_filter)(uint8_t *src, int stride, int *bounding_values);
+
+ int idct_perm;
+} VP3DSPContext;
+
+void ff_vp3dsp_init(VP3DSPContext *c, int flags);
+void ff_vp3dsp_init_arm(VP3DSPContext *c, int flags);
+void ff_vp3dsp_init_ppc(VP3DSPContext *c, int flags);
+void ff_vp3dsp_init_x86(VP3DSPContext *c, int flags);
+
+#endif /* AVCODEC_VP3DSP_H */
diff --git a/libavcodec/vp56.c b/libavcodec/vp56.c
index c6e32af..f98d22c 100644
--- a/libavcodec/vp56.c
+++ b/libavcodec/vp56.c
@@ -411,7 +411,7 @@ static void vp56_decode_mb(VP56Context *s, int row, int col, int is_alpha)
case VP56_MB_INTRA:
for (b=0; b<b_max; b++) {
plane = ff_vp56_b2p[b+ab];
- s->dsp.idct_put(frame_current->data[plane] + s->block_offset[b],
+ s->vp3dsp.idct_put(frame_current->data[plane] + s->block_offset[b],
s->stride[plane], s->block_coeff[b]);
}
break;
@@ -424,7 +424,7 @@ static void vp56_decode_mb(VP56Context *s, int row, int col, int is_alpha)
s->dsp.put_pixels_tab[1][0](frame_current->data[plane] + off,
frame_ref->data[plane] + off,
s->stride[plane], 8);
- s->dsp.idct_add(frame_current->data[plane] + off,
+ s->vp3dsp.idct_add(frame_current->data[plane] + off,
s->stride[plane], s->block_coeff[b]);
}
break;
@@ -442,7 +442,7 @@ static void vp56_decode_mb(VP56Context *s, int row, int col, int is_alpha)
plane = ff_vp56_b2p[b+ab];
vp56_mc(s, b, plane, frame_ref->data[plane], s->stride[plane],
16*col+x_off, 16*row+y_off);
- s->dsp.idct_add(frame_current->data[plane] + s->block_offset[b],
+ s->vp3dsp.idct_add(frame_current->data[plane] + s->block_offset[b],
s->stride[plane], s->block_coeff[b]);
}
break;
@@ -666,10 +666,10 @@ av_cold void ff_vp56_init(AVCodecContext *avctx, int flip, int has_alpha)
s->avctx = avctx;
avctx->pix_fmt = has_alpha ? PIX_FMT_YUVA420P : PIX_FMT_YUV420P;
- if (avctx->idct_algo == FF_IDCT_AUTO)
- avctx->idct_algo = FF_IDCT_VP3;
ff_dsputil_init(&s->dsp, avctx);
+ ff_vp3dsp_init(&s->vp3dsp, avctx->flags);
ff_vp56dsp_init(&s->vp56dsp, avctx->codec->id);
+ ff_init_scantable_permutation(s->dsp.idct_permutation, s->vp3dsp.idct_perm);
ff_init_scantable(s->dsp.idct_permutation, &s->scantable,ff_zigzag_direct);
for (i=0; i<4; i++) {
diff --git a/libavcodec/vp56.h b/libavcodec/vp56.h
index e135718..12f9380 100644
--- a/libavcodec/vp56.h
+++ b/libavcodec/vp56.h
@@ -30,6 +30,7 @@
#include "dsputil.h"
#include "get_bits.h"
#include "bytestream.h"
+#include "vp3dsp.h"
#include "vp56dsp.h"
typedef struct vp56_context VP56Context;
@@ -91,6 +92,7 @@ typedef struct {
struct vp56_context {
AVCodecContext *avctx;
DSPContext dsp;
+ VP3DSPContext vp3dsp;
VP56DSPContext vp56dsp;
ScanTable scantable;
AVFrame frames[4];
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index 8acbd07..e18074a 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -1,5 +1,6 @@
OBJS-$(CONFIG_MLP_DECODER) += x86/mlpdsp.o
OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp.o
+OBJS-$(CONFIG_VP3DSP) += x86/vp3dsp_init.o
OBJS-$(CONFIG_XMM_CLOBBER_TEST) += x86/w64xmmtest.o
MMX-OBJS += x86/dsputil_mmx.o \
@@ -67,10 +68,8 @@ YASM-OBJS-$(CONFIG_RV40_DECODER) += x86/rv34dsp.o \
x86/rv40dsp.o
YASM-OBJS-$(CONFIG_V210_DECODER) += x86/v210.o
YASM-OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp_yasm.o
-YASM-OBJS-$(CONFIG_VP3_DECODER) += x86/vp3dsp.o
-YASM-OBJS-$(CONFIG_VP5_DECODER) += x86/vp3dsp.o
-YASM-OBJS-$(CONFIG_VP6_DECODER) += x86/vp3dsp.o \
- x86/vp56dsp.o
+YASM-OBJS-$(CONFIG_VP3DSP) += x86/vp3dsp.o
+YASM-OBJS-$(CONFIG_VP6_DECODER) += x86/vp56dsp.o
YASM-OBJS-$(CONFIG_VP8_DECODER) += x86/vp8dsp.o
YASM-OBJS += x86/dsputil_yasm.o \
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c
index e829bbd..8049bde 100644
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -2574,20 +2574,6 @@ static void vector_clipf_sse(float *dst, const float *src,
);
}
-void ff_vp3_idct_mmx(int16_t *input_data);
-void ff_vp3_idct_put_mmx(uint8_t *dest, int line_size, DCTELEM *block);
-void ff_vp3_idct_add_mmx(uint8_t *dest, int line_size, DCTELEM *block);
-
-void ff_vp3_idct_dc_add_mmx2(uint8_t *dest, int line_size,
- const DCTELEM *block);
-
-void ff_vp3_v_loop_filter_mmx2(uint8_t *src, int stride, int *bounding_values);
-void ff_vp3_h_loop_filter_mmx2(uint8_t *src, int stride, int *bounding_values);
-
-void ff_vp3_idct_sse2(int16_t *input_data);
-void ff_vp3_idct_put_sse2(uint8_t *dest, int line_size, DCTELEM *block);
-void ff_vp3_idct_add_sse2(uint8_t *dest, int line_size, DCTELEM *block);
-
int32_t ff_scalarproduct_int16_mmx2(const int16_t *v1, const int16_t *v2,
int order);
int32_t ff_scalarproduct_int16_sse2(const int16_t *v1, const int16_t *v2,
@@ -2782,14 +2768,7 @@ static void dsputil_init_mmx2(DSPContext *c, AVCodecContext *avctx,
c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2;
c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2;
}
-
- if (CONFIG_VP3_DECODER && HAVE_YASM) {
- c->vp3_v_loop_filter = ff_vp3_v_loop_filter_mmx2;
- c->vp3_h_loop_filter = ff_vp3_h_loop_filter_mmx2;
- }
}
- if (CONFIG_VP3_DECODER && HAVE_YASM)
- c->vp3_idct_dc_add = ff_vp3_idct_dc_add_mmx2;
if (CONFIG_VP3_DECODER && (avctx->codec_id == CODEC_ID_VP3 ||
avctx->codec_id == CODEC_ID_THEORA)) {
@@ -3165,20 +3144,6 @@ void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx)
}
c->idct_permutation_type = FF_LIBMPEG2_IDCT_PERM;
#endif
- } else if ((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER ||
- CONFIG_VP6_DECODER) &&
- idct_algo == FF_IDCT_VP3 && HAVE_YASM) {
- if (mm_flags & AV_CPU_FLAG_SSE2) {
- c->idct_put = ff_vp3_idct_put_sse2;
- c->idct_add = ff_vp3_idct_add_sse2;
- c->idct = ff_vp3_idct_sse2;
- c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
- } else {
- c->idct_put = ff_vp3_idct_put_mmx;
- c->idct_add = ff_vp3_idct_add_mmx;
- c->idct = ff_vp3_idct_mmx;
- c->idct_permutation_type = FF_PARTTRANS_IDCT_PERM;
- }
} else if (idct_algo == FF_IDCT_CAVS) {
c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
} else if (idct_algo == FF_IDCT_XVIDMMX) {
diff --git a/libavcodec/x86/vp3dsp.asm b/libavcodec/x86/vp3dsp.asm
index 99621fb..0e0bd29 100644
--- a/libavcodec/x86/vp3dsp.asm
+++ b/libavcodec/x86/vp3dsp.asm
@@ -524,10 +524,6 @@ cglobal vp3_h_loop_filter_mmx2, 3, 4
%endmacro
%macro vp3_idct_funcs 3
-cglobal vp3_idct_%1, 1, 1, %2
- VP3_IDCT_%1 r0
- RET
-
cglobal vp3_idct_put_%1, 3, %3, %2
VP3_IDCT_%1 r2
%if ARCH_X86_64
diff --git a/libavcodec/x86/vp3dsp_init.c b/libavcodec/x86/vp3dsp_init.c
new file mode 100644
index 0000000..3ae2a90
--- /dev/null
+++ b/libavcodec/x86/vp3dsp_init.c
@@ -0,0 +1,65 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavcodec/avcodec.h"
+#include "libavcodec/vp3dsp.h"
+#include "config.h"
+
+void ff_vp3_idct_put_mmx(uint8_t *dest, int line_size, DCTELEM *block);
+void ff_vp3_idct_add_mmx(uint8_t *dest, int line_size, DCTELEM *block);
+
+void ff_vp3_idct_put_sse2(uint8_t *dest, int line_size, DCTELEM *block);
+void ff_vp3_idct_add_sse2(uint8_t *dest, int line_size, DCTELEM *block);
+
+void ff_vp3_idct_dc_add_mmx2(uint8_t *dest, int line_size,
+ const DCTELEM *block);
+
+void ff_vp3_v_loop_filter_mmx2(uint8_t *src, int stride, int *bounding_values);
+void ff_vp3_h_loop_filter_mmx2(uint8_t *src, int stride, int *bounding_values);
+
+av_cold void ff_vp3dsp_init_x86(VP3DSPContext *c, int flags)
+{
+#if HAVE_YASM
+ int cpuflags = av_get_cpu_flags();
+
+ if (HAVE_MMX && cpuflags & AV_CPU_FLAG_MMX) {
+ c->idct_put = ff_vp3_idct_put_mmx;
+ c->idct_add = ff_vp3_idct_add_mmx;
+ c->idct_perm = FF_PARTTRANS_IDCT_PERM;
+ }
+
+ if (HAVE_MMX2 && cpuflags & AV_CPU_FLAG_MMX2) {
+ c->idct_dc_add = ff_vp3_idct_dc_add_mmx2;
+
+ if (!(flags & CODEC_FLAG_BITEXACT)) {
+ c->v_loop_filter = ff_vp3_v_loop_filter_mmx2;
+ c->h_loop_filter = ff_vp3_h_loop_filter_mmx2;
+ }
+ }
+
+ if (cpuflags & AV_CPU_FLAG_SSE2) {
+ c->idct_put = ff_vp3_idct_put_sse2;
+ c->idct_add = ff_vp3_idct_add_sse2;
+ c->idct_perm = FF_TRANSPOSE_IDCT_PERM;
+ }
+#endif
+}
OpenPOWER on IntegriCloud