summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--libavcodec/alpha/dsputil_alpha.c4
-rw-r--r--libavcodec/arm/dsputil_init_arm.c4
-rw-r--r--libavcodec/arm/dsputil_init_armv6.c4
-rw-r--r--libavcodec/arm/dsputil_init_neon.c6
-rw-r--r--libavcodec/arm/dsputil_iwmmxt.c3
-rw-r--r--libavcodec/arm/h264dsp_init_arm.c8
-rw-r--r--libavcodec/arm/h264pred_init_arm.c11
-rw-r--r--libavcodec/bfin/dsputil_bfin.c5
-rw-r--r--libavcodec/dsputil.c282
-rw-r--r--libavcodec/dsputil.h71
-rw-r--r--libavcodec/dsputil_template.c80
-rw-r--r--libavcodec/h264.c22
-rw-r--r--libavcodec/h264dsp.c120
-rw-r--r--libavcodec/h264dsp.h8
-rw-r--r--libavcodec/h264dsp_template.c5
-rw-r--r--libavcodec/h264idct.c10
-rw-r--r--libavcodec/h264idct_template.c8
-rw-r--r--libavcodec/h264pred.c278
-rw-r--r--libavcodec/h264pred.h6
-rw-r--r--libavcodec/h264pred_template.c16
-rw-r--r--libavcodec/high_bit_depth.h85
-rw-r--r--libavcodec/mlib/dsputil_mlib.c4
-rw-r--r--libavcodec/ppc/dsputil_altivec.c5
-rw-r--r--libavcodec/ppc/dsputil_ppc.c4
-rw-r--r--libavcodec/ppc/h264_altivec.c7
-rw-r--r--libavcodec/ps2/dsputil_mmi.c3
-rw-r--r--libavcodec/rv34.c2
-rw-r--r--libavcodec/sh4/dsputil_align.c8
-rw-r--r--libavcodec/sh4/dsputil_sh4.c2
-rw-r--r--libavcodec/sparc/dsputil_vis.c3
-rw-r--r--libavcodec/vp8.c2
-rw-r--r--libavcodec/x86/dsputil_mmx.c32
-rw-r--r--libavcodec/x86/h264_intrapred_init.c6
-rw-r--r--libavcodec/x86/h264dsp_mmx.c4
34 files changed, 753 insertions, 365 deletions
diff --git a/libavcodec/alpha/dsputil_alpha.c b/libavcodec/alpha/dsputil_alpha.c
index 610f92a..32bb0fc 100644
--- a/libavcodec/alpha/dsputil_alpha.c
+++ b/libavcodec/alpha/dsputil_alpha.c
@@ -270,6 +270,9 @@ static void put_pixels16_axp_asm(uint8_t *block, const uint8_t *pixels,
void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx)
{
+ const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+
+ if (!high_bit_depth) {
c->put_pixels_tab[0][0] = put_pixels16_axp_asm;
c->put_pixels_tab[0][1] = put_pixels16_x2_axp;
c->put_pixels_tab[0][2] = put_pixels16_y2_axp;
@@ -311,6 +314,7 @@ void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx)
c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels_xy2_axp;
c->clear_blocks = clear_blocks_axp;
+ }
/* amask clears all bits that correspond to present features. */
if (amask(AMASK_MVI) == 0) {
diff --git a/libavcodec/arm/dsputil_init_arm.c b/libavcodec/arm/dsputil_init_arm.c
index 92796c3..777a2f9 100644
--- a/libavcodec/arm/dsputil_init_arm.c
+++ b/libavcodec/arm/dsputil_init_arm.c
@@ -75,6 +75,8 @@ static void simple_idct_arm_add(uint8_t *dest, int line_size, DCTELEM *block)
void dsputil_init_arm(DSPContext* c, AVCodecContext *avctx)
{
+ const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+
ff_put_pixels_clamped = c->put_pixels_clamped;
ff_add_pixels_clamped = c->add_pixels_clamped;
@@ -95,6 +97,7 @@ void dsputil_init_arm(DSPContext* c, AVCodecContext *avctx)
c->add_pixels_clamped = ff_add_pixels_clamped_arm;
+ if (!high_bit_depth) {
c->put_pixels_tab[0][0] = ff_put_pixels16_arm;
c->put_pixels_tab[0][1] = ff_put_pixels16_x2_arm;
c->put_pixels_tab[0][2] = ff_put_pixels16_y2_arm;
@@ -112,6 +115,7 @@ void dsputil_init_arm(DSPContext* c, AVCodecContext *avctx)
c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_arm;
c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_arm;
c->put_no_rnd_pixels_tab[1][3] = ff_put_no_rnd_pixels8_xy2_arm;
+ }
if (HAVE_ARMV5TE) ff_dsputil_init_armv5te(c, avctx);
if (HAVE_ARMV6) ff_dsputil_init_armv6(c, avctx);
diff --git a/libavcodec/arm/dsputil_init_armv6.c b/libavcodec/arm/dsputil_init_armv6.c
index 362050c..7584aee 100644
--- a/libavcodec/arm/dsputil_init_armv6.c
+++ b/libavcodec/arm/dsputil_init_armv6.c
@@ -72,6 +72,8 @@ int ff_pix_sum_armv6(uint8_t *pix, int line_size);
void av_cold ff_dsputil_init_armv6(DSPContext* c, AVCodecContext *avctx)
{
+ const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+
if (!avctx->lowres && (avctx->idct_algo == FF_IDCT_AUTO ||
avctx->idct_algo == FF_IDCT_SIMPLEARMV6)) {
c->idct_put = ff_simple_idct_put_armv6;
@@ -80,6 +82,7 @@ void av_cold ff_dsputil_init_armv6(DSPContext* c, AVCodecContext *avctx)
c->idct_permutation_type = FF_LIBMPEG2_IDCT_PERM;
}
+ if (!high_bit_depth) {
c->put_pixels_tab[0][0] = ff_put_pixels16_armv6;
c->put_pixels_tab[0][1] = ff_put_pixels16_x2_armv6;
c->put_pixels_tab[0][2] = ff_put_pixels16_y2_armv6;
@@ -100,6 +103,7 @@ void av_cold ff_dsputil_init_armv6(DSPContext* c, AVCodecContext *avctx)
c->avg_pixels_tab[0][0] = ff_avg_pixels16_armv6;
c->avg_pixels_tab[1][0] = ff_avg_pixels8_armv6;
+ }
c->add_pixels_clamped = ff_add_pixels_clamped_armv6;
c->get_pixels = ff_get_pixels_armv6;
diff --git a/libavcodec/arm/dsputil_init_neon.c b/libavcodec/arm/dsputil_init_neon.c
index cd58011..3bc053c 100644
--- a/libavcodec/arm/dsputil_init_neon.c
+++ b/libavcodec/arm/dsputil_init_neon.c
@@ -173,6 +173,8 @@ void ff_apply_window_int16_neon(int16_t *dst, const int16_t *src,
void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
{
+ const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+
if (!avctx->lowres) {
if (avctx->idct_algo == FF_IDCT_AUTO ||
avctx->idct_algo == FF_IDCT_SIMPLENEON) {
@@ -190,6 +192,7 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
}
}
+ if (!high_bit_depth) {
c->clear_block = ff_clear_block_neon;
c->clear_blocks = ff_clear_blocks_neon;
@@ -213,12 +216,14 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
c->avg_pixels_tab[0][0] = ff_avg_pixels16_neon;
c->avg_pixels_tab[1][0] = ff_avg_pixels8_neon;
+ }
c->add_pixels_clamped = ff_add_pixels_clamped_neon;
c->put_pixels_clamped = ff_put_pixels_clamped_neon;
c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_neon;
if (CONFIG_H264_DECODER) {
+ if (!high_bit_depth) {
c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_neon;
c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_neon;
c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_neon;
@@ -294,6 +299,7 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
c->avg_h264_qpel_pixels_tab[1][13] = ff_avg_h264_qpel8_mc13_neon;
c->avg_h264_qpel_pixels_tab[1][14] = ff_avg_h264_qpel8_mc23_neon;
c->avg_h264_qpel_pixels_tab[1][15] = ff_avg_h264_qpel8_mc33_neon;
+ }
}
if (CONFIG_VP3_DECODER) {
diff --git a/libavcodec/arm/dsputil_iwmmxt.c b/libavcodec/arm/dsputil_iwmmxt.c
index e83edb5..86f8fdd 100644
--- a/libavcodec/arm/dsputil_iwmmxt.c
+++ b/libavcodec/arm/dsputil_iwmmxt.c
@@ -155,6 +155,7 @@ static void nop(uint8_t *block, const uint8_t *pixels, int line_size, int h)
void ff_dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx)
{
int mm_flags = AV_CPU_FLAG_IWMMXT; /* multimedia extension flags */
+ const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
if (avctx->dsp_mask) {
if (avctx->dsp_mask & AV_CPU_FLAG_FORCE)
@@ -167,6 +168,7 @@ void ff_dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx)
c->add_pixels_clamped = add_pixels_clamped_iwmmxt;
+ if (!high_bit_depth) {
c->clear_blocks = clear_blocks_iwmmxt;
c->put_pixels_tab[0][0] = put_pixels16_iwmmxt;
@@ -204,4 +206,5 @@ void ff_dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx)
c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2_iwmmxt;
c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2_iwmmxt;
c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2_iwmmxt;
+ }
}
diff --git a/libavcodec/arm/h264dsp_init_arm.c b/libavcodec/arm/h264dsp_init_arm.c
index 20f5ac2..c2399e5 100644
--- a/libavcodec/arm/h264dsp_init_arm.c
+++ b/libavcodec/arm/h264dsp_init_arm.c
@@ -92,8 +92,9 @@ void ff_h264_idct8_add4_neon(uint8_t *dst, const int *block_offset,
DCTELEM *block, int stride,
const uint8_t nnzc[6*8]);
-static void ff_h264dsp_init_neon(H264DSPContext *c)
+static void ff_h264dsp_init_neon(H264DSPContext *c, const int bit_depth)
{
+ if (bit_depth == 8) {
c->h264_v_loop_filter_luma = ff_h264_v_loop_filter_luma_neon;
c->h264_h_loop_filter_luma = ff_h264_h_loop_filter_luma_neon;
c->h264_v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon;
@@ -125,9 +126,10 @@ static void ff_h264dsp_init_neon(H264DSPContext *c)
c->h264_idct8_add = ff_h264_idct8_add_neon;
c->h264_idct8_dc_add = ff_h264_idct8_dc_add_neon;
c->h264_idct8_add4 = ff_h264_idct8_add4_neon;
+ }
}
-void ff_h264dsp_init_arm(H264DSPContext *c)
+void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth)
{
- if (HAVE_NEON) ff_h264dsp_init_neon(c);
+ if (HAVE_NEON) ff_h264dsp_init_neon(c, bit_depth);
}
diff --git a/libavcodec/arm/h264pred_init_arm.c b/libavcodec/arm/h264pred_init_arm.c
index 3f1c5c6..cae32d7 100644
--- a/libavcodec/arm/h264pred_init_arm.c
+++ b/libavcodec/arm/h264pred_init_arm.c
@@ -42,8 +42,13 @@ void ff_pred8x8_0lt_dc_neon(uint8_t *src, int stride);
void ff_pred8x8_l00_dc_neon(uint8_t *src, int stride);
void ff_pred8x8_0l0_dc_neon(uint8_t *src, int stride);
-static void ff_h264_pred_init_neon(H264PredContext *h, int codec_id)
+static void ff_h264_pred_init_neon(H264PredContext *h, int codec_id, const int bit_depth)
{
+ const int high_depth = bit_depth > 8;
+
+ if (high_depth)
+ return;
+
h->pred8x8[VERT_PRED8x8 ] = ff_pred8x8_vert_neon;
h->pred8x8[HOR_PRED8x8 ] = ff_pred8x8_hor_neon;
if (codec_id != CODEC_ID_VP8)
@@ -69,7 +74,7 @@ static void ff_h264_pred_init_neon(H264PredContext *h, int codec_id)
h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_plane_neon;
}
-void ff_h264_pred_init_arm(H264PredContext *h, int codec_id)
+void ff_h264_pred_init_arm(H264PredContext *h, int codec_id, bit_depth)
{
- if (HAVE_NEON) ff_h264_pred_init_neon(h, codec_id);
+ if (HAVE_NEON) ff_h264_pred_init_neon(h, codec_id, bit_depth);
}
diff --git a/libavcodec/bfin/dsputil_bfin.c b/libavcodec/bfin/dsputil_bfin.c
index 65d0308..0db2d8b 100644
--- a/libavcodec/bfin/dsputil_bfin.c
+++ b/libavcodec/bfin/dsputil_bfin.c
@@ -197,11 +197,14 @@ static int bfin_pix_abs8_xy2 (void *c, uint8_t *blk1, uint8_t *blk2, int line_si
void dsputil_init_bfin( DSPContext* c, AVCodecContext *avctx )
{
+ const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+
c->get_pixels = ff_bfin_get_pixels;
c->diff_pixels = ff_bfin_diff_pixels;
c->put_pixels_clamped = ff_bfin_put_pixels_clamped;
c->add_pixels_clamped = ff_bfin_add_pixels_clamped;
+ if (!high_bit_depth)
c->clear_blocks = bfin_clear_blocks;
c->pix_sum = ff_bfin_pix_sum;
c->pix_norm1 = ff_bfin_pix_norm1;
@@ -228,6 +231,7 @@ void dsputil_init_bfin( DSPContext* c, AVCodecContext *avctx )
c->sse[1] = ff_bfin_sse8;
c->sse[2] = ff_bfin_sse4;
+ if (!high_bit_depth) {
c->put_pixels_tab[0][0] = bfin_put_pixels16;
c->put_pixels_tab[0][1] = bfin_put_pixels16_x2;
c->put_pixels_tab[0][2] = bfin_put_pixels16_y2;
@@ -247,6 +251,7 @@ void dsputil_init_bfin( DSPContext* c, AVCodecContext *avctx )
c->put_no_rnd_pixels_tab[0][1] = bfin_put_pixels16_x2_nornd;
c->put_no_rnd_pixels_tab[0][2] = bfin_put_pixels16_y2_nornd;
/* c->put_no_rnd_pixels_tab[0][3] = ff_bfin_put_pixels16_xy2_nornd; */
+ }
if (avctx->dct_algo == FF_DCT_AUTO)
c->fdct = ff_bfin_fdct;
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index 078e172..4389289 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -43,6 +43,15 @@
uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
uint32_t ff_squareTbl[512] = {0, };
+#define BIT_DEPTH 9
+#include "dsputil_template.c"
+#undef BIT_DEPTH
+
+#define BIT_DEPTH 10
+#include "dsputil_template.c"
+#undef BIT_DEPTH
+
+#define BIT_DEPTH 8
#include "dsputil_template.c"
// 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
@@ -619,10 +628,10 @@ void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
switch(width){
- case 2: put_pixels2_c (dst, src, stride, height); break;
- case 4: put_pixels4_c (dst, src, stride, height); break;
- case 8: put_pixels8_c (dst, src, stride, height); break;
- case 16:put_pixels16_c(dst, src, stride, height); break;
+ case 2: put_pixels2_8_c (dst, src, stride, height); break;
+ case 4: put_pixels4_8_c (dst, src, stride, height); break;
+ case 8: put_pixels8_8_c (dst, src, stride, height); break;
+ case 16:put_pixels16_8_c(dst, src, stride, height); break;
}
}
@@ -716,10 +725,10 @@ static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int
static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
switch(width){
- case 2: avg_pixels2_c (dst, src, stride, height); break;
- case 4: avg_pixels4_c (dst, src, stride, height); break;
- case 8: avg_pixels8_c (dst, src, stride, height); break;
- case 16:avg_pixels16_c(dst, src, stride, height); break;
+ case 2: avg_pixels2_8_c (dst, src, stride, height); break;
+ case 4: avg_pixels4_8_c (dst, src, stride, height); break;
+ case 8: avg_pixels8_8_c (dst, src, stride, height); break;
+ case 16:avg_pixels16_8_c(dst, src, stride, height); break;
}
}
@@ -953,7 +962,7 @@ static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dst
static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
uint8_t half[64];\
put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
- OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\
+ OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8);\
}\
\
static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
@@ -963,7 +972,7 @@ static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
uint8_t half[64];\
put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
- OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\
+ OPNAME ## pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);\
}\
\
static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
@@ -971,7 +980,7 @@ static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
uint8_t half[64];\
copy_block9(full, src, 16, stride, 9);\
put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
- OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\
+ OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8);\
}\
\
static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
@@ -985,7 +994,7 @@ static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
uint8_t half[64];\
copy_block9(full, src, 16, stride, 9);\
put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
- OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\
+ OPNAME ## pixels8_l2_8(dst, full+16, half, stride, 16, 8, 8);\
}\
void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
uint8_t full[16*9];\
@@ -996,7 +1005,7 @@ void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
- OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
+ OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
}\
static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
uint8_t full[16*9];\
@@ -1004,9 +1013,9 @@ static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
uint8_t halfHV[64];\
copy_block9(full, src, 16, stride, 9);\
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
- put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
+ put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
- OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
+ OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
}\
void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
uint8_t full[16*9];\
@@ -1017,7 +1026,7 @@ void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
- OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
+ OPNAME ## pixels8_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
}\
static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
uint8_t full[16*9];\
@@ -1025,9 +1034,9 @@ static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
uint8_t halfHV[64];\
copy_block9(full, src, 16, stride, 9);\
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
- put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
+ put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
- OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
+ OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
}\
void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
uint8_t full[16*9];\
@@ -1038,7 +1047,7 @@ void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
- OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
+ OPNAME ## pixels8_l4_8(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
}\
static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
uint8_t full[16*9];\
@@ -1046,9 +1055,9 @@ static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
uint8_t halfHV[64];\
copy_block9(full, src, 16, stride, 9);\
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
- put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
+ put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
- OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
+ OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
}\
void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
uint8_t full[16*9];\
@@ -1059,7 +1068,7 @@ void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
- OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
+ OPNAME ## pixels8_l4_8(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
}\
static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
uint8_t full[16*9];\
@@ -1067,23 +1076,23 @@ static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
uint8_t halfHV[64];\
copy_block9(full, src, 16, stride, 9);\
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
- put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
+ put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
- OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
+ OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
}\
static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
uint8_t halfH[72];\
uint8_t halfHV[64];\
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
- OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
+ OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
}\
static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
uint8_t halfH[72];\
uint8_t halfHV[64];\
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
- OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
+ OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
}\
void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
uint8_t full[16*9];\
@@ -1094,14 +1103,14 @@ void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
- OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
+ OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
}\
static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
uint8_t full[16*9];\
uint8_t halfH[72];\
copy_block9(full, src, 16, stride, 9);\
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
- put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
+ put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
}\
void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
@@ -1113,14 +1122,14 @@ void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
- OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
+ OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
}\
static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
uint8_t full[16*9];\
uint8_t halfH[72];\
copy_block9(full, src, 16, stride, 9);\
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
- put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
+ put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
}\
static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
@@ -1132,7 +1141,7 @@ static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
uint8_t half[256];\
put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
- OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\
+ OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16);\
}\
\
static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
@@ -1142,7 +1151,7 @@ static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
uint8_t half[256];\
put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
- OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\
+ OPNAME ## pixels16_l2_8(dst, src+1, half, stride, stride, 16, 16);\
}\
\
static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
@@ -1150,7 +1159,7 @@ static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
uint8_t half[256];\
copy_block17(full, src, 24, stride, 17);\
put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
- OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\
+ OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16);\
}\
\
static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
@@ -1164,7 +1173,7 @@ static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
uint8_t half[256];\
copy_block17(full, src, 24, stride, 17);\
put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
- OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\
+ OPNAME ## pixels16_l2_8(dst, full+24, half, stride, 24, 16, 16);\
}\
void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
uint8_t full[24*17];\
@@ -1175,7 +1184,7 @@ void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
- OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
+ OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
}\
static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
uint8_t full[24*17];\
@@ -1183,9 +1192,9 @@ static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
uint8_t halfHV[256];\
copy_block17(full, src, 24, stride, 17);\
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
- put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
+ put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
- OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
+ OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
}\
void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
uint8_t full[24*17];\
@@ -1196,7 +1205,7 @@ void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
- OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
+ OPNAME ## pixels16_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
}\
static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
uint8_t full[24*17];\
@@ -1204,9 +1213,9 @@ static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
uint8_t halfHV[256];\
copy_block17(full, src, 24, stride, 17);\
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
- put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
+ put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
- OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
+ OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
}\
void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
uint8_t full[24*17];\
@@ -1217,7 +1226,7 @@ void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
- OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
+ OPNAME ## pixels16_l4_8(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
}\
static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
uint8_t full[24*17];\
@@ -1225,9 +1234,9 @@ static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
uint8_t halfHV[256];\
copy_block17(full, src, 24, stride, 17);\
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
- put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
+ put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
- OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
+ OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
}\
void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
uint8_t full[24*17];\
@@ -1238,7 +1247,7 @@ void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
- OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
+ OPNAME ## pixels16_l4_8(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
}\
static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
uint8_t full[24*17];\
@@ -1246,23 +1255,23 @@ static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
uint8_t halfHV[256];\
copy_block17(full, src, 24, stride, 17);\
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
- put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
+ put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
- OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
+ OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
}\
static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
uint8_t halfH[272];\
uint8_t halfHV[256];\
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
- OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
+ OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
}\
static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
uint8_t halfH[272];\
uint8_t halfHV[256];\
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
- OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
+ OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
}\
void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
uint8_t full[24*17];\
@@ -1273,14 +1282,14 @@ void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
- OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
+ OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
}\
static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
uint8_t full[24*17];\
uint8_t halfH[272];\
copy_block17(full, src, 24, stride, 17);\
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
- put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
+ put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
}\
void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
@@ -1292,14 +1301,14 @@ void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
- OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
+ OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
}\
static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
uint8_t full[24*17];\
uint8_t halfH[272];\
copy_block17(full, src, 24, stride, 17);\
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
- put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
+ put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
}\
static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
@@ -1327,7 +1336,7 @@ QPEL_MC(0, avg_ , _ , op_avg)
#define put_qpel16_mc00_c ff_put_pixels16x16_c
#define avg_qpel16_mc00_c ff_avg_pixels16x16_c
#define put_no_rnd_qpel8_mc00_c ff_put_pixels8x8_c
-#define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_c
+#define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_8_c
static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
@@ -1349,16 +1358,16 @@ static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int
#if CONFIG_RV40_DECODER
static void put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
- put_pixels16_xy2_c(dst, src, stride, 16);
+ put_pixels16_xy2_8_c(dst, src, stride, 16);
}
static void avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
- avg_pixels16_xy2_c(dst, src, stride, 16);
+ avg_pixels16_xy2_8_c(dst, src, stride, 16);
}
static void put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
- put_pixels8_xy2_c(dst, src, stride, 8);
+ put_pixels8_xy2_8_c(dst, src, stride, 8);
}
static void avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
- avg_pixels8_xy2_c(dst, src, stride, 8);
+ avg_pixels8_xy2_8_c(dst, src, stride, 8);
}
#endif /* CONFIG_RV40_DECODER */
@@ -1394,7 +1403,7 @@ static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int
static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
uint8_t half[64];
wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
- put_pixels8_l2(dst, src, half, stride, stride, 8, 8);
+ put_pixels8_l2_8(dst, src, half, stride, stride, 8, 8);
}
static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
@@ -1404,7 +1413,7 @@ static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
uint8_t half[64];
wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
- put_pixels8_l2(dst, src+1, half, stride, stride, 8, 8);
+ put_pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);
}
static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
@@ -1418,7 +1427,7 @@ static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
- put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
+ put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
}
static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
uint8_t halfH[88];
@@ -1427,7 +1436,7 @@ static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
- put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
+ put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
}
static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
uint8_t halfH[88];
@@ -2863,8 +2872,24 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
c->idct_put= ff_jref_idct4_put;
c->idct_add= ff_jref_idct4_add;
}else{
- c->idct_put= ff_h264_lowres_idct_put_c;
- c->idct_add= ff_h264_lowres_idct_add_c;
+ if (avctx->codec_id != CODEC_ID_H264) {
+ c->idct_put= ff_h264_lowres_idct_put_8_c;
+ c->idct_add= ff_h264_lowres_idct_add_8_c;
+ } else {
+ switch (avctx->bits_per_raw_sample) {
+ case 9:
+ c->idct_put= ff_h264_lowres_idct_put_9_c;
+ c->idct_add= ff_h264_lowres_idct_add_9_c;
+ break;
+ case 10:
+ c->idct_put= ff_h264_lowres_idct_put_10_c;
+ c->idct_add= ff_h264_lowres_idct_add_10_c;
+ break;
+ default:
+ c->idct_put= ff_h264_lowres_idct_put_8_c;
+ c->idct_add= ff_h264_lowres_idct_add_8_c;
+ }
+ }
}
c->idct = j_rev_dct4;
c->idct_permutation_type= FF_NO_IDCT_PERM;
@@ -2922,14 +2947,9 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_c;
c->put_pixels_nonclamped = put_pixels_nonclamped_c;
c->add_pixels_clamped = ff_add_pixels_clamped_c;
- c->add_pixels8 = add_pixels8_c;
- c->add_pixels4 = add_pixels4_c;
c->sum_abs_dctelem = sum_abs_dctelem_c;
- c->emulated_edge_mc = ff_emulated_edge_mc;
c->gmc1 = gmc1_c;
c->gmc = ff_gmc_c;
- c->clear_block = clear_block_c;
- c->clear_blocks = clear_blocks_c;
c->pix_sum = pix_sum_c;
c->pix_norm1 = pix_norm1_c;
@@ -2947,30 +2967,6 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
c->pix_abs[1][2] = pix_abs8_y2_c;
c->pix_abs[1][3] = pix_abs8_xy2_c;
-#define dspfunc(PFX, IDX, NUM) \
- c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## NUM ## _c; \
- c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## NUM ## _x2_c; \
- c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## NUM ## _y2_c; \
- c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## NUM ## _xy2_c
-
- dspfunc(put, 0, 16);
- dspfunc(put_no_rnd, 0, 16);
- dspfunc(put, 1, 8);
- dspfunc(put_no_rnd, 1, 8);
- dspfunc(put, 2, 4);
- dspfunc(put, 3, 2);
-
- dspfunc(avg, 0, 16);
- dspfunc(avg_no_rnd, 0, 16);
- dspfunc(avg, 1, 8);
- dspfunc(avg_no_rnd, 1, 8);
- dspfunc(avg, 2, 4);
- dspfunc(avg, 3, 2);
-#undef dspfunc
-
- c->put_no_rnd_pixels_l2[0]= put_no_rnd_pixels16_l2_c;
- c->put_no_rnd_pixels_l2[1]= put_no_rnd_pixels8_l2_c;
-
c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
@@ -3021,23 +3017,7 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
dspfunc(avg_qpel, 1, 8);
/* dspfunc(avg_no_rnd_qpel, 1, 8); */
- dspfunc(put_h264_qpel, 0, 16);
- dspfunc(put_h264_qpel, 1, 8);
- dspfunc(put_h264_qpel, 2, 4);
- dspfunc(put_h264_qpel, 3, 2);
- dspfunc(avg_h264_qpel, 0, 16);
- dspfunc(avg_h264_qpel, 1, 8);
- dspfunc(avg_h264_qpel, 2, 4);
-
#undef dspfunc
- c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_c;
- c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_c;
- c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_c;
- c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_c;
- c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_c;
- c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_c;
-
- c->draw_edges = draw_edges_c;
#if CONFIG_MLP_DECODER || CONFIG_TRUEHD_DECODER
ff_mlp_init(c, avctx);
@@ -3162,6 +3142,92 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab));
memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab));
+#undef FUNC
+#undef FUNCC
+#define FUNC(f, depth) f ## _ ## depth
+#define FUNCC(f, depth) f ## _ ## depth ## _c
+
+#define dspfunc1(PFX, IDX, NUM, depth)\
+ c->PFX ## _pixels_tab[IDX][0] = FUNCC(PFX ## _pixels ## NUM , depth);\
+ c->PFX ## _pixels_tab[IDX][1] = FUNCC(PFX ## _pixels ## NUM ## _x2 , depth);\
+ c->PFX ## _pixels_tab[IDX][2] = FUNCC(PFX ## _pixels ## NUM ## _y2 , depth);\
+ c->PFX ## _pixels_tab[IDX][3] = FUNCC(PFX ## _pixels ## NUM ## _xy2, depth)
+
+#define dspfunc2(PFX, IDX, NUM, depth)\
+ c->PFX ## _pixels_tab[IDX][ 0] = FUNCC(PFX ## NUM ## _mc00, depth);\
+ c->PFX ## _pixels_tab[IDX][ 1] = FUNCC(PFX ## NUM ## _mc10, depth);\
+ c->PFX ## _pixels_tab[IDX][ 2] = FUNCC(PFX ## NUM ## _mc20, depth);\
+ c->PFX ## _pixels_tab[IDX][ 3] = FUNCC(PFX ## NUM ## _mc30, depth);\
+ c->PFX ## _pixels_tab[IDX][ 4] = FUNCC(PFX ## NUM ## _mc01, depth);\
+ c->PFX ## _pixels_tab[IDX][ 5] = FUNCC(PFX ## NUM ## _mc11, depth);\
+ c->PFX ## _pixels_tab[IDX][ 6] = FUNCC(PFX ## NUM ## _mc21, depth);\
+ c->PFX ## _pixels_tab[IDX][ 7] = FUNCC(PFX ## NUM ## _mc31, depth);\
+ c->PFX ## _pixels_tab[IDX][ 8] = FUNCC(PFX ## NUM ## _mc02, depth);\
+ c->PFX ## _pixels_tab[IDX][ 9] = FUNCC(PFX ## NUM ## _mc12, depth);\
+ c->PFX ## _pixels_tab[IDX][10] = FUNCC(PFX ## NUM ## _mc22, depth);\
+ c->PFX ## _pixels_tab[IDX][11] = FUNCC(PFX ## NUM ## _mc32, depth);\
+ c->PFX ## _pixels_tab[IDX][12] = FUNCC(PFX ## NUM ## _mc03, depth);\
+ c->PFX ## _pixels_tab[IDX][13] = FUNCC(PFX ## NUM ## _mc13, depth);\
+ c->PFX ## _pixels_tab[IDX][14] = FUNCC(PFX ## NUM ## _mc23, depth);\
+ c->PFX ## _pixels_tab[IDX][15] = FUNCC(PFX ## NUM ## _mc33, depth)
+
+
+#define BIT_DEPTH_FUNCS(depth)\
+ c->draw_edges = FUNCC(draw_edges , depth);\
+ c->emulated_edge_mc = FUNC (ff_emulated_edge_mc , depth);\
+ c->clear_block = FUNCC(clear_block , depth);\
+ c->clear_blocks = FUNCC(clear_blocks , depth);\
+ c->add_pixels8 = FUNCC(add_pixels8 , depth);\
+ c->add_pixels4 = FUNCC(add_pixels4 , depth);\
+ c->put_no_rnd_pixels_l2[0] = FUNCC(put_no_rnd_pixels16_l2, depth);\
+ c->put_no_rnd_pixels_l2[1] = FUNCC(put_no_rnd_pixels8_l2 , depth);\
+\
+ c->put_h264_chroma_pixels_tab[0] = FUNCC(put_h264_chroma_mc8 , depth);\
+ c->put_h264_chroma_pixels_tab[1] = FUNCC(put_h264_chroma_mc4 , depth);\
+ c->put_h264_chroma_pixels_tab[2] = FUNCC(put_h264_chroma_mc2 , depth);\
+ c->avg_h264_chroma_pixels_tab[0] = FUNCC(avg_h264_chroma_mc8 , depth);\
+ c->avg_h264_chroma_pixels_tab[1] = FUNCC(avg_h264_chroma_mc4 , depth);\
+ c->avg_h264_chroma_pixels_tab[2] = FUNCC(avg_h264_chroma_mc2 , depth);\
+\
+ dspfunc1(put , 0, 16, depth);\
+ dspfunc1(put , 1, 8, depth);\
+ dspfunc1(put , 2, 4, depth);\
+ dspfunc1(put , 3, 2, depth);\
+ dspfunc1(put_no_rnd, 0, 16, depth);\
+ dspfunc1(put_no_rnd, 1, 8, depth);\
+ dspfunc1(avg , 0, 16, depth);\
+ dspfunc1(avg , 1, 8, depth);\
+ dspfunc1(avg , 2, 4, depth);\
+ dspfunc1(avg , 3, 2, depth);\
+ dspfunc1(avg_no_rnd, 0, 16, depth);\
+ dspfunc1(avg_no_rnd, 1, 8, depth);\
+\
+ dspfunc2(put_h264_qpel, 0, 16, depth);\
+ dspfunc2(put_h264_qpel, 1, 8, depth);\
+ dspfunc2(put_h264_qpel, 2, 4, depth);\
+ dspfunc2(put_h264_qpel, 3, 2, depth);\
+ dspfunc2(avg_h264_qpel, 0, 16, depth);\
+ dspfunc2(avg_h264_qpel, 1, 8, depth);\
+ dspfunc2(avg_h264_qpel, 2, 4, depth);
+
+ if (avctx->codec_id != CODEC_ID_H264 || avctx->bits_per_raw_sample == 8) {
+ BIT_DEPTH_FUNCS(8)
+ } else {
+ switch (avctx->bits_per_raw_sample) {
+ case 9:
+ BIT_DEPTH_FUNCS(9)
+ break;
+ case 10:
+ BIT_DEPTH_FUNCS(10)
+ break;
+ default:
+ av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", avctx->bits_per_raw_sample);
+ BIT_DEPTH_FUNCS(8)
+ break;
+ }
+ }
+
+
if (HAVE_MMX) dsputil_init_mmx (c, avctx);
if (ARCH_ARM) dsputil_init_arm (c, avctx);
if (CONFIG_MLIB) dsputil_init_mlib (c, avctx);
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index 9b4aef7..78d2152 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -53,19 +53,24 @@ void ff_fdct_mmx(DCTELEM *block);
void ff_fdct_mmx2(DCTELEM *block);
void ff_fdct_sse2(DCTELEM *block);
-void ff_h264_idct8_add_c(uint8_t *dst, DCTELEM *block, int stride);
-void ff_h264_idct_add_c(uint8_t *dst, DCTELEM *block, int stride);
-void ff_h264_idct8_dc_add_c(uint8_t *dst, DCTELEM *block, int stride);
-void ff_h264_idct_dc_add_c(uint8_t *dst, DCTELEM *block, int stride);
-void ff_h264_lowres_idct_add_c(uint8_t *dst, int stride, DCTELEM *block);
-void ff_h264_lowres_idct_put_c(uint8_t *dst, int stride, DCTELEM *block);
-void ff_h264_idct_add16_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
-void ff_h264_idct_add16intra_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
-void ff_h264_idct8_add4_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
-void ff_h264_idct_add8_c(uint8_t **dest, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
-
-void ff_h264_chroma_dc_dequant_idct_c(DCTELEM *block, int qmul);
-void ff_h264_luma_dc_dequant_idct_c(DCTELEM *output, DCTELEM *input, int qmul);
+#define H264_IDCT(depth) \
+void ff_h264_idct8_add_ ## depth ## _c(uint8_t *dst, DCTELEM *block, int stride);\
+void ff_h264_idct_add_ ## depth ## _c(uint8_t *dst, DCTELEM *block, int stride);\
+void ff_h264_idct8_dc_add_ ## depth ## _c(uint8_t *dst, DCTELEM *block, int stride);\
+void ff_h264_idct_dc_add_ ## depth ## _c(uint8_t *dst, DCTELEM *block, int stride);\
+void ff_h264_lowres_idct_add_ ## depth ## _c(uint8_t *dst, int stride, DCTELEM *block);\
+void ff_h264_lowres_idct_put_ ## depth ## _c(uint8_t *dst, int stride, DCTELEM *block);\
+void ff_h264_idct_add16_ ## depth ## _c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\
+void ff_h264_idct_add16intra_ ## depth ## _c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\
+void ff_h264_idct8_add4_ ## depth ## _c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\
+void ff_h264_idct_add8_ ## depth ## _c(uint8_t **dest, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\
+void ff_h264_luma_dc_dequant_idct_ ## depth ## _c(DCTELEM *output, DCTELEM *input, int qmul);\
+void ff_h264_chroma_dc_dequant_idct_ ## depth ## _c(DCTELEM *block, int qmul);
+
+H264_IDCT( 8)
+H264_IDCT( 9)
+H264_IDCT(10)
+
void ff_svq3_luma_dc_dequant_idct_c(DCTELEM *output, DCTELEM *input, int qp);
void ff_svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
@@ -82,10 +87,20 @@ extern const uint8_t ff_zigzag248_direct[64];
extern uint32_t ff_squareTbl[512];
extern uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP];
-void ff_put_pixels8x8_c(uint8_t *dst, uint8_t *src, int stride);
-void ff_avg_pixels8x8_c(uint8_t *dst, uint8_t *src, int stride);
-void ff_put_pixels16x16_c(uint8_t *dst, uint8_t *src, int stride);
-void ff_avg_pixels16x16_c(uint8_t *dst, uint8_t *src, int stride);
+#define PUTAVG_PIXELS(depth)\
+void ff_put_pixels8x8_ ## depth ## _c(uint8_t *dst, uint8_t *src, int stride);\
+void ff_avg_pixels8x8_ ## depth ## _c(uint8_t *dst, uint8_t *src, int stride);\
+void ff_put_pixels16x16_ ## depth ## _c(uint8_t *dst, uint8_t *src, int stride);\
+void ff_avg_pixels16x16_ ## depth ## _c(uint8_t *dst, uint8_t *src, int stride);
+
+PUTAVG_PIXELS( 8)
+PUTAVG_PIXELS( 9)
+PUTAVG_PIXELS(10)
+
+#define ff_put_pixels8x8_c ff_put_pixels8x8_8_c
+#define ff_avg_pixels8x8_c ff_avg_pixels8x8_8_c
+#define ff_put_pixels16x16_c ff_put_pixels16x16_8_c
+#define ff_avg_pixels16x16_c ff_avg_pixels16x16_8_c
/* VP3 DSP functions */
void ff_vp3_idct_c(DCTELEM *block/* align 16*/);
@@ -187,10 +202,17 @@ typedef struct ScanTable{
void ff_init_scantable(uint8_t *, ScanTable *st, const uint8_t *src_scantable);
-void ff_emulated_edge_mc(uint8_t *buf, const uint8_t *src, int linesize,
- int block_w, int block_h,
+#define EMULATED_EDGE(depth) \
+void ff_emulated_edge_mc_ ## depth (uint8_t *buf, const uint8_t *src, int linesize,\
+ int block_w, int block_h,\
int src_x, int src_y, int w, int h);
+EMULATED_EDGE(8)
+EMULATED_EDGE(9)
+EMULATED_EDGE(10)
+
+#define ff_emulated_edge_mc ff_emulated_edge_mc_8
+
void ff_add_pixels_clamped_c(const DCTELEM *block, uint8_t *dest, int linesize);
void ff_put_pixels_clamped_c(const DCTELEM *block, uint8_t *dest, int linesize);
void ff_put_signed_pixels_clamped_c(const DCTELEM *block, uint8_t *dest, int linesize);
@@ -562,6 +584,7 @@ void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scant
void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type);
#define BYTE_VEC32(c) ((c)*0x01010101UL)
+#define BYTE_VEC64(c) ((c)*0x0001000100010001UL)
static inline uint32_t rnd_avg32(uint32_t a, uint32_t b)
{
@@ -573,6 +596,16 @@ static inline uint32_t no_rnd_avg32(uint32_t a, uint32_t b)
return (a & b) + (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1);
}
+static inline uint64_t rnd_avg64(uint64_t a, uint64_t b)
+{
+ return (a | b) - (((a ^ b) & ~BYTE_VEC64(0x01)) >> 1);
+}
+
+static inline uint64_t no_rnd_avg64(uint64_t a, uint64_t b)
+{
+ return (a & b) + (((a ^ b) & ~BYTE_VEC64(0x01)) >> 1);
+}
+
static inline int get_penalty_factor(int lambda, int lambda2, int type){
switch(type&0xFF){
default:
diff --git a/libavcodec/dsputil_template.c b/libavcodec/dsputil_template.c
index f69c467..8ca6d3e 100644
--- a/libavcodec/dsputil_template.c
+++ b/libavcodec/dsputil_template.c
@@ -27,25 +27,55 @@
* DSP utils
*/
-#include "dsputil.h"
+#include "high_bit_depth.h"
-#define BIT_DEPTH 8
+static inline void FUNC(copy_block2)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
+{
+ int i;
+ for(i=0; i<h; i++)
+ {
+ AV_WN2P(dst , AV_RN2P(src ));
+ dst+=dstStride;
+ src+=srcStride;
+ }
+}
-#define pixel uint8_t
-#define pixel2 uint16_t
-#define pixel4 uint32_t
-#define dctcoef int16_t
+static inline void FUNC(copy_block4)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
+{
+ int i;
+ for(i=0; i<h; i++)
+ {
+ AV_WN4P(dst , AV_RN4P(src ));
+ dst+=dstStride;
+ src+=srcStride;
+ }
+}
-#define FUNC(a) a
-#define FUNCC(a) a ## _c
-#define INIT_CLIP uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
-#define CLIP(a) cm[a]
-#define AV_RN2P AV_RN16
-#define AV_RN4P AV_RN32
-#define PIXEL_MAX ((1<<BIT_DEPTH)-1)
+static inline void FUNC(copy_block8)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
+{
+ int i;
+ for(i=0; i<h; i++)
+ {
+ AV_WN4P(dst , AV_RN4P(src ));
+ AV_WN4P(dst+4*sizeof(pixel), AV_RN4P(src+4*sizeof(pixel)));
+ dst+=dstStride;
+ src+=srcStride;
+ }
+}
-#define no_rnd_avg_pixel4 no_rnd_avg32
-#define rnd_avg_pixel4 rnd_avg32
+static inline void FUNC(copy_block16)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
+{
+ int i;
+ for(i=0; i<h; i++)
+ {
+ AV_WN4P(dst , AV_RN4P(src ));
+ AV_WN4P(dst+ 4*sizeof(pixel), AV_RN4P(src+ 4*sizeof(pixel)));
+ AV_WN4P(dst+ 8*sizeof(pixel), AV_RN4P(src+ 8*sizeof(pixel)));
+ AV_WN4P(dst+12*sizeof(pixel), AV_RN4P(src+12*sizeof(pixel)));
+ dst+=dstStride;
+ src+=srcStride;
+ }
+}
/* draw the edges of width 'w' of an image of size width, height */
//FIXME check that this is ok for mpeg4 interlaced
@@ -1317,10 +1347,22 @@ H264_MC(avg_, 16)
#undef op2_avg
#undef op2_put
-#define put_h264_qpel8_mc00_c ff_put_pixels8x8_c
-#define avg_h264_qpel8_mc00_c ff_avg_pixels8x8_c
-#define put_h264_qpel16_mc00_c ff_put_pixels16x16_c
-#define avg_h264_qpel16_mc00_c ff_avg_pixels16x16_c
+#if BIT_DEPTH == 8
+# define put_h264_qpel8_mc00_8_c ff_put_pixels8x8_8_c
+# define avg_h264_qpel8_mc00_8_c ff_avg_pixels8x8_8_c
+# define put_h264_qpel16_mc00_8_c ff_put_pixels16x16_8_c
+# define avg_h264_qpel16_mc00_8_c ff_avg_pixels16x16_8_c
+#elif BIT_DEPTH == 9
+# define put_h264_qpel8_mc00_9_c ff_put_pixels8x8_9_c
+# define avg_h264_qpel8_mc00_9_c ff_avg_pixels8x8_9_c
+# define put_h264_qpel16_mc00_9_c ff_put_pixels16x16_9_c
+# define avg_h264_qpel16_mc00_9_c ff_avg_pixels16x16_9_c
+#elif BIT_DEPTH == 10
+# define put_h264_qpel8_mc00_10_c ff_put_pixels8x8_10_c
+# define avg_h264_qpel8_mc00_10_c ff_avg_pixels8x8_10_c
+# define put_h264_qpel16_mc00_10_c ff_put_pixels16x16_10_c
+# define avg_h264_qpel16_mc00_10_c ff_avg_pixels16x16_10_c
+#endif
void FUNCC(ff_put_pixels8x8)(uint8_t *dst, uint8_t *src, int stride) {
FUNCC(put_pixels8)(dst, src, stride, 8);
diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index 0fcb7db..1388dd5 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -783,7 +783,7 @@ static void clone_tables(H264Context *dst, H264Context *src, int i){
dst->list_counts = src->list_counts;
dst->s.obmc_scratchpad = NULL;
- ff_h264_pred_init(&dst->hpc, src->s.codec_id);
+ ff_h264_pred_init(&dst->hpc, src->s.codec_id, src->sps.bit_depth_luma);
}
/**
@@ -811,8 +811,8 @@ static av_cold void common_init(H264Context *h){
s->height = s->avctx->height;
s->codec_id= s->avctx->codec->id;
- ff_h264dsp_init(&h->h264dsp);
- ff_h264_pred_init(&h->hpc, s->codec_id);
+ ff_h264dsp_init(&h->h264dsp, 8);
+ ff_h264_pred_init(&h->hpc, s->codec_id, 8);
h->dequant_coeff_pps= -1;
s->unrestricted_mv=1;
@@ -895,7 +895,7 @@ av_cold int ff_h264_decode_init(AVCodecContext *avctx){
ff_h264_decode_init_vlc();
h->pixel_shift = 0;
- h->sps.bit_depth_luma = 8;
+ h->sps.bit_depth_luma = avctx->bits_per_raw_sample = 8;
h->thread_context[0] = h;
h->outputed_poc = INT_MIN;
@@ -2998,6 +2998,20 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
if(avctx->has_b_frames < 2)
avctx->has_b_frames= !s->low_delay;
+
+ if (avctx->bits_per_raw_sample != h->sps.bit_depth_luma) {
+ if (h->sps.bit_depth_luma >= 8 && h->sps.bit_depth_luma <= 10) {
+ avctx->bits_per_raw_sample = h->sps.bit_depth_luma;
+ h->pixel_shift = h->sps.bit_depth_luma > 8;
+
+ ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma);
+ ff_h264_pred_init(&h->hpc, s->codec_id, h->sps.bit_depth_luma);
+ dsputil_init(&s->dsp, s->avctx);
+ } else {
+ av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", h->sps.bit_depth_luma);
+ return -1;
+ }
+ }
break;
case NAL_PPS:
init_get_bits(&s->gb, ptr, bit_length);
diff --git a/libavcodec/h264dsp.c b/libavcodec/h264dsp.c
index 22b2086..64f4856 100644
--- a/libavcodec/h264dsp.c
+++ b/libavcodec/h264dsp.c
@@ -29,57 +29,83 @@
#include "avcodec.h"
#include "h264dsp.h"
+#define BIT_DEPTH 8
#include "h264dsp_template.c"
+#undef BIT_DEPTH
-void ff_h264dsp_init(H264DSPContext *c)
-{
- c->h264_idct_add= ff_h264_idct_add_c;
- c->h264_idct8_add= ff_h264_idct8_add_c;
- c->h264_idct_dc_add= ff_h264_idct_dc_add_c;
- c->h264_idct8_dc_add= ff_h264_idct8_dc_add_c;
- c->h264_idct_add16 = ff_h264_idct_add16_c;
- c->h264_idct8_add4 = ff_h264_idct8_add4_c;
- c->h264_idct_add8 = ff_h264_idct_add8_c;
- c->h264_idct_add16intra= ff_h264_idct_add16intra_c;
- c->h264_luma_dc_dequant_idct= ff_h264_luma_dc_dequant_idct_c;
- c->h264_chroma_dc_dequant_idct= ff_h264_chroma_dc_dequant_idct_c;
+#define BIT_DEPTH 9
+#include "h264dsp_template.c"
+#undef BIT_DEPTH
+
+#define BIT_DEPTH 10
+#include "h264dsp_template.c"
+#undef BIT_DEPTH
- c->weight_h264_pixels_tab[0]= weight_h264_pixels16x16_c;
- c->weight_h264_pixels_tab[1]= weight_h264_pixels16x8_c;
- c->weight_h264_pixels_tab[2]= weight_h264_pixels8x16_c;
- c->weight_h264_pixels_tab[3]= weight_h264_pixels8x8_c;
- c->weight_h264_pixels_tab[4]= weight_h264_pixels8x4_c;
- c->weight_h264_pixels_tab[5]= weight_h264_pixels4x8_c;
- c->weight_h264_pixels_tab[6]= weight_h264_pixels4x4_c;
- c->weight_h264_pixels_tab[7]= weight_h264_pixels4x2_c;
- c->weight_h264_pixels_tab[8]= weight_h264_pixels2x4_c;
- c->weight_h264_pixels_tab[9]= weight_h264_pixels2x2_c;
- c->biweight_h264_pixels_tab[0]= biweight_h264_pixels16x16_c;
- c->biweight_h264_pixels_tab[1]= biweight_h264_pixels16x8_c;
- c->biweight_h264_pixels_tab[2]= biweight_h264_pixels8x16_c;
- c->biweight_h264_pixels_tab[3]= biweight_h264_pixels8x8_c;
- c->biweight_h264_pixels_tab[4]= biweight_h264_pixels8x4_c;
- c->biweight_h264_pixels_tab[5]= biweight_h264_pixels4x8_c;
- c->biweight_h264_pixels_tab[6]= biweight_h264_pixels4x4_c;
- c->biweight_h264_pixels_tab[7]= biweight_h264_pixels4x2_c;
- c->biweight_h264_pixels_tab[8]= biweight_h264_pixels2x4_c;
- c->biweight_h264_pixels_tab[9]= biweight_h264_pixels2x2_c;
+void ff_h264dsp_init(H264DSPContext *c, const int bit_depth)
+{
+#undef FUNC
+#define FUNC(a, depth) a ## _ ## depth ## _c
- c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_c;
- c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_c;
- c->h264_h_loop_filter_luma_mbaff= h264_h_loop_filter_luma_mbaff_c;
- c->h264_v_loop_filter_luma_intra= h264_v_loop_filter_luma_intra_c;
- c->h264_h_loop_filter_luma_intra= h264_h_loop_filter_luma_intra_c;
- c->h264_h_loop_filter_luma_mbaff_intra= h264_h_loop_filter_luma_mbaff_intra_c;
- c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_c;
- c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_c;
- c->h264_h_loop_filter_chroma_mbaff= h264_h_loop_filter_chroma_mbaff_c;
- c->h264_v_loop_filter_chroma_intra= h264_v_loop_filter_chroma_intra_c;
- c->h264_h_loop_filter_chroma_intra= h264_h_loop_filter_chroma_intra_c;
- c->h264_h_loop_filter_chroma_mbaff_intra= h264_h_loop_filter_chroma_mbaff_intra_c;
+#define H264_DSP(depth) \
+ c->h264_idct_add= FUNC(ff_h264_idct_add, depth);\
+ c->h264_idct8_add= FUNC(ff_h264_idct8_add, depth);\
+ c->h264_idct_dc_add= FUNC(ff_h264_idct_dc_add, depth);\
+ c->h264_idct8_dc_add= FUNC(ff_h264_idct8_dc_add, depth);\
+ c->h264_idct_add16 = FUNC(ff_h264_idct_add16, depth);\
+ c->h264_idct8_add4 = FUNC(ff_h264_idct8_add4, depth);\
+ c->h264_idct_add8 = FUNC(ff_h264_idct_add8, depth);\
+ c->h264_idct_add16intra= FUNC(ff_h264_idct_add16intra, depth);\
+ c->h264_luma_dc_dequant_idct= FUNC(ff_h264_luma_dc_dequant_idct, depth);\
+ c->h264_chroma_dc_dequant_idct= FUNC(ff_h264_chroma_dc_dequant_idct, depth);\
+\
+ c->weight_h264_pixels_tab[0]= FUNC(weight_h264_pixels16x16, depth);\
+ c->weight_h264_pixels_tab[1]= FUNC(weight_h264_pixels16x8, depth);\
+ c->weight_h264_pixels_tab[2]= FUNC(weight_h264_pixels8x16, depth);\
+ c->weight_h264_pixels_tab[3]= FUNC(weight_h264_pixels8x8, depth);\
+ c->weight_h264_pixels_tab[4]= FUNC(weight_h264_pixels8x4, depth);\
+ c->weight_h264_pixels_tab[5]= FUNC(weight_h264_pixels4x8, depth);\
+ c->weight_h264_pixels_tab[6]= FUNC(weight_h264_pixels4x4, depth);\
+ c->weight_h264_pixels_tab[7]= FUNC(weight_h264_pixels4x2, depth);\
+ c->weight_h264_pixels_tab[8]= FUNC(weight_h264_pixels2x4, depth);\
+ c->weight_h264_pixels_tab[9]= FUNC(weight_h264_pixels2x2, depth);\
+ c->biweight_h264_pixels_tab[0]= FUNC(biweight_h264_pixels16x16, depth);\
+ c->biweight_h264_pixels_tab[1]= FUNC(biweight_h264_pixels16x8, depth);\
+ c->biweight_h264_pixels_tab[2]= FUNC(biweight_h264_pixels8x16, depth);\
+ c->biweight_h264_pixels_tab[3]= FUNC(biweight_h264_pixels8x8, depth);\
+ c->biweight_h264_pixels_tab[4]= FUNC(biweight_h264_pixels8x4, depth);\
+ c->biweight_h264_pixels_tab[5]= FUNC(biweight_h264_pixels4x8, depth);\
+ c->biweight_h264_pixels_tab[6]= FUNC(biweight_h264_pixels4x4, depth);\
+ c->biweight_h264_pixels_tab[7]= FUNC(biweight_h264_pixels4x2, depth);\
+ c->biweight_h264_pixels_tab[8]= FUNC(biweight_h264_pixels2x4, depth);\
+ c->biweight_h264_pixels_tab[9]= FUNC(biweight_h264_pixels2x2, depth);\
+\
+ c->h264_v_loop_filter_luma= FUNC(h264_v_loop_filter_luma, depth);\
+ c->h264_h_loop_filter_luma= FUNC(h264_h_loop_filter_luma, depth);\
+ c->h264_h_loop_filter_luma_mbaff= FUNC(h264_h_loop_filter_luma_mbaff, depth);\
+ c->h264_v_loop_filter_luma_intra= FUNC(h264_v_loop_filter_luma_intra, depth);\
+ c->h264_h_loop_filter_luma_intra= FUNC(h264_h_loop_filter_luma_intra, depth);\
+ c->h264_h_loop_filter_luma_mbaff_intra= FUNC(h264_h_loop_filter_luma_mbaff_intra, depth);\
+ c->h264_v_loop_filter_chroma= FUNC(h264_v_loop_filter_chroma, depth);\
+ c->h264_h_loop_filter_chroma= FUNC(h264_h_loop_filter_chroma, depth);\
+ c->h264_h_loop_filter_chroma_mbaff= FUNC(h264_h_loop_filter_chroma_mbaff, depth);\
+ c->h264_v_loop_filter_chroma_intra= FUNC(h264_v_loop_filter_chroma_intra, depth);\
+ c->h264_h_loop_filter_chroma_intra= FUNC(h264_h_loop_filter_chroma_intra, depth);\
+ c->h264_h_loop_filter_chroma_mbaff_intra= FUNC(h264_h_loop_filter_chroma_mbaff_intra, depth);\
c->h264_loop_filter_strength= NULL;
- if (ARCH_ARM) ff_h264dsp_init_arm(c);
- if (HAVE_ALTIVEC) ff_h264dsp_init_ppc(c);
- if (HAVE_MMX) ff_h264dsp_init_x86(c);
+ switch (bit_depth) {
+ case 9:
+ H264_DSP(9);
+ break;
+ case 10:
+ H264_DSP(10);
+ break;
+ default:
+ H264_DSP(8);
+ break;
+ }
+
+ if (ARCH_ARM) ff_h264dsp_init_arm(c, bit_depth);
+ if (HAVE_ALTIVEC) ff_h264dsp_init_ppc(c, bit_depth);
+ if (HAVE_MMX) ff_h264dsp_init_x86(c, bit_depth);
}
diff --git a/libavcodec/h264dsp.h b/libavcodec/h264dsp.h
index 7eb50be..87a1dd9 100644
--- a/libavcodec/h264dsp.h
+++ b/libavcodec/h264dsp.h
@@ -75,9 +75,9 @@ typedef struct H264DSPContext{
void (*h264_chroma_dc_dequant_idct)(DCTELEM *block, int qmul);
}H264DSPContext;
-void ff_h264dsp_init(H264DSPContext *c);
-void ff_h264dsp_init_arm(H264DSPContext *c);
-void ff_h264dsp_init_ppc(H264DSPContext *c);
-void ff_h264dsp_init_x86(H264DSPContext *c);
+void ff_h264dsp_init(H264DSPContext *c, const int bit_depth);
+void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth);
+void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth);
+void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth);
#endif /* AVCODEC_H264DSP_H */
diff --git a/libavcodec/h264dsp_template.c b/libavcodec/h264dsp_template.c
index 7190f4d..91162ea 100644
--- a/libavcodec/h264dsp_template.c
+++ b/libavcodec/h264dsp_template.c
@@ -25,10 +25,7 @@
* @author Michael Niedermayer <michaelni@gmx.at>
*/
-#define BIT_DEPTH 8
-#define pixel uint8_t
-#define av_clip_pixel av_clip_uint8
-#define FUNCC(a) a ## _c
+#include "high_bit_depth.h"
#define op_scale1(x) block[x] = av_clip_pixel( (block[x]*weight + offset) >> log2_denom )
#define op_scale2(x) dst[x] = av_clip_pixel( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1))
diff --git a/libavcodec/h264idct.c b/libavcodec/h264idct.c
index d797775..1634a00 100644
--- a/libavcodec/h264idct.c
+++ b/libavcodec/h264idct.c
@@ -25,4 +25,14 @@
* @author Michael Niedermayer <michaelni@gmx.at>
*/
+#define BIT_DEPTH 8
#include "h264idct_template.c"
+#undef BIT_DEPTH
+
+#define BIT_DEPTH 9
+#include "h264idct_template.c"
+#undef BIT_DEPTH
+
+#define BIT_DEPTH 10
+#include "h264idct_template.c"
+#undef BIT_DEPTH
diff --git a/libavcodec/h264idct_template.c b/libavcodec/h264idct_template.c
index ed1f18c..39c9a1c 100644
--- a/libavcodec/h264idct_template.c
+++ b/libavcodec/h264idct_template.c
@@ -25,7 +25,7 @@
* @author Michael Niedermayer <michaelni@gmx.at>
*/
-#include "dsputil.h"
+#include "high_bit_depth.h"
#ifndef AVCODEC_H264IDCT_INTERNAL_H
#define AVCODEC_H264IDCT_INTERNAL_H
@@ -42,12 +42,6 @@ static const uint8_t scan8[16 + 2*4]={
};
#endif
-#define pixel uint8_t
-#define dctcoef DCTELEM
-#define INIT_CLIP uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
-#define CLIP(a) cm[a]
-#define FUNCC(a) a ## _c
-
static av_always_inline void FUNCC(idct_internal)(uint8_t *_dst, DCTELEM *_block, int stride, int block_stride, int shift, int add){
int i;
INIT_CLIP
diff --git a/libavcodec/h264pred.c b/libavcodec/h264pred.c
index 616a7a9..b3701ef 100644
--- a/libavcodec/h264pred.c
+++ b/libavcodec/h264pred.c
@@ -26,7 +26,18 @@
*/
#include "h264pred.h"
+
+#define BIT_DEPTH 8
+#include "h264pred_template.c"
+#undef BIT_DEPTH
+
+#define BIT_DEPTH 9
#include "h264pred_template.c"
+#undef BIT_DEPTH
+
+#define BIT_DEPTH 10
+#include "h264pred_template.c"
+#undef BIT_DEPTH
static void pred4x4_vertical_vp8_c(uint8_t *src, const uint8_t *topright, int stride){
const int lt= src[-1-1*stride];
@@ -245,11 +256,11 @@ static void pred4x4_tm_vp8_c(uint8_t *src, const uint8_t *topright, int stride){
}
static void pred16x16_plane_svq3_c(uint8_t *src, int stride){
- pred16x16_plane_compat_c(src, stride, 1, 0);
+ pred16x16_plane_compat_8_c(src, stride, 1, 0);
}
static void pred16x16_plane_rv40_c(uint8_t *src, int stride){
- pred16x16_plane_compat_c(src, stride, 0, 1);
+ pred16x16_plane_compat_8_c(src, stride, 0, 1);
}
static void pred16x16_tm_vp8_c(uint8_t *src, int stride){
@@ -352,130 +363,149 @@ static void pred8x8_tm_vp8_c(uint8_t *src, int stride){
/**
* Set the intra prediction function pointers.
*/
-void ff_h264_pred_init(H264PredContext *h, int codec_id){
+void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth){
// MpegEncContext * const s = &h->s;
- if(codec_id != CODEC_ID_RV40){
- if(codec_id == CODEC_ID_VP8) {
- h->pred4x4[VERT_PRED ]= pred4x4_vertical_vp8_c;
- h->pred4x4[HOR_PRED ]= pred4x4_horizontal_vp8_c;
- } else {
- h->pred4x4[VERT_PRED ]= pred4x4_vertical_c;
- h->pred4x4[HOR_PRED ]= pred4x4_horizontal_c;
- }
- h->pred4x4[DC_PRED ]= pred4x4_dc_c;
- if(codec_id == CODEC_ID_SVQ3)
- h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_svq3_c;
- else
- h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_c;
- h->pred4x4[DIAG_DOWN_RIGHT_PRED]= pred4x4_down_right_c;
- h->pred4x4[VERT_RIGHT_PRED ]= pred4x4_vertical_right_c;
- h->pred4x4[HOR_DOWN_PRED ]= pred4x4_horizontal_down_c;
- if (codec_id == CODEC_ID_VP8) {
- h->pred4x4[VERT_LEFT_PRED ]= pred4x4_vertical_left_vp8_c;
- } else
- h->pred4x4[VERT_LEFT_PRED ]= pred4x4_vertical_left_c;
- h->pred4x4[HOR_UP_PRED ]= pred4x4_horizontal_up_c;
- if(codec_id != CODEC_ID_VP8) {
- h->pred4x4[LEFT_DC_PRED ]= pred4x4_left_dc_c;
- h->pred4x4[TOP_DC_PRED ]= pred4x4_top_dc_c;
- h->pred4x4[DC_128_PRED ]= pred4x4_128_dc_c;
- } else {
- h->pred4x4[TM_VP8_PRED ]= pred4x4_tm_vp8_c;
- h->pred4x4[DC_127_PRED ]= pred4x4_127_dc_c;
- h->pred4x4[DC_129_PRED ]= pred4x4_129_dc_c;
- h->pred4x4[VERT_VP8_PRED ]= pred4x4_vertical_c;
- h->pred4x4[HOR_VP8_PRED ]= pred4x4_horizontal_c;
- }
- }else{
- h->pred4x4[VERT_PRED ]= pred4x4_vertical_c;
- h->pred4x4[HOR_PRED ]= pred4x4_horizontal_c;
- h->pred4x4[DC_PRED ]= pred4x4_dc_c;
- h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_rv40_c;
- h->pred4x4[DIAG_DOWN_RIGHT_PRED]= pred4x4_down_right_c;
- h->pred4x4[VERT_RIGHT_PRED ]= pred4x4_vertical_right_c;
- h->pred4x4[HOR_DOWN_PRED ]= pred4x4_horizontal_down_c;
- h->pred4x4[VERT_LEFT_PRED ]= pred4x4_vertical_left_rv40_c;
- h->pred4x4[HOR_UP_PRED ]= pred4x4_horizontal_up_rv40_c;
- h->pred4x4[LEFT_DC_PRED ]= pred4x4_left_dc_c;
- h->pred4x4[TOP_DC_PRED ]= pred4x4_top_dc_c;
- h->pred4x4[DC_128_PRED ]= pred4x4_128_dc_c;
- h->pred4x4[DIAG_DOWN_LEFT_PRED_RV40_NODOWN]= pred4x4_down_left_rv40_nodown_c;
- h->pred4x4[HOR_UP_PRED_RV40_NODOWN]= pred4x4_horizontal_up_rv40_nodown_c;
- h->pred4x4[VERT_LEFT_PRED_RV40_NODOWN]= pred4x4_vertical_left_rv40_nodown_c;
+#undef FUNC
+#undef FUNCC
+#define FUNC(a, depth) a ## _ ## depth
+#define FUNCC(a, depth) a ## _ ## depth ## _c
+#define FUNCD(a) a ## _c
+
+#define H264_PRED(depth) \
+ if(codec_id != CODEC_ID_RV40){\
+ if(codec_id == CODEC_ID_VP8) {\
+ h->pred4x4[VERT_PRED ]= FUNCD(pred4x4_vertical_vp8);\
+ h->pred4x4[HOR_PRED ]= FUNCD(pred4x4_horizontal_vp8);\
+ } else {\
+ h->pred4x4[VERT_PRED ]= FUNCC(pred4x4_vertical , depth);\
+ h->pred4x4[HOR_PRED ]= FUNCC(pred4x4_horizontal , depth);\
+ }\
+ h->pred4x4[DC_PRED ]= FUNCC(pred4x4_dc , depth);\
+ if(codec_id == CODEC_ID_SVQ3)\
+ h->pred4x4[DIAG_DOWN_LEFT_PRED ]= FUNCD(pred4x4_down_left_svq3);\
+ else\
+ h->pred4x4[DIAG_DOWN_LEFT_PRED ]= FUNCC(pred4x4_down_left , depth);\
+ h->pred4x4[DIAG_DOWN_RIGHT_PRED]= FUNCC(pred4x4_down_right , depth);\
+ h->pred4x4[VERT_RIGHT_PRED ]= FUNCC(pred4x4_vertical_right , depth);\
+ h->pred4x4[HOR_DOWN_PRED ]= FUNCC(pred4x4_horizontal_down , depth);\
+ if (codec_id == CODEC_ID_VP8) {\
+ h->pred4x4[VERT_LEFT_PRED ]= FUNCD(pred4x4_vertical_left_vp8);\
+ } else\
+ h->pred4x4[VERT_LEFT_PRED ]= FUNCC(pred4x4_vertical_left , depth);\
+ h->pred4x4[HOR_UP_PRED ]= FUNCC(pred4x4_horizontal_up , depth);\
+ if(codec_id != CODEC_ID_VP8) {\
+ h->pred4x4[LEFT_DC_PRED ]= FUNCC(pred4x4_left_dc , depth);\
+ h->pred4x4[TOP_DC_PRED ]= FUNCC(pred4x4_top_dc , depth);\
+ h->pred4x4[DC_128_PRED ]= FUNCC(pred4x4_128_dc , depth);\
+ } else {\
+ h->pred4x4[TM_VP8_PRED ]= FUNCD(pred4x4_tm_vp8);\
+ h->pred4x4[DC_127_PRED ]= FUNCC(pred4x4_127_dc , depth);\
+ h->pred4x4[DC_129_PRED ]= FUNCC(pred4x4_129_dc , depth);\
+ h->pred4x4[VERT_VP8_PRED ]= FUNCC(pred4x4_vertical , depth);\
+ h->pred4x4[HOR_VP8_PRED ]= FUNCC(pred4x4_horizontal , depth);\
+ }\
+ }else{\
+ h->pred4x4[VERT_PRED ]= FUNCC(pred4x4_vertical , depth);\
+ h->pred4x4[HOR_PRED ]= FUNCC(pred4x4_horizontal , depth);\
+ h->pred4x4[DC_PRED ]= FUNCC(pred4x4_dc , depth);\
+ h->pred4x4[DIAG_DOWN_LEFT_PRED ]= FUNCD(pred4x4_down_left_rv40);\
+ h->pred4x4[DIAG_DOWN_RIGHT_PRED]= FUNCC(pred4x4_down_right , depth);\
+ h->pred4x4[VERT_RIGHT_PRED ]= FUNCC(pred4x4_vertical_right , depth);\
+ h->pred4x4[HOR_DOWN_PRED ]= FUNCC(pred4x4_horizontal_down , depth);\
+ h->pred4x4[VERT_LEFT_PRED ]= FUNCD(pred4x4_vertical_left_rv40);\
+ h->pred4x4[HOR_UP_PRED ]= FUNCD(pred4x4_horizontal_up_rv40);\
+ h->pred4x4[LEFT_DC_PRED ]= FUNCC(pred4x4_left_dc , depth);\
+ h->pred4x4[TOP_DC_PRED ]= FUNCC(pred4x4_top_dc , depth);\
+ h->pred4x4[DC_128_PRED ]= FUNCC(pred4x4_128_dc , depth);\
+ h->pred4x4[DIAG_DOWN_LEFT_PRED_RV40_NODOWN]= FUNCD(pred4x4_down_left_rv40_nodown);\
+ h->pred4x4[HOR_UP_PRED_RV40_NODOWN]= FUNCD(pred4x4_horizontal_up_rv40_nodown);\
+ h->pred4x4[VERT_LEFT_PRED_RV40_NODOWN]= FUNCD(pred4x4_vertical_left_rv40_nodown);\
+ }\
+\
+ h->pred8x8l[VERT_PRED ]= FUNCC(pred8x8l_vertical , depth);\
+ h->pred8x8l[HOR_PRED ]= FUNCC(pred8x8l_horizontal , depth);\
+ h->pred8x8l[DC_PRED ]= FUNCC(pred8x8l_dc , depth);\
+ h->pred8x8l[DIAG_DOWN_LEFT_PRED ]= FUNCC(pred8x8l_down_left , depth);\
+ h->pred8x8l[DIAG_DOWN_RIGHT_PRED]= FUNCC(pred8x8l_down_right , depth);\
+ h->pred8x8l[VERT_RIGHT_PRED ]= FUNCC(pred8x8l_vertical_right , depth);\
+ h->pred8x8l[HOR_DOWN_PRED ]= FUNCC(pred8x8l_horizontal_down , depth);\
+ h->pred8x8l[VERT_LEFT_PRED ]= FUNCC(pred8x8l_vertical_left , depth);\
+ h->pred8x8l[HOR_UP_PRED ]= FUNCC(pred8x8l_horizontal_up , depth);\
+ h->pred8x8l[LEFT_DC_PRED ]= FUNCC(pred8x8l_left_dc , depth);\
+ h->pred8x8l[TOP_DC_PRED ]= FUNCC(pred8x8l_top_dc , depth);\
+ h->pred8x8l[DC_128_PRED ]= FUNCC(pred8x8l_128_dc , depth);\
+\
+ h->pred8x8[VERT_PRED8x8 ]= FUNCC(pred8x8_vertical , depth);\
+ h->pred8x8[HOR_PRED8x8 ]= FUNCC(pred8x8_horizontal , depth);\
+ if (codec_id != CODEC_ID_VP8) {\
+ h->pred8x8[PLANE_PRED8x8]= FUNCC(pred8x8_plane , depth);\
+ } else\
+ h->pred8x8[PLANE_PRED8x8]= FUNCD(pred8x8_tm_vp8);\
+ if(codec_id != CODEC_ID_RV40 && codec_id != CODEC_ID_VP8){\
+ h->pred8x8[DC_PRED8x8 ]= FUNCC(pred8x8_dc , depth);\
+ h->pred8x8[LEFT_DC_PRED8x8]= FUNCC(pred8x8_left_dc , depth);\
+ h->pred8x8[TOP_DC_PRED8x8 ]= FUNCC(pred8x8_top_dc , depth);\
+ h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l0t, depth);\
+ h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0lt, depth);\
+ h->pred8x8[ALZHEIMER_DC_L00_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l00, depth);\
+ h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0l0, depth);\
+ }else{\
+ h->pred8x8[DC_PRED8x8 ]= FUNCD(pred8x8_dc_rv40);\
+ h->pred8x8[LEFT_DC_PRED8x8]= FUNCD(pred8x8_left_dc_rv40);\
+ h->pred8x8[TOP_DC_PRED8x8 ]= FUNCD(pred8x8_top_dc_rv40);\
+ if (codec_id == CODEC_ID_VP8) {\
+ h->pred8x8[DC_127_PRED8x8]= FUNCC(pred8x8_127_dc , depth);\
+ h->pred8x8[DC_129_PRED8x8]= FUNCC(pred8x8_129_dc , depth);\
+ }\
+ }\
+ h->pred8x8[DC_128_PRED8x8 ]= FUNCC(pred8x8_128_dc , depth);\
+\
+ h->pred16x16[DC_PRED8x8 ]= FUNCC(pred16x16_dc , depth);\
+ h->pred16x16[VERT_PRED8x8 ]= FUNCC(pred16x16_vertical , depth);\
+ h->pred16x16[HOR_PRED8x8 ]= FUNCC(pred16x16_horizontal , depth);\
+ switch(codec_id){\
+ case CODEC_ID_SVQ3:\
+ h->pred16x16[PLANE_PRED8x8 ]= FUNCD(pred16x16_plane_svq3);\
+ break;\
+ case CODEC_ID_RV40:\
+ h->pred16x16[PLANE_PRED8x8 ]= FUNCD(pred16x16_plane_rv40);\
+ break;\
+ case CODEC_ID_VP8:\
+ h->pred16x16[PLANE_PRED8x8 ]= FUNCD(pred16x16_tm_vp8);\
+ h->pred16x16[DC_127_PRED8x8]= FUNCC(pred16x16_127_dc , depth);\
+ h->pred16x16[DC_129_PRED8x8]= FUNCC(pred16x16_129_dc , depth);\
+ break;\
+ default:\
+ h->pred16x16[PLANE_PRED8x8 ]= FUNCC(pred16x16_plane , depth);\
+ break;\
+ }\
+ h->pred16x16[LEFT_DC_PRED8x8]= FUNCC(pred16x16_left_dc , depth);\
+ h->pred16x16[TOP_DC_PRED8x8 ]= FUNCC(pred16x16_top_dc , depth);\
+ h->pred16x16[DC_128_PRED8x8 ]= FUNCC(pred16x16_128_dc , depth);\
+\
+ /* special lossless h/v prediction for h264 */ \
+ h->pred4x4_add [VERT_PRED ]= FUNCC(pred4x4_vertical_add , depth);\
+ h->pred4x4_add [ HOR_PRED ]= FUNCC(pred4x4_horizontal_add , depth);\
+ h->pred8x8l_add [VERT_PRED ]= FUNCC(pred8x8l_vertical_add , depth);\
+ h->pred8x8l_add [ HOR_PRED ]= FUNCC(pred8x8l_horizontal_add , depth);\
+ h->pred8x8_add [VERT_PRED8x8]= FUNCC(pred8x8_vertical_add , depth);\
+ h->pred8x8_add [ HOR_PRED8x8]= FUNCC(pred8x8_horizontal_add , depth);\
+ h->pred16x16_add[VERT_PRED8x8]= FUNCC(pred16x16_vertical_add , depth);\
+ h->pred16x16_add[ HOR_PRED8x8]= FUNCC(pred16x16_horizontal_add , depth);\
+
+ switch (bit_depth) {
+ case 9:
+ H264_PRED(9)
+ break;
+ case 10:
+ H264_PRED(10)
+ break;
+ default:
+ H264_PRED(8)
+ break;
}
- h->pred8x8l[VERT_PRED ]= pred8x8l_vertical_c;
- h->pred8x8l[HOR_PRED ]= pred8x8l_horizontal_c;
- h->pred8x8l[DC_PRED ]= pred8x8l_dc_c;
- h->pred8x8l[DIAG_DOWN_LEFT_PRED ]= pred8x8l_down_left_c;
- h->pred8x8l[DIAG_DOWN_RIGHT_PRED]= pred8x8l_down_right_c;
- h->pred8x8l[VERT_RIGHT_PRED ]= pred8x8l_vertical_right_c;
- h->pred8x8l[HOR_DOWN_PRED ]= pred8x8l_horizontal_down_c;
- h->pred8x8l[VERT_LEFT_PRED ]= pred8x8l_vertical_left_c;
- h->pred8x8l[HOR_UP_PRED ]= pred8x8l_horizontal_up_c;
- h->pred8x8l[LEFT_DC_PRED ]= pred8x8l_left_dc_c;
- h->pred8x8l[TOP_DC_PRED ]= pred8x8l_top_dc_c;
- h->pred8x8l[DC_128_PRED ]= pred8x8l_128_dc_c;
-
- h->pred8x8[VERT_PRED8x8 ]= pred8x8_vertical_c;
- h->pred8x8[HOR_PRED8x8 ]= pred8x8_horizontal_c;
- if (codec_id != CODEC_ID_VP8) {
- h->pred8x8[PLANE_PRED8x8]= pred8x8_plane_c;
- } else
- h->pred8x8[PLANE_PRED8x8]= pred8x8_tm_vp8_c;
- if(codec_id != CODEC_ID_RV40 && codec_id != CODEC_ID_VP8){
- h->pred8x8[DC_PRED8x8 ]= pred8x8_dc_c;
- h->pred8x8[LEFT_DC_PRED8x8]= pred8x8_left_dc_c;
- h->pred8x8[TOP_DC_PRED8x8 ]= pred8x8_top_dc_c;
- h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8 ]= pred8x8_mad_cow_dc_l0t;
- h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8 ]= pred8x8_mad_cow_dc_0lt;
- h->pred8x8[ALZHEIMER_DC_L00_PRED8x8 ]= pred8x8_mad_cow_dc_l00;
- h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8 ]= pred8x8_mad_cow_dc_0l0;
- }else{
- h->pred8x8[DC_PRED8x8 ]= pred8x8_dc_rv40_c;
- h->pred8x8[LEFT_DC_PRED8x8]= pred8x8_left_dc_rv40_c;
- h->pred8x8[TOP_DC_PRED8x8 ]= pred8x8_top_dc_rv40_c;
- if (codec_id == CODEC_ID_VP8) {
- h->pred8x8[DC_127_PRED8x8]= pred8x8_127_dc_c;
- h->pred8x8[DC_129_PRED8x8]= pred8x8_129_dc_c;
- }
- }
- h->pred8x8[DC_128_PRED8x8 ]= pred8x8_128_dc_c;
-
- h->pred16x16[DC_PRED8x8 ]= pred16x16_dc_c;
- h->pred16x16[VERT_PRED8x8 ]= pred16x16_vertical_c;
- h->pred16x16[HOR_PRED8x8 ]= pred16x16_horizontal_c;
- switch(codec_id){
- case CODEC_ID_SVQ3:
- h->pred16x16[PLANE_PRED8x8 ]= pred16x16_plane_svq3_c;
- break;
- case CODEC_ID_RV40:
- h->pred16x16[PLANE_PRED8x8 ]= pred16x16_plane_rv40_c;
- break;
- case CODEC_ID_VP8:
- h->pred16x16[PLANE_PRED8x8 ]= pred16x16_tm_vp8_c;
- h->pred16x16[DC_127_PRED8x8]= pred16x16_127_dc_c;
- h->pred16x16[DC_129_PRED8x8]= pred16x16_129_dc_c;
- break;
- default:
- h->pred16x16[PLANE_PRED8x8 ]= pred16x16_plane_c;
- break;
- }
- h->pred16x16[LEFT_DC_PRED8x8]= pred16x16_left_dc_c;
- h->pred16x16[TOP_DC_PRED8x8 ]= pred16x16_top_dc_c;
- h->pred16x16[DC_128_PRED8x8 ]= pred16x16_128_dc_c;
-
- //special lossless h/v prediction for h264
- h->pred4x4_add [VERT_PRED ]= pred4x4_vertical_add_c;
- h->pred4x4_add [ HOR_PRED ]= pred4x4_horizontal_add_c;
- h->pred8x8l_add [VERT_PRED ]= pred8x8l_vertical_add_c;
- h->pred8x8l_add [ HOR_PRED ]= pred8x8l_horizontal_add_c;
- h->pred8x8_add [VERT_PRED8x8]= pred8x8_vertical_add_c;
- h->pred8x8_add [ HOR_PRED8x8]= pred8x8_horizontal_add_c;
- h->pred16x16_add[VERT_PRED8x8]= pred16x16_vertical_add_c;
- h->pred16x16_add[ HOR_PRED8x8]= pred16x16_horizontal_add_c;
-
- if (ARCH_ARM) ff_h264_pred_init_arm(h, codec_id);
- if (HAVE_MMX) ff_h264_pred_init_x86(h, codec_id);
+ if (ARCH_ARM) ff_h264_pred_init_arm(h, codec_id, bit_depth);
+ if (HAVE_MMX) ff_h264_pred_init_x86(h, codec_id, bit_depth);
}
diff --git a/libavcodec/h264pred.h b/libavcodec/h264pred.h
index db3f580..34b1e90 100644
--- a/libavcodec/h264pred.h
+++ b/libavcodec/h264pred.h
@@ -101,8 +101,8 @@ typedef struct H264PredContext{
void (*pred16x16_add[3])(uint8_t *pix/*align 16*/, const int *block_offset, const DCTELEM *block/*align 16*/, int stride);
}H264PredContext;
-void ff_h264_pred_init(H264PredContext *h, int codec_id);
-void ff_h264_pred_init_arm(H264PredContext *h, int codec_id);
-void ff_h264_pred_init_x86(H264PredContext *h, int codec_id);
+void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth);
+void ff_h264_pred_init_arm(H264PredContext *h, int codec_id, const int bit_depth);
+void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth);
#endif /* AVCODEC_H264PRED_H */
diff --git a/libavcodec/h264pred_template.c b/libavcodec/h264pred_template.c
index 4bd26e2..066e837 100644
--- a/libavcodec/h264pred_template.c
+++ b/libavcodec/h264pred_template.c
@@ -26,21 +26,7 @@
*/
#include "mathops.h"
-#include "dsputil.h"
-
-#define BIT_DEPTH 8
-
-#define pixel uint8_t
-#define pixel4 uint32_t
-#define dctcoef DCTELEM
-
-#define INIT_CLIP uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
-#define CLIP(a) cm[a]
-#define FUNC(a) a
-#define FUNCC(a) a ## _c
-#define PIXEL_SPLAT_X4(x) ((x)*0x01010101U)
-#define AV_WN4P AV_WN32
-#define AV_WN4PA AV_WN32A
+#include "high_bit_depth.h"
static void FUNCC(pred4x4_vertical)(uint8_t *_src, const uint8_t *topright, int _stride){
pixel *src = (pixel*)_src;
diff --git a/libavcodec/high_bit_depth.h b/libavcodec/high_bit_depth.h
new file mode 100644
index 0000000..6f2b6a7
--- /dev/null
+++ b/libavcodec/high_bit_depth.h
@@ -0,0 +1,85 @@
+#include "dsputil.h"
+
+#ifndef BIT_DEPTH
+#define BIT_DEPTH 8
+#endif
+
+#ifdef AVCODEC_H264_HIGH_DEPTH_H
+# undef pixel
+# undef pixel2
+# undef pixel4
+# undef dctcoef
+# undef INIT_CLIP
+# undef no_rnd_avg_pixel4
+# undef rnd_avg_pixel4
+# undef AV_RN2P
+# undef AV_RN4P
+# undef AV_WN2P
+# undef AV_WN4P
+# undef AV_WN4PA
+# undef CLIP
+# undef FUNC
+# undef FUNCC
+# undef av_clip_pixel
+# undef PIXEL_SPLAT_X4
+#else
+# define AVCODEC_H264_HIGH_DEPTH_H
+# define CLIP_PIXEL(depth)\
+ static inline uint16_t av_clip_pixel_ ## depth (int p)\
+ {\
+ const int pixel_max = (1 << depth)-1;\
+ return (p & ~pixel_max) ? (-p)>>31 & pixel_max : p;\
+ }
+
+CLIP_PIXEL( 9)
+CLIP_PIXEL(10)
+#endif
+
+#if BIT_DEPTH > 8
+# define pixel uint16_t
+# define pixel2 uint32_t
+# define pixel4 uint64_t
+# define dctcoef int32_t
+
+# define INIT_CLIP
+# define no_rnd_avg_pixel4 no_rnd_avg64
+# define rnd_avg_pixel4 rnd_avg64
+# define AV_RN2P AV_RN32
+# define AV_RN4P AV_RN64
+# define AV_WN2P AV_WN32
+# define AV_WN4P AV_WN64
+# define AV_WN4PA AV_WN64A
+# define PIXEL_SPLAT_X4(x) ((x)*0x0001000100010001ULL)
+#else
+# define pixel uint8_t
+# define pixel2 uint16_t
+# define pixel4 uint32_t
+# define dctcoef int16_t
+
+# define INIT_CLIP uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+# define no_rnd_avg_pixel4 no_rnd_avg32
+# define rnd_avg_pixel4 rnd_avg32
+# define AV_RN2P AV_RN16
+# define AV_RN4P AV_RN32
+# define AV_WN2P AV_WN16
+# define AV_WN4P AV_WN32
+# define AV_WN4PA AV_WN32A
+# define PIXEL_SPLAT_X4(x) ((x)*0x01010101U)
+#endif
+
+#if BIT_DEPTH == 8
+# define av_clip_pixel(a) av_clip_uint8(a)
+# define CLIP(a) cm[a]
+# define FUNC(a) a ## _8
+# define FUNCC(a) a ## _8_c
+#elif BIT_DEPTH == 9
+# define av_clip_pixel(a) av_clip_pixel_9(a)
+# define CLIP(a) av_clip_pixel_9(a)
+# define FUNC(a) a ## _9
+# define FUNCC(a) a ## _9_c
+#elif BIT_DEPTH == 10
+# define av_clip_pixel(a) av_clip_pixel_10(a)
+# define CLIP(a) av_clip_pixel_10(a)
+# define FUNC(a) a ## _10
+# define FUNCC(a) a ## _10_c
+#endif
diff --git a/libavcodec/mlib/dsputil_mlib.c b/libavcodec/mlib/dsputil_mlib.c
index 9e49c91..c0f2c03 100644
--- a/libavcodec/mlib/dsputil_mlib.c
+++ b/libavcodec/mlib/dsputil_mlib.c
@@ -421,10 +421,13 @@ static void ff_fdct_mlib(DCTELEM *data)
void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx)
{
+ const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+
c->get_pixels = get_pixels_mlib;
c->diff_pixels = diff_pixels_mlib;
c->add_pixels_clamped = add_pixels_clamped_mlib;
+ if (!high_bit_depth) {
c->put_pixels_tab[0][0] = put_pixels16_mlib;
c->put_pixels_tab[0][1] = put_pixels16_x2_mlib;
c->put_pixels_tab[0][2] = put_pixels16_y2_mlib;
@@ -445,6 +448,7 @@ void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx)
c->put_no_rnd_pixels_tab[0][0] = put_pixels16_mlib;
c->put_no_rnd_pixels_tab[1][0] = put_pixels8_mlib;
+ }
c->bswap_buf = bswap_buf_mlib;
}
diff --git a/libavcodec/ppc/dsputil_altivec.c b/libavcodec/ppc/dsputil_altivec.c
index 9111d03..adce61b 100644
--- a/libavcodec/ppc/dsputil_altivec.c
+++ b/libavcodec/ppc/dsputil_altivec.c
@@ -1384,6 +1384,8 @@ static void avg_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int l
void dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx)
{
+ const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+
c->pix_abs[0][1] = sad16_x2_altivec;
c->pix_abs[0][2] = sad16_y2_altivec;
c->pix_abs[0][3] = sad16_xy2_altivec;
@@ -1397,8 +1399,10 @@ void dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx)
c->pix_sum = pix_sum_altivec;
c->diff_pixels = diff_pixels_altivec;
c->get_pixels = get_pixels_altivec;
+ if (!high_bit_depth)
c->clear_block = clear_block_altivec;
c->add_bytes= add_bytes_altivec;
+ if (!high_bit_depth) {
c->put_pixels_tab[0][0] = put_pixels16_altivec;
/* the two functions do the same thing, so use the same code */
c->put_no_rnd_pixels_tab[0][0] = put_pixels16_altivec;
@@ -1409,6 +1413,7 @@ void dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx)
c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_altivec;
c->put_pixels_tab[0][3] = put_pixels16_xy2_altivec;
c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_altivec;
+ }
c->hadamard8_diff[0] = hadamard8_diff16_altivec;
c->hadamard8_diff[1] = hadamard8_diff8x8_altivec;
diff --git a/libavcodec/ppc/dsputil_ppc.c b/libavcodec/ppc/dsputil_ppc.c
index c52ea61..5f131f3 100644
--- a/libavcodec/ppc/dsputil_ppc.c
+++ b/libavcodec/ppc/dsputil_ppc.c
@@ -153,8 +153,11 @@ static void prefetch_ppc(void *mem, int stride, int h)
void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
{
+ const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+
// Common optimizations whether AltiVec is available or not
c->prefetch = prefetch_ppc;
+ if (!high_bit_depth) {
switch (check_dcbzl_effect()) {
case 32:
c->clear_blocks = clear_blocks_dcbz32_ppc;
@@ -165,6 +168,7 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
default:
break;
}
+ }
#if HAVE_ALTIVEC
if(CONFIG_H264_DECODER) dsputil_h264_init_ppc(c, avctx);
diff --git a/libavcodec/ppc/h264_altivec.c b/libavcodec/ppc/h264_altivec.c
index 02699be..fae0674 100644
--- a/libavcodec/ppc/h264_altivec.c
+++ b/libavcodec/ppc/h264_altivec.c
@@ -965,8 +965,10 @@ H264_WEIGHT( 8, 8)
H264_WEIGHT( 8, 4)
void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) {
+ const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
if (av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) {
+ if (!high_bit_depth) {
c->put_h264_chroma_pixels_tab[0] = put_h264_chroma_mc8_altivec;
c->avg_h264_chroma_pixels_tab[0] = avg_h264_chroma_mc8_altivec;
@@ -992,11 +994,13 @@ void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) {
dspfunc(avg_h264_qpel, 0, 16);
#undef dspfunc
}
+ }
}
-void ff_h264dsp_init_ppc(H264DSPContext *c)
+void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth)
{
if (av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) {
+ if (bit_depth == 8) {
c->h264_idct_add = ff_h264_idct_add_altivec;
c->h264_idct_add8 = ff_h264_idct_add8_altivec;
c->h264_idct_add16 = ff_h264_idct_add16_altivec;
@@ -1019,4 +1023,5 @@ void ff_h264dsp_init_ppc(H264DSPContext *c)
c->biweight_h264_pixels_tab[3] = ff_biweight_h264_pixels8x8_altivec;
c->biweight_h264_pixels_tab[4] = ff_biweight_h264_pixels8x4_altivec;
}
+ }
}
diff --git a/libavcodec/ps2/dsputil_mmi.c b/libavcodec/ps2/dsputil_mmi.c
index b6096b3..f4503a9 100644
--- a/libavcodec/ps2/dsputil_mmi.c
+++ b/libavcodec/ps2/dsputil_mmi.c
@@ -142,7 +142,9 @@ static void put_pixels16_mmi(uint8_t *block, const uint8_t *pixels, int line_siz
void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx)
{
const int idct_algo= avctx->idct_algo;
+ const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+ if (!high_bit_depth) {
c->clear_blocks = clear_blocks_mmi;
c->put_pixels_tab[1][0] = put_pixels8_mmi;
@@ -150,6 +152,7 @@ void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx)
c->put_pixels_tab[0][0] = put_pixels16_mmi;
c->put_no_rnd_pixels_tab[0][0] = put_pixels16_mmi;
+ }
c->get_pixels = get_pixels_mmi;
diff --git a/libavcodec/rv34.c b/libavcodec/rv34.c
index f3ab979..b5d314c 100644
--- a/libavcodec/rv34.c
+++ b/libavcodec/rv34.c
@@ -1384,7 +1384,7 @@ av_cold int ff_rv34_decode_init(AVCodecContext *avctx)
if (MPV_common_init(s) < 0)
return -1;
- ff_h264_pred_init(&r->h, CODEC_ID_RV40);
+ ff_h264_pred_init(&r->h, CODEC_ID_RV40, 8);
r->intra_types_stride = 4*s->mb_stride + 4;
r->intra_types_hist = av_malloc(r->intra_types_stride * 4 * 2 * sizeof(*r->intra_types_hist));
diff --git a/libavcodec/sh4/dsputil_align.c b/libavcodec/sh4/dsputil_align.c
index 7a8d60d..db40ece 100644
--- a/libavcodec/sh4/dsputil_align.c
+++ b/libavcodec/sh4/dsputil_align.c
@@ -333,6 +333,9 @@ DEFFUNC(avg,no_rnd,xy,16,OP_XY,PACK)
void dsputil_init_align(DSPContext* c, AVCodecContext *avctx)
{
+ const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+
+ if (!high_bit_depth) {
c->put_pixels_tab[0][0] = put_rnd_pixels16_o;
c->put_pixels_tab[0][1] = put_rnd_pixels16_x;
c->put_pixels_tab[0][2] = put_rnd_pixels16_y;
@@ -368,6 +371,7 @@ void dsputil_init_align(DSPContext* c, AVCodecContext *avctx)
c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x;
c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y;
c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy;
+ }
#ifdef QPEL
@@ -401,20 +405,24 @@ void dsputil_init_align(DSPContext* c, AVCodecContext *avctx)
dspfunc(avg_qpel, 1, 8);
/* dspfunc(avg_no_rnd_qpel, 1, 8); */
+ if (!high_bit_depth) {
dspfunc(put_h264_qpel, 0, 16);
dspfunc(put_h264_qpel, 1, 8);
dspfunc(put_h264_qpel, 2, 4);
dspfunc(avg_h264_qpel, 0, 16);
dspfunc(avg_h264_qpel, 1, 8);
dspfunc(avg_h264_qpel, 2, 4);
+ }
#undef dspfunc
+ if (!high_bit_depth) {
c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_sh4;
c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_sh4;
c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_sh4;
c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_sh4;
c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_sh4;
c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_sh4;
+ }
c->put_mspel_pixels_tab[0]= put_mspel8_mc00_sh4;
c->put_mspel_pixels_tab[1]= put_mspel8_mc10_sh4;
diff --git a/libavcodec/sh4/dsputil_sh4.c b/libavcodec/sh4/dsputil_sh4.c
index 0c724c3..9ea48ad 100644
--- a/libavcodec/sh4/dsputil_sh4.c
+++ b/libavcodec/sh4/dsputil_sh4.c
@@ -92,8 +92,10 @@ static void idct_add(uint8_t *dest, int line_size, DCTELEM *block)
void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx)
{
const int idct_algo= avctx->idct_algo;
+ const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
dsputil_init_align(c,avctx);
+ if (!high_bit_depth)
c->clear_blocks = clear_blocks_sh4;
if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_SH4){
c->idct_put = idct_put;
diff --git a/libavcodec/sparc/dsputil_vis.c b/libavcodec/sparc/dsputil_vis.c
index baf555b..ab9258b 100644
--- a/libavcodec/sparc/dsputil_vis.c
+++ b/libavcodec/sparc/dsputil_vis.c
@@ -3953,6 +3953,7 @@ void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx)
{
/* VIS-specific optimizations */
int accel = vis_level ();
+ const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
if (accel & ACCEL_SPARC_VIS) {
if(avctx->idct_algo==FF_IDCT_SIMPLEVIS){
@@ -3962,6 +3963,7 @@ void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx)
c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
}
+ if (!high_bit_depth) {
c->put_pixels_tab[0][0] = MC_put_o_16_vis;
c->put_pixels_tab[0][1] = MC_put_x_16_vis;
c->put_pixels_tab[0][2] = MC_put_y_16_vis;
@@ -4001,5 +4003,6 @@ void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx)
c->avg_no_rnd_pixels_tab[1][1] = MC_avg_no_round_x_8_vis;
c->avg_no_rnd_pixels_tab[1][2] = MC_avg_no_round_y_8_vis;
c->avg_no_rnd_pixels_tab[1][3] = MC_avg_no_round_xy_8_vis;
+ }
}
}
diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c
index ea0e911..dc7eb21 100644
--- a/libavcodec/vp8.c
+++ b/libavcodec/vp8.c
@@ -1698,7 +1698,7 @@ static av_cold int vp8_decode_init(AVCodecContext *avctx)
avctx->pix_fmt = PIX_FMT_YUV420P;
dsputil_init(&s->dsp, avctx);
- ff_h264_pred_init(&s->hpc, CODEC_ID_VP8);
+ ff_h264_pred_init(&s->hpc, CODEC_ID_VP8, 8);
ff_vp8dsp_init(&s->vp8dsp);
return 0;
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c
index f98e6ae..a0cb11a 100644
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -2418,6 +2418,7 @@ float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order);
void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
{
int mm_flags = av_get_cpu_flags();
+ const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
if (avctx->dsp_mask) {
if (avctx->dsp_mask & AV_CPU_FLAG_FORCE)
@@ -2499,6 +2500,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->put_pixels_clamped = ff_put_pixels_clamped_mmx;
c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx;
c->add_pixels_clamped = ff_add_pixels_clamped_mmx;
+ if (!high_bit_depth) {
c->clear_block = clear_block_mmx;
c->clear_blocks = clear_blocks_mmx;
if ((mm_flags & AV_CPU_FLAG_SSE) &&
@@ -2507,6 +2509,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->clear_block = clear_block_sse;
c->clear_blocks = clear_blocks_sse;
}
+ }
#define SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU) \
c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## SIZE ## _ ## CPU; \
@@ -2514,6 +2517,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## SIZE ## _y2_ ## CPU; \
c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## SIZE ## _xy2_ ## CPU
+ if (!high_bit_depth) {
SET_HPEL_FUNCS(put, 0, 16, mmx);
SET_HPEL_FUNCS(put_no_rnd, 0, 16, mmx);
SET_HPEL_FUNCS(avg, 0, 16, mmx);
@@ -2522,17 +2526,20 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
SET_HPEL_FUNCS(put_no_rnd, 1, 8, mmx);
SET_HPEL_FUNCS(avg, 1, 8, mmx);
SET_HPEL_FUNCS(avg_no_rnd, 1, 8, mmx);
+ }
#if ARCH_X86_32 || !HAVE_YASM
c->gmc= gmc_mmx;
#endif
#if ARCH_X86_32 && HAVE_YASM
+ if (!high_bit_depth)
c->emulated_edge_mc = emulated_edge_mc_mmx;
#endif
c->add_bytes= add_bytes_mmx;
c->add_bytes_l2= add_bytes_l2_mmx;
+ if (!high_bit_depth)
c->draw_edges = draw_edges_mmx;
if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
@@ -2541,8 +2548,10 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
}
#if HAVE_YASM
+ if (!high_bit_depth) {
c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_mmx_rnd;
c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_mmx;
+ }
c->put_rv40_chroma_pixels_tab[0]= ff_put_rv40_chroma_mc8_mmx;
c->put_rv40_chroma_pixels_tab[1]= ff_put_rv40_chroma_mc4_mmx;
@@ -2551,6 +2560,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
if (mm_flags & AV_CPU_FLAG_MMX2) {
c->prefetch = prefetch_mmx2;
+ if (!high_bit_depth) {
c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2;
c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2;
@@ -2564,14 +2574,17 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->avg_pixels_tab[1][0] = avg_pixels8_mmx2;
c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2;
c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2;
+ }
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
+ if (!high_bit_depth) {
c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2;
c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2;
c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2;
c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2;
c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2;
c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2;
+ }
if (CONFIG_VP3_DECODER && HAVE_YASM) {
c->vp3_v_loop_filter= ff_vp3_v_loop_filter_mmx2;
@@ -2613,12 +2626,14 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
SET_QPEL_FUNCS(avg_qpel, 0, 16, mmx2);
SET_QPEL_FUNCS(avg_qpel, 1, 8, mmx2);
+ if (!high_bit_depth) {
SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmx2);
SET_QPEL_FUNCS(put_h264_qpel, 1, 8, mmx2);
SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmx2);
SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmx2);
SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, mmx2);
SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmx2);
+ }
SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, mmx2);
SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, mmx2);
@@ -2629,10 +2644,12 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->avg_rv40_chroma_pixels_tab[0]= ff_avg_rv40_chroma_mc8_mmx2;
c->avg_rv40_chroma_pixels_tab[1]= ff_avg_rv40_chroma_mc4_mmx2;
+ if (!high_bit_depth) {
c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_mmx2_rnd;
c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_mmx2;
c->avg_h264_chroma_pixels_tab[2]= ff_avg_h264_chroma_mc2_mmx2;
c->put_h264_chroma_pixels_tab[2]= ff_put_h264_chroma_mc2_mmx2;
+ }
c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_mmx2;
#endif
@@ -2645,6 +2662,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
} else if (mm_flags & AV_CPU_FLAG_3DNOW) {
c->prefetch = prefetch_3dnow;
+ if (!high_bit_depth) {
c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow;
c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow;
@@ -2667,6 +2685,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow;
c->avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow;
}
+ }
if (CONFIG_VP3_DECODER
&& (avctx->codec_id == CODEC_ID_VP3 || avctx->codec_id == CODEC_ID_THEORA)) {
@@ -2681,12 +2700,14 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
SET_QPEL_FUNCS(avg_qpel, 0, 16, 3dnow);
SET_QPEL_FUNCS(avg_qpel, 1, 8, 3dnow);
+ if (!high_bit_depth) {
SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 3dnow);
SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 3dnow);
SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 3dnow);
SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 3dnow);
SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 3dnow);
SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 3dnow);
+ }
SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, 3dnow);
SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, 3dnow);
@@ -2694,8 +2715,10 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, 3dnow);
#if HAVE_YASM
+ if (!high_bit_depth) {
c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_3dnow_rnd;
c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_3dnow;
+ }
c->avg_rv40_chroma_pixels_tab[0]= ff_avg_rv40_chroma_mc8_3dnow;
c->avg_rv40_chroma_pixels_tab[1]= ff_avg_rv40_chroma_mc4_3dnow;
@@ -2710,12 +2733,15 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->avg_h264_qpel_pixels_tab[1][x+y*4] = avg_h264_qpel8_mc##x##y##_##CPU;
if((mm_flags & AV_CPU_FLAG_SSE2) && !(mm_flags & AV_CPU_FLAG_3DNOW)){
// these functions are slower than mmx on AMD, but faster on Intel
+ if (!high_bit_depth) {
c->put_pixels_tab[0][0] = put_pixels16_sse2;
c->put_no_rnd_pixels_tab[0][0] = put_pixels16_sse2;
c->avg_pixels_tab[0][0] = avg_pixels16_sse2;
H264_QPEL_FUNCS(0, 0, sse2);
+ }
}
if(mm_flags & AV_CPU_FLAG_SSE2){
+ if (!high_bit_depth) {
H264_QPEL_FUNCS(0, 1, sse2);
H264_QPEL_FUNCS(0, 2, sse2);
H264_QPEL_FUNCS(0, 3, sse2);
@@ -2728,9 +2754,11 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
H264_QPEL_FUNCS(3, 1, sse2);
H264_QPEL_FUNCS(3, 2, sse2);
H264_QPEL_FUNCS(3, 3, sse2);
+ }
}
#if HAVE_SSSE3
if(mm_flags & AV_CPU_FLAG_SSSE3){
+ if (!high_bit_depth) {
H264_QPEL_FUNCS(1, 0, ssse3);
H264_QPEL_FUNCS(1, 1, ssse3);
H264_QPEL_FUNCS(1, 2, ssse3);
@@ -2743,12 +2771,15 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
H264_QPEL_FUNCS(3, 1, ssse3);
H264_QPEL_FUNCS(3, 2, ssse3);
H264_QPEL_FUNCS(3, 3, ssse3);
+ }
c->add_png_paeth_prediction= add_png_paeth_prediction_ssse3;
#if HAVE_YASM
+ if (!high_bit_depth) {
c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_ssse3_rnd;
c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_ssse3_rnd;
c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_ssse3;
c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_ssse3;
+ }
c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_ssse3;
if (mm_flags & AV_CPU_FLAG_SSE4) // not really sse4, just slow on Conroe
c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_sse4;
@@ -2805,6 +2836,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
}
}
+ if (!high_bit_depth)
c->emulated_edge_mc = emulated_edge_mc_sse;
c->gmc= gmc_sse;
#endif
diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c
index 4142cc1..9eb7525 100644
--- a/libavcodec/x86/h264_intrapred_init.c
+++ b/libavcodec/x86/h264_intrapred_init.c
@@ -95,9 +95,13 @@ void ff_pred4x4_tm_vp8_mmxext (uint8_t *src, const uint8_t *topright, int s
void ff_pred4x4_tm_vp8_ssse3 (uint8_t *src, const uint8_t *topright, int stride);
void ff_pred4x4_vertical_vp8_mmxext(uint8_t *src, const uint8_t *topright, int stride);
-void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
+void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth)
{
int mm_flags = av_get_cpu_flags();
+ const int high_depth = bit_depth > 8;
+
+ if (high_depth)
+ return;
#if HAVE_YASM
if (mm_flags & AV_CPU_FLAG_MMX) {
diff --git a/libavcodec/x86/h264dsp_mmx.c b/libavcodec/x86/h264dsp_mmx.c
index 9f004a5..b4936a6 100644
--- a/libavcodec/x86/h264dsp_mmx.c
+++ b/libavcodec/x86/h264dsp_mmx.c
@@ -285,10 +285,11 @@ H264_BIWEIGHT_MMX ( 4, 8)
H264_BIWEIGHT_MMX ( 4, 4)
H264_BIWEIGHT_MMX ( 4, 2)
-void ff_h264dsp_init_x86(H264DSPContext *c)
+void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth)
{
int mm_flags = av_get_cpu_flags();
+ if (bit_depth == 8) {
if (mm_flags & AV_CPU_FLAG_MMX2) {
c->h264_loop_filter_strength= h264_loop_filter_strength_mmx2;
}
@@ -378,5 +379,6 @@ void ff_h264dsp_init_x86(H264DSPContext *c)
}
}
}
+ }
#endif
}
OpenPOWER on IntegriCloud