diff options
34 files changed, 753 insertions, 365 deletions
diff --git a/libavcodec/alpha/dsputil_alpha.c b/libavcodec/alpha/dsputil_alpha.c index 610f92a..32bb0fc 100644 --- a/libavcodec/alpha/dsputil_alpha.c +++ b/libavcodec/alpha/dsputil_alpha.c @@ -270,6 +270,9 @@ static void put_pixels16_axp_asm(uint8_t *block, const uint8_t *pixels, void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx) { + const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; + + if (!high_bit_depth) { c->put_pixels_tab[0][0] = put_pixels16_axp_asm; c->put_pixels_tab[0][1] = put_pixels16_x2_axp; c->put_pixels_tab[0][2] = put_pixels16_y2_axp; @@ -311,6 +314,7 @@ void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx) c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels_xy2_axp; c->clear_blocks = clear_blocks_axp; + } /* amask clears all bits that correspond to present features. */ if (amask(AMASK_MVI) == 0) { diff --git a/libavcodec/arm/dsputil_init_arm.c b/libavcodec/arm/dsputil_init_arm.c index 92796c3..777a2f9 100644 --- a/libavcodec/arm/dsputil_init_arm.c +++ b/libavcodec/arm/dsputil_init_arm.c @@ -75,6 +75,8 @@ static void simple_idct_arm_add(uint8_t *dest, int line_size, DCTELEM *block) void dsputil_init_arm(DSPContext* c, AVCodecContext *avctx) { + const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; + ff_put_pixels_clamped = c->put_pixels_clamped; ff_add_pixels_clamped = c->add_pixels_clamped; @@ -95,6 +97,7 @@ void dsputil_init_arm(DSPContext* c, AVCodecContext *avctx) c->add_pixels_clamped = ff_add_pixels_clamped_arm; + if (!high_bit_depth) { c->put_pixels_tab[0][0] = ff_put_pixels16_arm; c->put_pixels_tab[0][1] = ff_put_pixels16_x2_arm; c->put_pixels_tab[0][2] = ff_put_pixels16_y2_arm; @@ -112,6 +115,7 @@ void dsputil_init_arm(DSPContext* c, AVCodecContext *avctx) c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_arm; c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_arm; c->put_no_rnd_pixels_tab[1][3] = ff_put_no_rnd_pixels8_xy2_arm; + } if (HAVE_ARMV5TE) ff_dsputil_init_armv5te(c, avctx); if (HAVE_ARMV6) ff_dsputil_init_armv6(c, avctx); diff --git a/libavcodec/arm/dsputil_init_armv6.c b/libavcodec/arm/dsputil_init_armv6.c index 362050c..7584aee 100644 --- a/libavcodec/arm/dsputil_init_armv6.c +++ b/libavcodec/arm/dsputil_init_armv6.c @@ -72,6 +72,8 @@ int ff_pix_sum_armv6(uint8_t *pix, int line_size); void av_cold ff_dsputil_init_armv6(DSPContext* c, AVCodecContext *avctx) { + const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; + if (!avctx->lowres && (avctx->idct_algo == FF_IDCT_AUTO || avctx->idct_algo == FF_IDCT_SIMPLEARMV6)) { c->idct_put = ff_simple_idct_put_armv6; @@ -80,6 +82,7 @@ void av_cold ff_dsputil_init_armv6(DSPContext* c, AVCodecContext *avctx) c->idct_permutation_type = FF_LIBMPEG2_IDCT_PERM; } + if (!high_bit_depth) { c->put_pixels_tab[0][0] = ff_put_pixels16_armv6; c->put_pixels_tab[0][1] = ff_put_pixels16_x2_armv6; c->put_pixels_tab[0][2] = ff_put_pixels16_y2_armv6; @@ -100,6 +103,7 @@ void av_cold ff_dsputil_init_armv6(DSPContext* c, AVCodecContext *avctx) c->avg_pixels_tab[0][0] = ff_avg_pixels16_armv6; c->avg_pixels_tab[1][0] = ff_avg_pixels8_armv6; + } c->add_pixels_clamped = ff_add_pixels_clamped_armv6; c->get_pixels = ff_get_pixels_armv6; diff --git a/libavcodec/arm/dsputil_init_neon.c b/libavcodec/arm/dsputil_init_neon.c index cd58011..3bc053c 100644 --- a/libavcodec/arm/dsputil_init_neon.c +++ b/libavcodec/arm/dsputil_init_neon.c @@ -173,6 +173,8 @@ void ff_apply_window_int16_neon(int16_t *dst, const int16_t *src, void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) { + const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; + if (!avctx->lowres) { if (avctx->idct_algo == FF_IDCT_AUTO || avctx->idct_algo == FF_IDCT_SIMPLENEON) { @@ -190,6 +192,7 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) } } + if (!high_bit_depth) { c->clear_block = ff_clear_block_neon; c->clear_blocks = ff_clear_blocks_neon; @@ -213,12 +216,14 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) c->avg_pixels_tab[0][0] = ff_avg_pixels16_neon; c->avg_pixels_tab[1][0] = ff_avg_pixels8_neon; + } c->add_pixels_clamped = ff_add_pixels_clamped_neon; c->put_pixels_clamped = ff_put_pixels_clamped_neon; c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_neon; if (CONFIG_H264_DECODER) { + if (!high_bit_depth) { c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_neon; c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_neon; c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_neon; @@ -294,6 +299,7 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) c->avg_h264_qpel_pixels_tab[1][13] = ff_avg_h264_qpel8_mc13_neon; c->avg_h264_qpel_pixels_tab[1][14] = ff_avg_h264_qpel8_mc23_neon; c->avg_h264_qpel_pixels_tab[1][15] = ff_avg_h264_qpel8_mc33_neon; + } } if (CONFIG_VP3_DECODER) { diff --git a/libavcodec/arm/dsputil_iwmmxt.c b/libavcodec/arm/dsputil_iwmmxt.c index e83edb5..86f8fdd 100644 --- a/libavcodec/arm/dsputil_iwmmxt.c +++ b/libavcodec/arm/dsputil_iwmmxt.c @@ -155,6 +155,7 @@ static void nop(uint8_t *block, const uint8_t *pixels, int line_size, int h) void ff_dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx) { int mm_flags = AV_CPU_FLAG_IWMMXT; /* multimedia extension flags */ + const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; if (avctx->dsp_mask) { if (avctx->dsp_mask & AV_CPU_FLAG_FORCE) @@ -167,6 +168,7 @@ void ff_dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx) c->add_pixels_clamped = add_pixels_clamped_iwmmxt; + if (!high_bit_depth) { c->clear_blocks = clear_blocks_iwmmxt; c->put_pixels_tab[0][0] = put_pixels16_iwmmxt; @@ -204,4 +206,5 @@ void ff_dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx) c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2_iwmmxt; c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2_iwmmxt; c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2_iwmmxt; + } } diff --git a/libavcodec/arm/h264dsp_init_arm.c b/libavcodec/arm/h264dsp_init_arm.c index 20f5ac2..c2399e5 100644 --- a/libavcodec/arm/h264dsp_init_arm.c +++ b/libavcodec/arm/h264dsp_init_arm.c @@ -92,8 +92,9 @@ void ff_h264_idct8_add4_neon(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]); -static void ff_h264dsp_init_neon(H264DSPContext *c) +static void ff_h264dsp_init_neon(H264DSPContext *c, const int bit_depth) { + if (bit_depth == 8) { c->h264_v_loop_filter_luma = ff_h264_v_loop_filter_luma_neon; c->h264_h_loop_filter_luma = ff_h264_h_loop_filter_luma_neon; c->h264_v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon; @@ -125,9 +126,10 @@ static void ff_h264dsp_init_neon(H264DSPContext *c) c->h264_idct8_add = ff_h264_idct8_add_neon; c->h264_idct8_dc_add = ff_h264_idct8_dc_add_neon; c->h264_idct8_add4 = ff_h264_idct8_add4_neon; + } } -void ff_h264dsp_init_arm(H264DSPContext *c) +void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth) { - if (HAVE_NEON) ff_h264dsp_init_neon(c); + if (HAVE_NEON) ff_h264dsp_init_neon(c, bit_depth); } diff --git a/libavcodec/arm/h264pred_init_arm.c b/libavcodec/arm/h264pred_init_arm.c index 3f1c5c6..cae32d7 100644 --- a/libavcodec/arm/h264pred_init_arm.c +++ b/libavcodec/arm/h264pred_init_arm.c @@ -42,8 +42,13 @@ void ff_pred8x8_0lt_dc_neon(uint8_t *src, int stride); void ff_pred8x8_l00_dc_neon(uint8_t *src, int stride); void ff_pred8x8_0l0_dc_neon(uint8_t *src, int stride); -static void ff_h264_pred_init_neon(H264PredContext *h, int codec_id) +static void ff_h264_pred_init_neon(H264PredContext *h, int codec_id, const int bit_depth) { + const int high_depth = bit_depth > 8; + + if (high_depth) + return; + h->pred8x8[VERT_PRED8x8 ] = ff_pred8x8_vert_neon; h->pred8x8[HOR_PRED8x8 ] = ff_pred8x8_hor_neon; if (codec_id != CODEC_ID_VP8) @@ -69,7 +74,7 @@ static void ff_h264_pred_init_neon(H264PredContext *h, int codec_id) h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_plane_neon; } -void ff_h264_pred_init_arm(H264PredContext *h, int codec_id) +void ff_h264_pred_init_arm(H264PredContext *h, int codec_id, bit_depth) { - if (HAVE_NEON) ff_h264_pred_init_neon(h, codec_id); + if (HAVE_NEON) ff_h264_pred_init_neon(h, codec_id, bit_depth); } diff --git a/libavcodec/bfin/dsputil_bfin.c b/libavcodec/bfin/dsputil_bfin.c index 65d0308..0db2d8b 100644 --- a/libavcodec/bfin/dsputil_bfin.c +++ b/libavcodec/bfin/dsputil_bfin.c @@ -197,11 +197,14 @@ static int bfin_pix_abs8_xy2 (void *c, uint8_t *blk1, uint8_t *blk2, int line_si void dsputil_init_bfin( DSPContext* c, AVCodecContext *avctx ) { + const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; + c->get_pixels = ff_bfin_get_pixels; c->diff_pixels = ff_bfin_diff_pixels; c->put_pixels_clamped = ff_bfin_put_pixels_clamped; c->add_pixels_clamped = ff_bfin_add_pixels_clamped; + if (!high_bit_depth) c->clear_blocks = bfin_clear_blocks; c->pix_sum = ff_bfin_pix_sum; c->pix_norm1 = ff_bfin_pix_norm1; @@ -228,6 +231,7 @@ void dsputil_init_bfin( DSPContext* c, AVCodecContext *avctx ) c->sse[1] = ff_bfin_sse8; c->sse[2] = ff_bfin_sse4; + if (!high_bit_depth) { c->put_pixels_tab[0][0] = bfin_put_pixels16; c->put_pixels_tab[0][1] = bfin_put_pixels16_x2; c->put_pixels_tab[0][2] = bfin_put_pixels16_y2; @@ -247,6 +251,7 @@ void dsputil_init_bfin( DSPContext* c, AVCodecContext *avctx ) c->put_no_rnd_pixels_tab[0][1] = bfin_put_pixels16_x2_nornd; c->put_no_rnd_pixels_tab[0][2] = bfin_put_pixels16_y2_nornd; /* c->put_no_rnd_pixels_tab[0][3] = ff_bfin_put_pixels16_xy2_nornd; */ + } if (avctx->dct_algo == FF_DCT_AUTO) c->fdct = ff_bfin_fdct; diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index 078e172..4389289 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -43,6 +43,15 @@ uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, }; uint32_t ff_squareTbl[512] = {0, }; +#define BIT_DEPTH 9 +#include "dsputil_template.c" +#undef BIT_DEPTH + +#define BIT_DEPTH 10 +#include "dsputil_template.c" +#undef BIT_DEPTH + +#define BIT_DEPTH 8 #include "dsputil_template.c" // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size @@ -619,10 +628,10 @@ void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ switch(width){ - case 2: put_pixels2_c (dst, src, stride, height); break; - case 4: put_pixels4_c (dst, src, stride, height); break; - case 8: put_pixels8_c (dst, src, stride, height); break; - case 16:put_pixels16_c(dst, src, stride, height); break; + case 2: put_pixels2_8_c (dst, src, stride, height); break; + case 4: put_pixels4_8_c (dst, src, stride, height); break; + case 8: put_pixels8_8_c (dst, src, stride, height); break; + case 16:put_pixels16_8_c(dst, src, stride, height); break; } } @@ -716,10 +725,10 @@ static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){ switch(width){ - case 2: avg_pixels2_c (dst, src, stride, height); break; - case 4: avg_pixels4_c (dst, src, stride, height); break; - case 8: avg_pixels8_c (dst, src, stride, height); break; - case 16:avg_pixels16_c(dst, src, stride, height); break; + case 2: avg_pixels2_8_c (dst, src, stride, height); break; + case 4: avg_pixels4_8_c (dst, src, stride, height); break; + case 8: avg_pixels8_8_c (dst, src, stride, height); break; + case 16:avg_pixels16_8_c(dst, src, stride, height); break; } } @@ -953,7 +962,7 @@ static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dst static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t half[64];\ put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\ - OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\ + OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8);\ }\ \ static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\ @@ -963,7 +972,7 @@ static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t half[64];\ put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\ - OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\ + OPNAME ## pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);\ }\ \ static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\ @@ -971,7 +980,7 @@ static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t half[64];\ copy_block9(full, src, 16, stride, 9);\ put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\ - OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\ + OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8);\ }\ \ static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\ @@ -985,7 +994,7 @@ static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t half[64];\ copy_block9(full, src, 16, stride, 9);\ put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\ - OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\ + OPNAME ## pixels8_l2_8(dst, full+16, half, stride, 16, 8, 8);\ }\ void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t full[16*9];\ @@ -996,7 +1005,7 @@ void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ - OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ + OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ }\ static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t full[16*9];\ @@ -1004,9 +1013,9 @@ static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t halfHV[64];\ copy_block9(full, src, 16, stride, 9);\ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ - put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\ + put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ - OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\ + OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\ }\ void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t full[16*9];\ @@ -1017,7 +1026,7 @@ void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ - OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ + OPNAME ## pixels8_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ }\ static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t full[16*9];\ @@ -1025,9 +1034,9 @@ static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t halfHV[64];\ copy_block9(full, src, 16, stride, 9);\ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ - put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\ + put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ - OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\ + OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\ }\ void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t full[16*9];\ @@ -1038,7 +1047,7 @@ void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ - OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ + OPNAME ## pixels8_l4_8(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ }\ static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t full[16*9];\ @@ -1046,9 +1055,9 @@ static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t halfHV[64];\ copy_block9(full, src, 16, stride, 9);\ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ - put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\ + put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ - OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\ + OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\ }\ void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t full[16*9];\ @@ -1059,7 +1068,7 @@ void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ - OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ + OPNAME ## pixels8_l4_8(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\ }\ static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t full[16*9];\ @@ -1067,23 +1076,23 @@ static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t halfHV[64];\ copy_block9(full, src, 16, stride, 9);\ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ - put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\ + put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ - OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\ + OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\ }\ static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t halfH[72];\ uint8_t halfHV[64];\ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ - OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\ + OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\ }\ static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t halfH[72];\ uint8_t halfHV[64];\ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ - OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\ + OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\ }\ void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t full[16*9];\ @@ -1094,14 +1103,14 @@ void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ - OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\ + OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\ }\ static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t full[16*9];\ uint8_t halfH[72];\ copy_block9(full, src, 16, stride, 9);\ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ - put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\ + put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\ OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\ }\ void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\ @@ -1113,14 +1122,14 @@ void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\ put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\ - OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\ + OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\ }\ static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t full[16*9];\ uint8_t halfH[72];\ copy_block9(full, src, 16, stride, 9);\ put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\ - put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\ + put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\ OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\ }\ static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\ @@ -1132,7 +1141,7 @@ static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t half[256];\ put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\ - OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\ + OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16);\ }\ \ static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\ @@ -1142,7 +1151,7 @@ static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\ static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t half[256];\ put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\ - OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\ + OPNAME ## pixels16_l2_8(dst, src+1, half, stride, stride, 16, 16);\ }\ \ static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\ @@ -1150,7 +1159,7 @@ static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t half[256];\ copy_block17(full, src, 24, stride, 17);\ put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\ - OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\ + OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16);\ }\ \ static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\ @@ -1164,7 +1173,7 @@ static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t half[256];\ copy_block17(full, src, 24, stride, 17);\ put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\ - OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\ + OPNAME ## pixels16_l2_8(dst, full+24, half, stride, 24, 16, 16);\ }\ void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t full[24*17];\ @@ -1175,7 +1184,7 @@ void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){ put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\ put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ - OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ + OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ }\ static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t full[24*17];\ @@ -1183,9 +1192,9 @@ static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t halfHV[256];\ copy_block17(full, src, 24, stride, 17);\ put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ - put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\ + put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\ put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ - OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\ + OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\ }\ void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t full[24*17];\ @@ -1196,7 +1205,7 @@ void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){ put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\ put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ - OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ + OPNAME ## pixels16_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ }\ static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t full[24*17];\ @@ -1204,9 +1213,9 @@ static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t halfHV[256];\ copy_block17(full, src, 24, stride, 17);\ put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ - put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\ + put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\ put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ - OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\ + OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\ }\ void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t full[24*17];\ @@ -1217,7 +1226,7 @@ void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){ put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\ put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ - OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ + OPNAME ## pixels16_l4_8(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ }\ static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t full[24*17];\ @@ -1225,9 +1234,9 @@ static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t halfHV[256];\ copy_block17(full, src, 24, stride, 17);\ put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ - put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\ + put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\ put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ - OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\ + OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\ }\ void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t full[24*17];\ @@ -1238,7 +1247,7 @@ void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){ put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\ put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\ put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ - OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ + OPNAME ## pixels16_l4_8(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\ }\ static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t full[24*17];\ @@ -1246,23 +1255,23 @@ static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t halfHV[256];\ copy_block17(full, src, 24, stride, 17);\ put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ - put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\ + put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\ put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ - OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\ + OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\ }\ static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t halfH[272];\ uint8_t halfHV[256];\ put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\ put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ - OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\ + OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\ }\ static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t halfH[272];\ uint8_t halfHV[256];\ put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\ put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ - OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\ + OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\ }\ void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t full[24*17];\ @@ -1273,14 +1282,14 @@ void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){ put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\ put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ - OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\ + OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\ }\ static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t full[24*17];\ uint8_t halfH[272];\ copy_block17(full, src, 24, stride, 17);\ put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ - put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\ + put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\ OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\ }\ void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\ @@ -1292,14 +1301,14 @@ void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){ put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\ put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\ - OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\ + OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\ }\ static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\ uint8_t full[24*17];\ uint8_t halfH[272];\ copy_block17(full, src, 24, stride, 17);\ put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\ - put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\ + put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\ OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\ }\ static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\ @@ -1327,7 +1336,7 @@ QPEL_MC(0, avg_ , _ , op_avg) #define put_qpel16_mc00_c ff_put_pixels16x16_c #define avg_qpel16_mc00_c ff_avg_pixels16x16_c #define put_no_rnd_qpel8_mc00_c ff_put_pixels8x8_c -#define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_c +#define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_8_c static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){ uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; @@ -1349,16 +1358,16 @@ static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int #if CONFIG_RV40_DECODER static void put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){ - put_pixels16_xy2_c(dst, src, stride, 16); + put_pixels16_xy2_8_c(dst, src, stride, 16); } static void avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){ - avg_pixels16_xy2_c(dst, src, stride, 16); + avg_pixels16_xy2_8_c(dst, src, stride, 16); } static void put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){ - put_pixels8_xy2_c(dst, src, stride, 8); + put_pixels8_xy2_8_c(dst, src, stride, 8); } static void avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){ - avg_pixels8_xy2_c(dst, src, stride, 8); + avg_pixels8_xy2_8_c(dst, src, stride, 8); } #endif /* CONFIG_RV40_DECODER */ @@ -1394,7 +1403,7 @@ static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){ uint8_t half[64]; wmv2_mspel8_h_lowpass(half, src, 8, stride, 8); - put_pixels8_l2(dst, src, half, stride, stride, 8, 8); + put_pixels8_l2_8(dst, src, half, stride, stride, 8, 8); } static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){ @@ -1404,7 +1413,7 @@ static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){ static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){ uint8_t half[64]; wmv2_mspel8_h_lowpass(half, src, 8, stride, 8); - put_pixels8_l2(dst, src+1, half, stride, stride, 8, 8); + put_pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8); } static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){ @@ -1418,7 +1427,7 @@ static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){ wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8); wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8); - put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8); + put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8); } static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){ uint8_t halfH[88]; @@ -1427,7 +1436,7 @@ static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){ wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8); wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8); - put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8); + put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8); } static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){ uint8_t halfH[88]; @@ -2863,8 +2872,24 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx) c->idct_put= ff_jref_idct4_put; c->idct_add= ff_jref_idct4_add; }else{ - c->idct_put= ff_h264_lowres_idct_put_c; - c->idct_add= ff_h264_lowres_idct_add_c; + if (avctx->codec_id != CODEC_ID_H264) { + c->idct_put= ff_h264_lowres_idct_put_8_c; + c->idct_add= ff_h264_lowres_idct_add_8_c; + } else { + switch (avctx->bits_per_raw_sample) { + case 9: + c->idct_put= ff_h264_lowres_idct_put_9_c; + c->idct_add= ff_h264_lowres_idct_add_9_c; + break; + case 10: + c->idct_put= ff_h264_lowres_idct_put_10_c; + c->idct_add= ff_h264_lowres_idct_add_10_c; + break; + default: + c->idct_put= ff_h264_lowres_idct_put_8_c; + c->idct_add= ff_h264_lowres_idct_add_8_c; + } + } } c->idct = j_rev_dct4; c->idct_permutation_type= FF_NO_IDCT_PERM; @@ -2922,14 +2947,9 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx) c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_c; c->put_pixels_nonclamped = put_pixels_nonclamped_c; c->add_pixels_clamped = ff_add_pixels_clamped_c; - c->add_pixels8 = add_pixels8_c; - c->add_pixels4 = add_pixels4_c; c->sum_abs_dctelem = sum_abs_dctelem_c; - c->emulated_edge_mc = ff_emulated_edge_mc; c->gmc1 = gmc1_c; c->gmc = ff_gmc_c; - c->clear_block = clear_block_c; - c->clear_blocks = clear_blocks_c; c->pix_sum = pix_sum_c; c->pix_norm1 = pix_norm1_c; @@ -2947,30 +2967,6 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx) c->pix_abs[1][2] = pix_abs8_y2_c; c->pix_abs[1][3] = pix_abs8_xy2_c; -#define dspfunc(PFX, IDX, NUM) \ - c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## NUM ## _c; \ - c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## NUM ## _x2_c; \ - c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## NUM ## _y2_c; \ - c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## NUM ## _xy2_c - - dspfunc(put, 0, 16); - dspfunc(put_no_rnd, 0, 16); - dspfunc(put, 1, 8); - dspfunc(put_no_rnd, 1, 8); - dspfunc(put, 2, 4); - dspfunc(put, 3, 2); - - dspfunc(avg, 0, 16); - dspfunc(avg_no_rnd, 0, 16); - dspfunc(avg, 1, 8); - dspfunc(avg_no_rnd, 1, 8); - dspfunc(avg, 2, 4); - dspfunc(avg, 3, 2); -#undef dspfunc - - c->put_no_rnd_pixels_l2[0]= put_no_rnd_pixels16_l2_c; - c->put_no_rnd_pixels_l2[1]= put_no_rnd_pixels8_l2_c; - c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c; c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c; c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c; @@ -3021,23 +3017,7 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx) dspfunc(avg_qpel, 1, 8); /* dspfunc(avg_no_rnd_qpel, 1, 8); */ - dspfunc(put_h264_qpel, 0, 16); - dspfunc(put_h264_qpel, 1, 8); - dspfunc(put_h264_qpel, 2, 4); - dspfunc(put_h264_qpel, 3, 2); - dspfunc(avg_h264_qpel, 0, 16); - dspfunc(avg_h264_qpel, 1, 8); - dspfunc(avg_h264_qpel, 2, 4); - #undef dspfunc - c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_c; - c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_c; - c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_c; - c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_c; - c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_c; - c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_c; - - c->draw_edges = draw_edges_c; #if CONFIG_MLP_DECODER || CONFIG_TRUEHD_DECODER ff_mlp_init(c, avctx); @@ -3162,6 +3142,92 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx) memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab)); memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab)); +#undef FUNC +#undef FUNCC +#define FUNC(f, depth) f ## _ ## depth +#define FUNCC(f, depth) f ## _ ## depth ## _c + +#define dspfunc1(PFX, IDX, NUM, depth)\ + c->PFX ## _pixels_tab[IDX][0] = FUNCC(PFX ## _pixels ## NUM , depth);\ + c->PFX ## _pixels_tab[IDX][1] = FUNCC(PFX ## _pixels ## NUM ## _x2 , depth);\ + c->PFX ## _pixels_tab[IDX][2] = FUNCC(PFX ## _pixels ## NUM ## _y2 , depth);\ + c->PFX ## _pixels_tab[IDX][3] = FUNCC(PFX ## _pixels ## NUM ## _xy2, depth) + +#define dspfunc2(PFX, IDX, NUM, depth)\ + c->PFX ## _pixels_tab[IDX][ 0] = FUNCC(PFX ## NUM ## _mc00, depth);\ + c->PFX ## _pixels_tab[IDX][ 1] = FUNCC(PFX ## NUM ## _mc10, depth);\ + c->PFX ## _pixels_tab[IDX][ 2] = FUNCC(PFX ## NUM ## _mc20, depth);\ + c->PFX ## _pixels_tab[IDX][ 3] = FUNCC(PFX ## NUM ## _mc30, depth);\ + c->PFX ## _pixels_tab[IDX][ 4] = FUNCC(PFX ## NUM ## _mc01, depth);\ + c->PFX ## _pixels_tab[IDX][ 5] = FUNCC(PFX ## NUM ## _mc11, depth);\ + c->PFX ## _pixels_tab[IDX][ 6] = FUNCC(PFX ## NUM ## _mc21, depth);\ + c->PFX ## _pixels_tab[IDX][ 7] = FUNCC(PFX ## NUM ## _mc31, depth);\ + c->PFX ## _pixels_tab[IDX][ 8] = FUNCC(PFX ## NUM ## _mc02, depth);\ + c->PFX ## _pixels_tab[IDX][ 9] = FUNCC(PFX ## NUM ## _mc12, depth);\ + c->PFX ## _pixels_tab[IDX][10] = FUNCC(PFX ## NUM ## _mc22, depth);\ + c->PFX ## _pixels_tab[IDX][11] = FUNCC(PFX ## NUM ## _mc32, depth);\ + c->PFX ## _pixels_tab[IDX][12] = FUNCC(PFX ## NUM ## _mc03, depth);\ + c->PFX ## _pixels_tab[IDX][13] = FUNCC(PFX ## NUM ## _mc13, depth);\ + c->PFX ## _pixels_tab[IDX][14] = FUNCC(PFX ## NUM ## _mc23, depth);\ + c->PFX ## _pixels_tab[IDX][15] = FUNCC(PFX ## NUM ## _mc33, depth) + + +#define BIT_DEPTH_FUNCS(depth)\ + c->draw_edges = FUNCC(draw_edges , depth);\ + c->emulated_edge_mc = FUNC (ff_emulated_edge_mc , depth);\ + c->clear_block = FUNCC(clear_block , depth);\ + c->clear_blocks = FUNCC(clear_blocks , depth);\ + c->add_pixels8 = FUNCC(add_pixels8 , depth);\ + c->add_pixels4 = FUNCC(add_pixels4 , depth);\ + c->put_no_rnd_pixels_l2[0] = FUNCC(put_no_rnd_pixels16_l2, depth);\ + c->put_no_rnd_pixels_l2[1] = FUNCC(put_no_rnd_pixels8_l2 , depth);\ +\ + c->put_h264_chroma_pixels_tab[0] = FUNCC(put_h264_chroma_mc8 , depth);\ + c->put_h264_chroma_pixels_tab[1] = FUNCC(put_h264_chroma_mc4 , depth);\ + c->put_h264_chroma_pixels_tab[2] = FUNCC(put_h264_chroma_mc2 , depth);\ + c->avg_h264_chroma_pixels_tab[0] = FUNCC(avg_h264_chroma_mc8 , depth);\ + c->avg_h264_chroma_pixels_tab[1] = FUNCC(avg_h264_chroma_mc4 , depth);\ + c->avg_h264_chroma_pixels_tab[2] = FUNCC(avg_h264_chroma_mc2 , depth);\ +\ + dspfunc1(put , 0, 16, depth);\ + dspfunc1(put , 1, 8, depth);\ + dspfunc1(put , 2, 4, depth);\ + dspfunc1(put , 3, 2, depth);\ + dspfunc1(put_no_rnd, 0, 16, depth);\ + dspfunc1(put_no_rnd, 1, 8, depth);\ + dspfunc1(avg , 0, 16, depth);\ + dspfunc1(avg , 1, 8, depth);\ + dspfunc1(avg , 2, 4, depth);\ + dspfunc1(avg , 3, 2, depth);\ + dspfunc1(avg_no_rnd, 0, 16, depth);\ + dspfunc1(avg_no_rnd, 1, 8, depth);\ +\ + dspfunc2(put_h264_qpel, 0, 16, depth);\ + dspfunc2(put_h264_qpel, 1, 8, depth);\ + dspfunc2(put_h264_qpel, 2, 4, depth);\ + dspfunc2(put_h264_qpel, 3, 2, depth);\ + dspfunc2(avg_h264_qpel, 0, 16, depth);\ + dspfunc2(avg_h264_qpel, 1, 8, depth);\ + dspfunc2(avg_h264_qpel, 2, 4, depth); + + if (avctx->codec_id != CODEC_ID_H264 || avctx->bits_per_raw_sample == 8) { + BIT_DEPTH_FUNCS(8) + } else { + switch (avctx->bits_per_raw_sample) { + case 9: + BIT_DEPTH_FUNCS(9) + break; + case 10: + BIT_DEPTH_FUNCS(10) + break; + default: + av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", avctx->bits_per_raw_sample); + BIT_DEPTH_FUNCS(8) + break; + } + } + + if (HAVE_MMX) dsputil_init_mmx (c, avctx); if (ARCH_ARM) dsputil_init_arm (c, avctx); if (CONFIG_MLIB) dsputil_init_mlib (c, avctx); diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h index 9b4aef7..78d2152 100644 --- a/libavcodec/dsputil.h +++ b/libavcodec/dsputil.h @@ -53,19 +53,24 @@ void ff_fdct_mmx(DCTELEM *block); void ff_fdct_mmx2(DCTELEM *block); void ff_fdct_sse2(DCTELEM *block); -void ff_h264_idct8_add_c(uint8_t *dst, DCTELEM *block, int stride); -void ff_h264_idct_add_c(uint8_t *dst, DCTELEM *block, int stride); -void ff_h264_idct8_dc_add_c(uint8_t *dst, DCTELEM *block, int stride); -void ff_h264_idct_dc_add_c(uint8_t *dst, DCTELEM *block, int stride); -void ff_h264_lowres_idct_add_c(uint8_t *dst, int stride, DCTELEM *block); -void ff_h264_lowres_idct_put_c(uint8_t *dst, int stride, DCTELEM *block); -void ff_h264_idct_add16_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]); -void ff_h264_idct_add16intra_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]); -void ff_h264_idct8_add4_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]); -void ff_h264_idct_add8_c(uint8_t **dest, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]); - -void ff_h264_chroma_dc_dequant_idct_c(DCTELEM *block, int qmul); -void ff_h264_luma_dc_dequant_idct_c(DCTELEM *output, DCTELEM *input, int qmul); +#define H264_IDCT(depth) \ +void ff_h264_idct8_add_ ## depth ## _c(uint8_t *dst, DCTELEM *block, int stride);\ +void ff_h264_idct_add_ ## depth ## _c(uint8_t *dst, DCTELEM *block, int stride);\ +void ff_h264_idct8_dc_add_ ## depth ## _c(uint8_t *dst, DCTELEM *block, int stride);\ +void ff_h264_idct_dc_add_ ## depth ## _c(uint8_t *dst, DCTELEM *block, int stride);\ +void ff_h264_lowres_idct_add_ ## depth ## _c(uint8_t *dst, int stride, DCTELEM *block);\ +void ff_h264_lowres_idct_put_ ## depth ## _c(uint8_t *dst, int stride, DCTELEM *block);\ +void ff_h264_idct_add16_ ## depth ## _c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\ +void ff_h264_idct_add16intra_ ## depth ## _c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\ +void ff_h264_idct8_add4_ ## depth ## _c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\ +void ff_h264_idct_add8_ ## depth ## _c(uint8_t **dest, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\ +void ff_h264_luma_dc_dequant_idct_ ## depth ## _c(DCTELEM *output, DCTELEM *input, int qmul);\ +void ff_h264_chroma_dc_dequant_idct_ ## depth ## _c(DCTELEM *block, int qmul); + +H264_IDCT( 8) +H264_IDCT( 9) +H264_IDCT(10) + void ff_svq3_luma_dc_dequant_idct_c(DCTELEM *output, DCTELEM *input, int qp); void ff_svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc); @@ -82,10 +87,20 @@ extern const uint8_t ff_zigzag248_direct[64]; extern uint32_t ff_squareTbl[512]; extern uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP]; -void ff_put_pixels8x8_c(uint8_t *dst, uint8_t *src, int stride); -void ff_avg_pixels8x8_c(uint8_t *dst, uint8_t *src, int stride); -void ff_put_pixels16x16_c(uint8_t *dst, uint8_t *src, int stride); -void ff_avg_pixels16x16_c(uint8_t *dst, uint8_t *src, int stride); +#define PUTAVG_PIXELS(depth)\ +void ff_put_pixels8x8_ ## depth ## _c(uint8_t *dst, uint8_t *src, int stride);\ +void ff_avg_pixels8x8_ ## depth ## _c(uint8_t *dst, uint8_t *src, int stride);\ +void ff_put_pixels16x16_ ## depth ## _c(uint8_t *dst, uint8_t *src, int stride);\ +void ff_avg_pixels16x16_ ## depth ## _c(uint8_t *dst, uint8_t *src, int stride); + +PUTAVG_PIXELS( 8) +PUTAVG_PIXELS( 9) +PUTAVG_PIXELS(10) + +#define ff_put_pixels8x8_c ff_put_pixels8x8_8_c +#define ff_avg_pixels8x8_c ff_avg_pixels8x8_8_c +#define ff_put_pixels16x16_c ff_put_pixels16x16_8_c +#define ff_avg_pixels16x16_c ff_avg_pixels16x16_8_c /* VP3 DSP functions */ void ff_vp3_idct_c(DCTELEM *block/* align 16*/); @@ -187,10 +202,17 @@ typedef struct ScanTable{ void ff_init_scantable(uint8_t *, ScanTable *st, const uint8_t *src_scantable); -void ff_emulated_edge_mc(uint8_t *buf, const uint8_t *src, int linesize, - int block_w, int block_h, +#define EMULATED_EDGE(depth) \ +void ff_emulated_edge_mc_ ## depth (uint8_t *buf, const uint8_t *src, int linesize,\ + int block_w, int block_h,\ int src_x, int src_y, int w, int h); +EMULATED_EDGE(8) +EMULATED_EDGE(9) +EMULATED_EDGE(10) + +#define ff_emulated_edge_mc ff_emulated_edge_mc_8 + void ff_add_pixels_clamped_c(const DCTELEM *block, uint8_t *dest, int linesize); void ff_put_pixels_clamped_c(const DCTELEM *block, uint8_t *dest, int linesize); void ff_put_signed_pixels_clamped_c(const DCTELEM *block, uint8_t *dest, int linesize); @@ -562,6 +584,7 @@ void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scant void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type); #define BYTE_VEC32(c) ((c)*0x01010101UL) +#define BYTE_VEC64(c) ((c)*0x0001000100010001UL) static inline uint32_t rnd_avg32(uint32_t a, uint32_t b) { @@ -573,6 +596,16 @@ static inline uint32_t no_rnd_avg32(uint32_t a, uint32_t b) return (a & b) + (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1); } +static inline uint64_t rnd_avg64(uint64_t a, uint64_t b) +{ + return (a | b) - (((a ^ b) & ~BYTE_VEC64(0x01)) >> 1); +} + +static inline uint64_t no_rnd_avg64(uint64_t a, uint64_t b) +{ + return (a & b) + (((a ^ b) & ~BYTE_VEC64(0x01)) >> 1); +} + static inline int get_penalty_factor(int lambda, int lambda2, int type){ switch(type&0xFF){ default: diff --git a/libavcodec/dsputil_template.c b/libavcodec/dsputil_template.c index f69c467..8ca6d3e 100644 --- a/libavcodec/dsputil_template.c +++ b/libavcodec/dsputil_template.c @@ -27,25 +27,55 @@ * DSP utils */ -#include "dsputil.h" +#include "high_bit_depth.h" -#define BIT_DEPTH 8 +static inline void FUNC(copy_block2)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h) +{ + int i; + for(i=0; i<h; i++) + { + AV_WN2P(dst , AV_RN2P(src )); + dst+=dstStride; + src+=srcStride; + } +} -#define pixel uint8_t -#define pixel2 uint16_t -#define pixel4 uint32_t -#define dctcoef int16_t +static inline void FUNC(copy_block4)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h) +{ + int i; + for(i=0; i<h; i++) + { + AV_WN4P(dst , AV_RN4P(src )); + dst+=dstStride; + src+=srcStride; + } +} -#define FUNC(a) a -#define FUNCC(a) a ## _c -#define INIT_CLIP uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; -#define CLIP(a) cm[a] -#define AV_RN2P AV_RN16 -#define AV_RN4P AV_RN32 -#define PIXEL_MAX ((1<<BIT_DEPTH)-1) +static inline void FUNC(copy_block8)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h) +{ + int i; + for(i=0; i<h; i++) + { + AV_WN4P(dst , AV_RN4P(src )); + AV_WN4P(dst+4*sizeof(pixel), AV_RN4P(src+4*sizeof(pixel))); + dst+=dstStride; + src+=srcStride; + } +} -#define no_rnd_avg_pixel4 no_rnd_avg32 -#define rnd_avg_pixel4 rnd_avg32 +static inline void FUNC(copy_block16)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h) +{ + int i; + for(i=0; i<h; i++) + { + AV_WN4P(dst , AV_RN4P(src )); + AV_WN4P(dst+ 4*sizeof(pixel), AV_RN4P(src+ 4*sizeof(pixel))); + AV_WN4P(dst+ 8*sizeof(pixel), AV_RN4P(src+ 8*sizeof(pixel))); + AV_WN4P(dst+12*sizeof(pixel), AV_RN4P(src+12*sizeof(pixel))); + dst+=dstStride; + src+=srcStride; + } +} /* draw the edges of width 'w' of an image of size width, height */ //FIXME check that this is ok for mpeg4 interlaced @@ -1317,10 +1347,22 @@ H264_MC(avg_, 16) #undef op2_avg #undef op2_put -#define put_h264_qpel8_mc00_c ff_put_pixels8x8_c -#define avg_h264_qpel8_mc00_c ff_avg_pixels8x8_c -#define put_h264_qpel16_mc00_c ff_put_pixels16x16_c -#define avg_h264_qpel16_mc00_c ff_avg_pixels16x16_c +#if BIT_DEPTH == 8 +# define put_h264_qpel8_mc00_8_c ff_put_pixels8x8_8_c +# define avg_h264_qpel8_mc00_8_c ff_avg_pixels8x8_8_c +# define put_h264_qpel16_mc00_8_c ff_put_pixels16x16_8_c +# define avg_h264_qpel16_mc00_8_c ff_avg_pixels16x16_8_c +#elif BIT_DEPTH == 9 +# define put_h264_qpel8_mc00_9_c ff_put_pixels8x8_9_c +# define avg_h264_qpel8_mc00_9_c ff_avg_pixels8x8_9_c +# define put_h264_qpel16_mc00_9_c ff_put_pixels16x16_9_c +# define avg_h264_qpel16_mc00_9_c ff_avg_pixels16x16_9_c +#elif BIT_DEPTH == 10 +# define put_h264_qpel8_mc00_10_c ff_put_pixels8x8_10_c +# define avg_h264_qpel8_mc00_10_c ff_avg_pixels8x8_10_c +# define put_h264_qpel16_mc00_10_c ff_put_pixels16x16_10_c +# define avg_h264_qpel16_mc00_10_c ff_avg_pixels16x16_10_c +#endif void FUNCC(ff_put_pixels8x8)(uint8_t *dst, uint8_t *src, int stride) { FUNCC(put_pixels8)(dst, src, stride, 8); diff --git a/libavcodec/h264.c b/libavcodec/h264.c index 0fcb7db..1388dd5 100644 --- a/libavcodec/h264.c +++ b/libavcodec/h264.c @@ -783,7 +783,7 @@ static void clone_tables(H264Context *dst, H264Context *src, int i){ dst->list_counts = src->list_counts; dst->s.obmc_scratchpad = NULL; - ff_h264_pred_init(&dst->hpc, src->s.codec_id); + ff_h264_pred_init(&dst->hpc, src->s.codec_id, src->sps.bit_depth_luma); } /** @@ -811,8 +811,8 @@ static av_cold void common_init(H264Context *h){ s->height = s->avctx->height; s->codec_id= s->avctx->codec->id; - ff_h264dsp_init(&h->h264dsp); - ff_h264_pred_init(&h->hpc, s->codec_id); + ff_h264dsp_init(&h->h264dsp, 8); + ff_h264_pred_init(&h->hpc, s->codec_id, 8); h->dequant_coeff_pps= -1; s->unrestricted_mv=1; @@ -895,7 +895,7 @@ av_cold int ff_h264_decode_init(AVCodecContext *avctx){ ff_h264_decode_init_vlc(); h->pixel_shift = 0; - h->sps.bit_depth_luma = 8; + h->sps.bit_depth_luma = avctx->bits_per_raw_sample = 8; h->thread_context[0] = h; h->outputed_poc = INT_MIN; @@ -2998,6 +2998,20 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){ if(avctx->has_b_frames < 2) avctx->has_b_frames= !s->low_delay; + + if (avctx->bits_per_raw_sample != h->sps.bit_depth_luma) { + if (h->sps.bit_depth_luma >= 8 && h->sps.bit_depth_luma <= 10) { + avctx->bits_per_raw_sample = h->sps.bit_depth_luma; + h->pixel_shift = h->sps.bit_depth_luma > 8; + + ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma); + ff_h264_pred_init(&h->hpc, s->codec_id, h->sps.bit_depth_luma); + dsputil_init(&s->dsp, s->avctx); + } else { + av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", h->sps.bit_depth_luma); + return -1; + } + } break; case NAL_PPS: init_get_bits(&s->gb, ptr, bit_length); diff --git a/libavcodec/h264dsp.c b/libavcodec/h264dsp.c index 22b2086..64f4856 100644 --- a/libavcodec/h264dsp.c +++ b/libavcodec/h264dsp.c @@ -29,57 +29,83 @@ #include "avcodec.h" #include "h264dsp.h" +#define BIT_DEPTH 8 #include "h264dsp_template.c" +#undef BIT_DEPTH -void ff_h264dsp_init(H264DSPContext *c) -{ - c->h264_idct_add= ff_h264_idct_add_c; - c->h264_idct8_add= ff_h264_idct8_add_c; - c->h264_idct_dc_add= ff_h264_idct_dc_add_c; - c->h264_idct8_dc_add= ff_h264_idct8_dc_add_c; - c->h264_idct_add16 = ff_h264_idct_add16_c; - c->h264_idct8_add4 = ff_h264_idct8_add4_c; - c->h264_idct_add8 = ff_h264_idct_add8_c; - c->h264_idct_add16intra= ff_h264_idct_add16intra_c; - c->h264_luma_dc_dequant_idct= ff_h264_luma_dc_dequant_idct_c; - c->h264_chroma_dc_dequant_idct= ff_h264_chroma_dc_dequant_idct_c; +#define BIT_DEPTH 9 +#include "h264dsp_template.c" +#undef BIT_DEPTH + +#define BIT_DEPTH 10 +#include "h264dsp_template.c" +#undef BIT_DEPTH - c->weight_h264_pixels_tab[0]= weight_h264_pixels16x16_c; - c->weight_h264_pixels_tab[1]= weight_h264_pixels16x8_c; - c->weight_h264_pixels_tab[2]= weight_h264_pixels8x16_c; - c->weight_h264_pixels_tab[3]= weight_h264_pixels8x8_c; - c->weight_h264_pixels_tab[4]= weight_h264_pixels8x4_c; - c->weight_h264_pixels_tab[5]= weight_h264_pixels4x8_c; - c->weight_h264_pixels_tab[6]= weight_h264_pixels4x4_c; - c->weight_h264_pixels_tab[7]= weight_h264_pixels4x2_c; - c->weight_h264_pixels_tab[8]= weight_h264_pixels2x4_c; - c->weight_h264_pixels_tab[9]= weight_h264_pixels2x2_c; - c->biweight_h264_pixels_tab[0]= biweight_h264_pixels16x16_c; - c->biweight_h264_pixels_tab[1]= biweight_h264_pixels16x8_c; - c->biweight_h264_pixels_tab[2]= biweight_h264_pixels8x16_c; - c->biweight_h264_pixels_tab[3]= biweight_h264_pixels8x8_c; - c->biweight_h264_pixels_tab[4]= biweight_h264_pixels8x4_c; - c->biweight_h264_pixels_tab[5]= biweight_h264_pixels4x8_c; - c->biweight_h264_pixels_tab[6]= biweight_h264_pixels4x4_c; - c->biweight_h264_pixels_tab[7]= biweight_h264_pixels4x2_c; - c->biweight_h264_pixels_tab[8]= biweight_h264_pixels2x4_c; - c->biweight_h264_pixels_tab[9]= biweight_h264_pixels2x2_c; +void ff_h264dsp_init(H264DSPContext *c, const int bit_depth) +{ +#undef FUNC +#define FUNC(a, depth) a ## _ ## depth ## _c - c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_c; - c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_c; - c->h264_h_loop_filter_luma_mbaff= h264_h_loop_filter_luma_mbaff_c; - c->h264_v_loop_filter_luma_intra= h264_v_loop_filter_luma_intra_c; - c->h264_h_loop_filter_luma_intra= h264_h_loop_filter_luma_intra_c; - c->h264_h_loop_filter_luma_mbaff_intra= h264_h_loop_filter_luma_mbaff_intra_c; - c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_c; - c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_c; - c->h264_h_loop_filter_chroma_mbaff= h264_h_loop_filter_chroma_mbaff_c; - c->h264_v_loop_filter_chroma_intra= h264_v_loop_filter_chroma_intra_c; - c->h264_h_loop_filter_chroma_intra= h264_h_loop_filter_chroma_intra_c; - c->h264_h_loop_filter_chroma_mbaff_intra= h264_h_loop_filter_chroma_mbaff_intra_c; +#define H264_DSP(depth) \ + c->h264_idct_add= FUNC(ff_h264_idct_add, depth);\ + c->h264_idct8_add= FUNC(ff_h264_idct8_add, depth);\ + c->h264_idct_dc_add= FUNC(ff_h264_idct_dc_add, depth);\ + c->h264_idct8_dc_add= FUNC(ff_h264_idct8_dc_add, depth);\ + c->h264_idct_add16 = FUNC(ff_h264_idct_add16, depth);\ + c->h264_idct8_add4 = FUNC(ff_h264_idct8_add4, depth);\ + c->h264_idct_add8 = FUNC(ff_h264_idct_add8, depth);\ + c->h264_idct_add16intra= FUNC(ff_h264_idct_add16intra, depth);\ + c->h264_luma_dc_dequant_idct= FUNC(ff_h264_luma_dc_dequant_idct, depth);\ + c->h264_chroma_dc_dequant_idct= FUNC(ff_h264_chroma_dc_dequant_idct, depth);\ +\ + c->weight_h264_pixels_tab[0]= FUNC(weight_h264_pixels16x16, depth);\ + c->weight_h264_pixels_tab[1]= FUNC(weight_h264_pixels16x8, depth);\ + c->weight_h264_pixels_tab[2]= FUNC(weight_h264_pixels8x16, depth);\ + c->weight_h264_pixels_tab[3]= FUNC(weight_h264_pixels8x8, depth);\ + c->weight_h264_pixels_tab[4]= FUNC(weight_h264_pixels8x4, depth);\ + c->weight_h264_pixels_tab[5]= FUNC(weight_h264_pixels4x8, depth);\ + c->weight_h264_pixels_tab[6]= FUNC(weight_h264_pixels4x4, depth);\ + c->weight_h264_pixels_tab[7]= FUNC(weight_h264_pixels4x2, depth);\ + c->weight_h264_pixels_tab[8]= FUNC(weight_h264_pixels2x4, depth);\ + c->weight_h264_pixels_tab[9]= FUNC(weight_h264_pixels2x2, depth);\ + c->biweight_h264_pixels_tab[0]= FUNC(biweight_h264_pixels16x16, depth);\ + c->biweight_h264_pixels_tab[1]= FUNC(biweight_h264_pixels16x8, depth);\ + c->biweight_h264_pixels_tab[2]= FUNC(biweight_h264_pixels8x16, depth);\ + c->biweight_h264_pixels_tab[3]= FUNC(biweight_h264_pixels8x8, depth);\ + c->biweight_h264_pixels_tab[4]= FUNC(biweight_h264_pixels8x4, depth);\ + c->biweight_h264_pixels_tab[5]= FUNC(biweight_h264_pixels4x8, depth);\ + c->biweight_h264_pixels_tab[6]= FUNC(biweight_h264_pixels4x4, depth);\ + c->biweight_h264_pixels_tab[7]= FUNC(biweight_h264_pixels4x2, depth);\ + c->biweight_h264_pixels_tab[8]= FUNC(biweight_h264_pixels2x4, depth);\ + c->biweight_h264_pixels_tab[9]= FUNC(biweight_h264_pixels2x2, depth);\ +\ + c->h264_v_loop_filter_luma= FUNC(h264_v_loop_filter_luma, depth);\ + c->h264_h_loop_filter_luma= FUNC(h264_h_loop_filter_luma, depth);\ + c->h264_h_loop_filter_luma_mbaff= FUNC(h264_h_loop_filter_luma_mbaff, depth);\ + c->h264_v_loop_filter_luma_intra= FUNC(h264_v_loop_filter_luma_intra, depth);\ + c->h264_h_loop_filter_luma_intra= FUNC(h264_h_loop_filter_luma_intra, depth);\ + c->h264_h_loop_filter_luma_mbaff_intra= FUNC(h264_h_loop_filter_luma_mbaff_intra, depth);\ + c->h264_v_loop_filter_chroma= FUNC(h264_v_loop_filter_chroma, depth);\ + c->h264_h_loop_filter_chroma= FUNC(h264_h_loop_filter_chroma, depth);\ + c->h264_h_loop_filter_chroma_mbaff= FUNC(h264_h_loop_filter_chroma_mbaff, depth);\ + c->h264_v_loop_filter_chroma_intra= FUNC(h264_v_loop_filter_chroma_intra, depth);\ + c->h264_h_loop_filter_chroma_intra= FUNC(h264_h_loop_filter_chroma_intra, depth);\ + c->h264_h_loop_filter_chroma_mbaff_intra= FUNC(h264_h_loop_filter_chroma_mbaff_intra, depth);\ c->h264_loop_filter_strength= NULL; - if (ARCH_ARM) ff_h264dsp_init_arm(c); - if (HAVE_ALTIVEC) ff_h264dsp_init_ppc(c); - if (HAVE_MMX) ff_h264dsp_init_x86(c); + switch (bit_depth) { + case 9: + H264_DSP(9); + break; + case 10: + H264_DSP(10); + break; + default: + H264_DSP(8); + break; + } + + if (ARCH_ARM) ff_h264dsp_init_arm(c, bit_depth); + if (HAVE_ALTIVEC) ff_h264dsp_init_ppc(c, bit_depth); + if (HAVE_MMX) ff_h264dsp_init_x86(c, bit_depth); } diff --git a/libavcodec/h264dsp.h b/libavcodec/h264dsp.h index 7eb50be..87a1dd9 100644 --- a/libavcodec/h264dsp.h +++ b/libavcodec/h264dsp.h @@ -75,9 +75,9 @@ typedef struct H264DSPContext{ void (*h264_chroma_dc_dequant_idct)(DCTELEM *block, int qmul); }H264DSPContext; -void ff_h264dsp_init(H264DSPContext *c); -void ff_h264dsp_init_arm(H264DSPContext *c); -void ff_h264dsp_init_ppc(H264DSPContext *c); -void ff_h264dsp_init_x86(H264DSPContext *c); +void ff_h264dsp_init(H264DSPContext *c, const int bit_depth); +void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth); +void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth); +void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth); #endif /* AVCODEC_H264DSP_H */ diff --git a/libavcodec/h264dsp_template.c b/libavcodec/h264dsp_template.c index 7190f4d..91162ea 100644 --- a/libavcodec/h264dsp_template.c +++ b/libavcodec/h264dsp_template.c @@ -25,10 +25,7 @@ * @author Michael Niedermayer <michaelni@gmx.at> */ -#define BIT_DEPTH 8 -#define pixel uint8_t -#define av_clip_pixel av_clip_uint8 -#define FUNCC(a) a ## _c +#include "high_bit_depth.h" #define op_scale1(x) block[x] = av_clip_pixel( (block[x]*weight + offset) >> log2_denom ) #define op_scale2(x) dst[x] = av_clip_pixel( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1)) diff --git a/libavcodec/h264idct.c b/libavcodec/h264idct.c index d797775..1634a00 100644 --- a/libavcodec/h264idct.c +++ b/libavcodec/h264idct.c @@ -25,4 +25,14 @@ * @author Michael Niedermayer <michaelni@gmx.at> */ +#define BIT_DEPTH 8 #include "h264idct_template.c" +#undef BIT_DEPTH + +#define BIT_DEPTH 9 +#include "h264idct_template.c" +#undef BIT_DEPTH + +#define BIT_DEPTH 10 +#include "h264idct_template.c" +#undef BIT_DEPTH diff --git a/libavcodec/h264idct_template.c b/libavcodec/h264idct_template.c index ed1f18c..39c9a1c 100644 --- a/libavcodec/h264idct_template.c +++ b/libavcodec/h264idct_template.c @@ -25,7 +25,7 @@ * @author Michael Niedermayer <michaelni@gmx.at> */ -#include "dsputil.h" +#include "high_bit_depth.h" #ifndef AVCODEC_H264IDCT_INTERNAL_H #define AVCODEC_H264IDCT_INTERNAL_H @@ -42,12 +42,6 @@ static const uint8_t scan8[16 + 2*4]={ }; #endif -#define pixel uint8_t -#define dctcoef DCTELEM -#define INIT_CLIP uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; -#define CLIP(a) cm[a] -#define FUNCC(a) a ## _c - static av_always_inline void FUNCC(idct_internal)(uint8_t *_dst, DCTELEM *_block, int stride, int block_stride, int shift, int add){ int i; INIT_CLIP diff --git a/libavcodec/h264pred.c b/libavcodec/h264pred.c index 616a7a9..b3701ef 100644 --- a/libavcodec/h264pred.c +++ b/libavcodec/h264pred.c @@ -26,7 +26,18 @@ */ #include "h264pred.h" + +#define BIT_DEPTH 8 +#include "h264pred_template.c" +#undef BIT_DEPTH + +#define BIT_DEPTH 9 #include "h264pred_template.c" +#undef BIT_DEPTH + +#define BIT_DEPTH 10 +#include "h264pred_template.c" +#undef BIT_DEPTH static void pred4x4_vertical_vp8_c(uint8_t *src, const uint8_t *topright, int stride){ const int lt= src[-1-1*stride]; @@ -245,11 +256,11 @@ static void pred4x4_tm_vp8_c(uint8_t *src, const uint8_t *topright, int stride){ } static void pred16x16_plane_svq3_c(uint8_t *src, int stride){ - pred16x16_plane_compat_c(src, stride, 1, 0); + pred16x16_plane_compat_8_c(src, stride, 1, 0); } static void pred16x16_plane_rv40_c(uint8_t *src, int stride){ - pred16x16_plane_compat_c(src, stride, 0, 1); + pred16x16_plane_compat_8_c(src, stride, 0, 1); } static void pred16x16_tm_vp8_c(uint8_t *src, int stride){ @@ -352,130 +363,149 @@ static void pred8x8_tm_vp8_c(uint8_t *src, int stride){ /** * Set the intra prediction function pointers. */ -void ff_h264_pred_init(H264PredContext *h, int codec_id){ +void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth){ // MpegEncContext * const s = &h->s; - if(codec_id != CODEC_ID_RV40){ - if(codec_id == CODEC_ID_VP8) { - h->pred4x4[VERT_PRED ]= pred4x4_vertical_vp8_c; - h->pred4x4[HOR_PRED ]= pred4x4_horizontal_vp8_c; - } else { - h->pred4x4[VERT_PRED ]= pred4x4_vertical_c; - h->pred4x4[HOR_PRED ]= pred4x4_horizontal_c; - } - h->pred4x4[DC_PRED ]= pred4x4_dc_c; - if(codec_id == CODEC_ID_SVQ3) - h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_svq3_c; - else - h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_c; - h->pred4x4[DIAG_DOWN_RIGHT_PRED]= pred4x4_down_right_c; - h->pred4x4[VERT_RIGHT_PRED ]= pred4x4_vertical_right_c; - h->pred4x4[HOR_DOWN_PRED ]= pred4x4_horizontal_down_c; - if (codec_id == CODEC_ID_VP8) { - h->pred4x4[VERT_LEFT_PRED ]= pred4x4_vertical_left_vp8_c; - } else - h->pred4x4[VERT_LEFT_PRED ]= pred4x4_vertical_left_c; - h->pred4x4[HOR_UP_PRED ]= pred4x4_horizontal_up_c; - if(codec_id != CODEC_ID_VP8) { - h->pred4x4[LEFT_DC_PRED ]= pred4x4_left_dc_c; - h->pred4x4[TOP_DC_PRED ]= pred4x4_top_dc_c; - h->pred4x4[DC_128_PRED ]= pred4x4_128_dc_c; - } else { - h->pred4x4[TM_VP8_PRED ]= pred4x4_tm_vp8_c; - h->pred4x4[DC_127_PRED ]= pred4x4_127_dc_c; - h->pred4x4[DC_129_PRED ]= pred4x4_129_dc_c; - h->pred4x4[VERT_VP8_PRED ]= pred4x4_vertical_c; - h->pred4x4[HOR_VP8_PRED ]= pred4x4_horizontal_c; - } - }else{ - h->pred4x4[VERT_PRED ]= pred4x4_vertical_c; - h->pred4x4[HOR_PRED ]= pred4x4_horizontal_c; - h->pred4x4[DC_PRED ]= pred4x4_dc_c; - h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_rv40_c; - h->pred4x4[DIAG_DOWN_RIGHT_PRED]= pred4x4_down_right_c; - h->pred4x4[VERT_RIGHT_PRED ]= pred4x4_vertical_right_c; - h->pred4x4[HOR_DOWN_PRED ]= pred4x4_horizontal_down_c; - h->pred4x4[VERT_LEFT_PRED ]= pred4x4_vertical_left_rv40_c; - h->pred4x4[HOR_UP_PRED ]= pred4x4_horizontal_up_rv40_c; - h->pred4x4[LEFT_DC_PRED ]= pred4x4_left_dc_c; - h->pred4x4[TOP_DC_PRED ]= pred4x4_top_dc_c; - h->pred4x4[DC_128_PRED ]= pred4x4_128_dc_c; - h->pred4x4[DIAG_DOWN_LEFT_PRED_RV40_NODOWN]= pred4x4_down_left_rv40_nodown_c; - h->pred4x4[HOR_UP_PRED_RV40_NODOWN]= pred4x4_horizontal_up_rv40_nodown_c; - h->pred4x4[VERT_LEFT_PRED_RV40_NODOWN]= pred4x4_vertical_left_rv40_nodown_c; +#undef FUNC +#undef FUNCC +#define FUNC(a, depth) a ## _ ## depth +#define FUNCC(a, depth) a ## _ ## depth ## _c +#define FUNCD(a) a ## _c + +#define H264_PRED(depth) \ + if(codec_id != CODEC_ID_RV40){\ + if(codec_id == CODEC_ID_VP8) {\ + h->pred4x4[VERT_PRED ]= FUNCD(pred4x4_vertical_vp8);\ + h->pred4x4[HOR_PRED ]= FUNCD(pred4x4_horizontal_vp8);\ + } else {\ + h->pred4x4[VERT_PRED ]= FUNCC(pred4x4_vertical , depth);\ + h->pred4x4[HOR_PRED ]= FUNCC(pred4x4_horizontal , depth);\ + }\ + h->pred4x4[DC_PRED ]= FUNCC(pred4x4_dc , depth);\ + if(codec_id == CODEC_ID_SVQ3)\ + h->pred4x4[DIAG_DOWN_LEFT_PRED ]= FUNCD(pred4x4_down_left_svq3);\ + else\ + h->pred4x4[DIAG_DOWN_LEFT_PRED ]= FUNCC(pred4x4_down_left , depth);\ + h->pred4x4[DIAG_DOWN_RIGHT_PRED]= FUNCC(pred4x4_down_right , depth);\ + h->pred4x4[VERT_RIGHT_PRED ]= FUNCC(pred4x4_vertical_right , depth);\ + h->pred4x4[HOR_DOWN_PRED ]= FUNCC(pred4x4_horizontal_down , depth);\ + if (codec_id == CODEC_ID_VP8) {\ + h->pred4x4[VERT_LEFT_PRED ]= FUNCD(pred4x4_vertical_left_vp8);\ + } else\ + h->pred4x4[VERT_LEFT_PRED ]= FUNCC(pred4x4_vertical_left , depth);\ + h->pred4x4[HOR_UP_PRED ]= FUNCC(pred4x4_horizontal_up , depth);\ + if(codec_id != CODEC_ID_VP8) {\ + h->pred4x4[LEFT_DC_PRED ]= FUNCC(pred4x4_left_dc , depth);\ + h->pred4x4[TOP_DC_PRED ]= FUNCC(pred4x4_top_dc , depth);\ + h->pred4x4[DC_128_PRED ]= FUNCC(pred4x4_128_dc , depth);\ + } else {\ + h->pred4x4[TM_VP8_PRED ]= FUNCD(pred4x4_tm_vp8);\ + h->pred4x4[DC_127_PRED ]= FUNCC(pred4x4_127_dc , depth);\ + h->pred4x4[DC_129_PRED ]= FUNCC(pred4x4_129_dc , depth);\ + h->pred4x4[VERT_VP8_PRED ]= FUNCC(pred4x4_vertical , depth);\ + h->pred4x4[HOR_VP8_PRED ]= FUNCC(pred4x4_horizontal , depth);\ + }\ + }else{\ + h->pred4x4[VERT_PRED ]= FUNCC(pred4x4_vertical , depth);\ + h->pred4x4[HOR_PRED ]= FUNCC(pred4x4_horizontal , depth);\ + h->pred4x4[DC_PRED ]= FUNCC(pred4x4_dc , depth);\ + h->pred4x4[DIAG_DOWN_LEFT_PRED ]= FUNCD(pred4x4_down_left_rv40);\ + h->pred4x4[DIAG_DOWN_RIGHT_PRED]= FUNCC(pred4x4_down_right , depth);\ + h->pred4x4[VERT_RIGHT_PRED ]= FUNCC(pred4x4_vertical_right , depth);\ + h->pred4x4[HOR_DOWN_PRED ]= FUNCC(pred4x4_horizontal_down , depth);\ + h->pred4x4[VERT_LEFT_PRED ]= FUNCD(pred4x4_vertical_left_rv40);\ + h->pred4x4[HOR_UP_PRED ]= FUNCD(pred4x4_horizontal_up_rv40);\ + h->pred4x4[LEFT_DC_PRED ]= FUNCC(pred4x4_left_dc , depth);\ + h->pred4x4[TOP_DC_PRED ]= FUNCC(pred4x4_top_dc , depth);\ + h->pred4x4[DC_128_PRED ]= FUNCC(pred4x4_128_dc , depth);\ + h->pred4x4[DIAG_DOWN_LEFT_PRED_RV40_NODOWN]= FUNCD(pred4x4_down_left_rv40_nodown);\ + h->pred4x4[HOR_UP_PRED_RV40_NODOWN]= FUNCD(pred4x4_horizontal_up_rv40_nodown);\ + h->pred4x4[VERT_LEFT_PRED_RV40_NODOWN]= FUNCD(pred4x4_vertical_left_rv40_nodown);\ + }\ +\ + h->pred8x8l[VERT_PRED ]= FUNCC(pred8x8l_vertical , depth);\ + h->pred8x8l[HOR_PRED ]= FUNCC(pred8x8l_horizontal , depth);\ + h->pred8x8l[DC_PRED ]= FUNCC(pred8x8l_dc , depth);\ + h->pred8x8l[DIAG_DOWN_LEFT_PRED ]= FUNCC(pred8x8l_down_left , depth);\ + h->pred8x8l[DIAG_DOWN_RIGHT_PRED]= FUNCC(pred8x8l_down_right , depth);\ + h->pred8x8l[VERT_RIGHT_PRED ]= FUNCC(pred8x8l_vertical_right , depth);\ + h->pred8x8l[HOR_DOWN_PRED ]= FUNCC(pred8x8l_horizontal_down , depth);\ + h->pred8x8l[VERT_LEFT_PRED ]= FUNCC(pred8x8l_vertical_left , depth);\ + h->pred8x8l[HOR_UP_PRED ]= FUNCC(pred8x8l_horizontal_up , depth);\ + h->pred8x8l[LEFT_DC_PRED ]= FUNCC(pred8x8l_left_dc , depth);\ + h->pred8x8l[TOP_DC_PRED ]= FUNCC(pred8x8l_top_dc , depth);\ + h->pred8x8l[DC_128_PRED ]= FUNCC(pred8x8l_128_dc , depth);\ +\ + h->pred8x8[VERT_PRED8x8 ]= FUNCC(pred8x8_vertical , depth);\ + h->pred8x8[HOR_PRED8x8 ]= FUNCC(pred8x8_horizontal , depth);\ + if (codec_id != CODEC_ID_VP8) {\ + h->pred8x8[PLANE_PRED8x8]= FUNCC(pred8x8_plane , depth);\ + } else\ + h->pred8x8[PLANE_PRED8x8]= FUNCD(pred8x8_tm_vp8);\ + if(codec_id != CODEC_ID_RV40 && codec_id != CODEC_ID_VP8){\ + h->pred8x8[DC_PRED8x8 ]= FUNCC(pred8x8_dc , depth);\ + h->pred8x8[LEFT_DC_PRED8x8]= FUNCC(pred8x8_left_dc , depth);\ + h->pred8x8[TOP_DC_PRED8x8 ]= FUNCC(pred8x8_top_dc , depth);\ + h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l0t, depth);\ + h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0lt, depth);\ + h->pred8x8[ALZHEIMER_DC_L00_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l00, depth);\ + h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0l0, depth);\ + }else{\ + h->pred8x8[DC_PRED8x8 ]= FUNCD(pred8x8_dc_rv40);\ + h->pred8x8[LEFT_DC_PRED8x8]= FUNCD(pred8x8_left_dc_rv40);\ + h->pred8x8[TOP_DC_PRED8x8 ]= FUNCD(pred8x8_top_dc_rv40);\ + if (codec_id == CODEC_ID_VP8) {\ + h->pred8x8[DC_127_PRED8x8]= FUNCC(pred8x8_127_dc , depth);\ + h->pred8x8[DC_129_PRED8x8]= FUNCC(pred8x8_129_dc , depth);\ + }\ + }\ + h->pred8x8[DC_128_PRED8x8 ]= FUNCC(pred8x8_128_dc , depth);\ +\ + h->pred16x16[DC_PRED8x8 ]= FUNCC(pred16x16_dc , depth);\ + h->pred16x16[VERT_PRED8x8 ]= FUNCC(pred16x16_vertical , depth);\ + h->pred16x16[HOR_PRED8x8 ]= FUNCC(pred16x16_horizontal , depth);\ + switch(codec_id){\ + case CODEC_ID_SVQ3:\ + h->pred16x16[PLANE_PRED8x8 ]= FUNCD(pred16x16_plane_svq3);\ + break;\ + case CODEC_ID_RV40:\ + h->pred16x16[PLANE_PRED8x8 ]= FUNCD(pred16x16_plane_rv40);\ + break;\ + case CODEC_ID_VP8:\ + h->pred16x16[PLANE_PRED8x8 ]= FUNCD(pred16x16_tm_vp8);\ + h->pred16x16[DC_127_PRED8x8]= FUNCC(pred16x16_127_dc , depth);\ + h->pred16x16[DC_129_PRED8x8]= FUNCC(pred16x16_129_dc , depth);\ + break;\ + default:\ + h->pred16x16[PLANE_PRED8x8 ]= FUNCC(pred16x16_plane , depth);\ + break;\ + }\ + h->pred16x16[LEFT_DC_PRED8x8]= FUNCC(pred16x16_left_dc , depth);\ + h->pred16x16[TOP_DC_PRED8x8 ]= FUNCC(pred16x16_top_dc , depth);\ + h->pred16x16[DC_128_PRED8x8 ]= FUNCC(pred16x16_128_dc , depth);\ +\ + /* special lossless h/v prediction for h264 */ \ + h->pred4x4_add [VERT_PRED ]= FUNCC(pred4x4_vertical_add , depth);\ + h->pred4x4_add [ HOR_PRED ]= FUNCC(pred4x4_horizontal_add , depth);\ + h->pred8x8l_add [VERT_PRED ]= FUNCC(pred8x8l_vertical_add , depth);\ + h->pred8x8l_add [ HOR_PRED ]= FUNCC(pred8x8l_horizontal_add , depth);\ + h->pred8x8_add [VERT_PRED8x8]= FUNCC(pred8x8_vertical_add , depth);\ + h->pred8x8_add [ HOR_PRED8x8]= FUNCC(pred8x8_horizontal_add , depth);\ + h->pred16x16_add[VERT_PRED8x8]= FUNCC(pred16x16_vertical_add , depth);\ + h->pred16x16_add[ HOR_PRED8x8]= FUNCC(pred16x16_horizontal_add , depth);\ + + switch (bit_depth) { + case 9: + H264_PRED(9) + break; + case 10: + H264_PRED(10) + break; + default: + H264_PRED(8) + break; } - h->pred8x8l[VERT_PRED ]= pred8x8l_vertical_c; - h->pred8x8l[HOR_PRED ]= pred8x8l_horizontal_c; - h->pred8x8l[DC_PRED ]= pred8x8l_dc_c; - h->pred8x8l[DIAG_DOWN_LEFT_PRED ]= pred8x8l_down_left_c; - h->pred8x8l[DIAG_DOWN_RIGHT_PRED]= pred8x8l_down_right_c; - h->pred8x8l[VERT_RIGHT_PRED ]= pred8x8l_vertical_right_c; - h->pred8x8l[HOR_DOWN_PRED ]= pred8x8l_horizontal_down_c; - h->pred8x8l[VERT_LEFT_PRED ]= pred8x8l_vertical_left_c; - h->pred8x8l[HOR_UP_PRED ]= pred8x8l_horizontal_up_c; - h->pred8x8l[LEFT_DC_PRED ]= pred8x8l_left_dc_c; - h->pred8x8l[TOP_DC_PRED ]= pred8x8l_top_dc_c; - h->pred8x8l[DC_128_PRED ]= pred8x8l_128_dc_c; - - h->pred8x8[VERT_PRED8x8 ]= pred8x8_vertical_c; - h->pred8x8[HOR_PRED8x8 ]= pred8x8_horizontal_c; - if (codec_id != CODEC_ID_VP8) { - h->pred8x8[PLANE_PRED8x8]= pred8x8_plane_c; - } else - h->pred8x8[PLANE_PRED8x8]= pred8x8_tm_vp8_c; - if(codec_id != CODEC_ID_RV40 && codec_id != CODEC_ID_VP8){ - h->pred8x8[DC_PRED8x8 ]= pred8x8_dc_c; - h->pred8x8[LEFT_DC_PRED8x8]= pred8x8_left_dc_c; - h->pred8x8[TOP_DC_PRED8x8 ]= pred8x8_top_dc_c; - h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8 ]= pred8x8_mad_cow_dc_l0t; - h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8 ]= pred8x8_mad_cow_dc_0lt; - h->pred8x8[ALZHEIMER_DC_L00_PRED8x8 ]= pred8x8_mad_cow_dc_l00; - h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8 ]= pred8x8_mad_cow_dc_0l0; - }else{ - h->pred8x8[DC_PRED8x8 ]= pred8x8_dc_rv40_c; - h->pred8x8[LEFT_DC_PRED8x8]= pred8x8_left_dc_rv40_c; - h->pred8x8[TOP_DC_PRED8x8 ]= pred8x8_top_dc_rv40_c; - if (codec_id == CODEC_ID_VP8) { - h->pred8x8[DC_127_PRED8x8]= pred8x8_127_dc_c; - h->pred8x8[DC_129_PRED8x8]= pred8x8_129_dc_c; - } - } - h->pred8x8[DC_128_PRED8x8 ]= pred8x8_128_dc_c; - - h->pred16x16[DC_PRED8x8 ]= pred16x16_dc_c; - h->pred16x16[VERT_PRED8x8 ]= pred16x16_vertical_c; - h->pred16x16[HOR_PRED8x8 ]= pred16x16_horizontal_c; - switch(codec_id){ - case CODEC_ID_SVQ3: - h->pred16x16[PLANE_PRED8x8 ]= pred16x16_plane_svq3_c; - break; - case CODEC_ID_RV40: - h->pred16x16[PLANE_PRED8x8 ]= pred16x16_plane_rv40_c; - break; - case CODEC_ID_VP8: - h->pred16x16[PLANE_PRED8x8 ]= pred16x16_tm_vp8_c; - h->pred16x16[DC_127_PRED8x8]= pred16x16_127_dc_c; - h->pred16x16[DC_129_PRED8x8]= pred16x16_129_dc_c; - break; - default: - h->pred16x16[PLANE_PRED8x8 ]= pred16x16_plane_c; - break; - } - h->pred16x16[LEFT_DC_PRED8x8]= pred16x16_left_dc_c; - h->pred16x16[TOP_DC_PRED8x8 ]= pred16x16_top_dc_c; - h->pred16x16[DC_128_PRED8x8 ]= pred16x16_128_dc_c; - - //special lossless h/v prediction for h264 - h->pred4x4_add [VERT_PRED ]= pred4x4_vertical_add_c; - h->pred4x4_add [ HOR_PRED ]= pred4x4_horizontal_add_c; - h->pred8x8l_add [VERT_PRED ]= pred8x8l_vertical_add_c; - h->pred8x8l_add [ HOR_PRED ]= pred8x8l_horizontal_add_c; - h->pred8x8_add [VERT_PRED8x8]= pred8x8_vertical_add_c; - h->pred8x8_add [ HOR_PRED8x8]= pred8x8_horizontal_add_c; - h->pred16x16_add[VERT_PRED8x8]= pred16x16_vertical_add_c; - h->pred16x16_add[ HOR_PRED8x8]= pred16x16_horizontal_add_c; - - if (ARCH_ARM) ff_h264_pred_init_arm(h, codec_id); - if (HAVE_MMX) ff_h264_pred_init_x86(h, codec_id); + if (ARCH_ARM) ff_h264_pred_init_arm(h, codec_id, bit_depth); + if (HAVE_MMX) ff_h264_pred_init_x86(h, codec_id, bit_depth); } diff --git a/libavcodec/h264pred.h b/libavcodec/h264pred.h index db3f580..34b1e90 100644 --- a/libavcodec/h264pred.h +++ b/libavcodec/h264pred.h @@ -101,8 +101,8 @@ typedef struct H264PredContext{ void (*pred16x16_add[3])(uint8_t *pix/*align 16*/, const int *block_offset, const DCTELEM *block/*align 16*/, int stride); }H264PredContext; -void ff_h264_pred_init(H264PredContext *h, int codec_id); -void ff_h264_pred_init_arm(H264PredContext *h, int codec_id); -void ff_h264_pred_init_x86(H264PredContext *h, int codec_id); +void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth); +void ff_h264_pred_init_arm(H264PredContext *h, int codec_id, const int bit_depth); +void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth); #endif /* AVCODEC_H264PRED_H */ diff --git a/libavcodec/h264pred_template.c b/libavcodec/h264pred_template.c index 4bd26e2..066e837 100644 --- a/libavcodec/h264pred_template.c +++ b/libavcodec/h264pred_template.c @@ -26,21 +26,7 @@ */ #include "mathops.h" -#include "dsputil.h" - -#define BIT_DEPTH 8 - -#define pixel uint8_t -#define pixel4 uint32_t -#define dctcoef DCTELEM - -#define INIT_CLIP uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; -#define CLIP(a) cm[a] -#define FUNC(a) a -#define FUNCC(a) a ## _c -#define PIXEL_SPLAT_X4(x) ((x)*0x01010101U) -#define AV_WN4P AV_WN32 -#define AV_WN4PA AV_WN32A +#include "high_bit_depth.h" static void FUNCC(pred4x4_vertical)(uint8_t *_src, const uint8_t *topright, int _stride){ pixel *src = (pixel*)_src; diff --git a/libavcodec/high_bit_depth.h b/libavcodec/high_bit_depth.h new file mode 100644 index 0000000..6f2b6a7 --- /dev/null +++ b/libavcodec/high_bit_depth.h @@ -0,0 +1,85 @@ +#include "dsputil.h" + +#ifndef BIT_DEPTH +#define BIT_DEPTH 8 +#endif + +#ifdef AVCODEC_H264_HIGH_DEPTH_H +# undef pixel +# undef pixel2 +# undef pixel4 +# undef dctcoef +# undef INIT_CLIP +# undef no_rnd_avg_pixel4 +# undef rnd_avg_pixel4 +# undef AV_RN2P +# undef AV_RN4P +# undef AV_WN2P +# undef AV_WN4P +# undef AV_WN4PA +# undef CLIP +# undef FUNC +# undef FUNCC +# undef av_clip_pixel +# undef PIXEL_SPLAT_X4 +#else +# define AVCODEC_H264_HIGH_DEPTH_H +# define CLIP_PIXEL(depth)\ + static inline uint16_t av_clip_pixel_ ## depth (int p)\ + {\ + const int pixel_max = (1 << depth)-1;\ + return (p & ~pixel_max) ? (-p)>>31 & pixel_max : p;\ + } + +CLIP_PIXEL( 9) +CLIP_PIXEL(10) +#endif + +#if BIT_DEPTH > 8 +# define pixel uint16_t +# define pixel2 uint32_t +# define pixel4 uint64_t +# define dctcoef int32_t + +# define INIT_CLIP +# define no_rnd_avg_pixel4 no_rnd_avg64 +# define rnd_avg_pixel4 rnd_avg64 +# define AV_RN2P AV_RN32 +# define AV_RN4P AV_RN64 +# define AV_WN2P AV_WN32 +# define AV_WN4P AV_WN64 +# define AV_WN4PA AV_WN64A +# define PIXEL_SPLAT_X4(x) ((x)*0x0001000100010001ULL) +#else +# define pixel uint8_t +# define pixel2 uint16_t +# define pixel4 uint32_t +# define dctcoef int16_t + +# define INIT_CLIP uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; +# define no_rnd_avg_pixel4 no_rnd_avg32 +# define rnd_avg_pixel4 rnd_avg32 +# define AV_RN2P AV_RN16 +# define AV_RN4P AV_RN32 +# define AV_WN2P AV_WN16 +# define AV_WN4P AV_WN32 +# define AV_WN4PA AV_WN32A +# define PIXEL_SPLAT_X4(x) ((x)*0x01010101U) +#endif + +#if BIT_DEPTH == 8 +# define av_clip_pixel(a) av_clip_uint8(a) +# define CLIP(a) cm[a] +# define FUNC(a) a ## _8 +# define FUNCC(a) a ## _8_c +#elif BIT_DEPTH == 9 +# define av_clip_pixel(a) av_clip_pixel_9(a) +# define CLIP(a) av_clip_pixel_9(a) +# define FUNC(a) a ## _9 +# define FUNCC(a) a ## _9_c +#elif BIT_DEPTH == 10 +# define av_clip_pixel(a) av_clip_pixel_10(a) +# define CLIP(a) av_clip_pixel_10(a) +# define FUNC(a) a ## _10 +# define FUNCC(a) a ## _10_c +#endif diff --git a/libavcodec/mlib/dsputil_mlib.c b/libavcodec/mlib/dsputil_mlib.c index 9e49c91..c0f2c03 100644 --- a/libavcodec/mlib/dsputil_mlib.c +++ b/libavcodec/mlib/dsputil_mlib.c @@ -421,10 +421,13 @@ static void ff_fdct_mlib(DCTELEM *data) void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx) { + const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; + c->get_pixels = get_pixels_mlib; c->diff_pixels = diff_pixels_mlib; c->add_pixels_clamped = add_pixels_clamped_mlib; + if (!high_bit_depth) { c->put_pixels_tab[0][0] = put_pixels16_mlib; c->put_pixels_tab[0][1] = put_pixels16_x2_mlib; c->put_pixels_tab[0][2] = put_pixels16_y2_mlib; @@ -445,6 +448,7 @@ void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx) c->put_no_rnd_pixels_tab[0][0] = put_pixels16_mlib; c->put_no_rnd_pixels_tab[1][0] = put_pixels8_mlib; + } c->bswap_buf = bswap_buf_mlib; } diff --git a/libavcodec/ppc/dsputil_altivec.c b/libavcodec/ppc/dsputil_altivec.c index 9111d03..adce61b 100644 --- a/libavcodec/ppc/dsputil_altivec.c +++ b/libavcodec/ppc/dsputil_altivec.c @@ -1384,6 +1384,8 @@ static void avg_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int l void dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx) { + const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; + c->pix_abs[0][1] = sad16_x2_altivec; c->pix_abs[0][2] = sad16_y2_altivec; c->pix_abs[0][3] = sad16_xy2_altivec; @@ -1397,8 +1399,10 @@ void dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx) c->pix_sum = pix_sum_altivec; c->diff_pixels = diff_pixels_altivec; c->get_pixels = get_pixels_altivec; + if (!high_bit_depth) c->clear_block = clear_block_altivec; c->add_bytes= add_bytes_altivec; + if (!high_bit_depth) { c->put_pixels_tab[0][0] = put_pixels16_altivec; /* the two functions do the same thing, so use the same code */ c->put_no_rnd_pixels_tab[0][0] = put_pixels16_altivec; @@ -1409,6 +1413,7 @@ void dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx) c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_altivec; c->put_pixels_tab[0][3] = put_pixels16_xy2_altivec; c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_altivec; + } c->hadamard8_diff[0] = hadamard8_diff16_altivec; c->hadamard8_diff[1] = hadamard8_diff8x8_altivec; diff --git a/libavcodec/ppc/dsputil_ppc.c b/libavcodec/ppc/dsputil_ppc.c index c52ea61..5f131f3 100644 --- a/libavcodec/ppc/dsputil_ppc.c +++ b/libavcodec/ppc/dsputil_ppc.c @@ -153,8 +153,11 @@ static void prefetch_ppc(void *mem, int stride, int h) void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx) { + const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; + // Common optimizations whether AltiVec is available or not c->prefetch = prefetch_ppc; + if (!high_bit_depth) { switch (check_dcbzl_effect()) { case 32: c->clear_blocks = clear_blocks_dcbz32_ppc; @@ -165,6 +168,7 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx) default: break; } + } #if HAVE_ALTIVEC if(CONFIG_H264_DECODER) dsputil_h264_init_ppc(c, avctx); diff --git a/libavcodec/ppc/h264_altivec.c b/libavcodec/ppc/h264_altivec.c index 02699be..fae0674 100644 --- a/libavcodec/ppc/h264_altivec.c +++ b/libavcodec/ppc/h264_altivec.c @@ -965,8 +965,10 @@ H264_WEIGHT( 8, 8) H264_WEIGHT( 8, 4) void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) { + const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; if (av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) { + if (!high_bit_depth) { c->put_h264_chroma_pixels_tab[0] = put_h264_chroma_mc8_altivec; c->avg_h264_chroma_pixels_tab[0] = avg_h264_chroma_mc8_altivec; @@ -992,11 +994,13 @@ void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) { dspfunc(avg_h264_qpel, 0, 16); #undef dspfunc } + } } -void ff_h264dsp_init_ppc(H264DSPContext *c) +void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth) { if (av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) { + if (bit_depth == 8) { c->h264_idct_add = ff_h264_idct_add_altivec; c->h264_idct_add8 = ff_h264_idct_add8_altivec; c->h264_idct_add16 = ff_h264_idct_add16_altivec; @@ -1019,4 +1023,5 @@ void ff_h264dsp_init_ppc(H264DSPContext *c) c->biweight_h264_pixels_tab[3] = ff_biweight_h264_pixels8x8_altivec; c->biweight_h264_pixels_tab[4] = ff_biweight_h264_pixels8x4_altivec; } + } } diff --git a/libavcodec/ps2/dsputil_mmi.c b/libavcodec/ps2/dsputil_mmi.c index b6096b3..f4503a9 100644 --- a/libavcodec/ps2/dsputil_mmi.c +++ b/libavcodec/ps2/dsputil_mmi.c @@ -142,7 +142,9 @@ static void put_pixels16_mmi(uint8_t *block, const uint8_t *pixels, int line_siz void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx) { const int idct_algo= avctx->idct_algo; + const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; + if (!high_bit_depth) { c->clear_blocks = clear_blocks_mmi; c->put_pixels_tab[1][0] = put_pixels8_mmi; @@ -150,6 +152,7 @@ void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx) c->put_pixels_tab[0][0] = put_pixels16_mmi; c->put_no_rnd_pixels_tab[0][0] = put_pixels16_mmi; + } c->get_pixels = get_pixels_mmi; diff --git a/libavcodec/rv34.c b/libavcodec/rv34.c index f3ab979..b5d314c 100644 --- a/libavcodec/rv34.c +++ b/libavcodec/rv34.c @@ -1384,7 +1384,7 @@ av_cold int ff_rv34_decode_init(AVCodecContext *avctx) if (MPV_common_init(s) < 0) return -1; - ff_h264_pred_init(&r->h, CODEC_ID_RV40); + ff_h264_pred_init(&r->h, CODEC_ID_RV40, 8); r->intra_types_stride = 4*s->mb_stride + 4; r->intra_types_hist = av_malloc(r->intra_types_stride * 4 * 2 * sizeof(*r->intra_types_hist)); diff --git a/libavcodec/sh4/dsputil_align.c b/libavcodec/sh4/dsputil_align.c index 7a8d60d..db40ece 100644 --- a/libavcodec/sh4/dsputil_align.c +++ b/libavcodec/sh4/dsputil_align.c @@ -333,6 +333,9 @@ DEFFUNC(avg,no_rnd,xy,16,OP_XY,PACK) void dsputil_init_align(DSPContext* c, AVCodecContext *avctx) { + const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; + + if (!high_bit_depth) { c->put_pixels_tab[0][0] = put_rnd_pixels16_o; c->put_pixels_tab[0][1] = put_rnd_pixels16_x; c->put_pixels_tab[0][2] = put_rnd_pixels16_y; @@ -368,6 +371,7 @@ void dsputil_init_align(DSPContext* c, AVCodecContext *avctx) c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x; c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y; c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy; + } #ifdef QPEL @@ -401,20 +405,24 @@ void dsputil_init_align(DSPContext* c, AVCodecContext *avctx) dspfunc(avg_qpel, 1, 8); /* dspfunc(avg_no_rnd_qpel, 1, 8); */ + if (!high_bit_depth) { dspfunc(put_h264_qpel, 0, 16); dspfunc(put_h264_qpel, 1, 8); dspfunc(put_h264_qpel, 2, 4); dspfunc(avg_h264_qpel, 0, 16); dspfunc(avg_h264_qpel, 1, 8); dspfunc(avg_h264_qpel, 2, 4); + } #undef dspfunc + if (!high_bit_depth) { c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_sh4; c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_sh4; c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_sh4; c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_sh4; c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_sh4; c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_sh4; + } c->put_mspel_pixels_tab[0]= put_mspel8_mc00_sh4; c->put_mspel_pixels_tab[1]= put_mspel8_mc10_sh4; diff --git a/libavcodec/sh4/dsputil_sh4.c b/libavcodec/sh4/dsputil_sh4.c index 0c724c3..9ea48ad 100644 --- a/libavcodec/sh4/dsputil_sh4.c +++ b/libavcodec/sh4/dsputil_sh4.c @@ -92,8 +92,10 @@ static void idct_add(uint8_t *dest, int line_size, DCTELEM *block) void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx) { const int idct_algo= avctx->idct_algo; + const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; dsputil_init_align(c,avctx); + if (!high_bit_depth) c->clear_blocks = clear_blocks_sh4; if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_SH4){ c->idct_put = idct_put; diff --git a/libavcodec/sparc/dsputil_vis.c b/libavcodec/sparc/dsputil_vis.c index baf555b..ab9258b 100644 --- a/libavcodec/sparc/dsputil_vis.c +++ b/libavcodec/sparc/dsputil_vis.c @@ -3953,6 +3953,7 @@ void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx) { /* VIS-specific optimizations */ int accel = vis_level (); + const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; if (accel & ACCEL_SPARC_VIS) { if(avctx->idct_algo==FF_IDCT_SIMPLEVIS){ @@ -3962,6 +3963,7 @@ void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx) c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; } + if (!high_bit_depth) { c->put_pixels_tab[0][0] = MC_put_o_16_vis; c->put_pixels_tab[0][1] = MC_put_x_16_vis; c->put_pixels_tab[0][2] = MC_put_y_16_vis; @@ -4001,5 +4003,6 @@ void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx) c->avg_no_rnd_pixels_tab[1][1] = MC_avg_no_round_x_8_vis; c->avg_no_rnd_pixels_tab[1][2] = MC_avg_no_round_y_8_vis; c->avg_no_rnd_pixels_tab[1][3] = MC_avg_no_round_xy_8_vis; + } } } diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c index ea0e911..dc7eb21 100644 --- a/libavcodec/vp8.c +++ b/libavcodec/vp8.c @@ -1698,7 +1698,7 @@ static av_cold int vp8_decode_init(AVCodecContext *avctx) avctx->pix_fmt = PIX_FMT_YUV420P; dsputil_init(&s->dsp, avctx); - ff_h264_pred_init(&s->hpc, CODEC_ID_VP8); + ff_h264_pred_init(&s->hpc, CODEC_ID_VP8, 8); ff_vp8dsp_init(&s->vp8dsp); return 0; diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c index f98e6ae..a0cb11a 100644 --- a/libavcodec/x86/dsputil_mmx.c +++ b/libavcodec/x86/dsputil_mmx.c @@ -2418,6 +2418,7 @@ float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order); void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) { int mm_flags = av_get_cpu_flags(); + const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; if (avctx->dsp_mask) { if (avctx->dsp_mask & AV_CPU_FLAG_FORCE) @@ -2499,6 +2500,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->put_pixels_clamped = ff_put_pixels_clamped_mmx; c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx; c->add_pixels_clamped = ff_add_pixels_clamped_mmx; + if (!high_bit_depth) { c->clear_block = clear_block_mmx; c->clear_blocks = clear_blocks_mmx; if ((mm_flags & AV_CPU_FLAG_SSE) && @@ -2507,6 +2509,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->clear_block = clear_block_sse; c->clear_blocks = clear_blocks_sse; } + } #define SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU) \ c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## SIZE ## _ ## CPU; \ @@ -2514,6 +2517,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## SIZE ## _y2_ ## CPU; \ c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## SIZE ## _xy2_ ## CPU + if (!high_bit_depth) { SET_HPEL_FUNCS(put, 0, 16, mmx); SET_HPEL_FUNCS(put_no_rnd, 0, 16, mmx); SET_HPEL_FUNCS(avg, 0, 16, mmx); @@ -2522,17 +2526,20 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) SET_HPEL_FUNCS(put_no_rnd, 1, 8, mmx); SET_HPEL_FUNCS(avg, 1, 8, mmx); SET_HPEL_FUNCS(avg_no_rnd, 1, 8, mmx); + } #if ARCH_X86_32 || !HAVE_YASM c->gmc= gmc_mmx; #endif #if ARCH_X86_32 && HAVE_YASM + if (!high_bit_depth) c->emulated_edge_mc = emulated_edge_mc_mmx; #endif c->add_bytes= add_bytes_mmx; c->add_bytes_l2= add_bytes_l2_mmx; + if (!high_bit_depth) c->draw_edges = draw_edges_mmx; if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) { @@ -2541,8 +2548,10 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) } #if HAVE_YASM + if (!high_bit_depth) { c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_mmx_rnd; c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_mmx; + } c->put_rv40_chroma_pixels_tab[0]= ff_put_rv40_chroma_mc8_mmx; c->put_rv40_chroma_pixels_tab[1]= ff_put_rv40_chroma_mc4_mmx; @@ -2551,6 +2560,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) if (mm_flags & AV_CPU_FLAG_MMX2) { c->prefetch = prefetch_mmx2; + if (!high_bit_depth) { c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2; c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2; @@ -2564,14 +2574,17 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->avg_pixels_tab[1][0] = avg_pixels8_mmx2; c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2; c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2; + } if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ + if (!high_bit_depth) { c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2; c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2; c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2; c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2; c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2; c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2; + } if (CONFIG_VP3_DECODER && HAVE_YASM) { c->vp3_v_loop_filter= ff_vp3_v_loop_filter_mmx2; @@ -2613,12 +2626,14 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) SET_QPEL_FUNCS(avg_qpel, 0, 16, mmx2); SET_QPEL_FUNCS(avg_qpel, 1, 8, mmx2); + if (!high_bit_depth) { SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmx2); SET_QPEL_FUNCS(put_h264_qpel, 1, 8, mmx2); SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmx2); SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmx2); SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, mmx2); SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmx2); + } SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, mmx2); SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, mmx2); @@ -2629,10 +2644,12 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->avg_rv40_chroma_pixels_tab[0]= ff_avg_rv40_chroma_mc8_mmx2; c->avg_rv40_chroma_pixels_tab[1]= ff_avg_rv40_chroma_mc4_mmx2; + if (!high_bit_depth) { c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_mmx2_rnd; c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_mmx2; c->avg_h264_chroma_pixels_tab[2]= ff_avg_h264_chroma_mc2_mmx2; c->put_h264_chroma_pixels_tab[2]= ff_put_h264_chroma_mc2_mmx2; + } c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_mmx2; #endif @@ -2645,6 +2662,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) } else if (mm_flags & AV_CPU_FLAG_3DNOW) { c->prefetch = prefetch_3dnow; + if (!high_bit_depth) { c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow; c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow; @@ -2667,6 +2685,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow; c->avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow; } + } if (CONFIG_VP3_DECODER && (avctx->codec_id == CODEC_ID_VP3 || avctx->codec_id == CODEC_ID_THEORA)) { @@ -2681,12 +2700,14 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) SET_QPEL_FUNCS(avg_qpel, 0, 16, 3dnow); SET_QPEL_FUNCS(avg_qpel, 1, 8, 3dnow); + if (!high_bit_depth) { SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 3dnow); SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 3dnow); SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 3dnow); SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 3dnow); SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 3dnow); SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 3dnow); + } SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, 3dnow); SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, 3dnow); @@ -2694,8 +2715,10 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, 3dnow); #if HAVE_YASM + if (!high_bit_depth) { c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_3dnow_rnd; c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_3dnow; + } c->avg_rv40_chroma_pixels_tab[0]= ff_avg_rv40_chroma_mc8_3dnow; c->avg_rv40_chroma_pixels_tab[1]= ff_avg_rv40_chroma_mc4_3dnow; @@ -2710,12 +2733,15 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->avg_h264_qpel_pixels_tab[1][x+y*4] = avg_h264_qpel8_mc##x##y##_##CPU; if((mm_flags & AV_CPU_FLAG_SSE2) && !(mm_flags & AV_CPU_FLAG_3DNOW)){ // these functions are slower than mmx on AMD, but faster on Intel + if (!high_bit_depth) { c->put_pixels_tab[0][0] = put_pixels16_sse2; c->put_no_rnd_pixels_tab[0][0] = put_pixels16_sse2; c->avg_pixels_tab[0][0] = avg_pixels16_sse2; H264_QPEL_FUNCS(0, 0, sse2); + } } if(mm_flags & AV_CPU_FLAG_SSE2){ + if (!high_bit_depth) { H264_QPEL_FUNCS(0, 1, sse2); H264_QPEL_FUNCS(0, 2, sse2); H264_QPEL_FUNCS(0, 3, sse2); @@ -2728,9 +2754,11 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) H264_QPEL_FUNCS(3, 1, sse2); H264_QPEL_FUNCS(3, 2, sse2); H264_QPEL_FUNCS(3, 3, sse2); + } } #if HAVE_SSSE3 if(mm_flags & AV_CPU_FLAG_SSSE3){ + if (!high_bit_depth) { H264_QPEL_FUNCS(1, 0, ssse3); H264_QPEL_FUNCS(1, 1, ssse3); H264_QPEL_FUNCS(1, 2, ssse3); @@ -2743,12 +2771,15 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) H264_QPEL_FUNCS(3, 1, ssse3); H264_QPEL_FUNCS(3, 2, ssse3); H264_QPEL_FUNCS(3, 3, ssse3); + } c->add_png_paeth_prediction= add_png_paeth_prediction_ssse3; #if HAVE_YASM + if (!high_bit_depth) { c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_ssse3_rnd; c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_ssse3_rnd; c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_ssse3; c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_ssse3; + } c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_ssse3; if (mm_flags & AV_CPU_FLAG_SSE4) // not really sse4, just slow on Conroe c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_sse4; @@ -2805,6 +2836,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) } } + if (!high_bit_depth) c->emulated_edge_mc = emulated_edge_mc_sse; c->gmc= gmc_sse; #endif diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c index 4142cc1..9eb7525 100644 --- a/libavcodec/x86/h264_intrapred_init.c +++ b/libavcodec/x86/h264_intrapred_init.c @@ -95,9 +95,13 @@ void ff_pred4x4_tm_vp8_mmxext (uint8_t *src, const uint8_t *topright, int s void ff_pred4x4_tm_vp8_ssse3 (uint8_t *src, const uint8_t *topright, int stride); void ff_pred4x4_vertical_vp8_mmxext(uint8_t *src, const uint8_t *topright, int stride); -void ff_h264_pred_init_x86(H264PredContext *h, int codec_id) +void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth) { int mm_flags = av_get_cpu_flags(); + const int high_depth = bit_depth > 8; + + if (high_depth) + return; #if HAVE_YASM if (mm_flags & AV_CPU_FLAG_MMX) { diff --git a/libavcodec/x86/h264dsp_mmx.c b/libavcodec/x86/h264dsp_mmx.c index 9f004a5..b4936a6 100644 --- a/libavcodec/x86/h264dsp_mmx.c +++ b/libavcodec/x86/h264dsp_mmx.c @@ -285,10 +285,11 @@ H264_BIWEIGHT_MMX ( 4, 8) H264_BIWEIGHT_MMX ( 4, 4) H264_BIWEIGHT_MMX ( 4, 2) -void ff_h264dsp_init_x86(H264DSPContext *c) +void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth) { int mm_flags = av_get_cpu_flags(); + if (bit_depth == 8) { if (mm_flags & AV_CPU_FLAG_MMX2) { c->h264_loop_filter_strength= h264_loop_filter_strength_mmx2; } @@ -378,5 +379,6 @@ void ff_h264dsp_init_x86(H264DSPContext *c) } } } + } #endif } |