summaryrefslogtreecommitdiffstats
path: root/libavcodec
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2011-07-22 11:56:53 +0200
committerMichael Niedermayer <michaelni@gmx.at>2011-07-22 12:08:52 +0200
commit4095fa903830f8395a26d6ee38c77ad6333a4f5e (patch)
tree828ada22309e543a181997b63c6ffca6868731ac /libavcodec
parent657eac048eb267d781de83849fe7616d29320832 (diff)
parentbb32fded3623a20ff8999c2924315841c08c985c (diff)
downloadffmpeg-streaming-4095fa903830f8395a26d6ee38c77ad6333a4f5e.zip
ffmpeg-streaming-4095fa903830f8395a26d6ee38c77ad6333a4f5e.tar.gz
Merge remote-tracking branch 'qatar/master'
* qatar/master: dnxhddec: optimise dnxhd_decode_dct_block() rtp: remove disabled code eac3enc: use different numbers of blocks per frame to allow higher bitrates dnxhd: add regression test for 10-bit dnxhd: 10-bit support dsputil: update per-arch init funcs for non-h264 high bit depth dsputil: template get_pixels() for different bit depths dsputil: create 16/32-bit dctcoef versions of some functions jfdctint: add 10-bit version mov: add clcp type track as Subtitle stream. mpeg4: add Mpeg4 Profiles names. mpeg4: decode Level Profile for MPEG4 Part 2. ffprobe: display bitstream level. imgconvert: remove unused glue and xglue macros Conflicts: libavcodec/dsputil_template.c Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec')
-rw-r--r--libavcodec/ac3enc.c179
-rw-r--r--libavcodec/ac3enc.h2
-rw-r--r--libavcodec/ac3enc_fixed.c2
-rw-r--r--libavcodec/ac3enc_float.c2
-rw-r--r--libavcodec/ac3enc_template.c24
-rw-r--r--libavcodec/alpha/dsputil_alpha.c5
-rw-r--r--libavcodec/arm/dsputil_init_arm.c2
-rw-r--r--libavcodec/arm/dsputil_init_armv6.c5
-rw-r--r--libavcodec/arm/dsputil_init_neon.c2
-rw-r--r--libavcodec/arm/dsputil_iwmmxt.c2
-rw-r--r--libavcodec/avcodec.h17
-rw-r--r--libavcodec/bfin/dsputil_bfin.c10
-rw-r--r--libavcodec/dct-test.c2
-rw-r--r--libavcodec/dnxhddata.c202
-rw-r--r--libavcodec/dnxhddata.h2
-rw-r--r--libavcodec/dnxhddec.c159
-rw-r--r--libavcodec/dnxhdenc.c240
-rw-r--r--libavcodec/dnxhdenc.h4
-rw-r--r--libavcodec/dsputil.c72
-rw-r--r--libavcodec/dsputil.h11
-rw-r--r--libavcodec/dsputil_template.c129
-rw-r--r--libavcodec/eac3enc.c30
-rw-r--r--libavcodec/h264.c1
-rw-r--r--libavcodec/imgconvert.c3
-rw-r--r--libavcodec/jfdctint.c413
-rw-r--r--libavcodec/jfdctint_template.c405
-rw-r--r--libavcodec/mlib/dsputil_mlib.c5
-rw-r--r--libavcodec/mpeg4videodec.c39
-rw-r--r--libavcodec/mpegvideo_enc.c3
-rw-r--r--libavcodec/ppc/dsputil_altivec.c7
-rw-r--r--libavcodec/ppc/dsputil_ppc.c7
-rw-r--r--libavcodec/ppc/h264_altivec.c2
-rw-r--r--libavcodec/ps2/dsputil_mmi.c4
-rw-r--r--libavcodec/sh4/dsputil_align.c2
-rw-r--r--libavcodec/sh4/dsputil_sh4.c2
-rw-r--r--libavcodec/sparc/dsputil_vis.c2
-rw-r--r--libavcodec/x86/dnxhd_mmx.c3
-rw-r--r--libavcodec/x86/dsputil_mmx.c2
-rw-r--r--libavcodec/x86/dsputilenc_mmx.c10
39 files changed, 1279 insertions, 734 deletions
diff --git a/libavcodec/ac3enc.c b/libavcodec/ac3enc.c
index f45ed3c..df847b8 100644
--- a/libavcodec/ac3enc.c
+++ b/libavcodec/ac3enc.c
@@ -186,7 +186,7 @@ void ff_ac3_adjust_frame_size(AC3EncodeContext *s)
s->frame_size = s->frame_size_min +
2 * (s->bits_written * s->sample_rate < s->samples_written * s->bit_rate);
s->bits_written += s->frame_size * 8;
- s->samples_written += AC3_FRAME_SIZE;
+ s->samples_written += AC3_BLOCK_SIZE * s->num_blocks;
}
@@ -198,7 +198,7 @@ void ff_ac3_compute_coupling_strategy(AC3EncodeContext *s)
/* set coupling use flags for each block/channel */
/* TODO: turn coupling on/off and adjust start band based on bit usage */
- for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+ for (blk = 0; blk < s->num_blocks; blk++) {
AC3Block *block = &s->blocks[blk];
for (ch = 1; ch <= s->fbw_channels; ch++)
block->channel_in_cpl[ch] = s->cpl_on;
@@ -208,7 +208,7 @@ void ff_ac3_compute_coupling_strategy(AC3EncodeContext *s)
enabled for that block */
got_cpl_snr = 0;
num_cpl_blocks = 0;
- for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+ for (blk = 0; blk < s->num_blocks; blk++) {
AC3Block *block = &s->blocks[blk];
block->num_cpl_channels = 0;
for (ch = 1; ch <= s->fbw_channels; ch++)
@@ -244,7 +244,7 @@ void ff_ac3_compute_coupling_strategy(AC3EncodeContext *s)
s->cpl_on = 0;
/* set bandwidth for each channel */
- for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+ for (blk = 0; blk < s->num_blocks; blk++) {
AC3Block *block = &s->blocks[blk];
for (ch = 1; ch <= s->fbw_channels; ch++) {
if (block->channel_in_cpl[ch])
@@ -269,7 +269,7 @@ void ff_ac3_apply_rematrixing(AC3EncodeContext *s)
if (!s->rematrixing_enabled)
return;
- for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+ for (blk = 0; blk < s->num_blocks; blk++) {
AC3Block *block = &s->blocks[blk];
if (block->new_rematrixing_strategy)
flags = block->rematrixing_flags;
@@ -318,7 +318,7 @@ static av_cold void exponent_init(AC3EncodeContext *s)
static void extract_exponents(AC3EncodeContext *s)
{
int ch = !s->cpl_on;
- int chan_size = AC3_MAX_COEFS * AC3_MAX_BLOCKS * (s->channels - ch + 1);
+ int chan_size = AC3_MAX_COEFS * s->num_blocks * (s->channels - ch + 1);
AC3Block *block = &s->blocks[0];
s->ac3dsp.extract_exponents(block->exp[ch], block->fixed_coef[ch], chan_size);
@@ -331,6 +331,15 @@ static void extract_exponents(AC3EncodeContext *s)
*/
#define EXP_DIFF_THRESHOLD 500
+/**
+ * Table used to select exponent strategy based on exponent reuse block interval.
+ */
+static const uint8_t exp_strategy_reuse_tab[4][6] = {
+ { EXP_D15, EXP_D15, EXP_D15, EXP_D15, EXP_D15, EXP_D15 },
+ { EXP_D15, EXP_D15, EXP_D15, EXP_D15, EXP_D15, EXP_D15 },
+ { EXP_D25, EXP_D25, EXP_D15, EXP_D15, EXP_D15, EXP_D15 },
+ { EXP_D45, EXP_D25, EXP_D25, EXP_D15, EXP_D15, EXP_D15 }
+};
/**
* Calculate exponent strategies for all channels.
@@ -349,7 +358,7 @@ static void compute_exp_strategy(AC3EncodeContext *s)
reused in the next frame */
exp_strategy[0] = EXP_NEW;
exp += AC3_MAX_COEFS;
- for (blk = 1; blk < AC3_MAX_BLOCKS; blk++, exp += AC3_MAX_COEFS) {
+ for (blk = 1; blk < s->num_blocks; blk++, exp += AC3_MAX_COEFS) {
if (ch == CPL_CH) {
if (!s->blocks[blk-1].cpl_in_use) {
exp_strategy[blk] = EXP_NEW;
@@ -373,23 +382,18 @@ static void compute_exp_strategy(AC3EncodeContext *s)
/* now select the encoding strategy type : if exponents are often
recoded, we use a coarse encoding */
blk = 0;
- while (blk < AC3_MAX_BLOCKS) {
+ while (blk < s->num_blocks) {
blk1 = blk + 1;
- while (blk1 < AC3_MAX_BLOCKS && exp_strategy[blk1] == EXP_REUSE)
+ while (blk1 < s->num_blocks && exp_strategy[blk1] == EXP_REUSE)
blk1++;
- switch (blk1 - blk) {
- case 1: exp_strategy[blk] = EXP_D45; break;
- case 2:
- case 3: exp_strategy[blk] = EXP_D25; break;
- default: exp_strategy[blk] = EXP_D15; break;
- }
+ exp_strategy[blk] = exp_strategy_reuse_tab[s->num_blks_code][blk1-blk-1];
blk = blk1;
}
}
if (s->lfe_on) {
ch = s->lfe_channel;
s->exp_strategy[ch][0] = EXP_D15;
- for (blk = 1; blk < AC3_MAX_BLOCKS; blk++)
+ for (blk = 1; blk < s->num_blocks; blk++)
s->exp_strategy[ch][blk] = EXP_REUSE;
}
@@ -487,7 +491,7 @@ static void encode_exponents(AC3EncodeContext *s)
cpl = (ch == CPL_CH);
blk = 0;
- while (blk < AC3_MAX_BLOCKS) {
+ while (blk < s->num_blocks) {
AC3Block *block = &s->blocks[blk];
if (cpl && !block->cpl_in_use) {
exp += AC3_MAX_COEFS;
@@ -500,7 +504,7 @@ static void encode_exponents(AC3EncodeContext *s)
/* count the number of EXP_REUSE blocks after the current block
and set exponent reference block numbers */
s->exp_ref_block[ch][blk] = blk;
- while (blk1 < AC3_MAX_BLOCKS && exp_strategy[blk1] == EXP_REUSE) {
+ while (blk1 < s->num_blocks && exp_strategy[blk1] == EXP_REUSE) {
s->exp_ref_block[ch][blk1] = blk;
blk1++;
}
@@ -536,7 +540,7 @@ static void group_exponents(AC3EncodeContext *s)
int exp0, exp1;
bit_count = 0;
- for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+ for (blk = 0; blk < s->num_blocks; blk++) {
AC3Block *block = &s->blocks[blk];
for (ch = !block->cpl_in_use; ch <= s->channels; ch++) {
int exp_strategy = s->exp_strategy[ch][blk];
@@ -625,30 +629,38 @@ static void count_frame_bits_fixed(AC3EncodeContext *s)
if (s->eac3) {
/* bitstream info header */
frame_bits += 35;
- frame_bits += 1 + 1 + 1;
+ frame_bits += 1 + 1;
+ if (s->num_blocks != 0x6)
+ frame_bits++;
+ frame_bits++;
/* audio frame header */
- frame_bits += 2;
+ if (s->num_blocks == 6)
+ frame_bits += 2;
frame_bits += 10;
/* exponent strategy */
if (s->use_frame_exp_strategy)
frame_bits += 5 * s->fbw_channels;
else
- frame_bits += AC3_MAX_BLOCKS * 2 * s->fbw_channels;
+ frame_bits += s->num_blocks * 2 * s->fbw_channels;
if (s->lfe_on)
- frame_bits += AC3_MAX_BLOCKS;
+ frame_bits += s->num_blocks;
/* converter exponent strategy */
- frame_bits += s->fbw_channels * 5;
+ if (s->num_blks_code != 0x3)
+ frame_bits++;
+ else
+ frame_bits += s->fbw_channels * 5;
/* snr offsets */
frame_bits += 10;
/* block start info */
- frame_bits++;
+ if (s->num_blocks != 1)
+ frame_bits++;
} else {
frame_bits += 49;
frame_bits += frame_bits_inc[s->channel_mode];
}
/* audio blocks */
- for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+ for (blk = 0; blk < s->num_blocks; blk++) {
if (!s->eac3) {
/* block switch flags */
frame_bits += s->fbw_channels;
@@ -750,7 +762,7 @@ static void count_frame_bits(AC3EncodeContext *s)
/* coupling */
if (s->channel_mode > AC3_CHMODE_MONO) {
frame_bits++;
- for (blk = 1; blk < AC3_MAX_BLOCKS; blk++) {
+ for (blk = 1; blk < s->num_blocks; blk++) {
AC3Block *block = &s->blocks[blk];
frame_bits++;
if (block->new_cpl_strategy)
@@ -762,7 +774,7 @@ static void count_frame_bits(AC3EncodeContext *s)
if (s->use_frame_exp_strategy) {
frame_bits += 5 * s->cpl_on;
} else {
- for (blk = 0; blk < AC3_MAX_BLOCKS; blk++)
+ for (blk = 0; blk < s->num_blocks; blk++)
frame_bits += 2 * s->blocks[blk].cpl_in_use;
}
}
@@ -778,7 +790,7 @@ static void count_frame_bits(AC3EncodeContext *s)
}
/* audio blocks */
- for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+ for (blk = 0; blk < s->num_blocks; blk++) {
AC3Block *block = &s->blocks[blk];
/* coupling strategy */
@@ -865,7 +877,7 @@ static void bit_alloc_masking(AC3EncodeContext *s)
{
int blk, ch;
- for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+ for (blk = 0; blk < s->num_blocks; blk++) {
AC3Block *block = &s->blocks[blk];
for (ch = !block->cpl_in_use; ch <= s->channels; ch++) {
/* We only need psd and mask for calculating bap.
@@ -901,9 +913,9 @@ static void reset_block_bap(AC3EncodeContext *s)
ref_bap = s->bap_buffer;
for (ch = 0; ch <= s->channels; ch++) {
- for (blk = 0; blk < AC3_MAX_BLOCKS; blk++)
+ for (blk = 0; blk < s->num_blocks; blk++)
s->ref_bap[ch][blk] = ref_bap + AC3_MAX_COEFS * s->exp_ref_block[ch][blk];
- ref_bap += AC3_MAX_COEFS * AC3_MAX_BLOCKS;
+ ref_bap += AC3_MAX_COEFS * s->num_blocks;
}
s->ref_bap_set = 1;
}
@@ -936,7 +948,7 @@ static void count_mantissa_bits_update_ch(AC3EncodeContext *s, int ch,
{
int blk;
- for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+ for (blk = 0; blk < s->num_blocks; blk++) {
AC3Block *block = &s->blocks[blk];
if (ch == CPL_CH && !block->cpl_in_use)
continue;
@@ -980,7 +992,7 @@ static int bit_alloc(AC3EncodeContext *s, int snr_offset)
snr_offset = (snr_offset - 240) << 2;
reset_block_bap(s);
- for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+ for (blk = 0; blk < s->num_blocks; blk++) {
AC3Block *block = &s->blocks[blk];
for (ch = !block->cpl_in_use; ch <= s->channels; ch++) {
@@ -1194,7 +1206,7 @@ void ff_ac3_quantize_mantissas(AC3EncodeContext *s)
{
int blk, ch, ch0=0, got_cpl;
- for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+ for (blk = 0; blk < s->num_blocks; blk++) {
AC3Block *block = &s->blocks[blk];
AC3Mant m = { 0 };
@@ -1557,7 +1569,7 @@ void ff_ac3_output_frame(AC3EncodeContext *s, unsigned char *frame)
s->output_frame_header(s);
- for (blk = 0; blk < AC3_MAX_BLOCKS; blk++)
+ for (blk = 0; blk < s->num_blocks; blk++)
output_audio_block(s, blk);
output_frame_end(s);
@@ -1585,6 +1597,7 @@ static void dprint_options(AC3EncodeContext *s)
av_dlog(avctx, "channel_layout: %s\n", strbuf);
av_dlog(avctx, "sample_rate: %d\n", s->sample_rate);
av_dlog(avctx, "bit_rate: %d\n", s->bit_rate);
+ av_dlog(avctx, "blocks/frame: %d (code=%d)\n", s->num_blocks, s->num_blks_code);
if (s->cutoff)
av_dlog(avctx, "cutoff: %d\n", s->cutoff);
@@ -1851,7 +1864,7 @@ av_cold int ff_ac3_encode_close(AVCodecContext *avctx)
av_freep(&s->qmant_buffer);
av_freep(&s->cpl_coord_exp_buffer);
av_freep(&s->cpl_coord_mant_buffer);
- for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+ for (blk = 0; blk < s->num_blocks; blk++) {
AC3Block *block = &s->blocks[blk];
av_freep(&block->mdct_coef);
av_freep(&block->fixed_coef);
@@ -1958,18 +1971,30 @@ static av_cold int validate_options(AC3EncodeContext *s)
/* validate bit rate */
if (s->eac3) {
int max_br, min_br, wpf, min_br_dist, min_br_code;
+ int num_blks_code, num_blocks, frame_samples;
/* calculate min/max bitrate */
- max_br = 2048 * s->sample_rate / AC3_FRAME_SIZE * 16;
- min_br = ((s->sample_rate + (AC3_FRAME_SIZE-1)) / AC3_FRAME_SIZE) * 16;
+ /* TODO: More testing with 3 and 2 blocks. All E-AC-3 samples I've
+ found use either 6 blocks or 1 block, even though 2 or 3 blocks
+ would work as far as the bit rate is concerned. */
+ for (num_blks_code = 3; num_blks_code >= 0; num_blks_code--) {
+ num_blocks = ((int[]){ 1, 2, 3, 6 })[num_blks_code];
+ frame_samples = AC3_BLOCK_SIZE * num_blocks;
+ max_br = 2048 * s->sample_rate / frame_samples * 16;
+ min_br = ((s->sample_rate + (frame_samples-1)) / frame_samples) * 16;
+ if (avctx->bit_rate <= max_br)
+ break;
+ }
if (avctx->bit_rate < min_br || avctx->bit_rate > max_br) {
av_log(avctx, AV_LOG_ERROR, "invalid bit rate. must be %d to %d "
"for this sample rate\n", min_br, max_br);
return AVERROR(EINVAL);
}
+ s->num_blks_code = num_blks_code;
+ s->num_blocks = num_blocks;
/* calculate words-per-frame for the selected bitrate */
- wpf = (avctx->bit_rate / 16) * AC3_FRAME_SIZE / s->sample_rate;
+ wpf = (avctx->bit_rate / 16) * frame_samples / s->sample_rate;
av_assert1(wpf > 0 && wpf <= 2048);
/* find the closest AC-3 bitrate code to the selected bitrate.
@@ -2001,6 +2026,8 @@ static av_cold int validate_options(AC3EncodeContext *s)
}
s->frame_size_code = i << 1;
s->frame_size_min = 2 * ff_ac3_frame_size_tab[s->frame_size_code][s->bit_alloc.sr_code];
+ s->num_blks_code = 0x3;
+ s->num_blocks = 6;
}
s->bit_rate = avctx->bit_rate;
s->frame_size = s->frame_size_min;
@@ -2065,13 +2092,13 @@ static av_cold void set_bandwidth(AC3EncodeContext *s)
/* set number of coefficients for each channel */
for (ch = 1; ch <= s->fbw_channels; ch++) {
s->start_freq[ch] = 0;
- for (blk = 0; blk < AC3_MAX_BLOCKS; blk++)
+ for (blk = 0; blk < s->num_blocks; blk++)
s->blocks[blk].end_freq[ch] = s->bandwidth_code * 3 + 73;
}
/* LFE channel always has 7 coefs */
if (s->lfe_on) {
s->start_freq[s->lfe_channel] = 0;
- for (blk = 0; blk < AC3_MAX_BLOCKS; blk++)
+ for (blk = 0; blk < s->num_blocks; blk++)
s->blocks[blk].end_freq[ch] = 7;
}
@@ -2108,7 +2135,7 @@ static av_cold void set_bandwidth(AC3EncodeContext *s)
s->start_freq[CPL_CH] = cpl_start_band * 12 + 37;
s->cpl_end_freq = cpl_end_band * 12 + 37;
- for (blk = 0; blk < AC3_MAX_BLOCKS; blk++)
+ for (blk = 0; blk < s->num_blocks; blk++)
s->blocks[blk].end_freq[CPL_CH] = s->cpl_end_freq;
}
}
@@ -2119,35 +2146,37 @@ static av_cold int allocate_buffers(AC3EncodeContext *s)
AVCodecContext *avctx = s->avctx;
int blk, ch;
int channels = s->channels + 1; /* includes coupling channel */
+ int channel_blocks = channels * s->num_blocks;
+ int total_coefs = AC3_MAX_COEFS * channel_blocks;
if (s->allocate_sample_buffers(s))
goto alloc_fail;
- FF_ALLOC_OR_GOTO(avctx, s->bap_buffer, AC3_MAX_BLOCKS * channels *
- AC3_MAX_COEFS * sizeof(*s->bap_buffer), alloc_fail);
- FF_ALLOC_OR_GOTO(avctx, s->bap1_buffer, AC3_MAX_BLOCKS * channels *
- AC3_MAX_COEFS * sizeof(*s->bap1_buffer), alloc_fail);
- FF_ALLOCZ_OR_GOTO(avctx, s->mdct_coef_buffer, AC3_MAX_BLOCKS * channels *
- AC3_MAX_COEFS * sizeof(*s->mdct_coef_buffer), alloc_fail);
- FF_ALLOC_OR_GOTO(avctx, s->exp_buffer, AC3_MAX_BLOCKS * channels *
- AC3_MAX_COEFS * sizeof(*s->exp_buffer), alloc_fail);
- FF_ALLOC_OR_GOTO(avctx, s->grouped_exp_buffer, AC3_MAX_BLOCKS * channels *
- 128 * sizeof(*s->grouped_exp_buffer), alloc_fail);
- FF_ALLOC_OR_GOTO(avctx, s->psd_buffer, AC3_MAX_BLOCKS * channels *
- AC3_MAX_COEFS * sizeof(*s->psd_buffer), alloc_fail);
- FF_ALLOC_OR_GOTO(avctx, s->band_psd_buffer, AC3_MAX_BLOCKS * channels *
- 64 * sizeof(*s->band_psd_buffer), alloc_fail);
- FF_ALLOC_OR_GOTO(avctx, s->mask_buffer, AC3_MAX_BLOCKS * channels *
- 64 * sizeof(*s->mask_buffer), alloc_fail);
- FF_ALLOC_OR_GOTO(avctx, s->qmant_buffer, AC3_MAX_BLOCKS * channels *
- AC3_MAX_COEFS * sizeof(*s->qmant_buffer), alloc_fail);
+ FF_ALLOC_OR_GOTO(avctx, s->bap_buffer, total_coefs *
+ sizeof(*s->bap_buffer), alloc_fail);
+ FF_ALLOC_OR_GOTO(avctx, s->bap1_buffer, total_coefs *
+ sizeof(*s->bap1_buffer), alloc_fail);
+ FF_ALLOCZ_OR_GOTO(avctx, s->mdct_coef_buffer, total_coefs *
+ sizeof(*s->mdct_coef_buffer), alloc_fail);
+ FF_ALLOC_OR_GOTO(avctx, s->exp_buffer, total_coefs *
+ sizeof(*s->exp_buffer), alloc_fail);
+ FF_ALLOC_OR_GOTO(avctx, s->grouped_exp_buffer, channel_blocks * 128 *
+ sizeof(*s->grouped_exp_buffer), alloc_fail);
+ FF_ALLOC_OR_GOTO(avctx, s->psd_buffer, total_coefs *
+ sizeof(*s->psd_buffer), alloc_fail);
+ FF_ALLOC_OR_GOTO(avctx, s->band_psd_buffer, channel_blocks * 64 *
+ sizeof(*s->band_psd_buffer), alloc_fail);
+ FF_ALLOC_OR_GOTO(avctx, s->mask_buffer, channel_blocks * 64 *
+ sizeof(*s->mask_buffer), alloc_fail);
+ FF_ALLOC_OR_GOTO(avctx, s->qmant_buffer, total_coefs *
+ sizeof(*s->qmant_buffer), alloc_fail);
if (s->cpl_enabled) {
- FF_ALLOC_OR_GOTO(avctx, s->cpl_coord_exp_buffer, AC3_MAX_BLOCKS * channels *
- 16 * sizeof(*s->cpl_coord_exp_buffer), alloc_fail);
- FF_ALLOC_OR_GOTO(avctx, s->cpl_coord_mant_buffer, AC3_MAX_BLOCKS * channels *
- 16 * sizeof(*s->cpl_coord_mant_buffer), alloc_fail);
+ FF_ALLOC_OR_GOTO(avctx, s->cpl_coord_exp_buffer, channel_blocks * 16 *
+ sizeof(*s->cpl_coord_exp_buffer), alloc_fail);
+ FF_ALLOC_OR_GOTO(avctx, s->cpl_coord_mant_buffer, channel_blocks * 16 *
+ sizeof(*s->cpl_coord_mant_buffer), alloc_fail);
}
- for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+ for (blk = 0; blk < s->num_blocks; blk++) {
AC3Block *block = &s->blocks[blk];
FF_ALLOCZ_OR_GOTO(avctx, block->mdct_coef, channels * sizeof(*block->mdct_coef),
alloc_fail);
@@ -2183,23 +2212,23 @@ static av_cold int allocate_buffers(AC3EncodeContext *s)
}
/* arrangement: channel, block, coeff */
- block->exp[ch] = &s->exp_buffer [AC3_MAX_COEFS * (AC3_MAX_BLOCKS * ch + blk)];
- block->mdct_coef[ch] = &s->mdct_coef_buffer [AC3_MAX_COEFS * (AC3_MAX_BLOCKS * ch + blk)];
+ block->exp[ch] = &s->exp_buffer [AC3_MAX_COEFS * (s->num_blocks * ch + blk)];
+ block->mdct_coef[ch] = &s->mdct_coef_buffer [AC3_MAX_COEFS * (s->num_blocks * ch + blk)];
}
}
if (!s->fixed_point) {
- FF_ALLOCZ_OR_GOTO(avctx, s->fixed_coef_buffer, AC3_MAX_BLOCKS * channels *
- AC3_MAX_COEFS * sizeof(*s->fixed_coef_buffer), alloc_fail);
- for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+ FF_ALLOCZ_OR_GOTO(avctx, s->fixed_coef_buffer, total_coefs *
+ sizeof(*s->fixed_coef_buffer), alloc_fail);
+ for (blk = 0; blk < s->num_blocks; blk++) {
AC3Block *block = &s->blocks[blk];
FF_ALLOCZ_OR_GOTO(avctx, block->fixed_coef, channels *
sizeof(*block->fixed_coef), alloc_fail);
for (ch = 0; ch < channels; ch++)
- block->fixed_coef[ch] = &s->fixed_coef_buffer[AC3_MAX_COEFS * (AC3_MAX_BLOCKS * ch + blk)];
+ block->fixed_coef[ch] = &s->fixed_coef_buffer[AC3_MAX_COEFS * (s->num_blocks * ch + blk)];
}
} else {
- for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+ for (blk = 0; blk < s->num_blocks; blk++) {
AC3Block *block = &s->blocks[blk];
FF_ALLOCZ_OR_GOTO(avctx, block->fixed_coef, channels *
sizeof(*block->fixed_coef), alloc_fail);
@@ -2226,14 +2255,14 @@ av_cold int ff_ac3_encode_init(AVCodecContext *avctx)
s->eac3 = avctx->codec_id == CODEC_ID_EAC3;
- avctx->frame_size = AC3_FRAME_SIZE;
-
ff_ac3_common_init();
ret = validate_options(s);
if (ret)
return ret;
+ avctx->frame_size = AC3_BLOCK_SIZE * s->num_blocks;
+
s->bitstream_mode = avctx->audio_service_type;
if (s->bitstream_mode == AV_AUDIO_SERVICE_TYPE_KARAOKE)
s->bitstream_mode = 0x7;
diff --git a/libavcodec/ac3enc.h b/libavcodec/ac3enc.h
index f00f1cf..af104b6 100644
--- a/libavcodec/ac3enc.h
+++ b/libavcodec/ac3enc.h
@@ -152,6 +152,8 @@ typedef struct AC3EncodeContext {
int bit_rate; ///< target bit rate, in bits-per-second
int sample_rate; ///< sampling frequency, in Hz
+ int num_blks_code; ///< number of blocks code (numblkscod)
+ int num_blocks; ///< number of blocks per frame
int frame_size_min; ///< minimum frame size in case rounding is necessary
int frame_size; ///< current frame size in bytes
int frame_size_code; ///< frame size code (frmsizecod)
diff --git a/libavcodec/ac3enc_fixed.c b/libavcodec/ac3enc_fixed.c
index 6153697..906b0a5 100644
--- a/libavcodec/ac3enc_fixed.c
+++ b/libavcodec/ac3enc_fixed.c
@@ -93,7 +93,7 @@ static void scale_coefficients(AC3EncodeContext *s)
{
int blk, ch;
- for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+ for (blk = 0; blk < s->num_blocks; blk++) {
AC3Block *block = &s->blocks[blk];
for (ch = 1; ch <= s->channels; ch++) {
s->ac3dsp.ac3_rshift_int32(block->mdct_coef[ch], AC3_MAX_COEFS,
diff --git a/libavcodec/ac3enc_float.c b/libavcodec/ac3enc_float.c
index fa19a21..cb75314 100644
--- a/libavcodec/ac3enc_float.c
+++ b/libavcodec/ac3enc_float.c
@@ -103,7 +103,7 @@ static int normalize_samples(AC3EncodeContext *s)
*/
static void scale_coefficients(AC3EncodeContext *s)
{
- int chan_size = AC3_MAX_COEFS * AC3_MAX_BLOCKS;
+ int chan_size = AC3_MAX_COEFS * s->num_blocks;
s->ac3dsp.float_to_fixed24(s->fixed_coef_buffer + chan_size,
s->mdct_coef_buffer + chan_size,
chan_size * s->channels);
diff --git a/libavcodec/ac3enc_template.c b/libavcodec/ac3enc_template.c
index 9b9151b..c4e2a12 100644
--- a/libavcodec/ac3enc_template.c
+++ b/libavcodec/ac3enc_template.c
@@ -79,13 +79,13 @@ static void deinterleave_input_samples(AC3EncodeContext *s,
int sinc;
/* copy last 256 samples of previous frame to the start of the current frame */
- memcpy(&s->planar_samples[ch][0], &s->planar_samples[ch][AC3_FRAME_SIZE],
+ memcpy(&s->planar_samples[ch][0], &s->planar_samples[ch][AC3_BLOCK_SIZE * s->num_blocks],
AC3_BLOCK_SIZE * sizeof(s->planar_samples[0][0]));
/* deinterleave */
sinc = s->channels;
sptr = samples + s->channel_map[ch];
- for (i = AC3_BLOCK_SIZE; i < AC3_FRAME_SIZE+AC3_BLOCK_SIZE; i++) {
+ for (i = AC3_BLOCK_SIZE; i < AC3_BLOCK_SIZE * (s->num_blocks + 1); i++) {
s->planar_samples[ch][i] = *sptr;
sptr += sinc;
}
@@ -103,7 +103,7 @@ static void apply_mdct(AC3EncodeContext *s)
int blk, ch;
for (ch = 0; ch < s->channels; ch++) {
- for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+ for (blk = 0; blk < s->num_blocks; blk++) {
AC3Block *block = &s->blocks[blk];
const SampleType *input_samples = &s->planar_samples[ch][blk * AC3_BLOCK_SIZE];
@@ -159,7 +159,7 @@ static void apply_channel_coupling(AC3EncodeContext *s)
cpl_start = FFMIN(256, cpl_start + num_cpl_coefs) - num_cpl_coefs;
/* calculate coupling channel from fbw channels */
- for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+ for (blk = 0; blk < s->num_blocks; blk++) {
AC3Block *block = &s->blocks[blk];
CoefType *cpl_coef = &block->mdct_coef[CPL_CH][cpl_start];
if (!block->cpl_in_use)
@@ -188,7 +188,7 @@ static void apply_channel_coupling(AC3EncodeContext *s)
while (i < s->cpl_end_freq) {
int band_size = s->cpl_band_sizes[bnd];
for (ch = CPL_CH; ch <= s->fbw_channels; ch++) {
- for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+ for (blk = 0; blk < s->num_blocks; blk++) {
AC3Block *block = &s->blocks[blk];
if (!block->cpl_in_use || (ch > CPL_CH && !block->channel_in_cpl[ch]))
continue;
@@ -203,7 +203,7 @@ static void apply_channel_coupling(AC3EncodeContext *s)
}
/* determine which blocks to send new coupling coordinates for */
- for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+ for (blk = 0; blk < s->num_blocks; blk++) {
AC3Block *block = &s->blocks[blk];
AC3Block *block0 = blk ? &s->blocks[blk-1] : NULL;
int new_coords = 0;
@@ -261,7 +261,7 @@ static void apply_channel_coupling(AC3EncodeContext *s)
coordinates in successive blocks */
for (bnd = 0; bnd < s->num_cpl_bands; bnd++) {
blk = 0;
- while (blk < AC3_MAX_BLOCKS) {
+ while (blk < s->num_blocks) {
int blk1;
CoefSumType energy_cpl;
AC3Block *block = &s->blocks[blk];
@@ -273,7 +273,7 @@ static void apply_channel_coupling(AC3EncodeContext *s)
energy_cpl = energy[blk][CPL_CH][bnd];
blk1 = blk+1;
- while (!s->blocks[blk1].new_cpl_coords && blk1 < AC3_MAX_BLOCKS) {
+ while (!s->blocks[blk1].new_cpl_coords && blk1 < s->num_blocks) {
if (s->blocks[blk1].cpl_in_use)
energy_cpl += energy[blk1][CPL_CH][bnd];
blk1++;
@@ -285,7 +285,7 @@ static void apply_channel_coupling(AC3EncodeContext *s)
continue;
energy_ch = energy[blk][ch][bnd];
blk1 = blk+1;
- while (!s->blocks[blk1].new_cpl_coords && blk1 < AC3_MAX_BLOCKS) {
+ while (!s->blocks[blk1].new_cpl_coords && blk1 < s->num_blocks) {
if (s->blocks[blk1].cpl_in_use)
energy_ch += energy[blk1][ch][bnd];
blk1++;
@@ -297,7 +297,7 @@ static void apply_channel_coupling(AC3EncodeContext *s)
}
/* calculate exponents/mantissas for coupling coordinates */
- for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+ for (blk = 0; blk < s->num_blocks; blk++) {
AC3Block *block = &s->blocks[blk];
if (!block->cpl_in_use || !block->new_cpl_coords)
continue;
@@ -362,7 +362,7 @@ static void compute_rematrixing_strategy(AC3EncodeContext *s)
if (s->channel_mode != AC3_CHMODE_STEREO)
return;
- for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+ for (blk = 0; blk < s->num_blocks; blk++) {
block = &s->blocks[blk];
block->new_rematrixing_strategy = !blk;
@@ -440,7 +440,7 @@ int AC3_NAME(encode_frame)(AVCodecContext *avctx, unsigned char *frame,
scale_coefficients(s);
clip_coefficients(&s->dsp, s->blocks[0].mdct_coef[1],
- AC3_MAX_COEFS * AC3_MAX_BLOCKS * s->channels);
+ AC3_MAX_COEFS * s->num_blocks * s->channels);
s->cpl_on = s->cpl_enabled;
ff_ac3_compute_coupling_strategy(s);
diff --git a/libavcodec/alpha/dsputil_alpha.c b/libavcodec/alpha/dsputil_alpha.c
index d0d2a62..d8f999d 100644
--- a/libavcodec/alpha/dsputil_alpha.c
+++ b/libavcodec/alpha/dsputil_alpha.c
@@ -270,7 +270,7 @@ static void put_pixels16_axp_asm(uint8_t *block, const uint8_t *pixels,
void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx)
{
- const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+ const int high_bit_depth = avctx->bits_per_raw_sample > 8;
if (!high_bit_depth) {
c->put_pixels_tab[0][0] = put_pixels16_axp_asm;
@@ -321,7 +321,8 @@ void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx)
c->put_pixels_clamped = put_pixels_clamped_mvi_asm;
c->add_pixels_clamped = add_pixels_clamped_mvi_asm;
- c->get_pixels = get_pixels_mvi;
+ if (!high_bit_depth)
+ c->get_pixels = get_pixels_mvi;
c->diff_pixels = diff_pixels_mvi;
c->sad[0] = pix_abs16x16_mvi_asm;
c->sad[1] = pix_abs8x8_mvi;
diff --git a/libavcodec/arm/dsputil_init_arm.c b/libavcodec/arm/dsputil_init_arm.c
index 5728d70..ccbe1ed 100644
--- a/libavcodec/arm/dsputil_init_arm.c
+++ b/libavcodec/arm/dsputil_init_arm.c
@@ -75,7 +75,7 @@ static void simple_idct_arm_add(uint8_t *dest, int line_size, DCTELEM *block)
void dsputil_init_arm(DSPContext* c, AVCodecContext *avctx)
{
- const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+ const int high_bit_depth = avctx->bits_per_raw_sample > 8;
ff_put_pixels_clamped = c->put_pixels_clamped;
ff_add_pixels_clamped = c->add_pixels_clamped;
diff --git a/libavcodec/arm/dsputil_init_armv6.c b/libavcodec/arm/dsputil_init_armv6.c
index 1fc636b..fb0d009 100644
--- a/libavcodec/arm/dsputil_init_armv6.c
+++ b/libavcodec/arm/dsputil_init_armv6.c
@@ -72,7 +72,7 @@ int ff_pix_sum_armv6(uint8_t *pix, int line_size);
void av_cold ff_dsputil_init_armv6(DSPContext* c, AVCodecContext *avctx)
{
- const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+ const int high_bit_depth = avctx->bits_per_raw_sample > 8;
if (!avctx->lowres && avctx->bits_per_raw_sample <= 8 &&
(avctx->idct_algo == FF_IDCT_AUTO ||
@@ -106,8 +106,9 @@ void av_cold ff_dsputil_init_armv6(DSPContext* c, AVCodecContext *avctx)
c->avg_pixels_tab[1][0] = ff_avg_pixels8_armv6;
}
+ if (!high_bit_depth)
+ c->get_pixels = ff_get_pixels_armv6;
c->add_pixels_clamped = ff_add_pixels_clamped_armv6;
- c->get_pixels = ff_get_pixels_armv6;
c->diff_pixels = ff_diff_pixels_armv6;
c->pix_abs[0][0] = ff_pix_abs16_armv6;
diff --git a/libavcodec/arm/dsputil_init_neon.c b/libavcodec/arm/dsputil_init_neon.c
index c8b6b6e..3b0de32 100644
--- a/libavcodec/arm/dsputil_init_neon.c
+++ b/libavcodec/arm/dsputil_init_neon.c
@@ -175,7 +175,7 @@ void ff_apply_window_int16_neon(int16_t *dst, const int16_t *src,
void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
{
- const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+ const int high_bit_depth = avctx->bits_per_raw_sample > 8;
if (!avctx->lowres && avctx->bits_per_raw_sample <= 8) {
if (avctx->idct_algo == FF_IDCT_AUTO ||
diff --git a/libavcodec/arm/dsputil_iwmmxt.c b/libavcodec/arm/dsputil_iwmmxt.c
index 85be831..2837af1 100644
--- a/libavcodec/arm/dsputil_iwmmxt.c
+++ b/libavcodec/arm/dsputil_iwmmxt.c
@@ -155,7 +155,7 @@ static void nop(uint8_t *block, const uint8_t *pixels, int line_size, int h)
void ff_dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx)
{
int mm_flags = AV_CPU_FLAG_IWMMXT; /* multimedia extension flags */
- const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+ const int high_bit_depth = avctx->bits_per_raw_sample > 8;
if (avctx->dsp_mask) {
if (avctx->dsp_mask & AV_CPU_FLAG_FORCE)
diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index 9e5cbd5..c854958 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -2278,6 +2278,23 @@ typedef struct AVCodecContext {
#define FF_PROFILE_VC1_COMPLEX 2
#define FF_PROFILE_VC1_ADVANCED 3
+#define FF_PROFILE_MPEG4_SIMPLE 0
+#define FF_PROFILE_MPEG4_SIMPLE_SCALABLE 1
+#define FF_PROFILE_MPEG4_CORE 2
+#define FF_PROFILE_MPEG4_MAIN 3
+#define FF_PROFILE_MPEG4_N_BIT 4
+#define FF_PROFILE_MPEG4_SCALABLE_TEXTURE 5
+#define FF_PROFILE_MPEG4_SIMPLE_FACE_ANIMATION 6
+#define FF_PROFILE_MPEG4_BASIC_ANIMATED_TEXTURE 7
+#define FF_PROFILE_MPEG4_HYBRID 8
+#define FF_PROFILE_MPEG4_ADVANCED_REAL_TIME 9
+#define FF_PROFILE_MPEG4_CORE_SCALABLE 10
+#define FF_PROFILE_MPEG4_ADVANCED_CODING 11
+#define FF_PROFILE_MPEG4_ADVANCED_CORE 12
+#define FF_PROFILE_MPEG4_ADVANCED_SCALABLE_TEXTURE 13
+#define FF_PROFILE_MPEG4_SIMPLE_STUDIO 14
+#define FF_PROFILE_MPEG4_ADVANCED_SIMPLE 15
+
/**
* level
* - encoding: Set by user.
diff --git a/libavcodec/bfin/dsputil_bfin.c b/libavcodec/bfin/dsputil_bfin.c
index eade153..bfcc337 100644
--- a/libavcodec/bfin/dsputil_bfin.c
+++ b/libavcodec/bfin/dsputil_bfin.c
@@ -197,14 +197,14 @@ static int bfin_pix_abs8_xy2 (void *c, uint8_t *blk1, uint8_t *blk2, int line_si
void dsputil_init_bfin( DSPContext* c, AVCodecContext *avctx )
{
- const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+ const int high_bit_depth = avctx->bits_per_raw_sample > 8;
- c->get_pixels = ff_bfin_get_pixels;
c->diff_pixels = ff_bfin_diff_pixels;
c->put_pixels_clamped = ff_bfin_put_pixels_clamped;
c->add_pixels_clamped = ff_bfin_add_pixels_clamped;
if (!high_bit_depth)
+ c->get_pixels = ff_bfin_get_pixels;
c->clear_blocks = bfin_clear_blocks;
c->pix_sum = ff_bfin_pix_sum;
c->pix_norm1 = ff_bfin_pix_norm1;
@@ -253,10 +253,10 @@ void dsputil_init_bfin( DSPContext* c, AVCodecContext *avctx )
/* c->put_no_rnd_pixels_tab[0][3] = ff_bfin_put_pixels16_xy2_nornd; */
}
- if (avctx->dct_algo == FF_DCT_AUTO)
- c->fdct = ff_bfin_fdct;
-
if (avctx->bits_per_raw_sample <= 8) {
+ if (avctx->dct_algo == FF_DCT_AUTO)
+ c->fdct = ff_bfin_fdct;
+
if (avctx->idct_algo == FF_IDCT_VP3) {
c->idct_permutation_type = FF_NO_IDCT_PERM;
c->idct = ff_bfin_vp3_idct;
diff --git a/libavcodec/dct-test.c b/libavcodec/dct-test.c
index de6582c..9e1e996 100644
--- a/libavcodec/dct-test.c
+++ b/libavcodec/dct-test.c
@@ -88,7 +88,7 @@ static const struct algo fdct_tab[] = {
{ "REF-DBL", ff_ref_fdct, NO_PERM },
{ "FAAN", ff_faandct, FAAN_SCALE },
{ "IJG-AAN-INT", fdct_ifast, SCALE_PERM },
- { "IJG-LLM-INT", ff_jpeg_fdct_islow, NO_PERM },
+ { "IJG-LLM-INT", ff_jpeg_fdct_islow_8, NO_PERM },
#if HAVE_MMX
{ "MMX", ff_fdct_mmx, NO_PERM, AV_CPU_FLAG_MMX },
diff --git a/libavcodec/dnxhddata.c b/libavcodec/dnxhddata.c
index d4aefee..536636d 100644
--- a/libavcodec/dnxhddata.c
+++ b/libavcodec/dnxhddata.c
@@ -22,6 +22,28 @@
#include "avcodec.h"
#include "dnxhddata.h"
+static const uint8_t dnxhd_1235_luma_weight[] = {
+ 0, 32, 32, 32, 33, 35, 38, 39,
+ 32, 33, 32, 33, 36, 36, 39, 42,
+ 32, 32, 33, 36, 35, 37, 41, 43,
+ 31, 33, 34, 36, 36, 40, 42, 48,
+ 32, 34, 36, 37, 39, 42, 46, 51,
+ 36, 37, 37, 39, 41, 46, 51, 55,
+ 37, 39, 41, 41, 47, 50, 55, 56,
+ 41, 42, 41, 44, 50, 53, 60, 60
+};
+
+static const uint8_t dnxhd_1235_chroma_weight[] = {
+ 0, 32, 33, 34, 39, 41, 54, 59,
+ 33, 34, 35, 38, 43, 49, 58, 84,
+ 34, 37, 39, 44, 46, 55, 74, 87,
+ 40, 42, 47, 48, 58, 70, 87, 86,
+ 43, 50, 56, 63, 72, 94, 91, 82,
+ 55, 63, 65, 75, 93, 89, 85, 73,
+ 61, 67, 82, 81, 83, 90, 79, 73,
+ 74, 84, 75, 78, 90, 85, 73, 73
+};
+
static const uint8_t dnxhd_1237_luma_weight[] = {
0, 32, 33, 34, 34, 36, 37, 36,
36, 37, 38, 38, 38, 39, 41, 44,
@@ -132,6 +154,28 @@ static const uint8_t dnxhd_1243_chroma_weight[] = {
46, 45, 46, 47, 47, 48, 47, 47,
};
+static const uint8_t dnxhd_1250_luma_weight[] = {
+ 0, 32, 35, 35, 36, 36, 41, 43,
+ 32, 34, 35, 36, 37, 39, 43, 47,
+ 33, 34, 36, 38, 38, 42, 42, 50,
+ 34, 36, 38, 38, 41, 40, 47, 54,
+ 35, 38, 39, 40, 39, 45, 49, 58,
+ 38, 39, 40, 39, 46, 47, 54, 60,
+ 38, 39, 41, 46, 46, 48, 57, 62,
+ 40, 41, 44, 45, 49, 54, 63, 63
+};
+
+static const uint8_t dnxhd_1250_chroma_weight[] = {
+ 0, 32, 35, 36, 40, 42, 51, 51,
+ 35, 36, 39, 39, 43, 51, 52, 55,
+ 36, 41, 41, 43, 51, 53, 54, 56,
+ 43, 44, 45, 50, 54, 54, 55, 57,
+ 45, 48, 50, 51, 55, 58, 59, 58,
+ 49, 52, 49, 57, 58, 62, 58, 60,
+ 51, 51, 56, 58, 62, 61, 59, 62,
+ 52, 52, 60, 61, 59, 59, 63, 63
+};
+
static const uint8_t dnxhd_1251_luma_weight[] = {
0, 32, 32, 34, 34, 34, 34, 35,
35, 35, 36, 37, 36, 36, 35, 36,
@@ -604,6 +648,146 @@ static const uint8_t dnxhd_1235_1241_run[62] = {
49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,
};
+static const uint8_t dnxhd_1250_dc_codes[14] = {
+ 10, 62, 11, 12, 13, 0, 1, 2, 3, 4, 14, 30, 126, 127
+};
+static const uint8_t dnxhd_1250_dc_bits[14] = {
+ 4, 6, 4, 4, 4, 3, 3, 3, 3, 3, 4, 5, 7, 7
+};
+static const uint16_t dnxhd_1250_ac_codes[257] = {
+ 0, 1, 4, 10, 11, 24, 25, 26,
+ 54, 55, 56, 57, 116, 117, 118, 119,
+ 240, 241, 242, 243, 244, 245, 492, 493,
+ 494, 495, 496, 497, 498, 998, 999, 1000,
+ 1001, 1002, 1003, 1004, 1005, 1006, 2014, 2015,
+ 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023,
+ 2024, 2025, 4052, 4053, 4054, 4055, 4056, 4057,
+ 4058, 4059, 4060, 4061, 4062, 4063, 4064, 4065,
+ 4066, 4067, 8136, 8137, 8138, 8139, 8140, 8141,
+ 8142, 8143, 8144, 8145, 8146, 8147, 8148, 8149,
+ 8150, 8151, 8152, 8153, 8154, 8155, 8156, 16314,
+ 16315, 16316, 16317, 16318, 16319, 16320, 16321, 16322,
+ 16323, 16324, 16325, 16326, 16327, 16328, 16329, 16330,
+ 16331, 16332, 16333, 16334, 16335, 16336, 16337, 16338,
+ 32678, 32679, 32680, 32681, 32682, 32683, 32684, 32685,
+ 32686, 32687, 32688, 32689, 32690, 32691, 32692, 32693,
+ 32694, 32695, 32696, 32697, 32698, 32699, 32700, 32701,
+ 32702, 32703, 32704, 32705, 32706, 32707, 32708, 32709,
+ 32710, 32711, 32712, 65426, 65427, 65428, 65429, 65430,
+ 65431, 65432, 65433, 65434, 65435, 65436, 65437, 65438,
+ 65439, 65440, 65441, 65442, 65443, 65444, 65445, 65446,
+ 65447, 65448, 65449, 65450, 65451, 65452, 65453, 65454,
+ 65455, 65456, 65457, 65458, 65459, 65460, 65461, 65462,
+ 65463, 65464, 65465, 65466, 65467, 65468, 65469, 65470,
+ 65471, 65472, 65473, 65474, 65475, 65476, 65477, 65478,
+ 65479, 65480, 65481, 65482, 65483, 65484, 65485, 65486,
+ 65487, 65488, 65489, 65490, 65491, 65492, 65493, 65494,
+ 65495, 65496, 65497, 65498, 65499, 65500, 65501, 65502,
+ 65503, 65504, 65505, 65506, 65507, 65508, 65509, 65510,
+ 65511, 65512, 65513, 65514, 65515, 65516, 65517, 65518,
+ 65519, 65520, 65521, 65522, 65523, 65524, 65525, 65526,
+ 65527, 65528, 65529, 65530, 65531, 65532, 65533, 65534,
+ 65535
+};
+static const uint8_t dnxhd_1250_ac_bits[257] = {
+ 2, 2, 3, 4, 4, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
+ 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10,
+ 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+ 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+ 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+ 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+ 16
+};
+static const uint8_t dnxhd_1250_ac_level[257] = {
+ 1, 1, 2, 3, 0, 4, 5, 2, 6, 7, 8, 3, 9, 10, 11, 4,
+ 12, 13, 14, 15, 16, 5, 17, 18, 19, 20, 21, 22, 6, 23, 24, 25,
+ 26, 27, 28, 29, 7, 8, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
+ 9, 10, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 11,
+ 12, 13, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 1, 2,
+ 3, 4, 5, 14, 15, 16, 17, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 18, 19, 20, 21,
+ 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42,
+ 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 55, 56, 22, 23, 24,
+ 25, 26, 27, 54, 57, 58, 59, 60, 61, 62, 63, 64, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+ 64, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+ 64
+};
+static const uint8_t dnxhd_1250_ac_run_flag[257] = {
+ 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1,
+ 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
+ 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
+ 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
+ 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1
+};
+static const uint8_t dnxhd_1250_ac_index_flag[257] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
+ 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0,
+ 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1
+};
+static const uint16_t dnxhd_1250_run_codes[62] = {
+ 0, 4, 5, 12, 26, 27, 28, 58,
+ 118, 119, 120, 242, 486, 487, 976, 977,
+ 978, 979, 980, 981, 982, 983, 984, 985,
+ 986, 987, 988, 989, 990, 991, 992, 993,
+ 994, 995, 996, 997, 998, 999, 1000, 1001,
+ 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009,
+ 1010, 1011, 1012, 1013, 1014, 1015, 1016, 1017,
+ 1018, 1019, 1020, 1021, 1022, 1023
+};
+static const uint8_t dnxhd_1250_run_bits[62] = {
+ 1, 3, 3, 4, 5, 5, 5, 6, 7, 7, 7, 8, 9, 9, 10, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10
+};
+static const uint8_t dnxhd_1250_run[62] = {
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
+ 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
+ 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62
+};
+
static const uint8_t dnxhd_1251_dc_codes[12] = {
0, 12, 13, 1, 2, 3, 4, 5, 14, 30, 62, 63,
};
@@ -878,6 +1062,13 @@ static const uint8_t dnxhd_1252_ac_index_flag[257] = {
};
const CIDEntry ff_dnxhd_cid_table[] = {
+ { 1235, 1920, 1080, 0, 917504, 917504, 6, 10,
+ dnxhd_1235_luma_weight, dnxhd_1235_chroma_weight,
+ dnxhd_1235_1241_dc_codes, dnxhd_1235_1241_dc_bits,
+ dnxhd_1235_1241_ac_codes, dnxhd_1235_1241_ac_bits, dnxhd_1235_1241_ac_level,
+ dnxhd_1235_1241_ac_run_flag, dnxhd_1235_1241_ac_index_flag,
+ dnxhd_1235_1238_1241_run_codes, dnxhd_1235_1238_1241_run_bits, dnxhd_1235_1241_run,
+ { 175, 185, 365, 440 } },
{ 1237, 1920, 1080, 0, 606208, 606208, 4, 8,
dnxhd_1237_luma_weight, dnxhd_1237_chroma_weight,
dnxhd_1237_dc_codes, dnxhd_1237_dc_bits,
@@ -913,6 +1104,13 @@ const CIDEntry ff_dnxhd_cid_table[] = {
dnxhd_1238_ac_run_flag, dnxhd_1238_ac_index_flag,
dnxhd_1235_1238_1241_run_codes, dnxhd_1235_1238_1241_run_bits, dnxhd_1238_run,
{ 185, 220 } },
+ { 1250, 1280, 720, 0, 458752, 458752, 6, 10,
+ dnxhd_1250_luma_weight, dnxhd_1250_chroma_weight,
+ dnxhd_1250_dc_codes, dnxhd_1250_dc_bits,
+ dnxhd_1250_ac_codes, dnxhd_1250_ac_bits, dnxhd_1250_ac_level,
+ dnxhd_1250_ac_run_flag, dnxhd_1250_ac_index_flag,
+ dnxhd_1250_run_codes, dnxhd_1250_run_bits, dnxhd_1250_run,
+ { 90, 180, 220 } },
{ 1251, 1280, 720, 0, 458752, 458752, 4, 8,
dnxhd_1251_luma_weight, dnxhd_1251_chroma_weight,
dnxhd_1251_dc_codes, dnxhd_1251_dc_bits,
@@ -945,7 +1143,7 @@ int ff_dnxhd_get_cid_table(int cid)
return -1;
}
-int ff_dnxhd_find_cid(AVCodecContext *avctx)
+int ff_dnxhd_find_cid(AVCodecContext *avctx, int bit_depth)
{
int i, j;
int mbs = avctx->bit_rate/1000000;
@@ -955,7 +1153,7 @@ int ff_dnxhd_find_cid(AVCodecContext *avctx)
const CIDEntry *cid = &ff_dnxhd_cid_table[i];
if (cid->width == avctx->width && cid->height == avctx->height &&
cid->interlaced == !!(avctx->flags & CODEC_FLAG_INTERLACED_DCT) &&
- cid->bit_depth == 8) { // until 10 bit is supported
+ cid->bit_depth == bit_depth) {
for (j = 0; j < sizeof(cid->bit_rates); j++) {
if (cid->bit_rates[j] == mbs)
return cid->cid;
diff --git a/libavcodec/dnxhddata.h b/libavcodec/dnxhddata.h
index 32c77db..4d03a60 100644
--- a/libavcodec/dnxhddata.h
+++ b/libavcodec/dnxhddata.h
@@ -46,6 +46,6 @@ typedef struct {
extern const CIDEntry ff_dnxhd_cid_table[];
int ff_dnxhd_get_cid_table(int cid);
-int ff_dnxhd_find_cid(AVCodecContext *avctx);
+int ff_dnxhd_find_cid(AVCodecContext *avctx, int bit_depth);
#endif /* AVCODEC_DNXHDDATA_H */
diff --git a/libavcodec/dnxhddec.c b/libavcodec/dnxhddec.c
index 43c4679..a7ad620 100644
--- a/libavcodec/dnxhddec.c
+++ b/libavcodec/dnxhddec.c
@@ -1,6 +1,9 @@
/*
* VC3/DNxHD decoder.
* Copyright (c) 2007 SmartJog S.A., Baptiste Coudurier <baptiste dot coudurier at smartjog dot com>
+ * Copyright (c) 2011 MirriAd Ltd
+ *
+ * 10 bit support added by MirriAd Ltd, Joseph Artsimovich <joseph@mirriad.com>
*
* This file is part of FFmpeg.
*
@@ -28,7 +31,7 @@
#include "dnxhddata.h"
#include "dsputil.h"
-typedef struct {
+typedef struct DNXHDContext {
AVCodecContext *avctx;
AVFrame picture;
GetBitContext gb;
@@ -43,17 +46,22 @@ typedef struct {
DECLARE_ALIGNED(16, DCTELEM, blocks)[8][64];
ScanTable scantable;
const CIDEntry *cid_table;
+ int bit_depth; // 8, 10 or 0 if not initialized at all.
+ void (*decode_dct_block)(struct DNXHDContext *ctx, DCTELEM *block,
+ int n, int qscale);
} DNXHDContext;
#define DNXHD_VLC_BITS 9
#define DNXHD_DC_VLC_BITS 7
+static void dnxhd_decode_dct_block_8(DNXHDContext *ctx, DCTELEM *block, int n, int qscale);
+static void dnxhd_decode_dct_block_10(DNXHDContext *ctx, DCTELEM *block, int n, int qscale);
+
static av_cold int dnxhd_decode_init(AVCodecContext *avctx)
{
DNXHDContext *ctx = avctx->priv_data;
ctx->avctx = avctx;
- dsputil_init(&ctx->dsp, avctx);
avctx->coded_frame = &ctx->picture;
avcodec_get_frame_defaults(&ctx->picture);
ctx->picture.type = AV_PICTURE_TYPE_I;
@@ -79,7 +87,7 @@ static int dnxhd_init_vlc(DNXHDContext *ctx, int cid)
init_vlc(&ctx->ac_vlc, DNXHD_VLC_BITS, 257,
ctx->cid_table->ac_bits, 1, 1,
ctx->cid_table->ac_codes, 2, 2, 0);
- init_vlc(&ctx->dc_vlc, DNXHD_DC_VLC_BITS, ctx->cid_table->bit_depth+4,
+ init_vlc(&ctx->dc_vlc, DNXHD_DC_VLC_BITS, ctx->bit_depth + 4,
ctx->cid_table->dc_bits, 1, 1,
ctx->cid_table->dc_codes, 1, 1, 0);
init_vlc(&ctx->run_vlc, DNXHD_VLC_BITS, 62,
@@ -117,8 +125,21 @@ static int dnxhd_decode_header(DNXHDContext *ctx, const uint8_t *buf, int buf_si
av_dlog(ctx->avctx, "width %d, heigth %d\n", ctx->width, ctx->height);
if (buf[0x21] & 0x40) {
- av_log(ctx->avctx, AV_LOG_ERROR, "10 bit per component\n");
- return -1;
+ ctx->avctx->pix_fmt = PIX_FMT_YUV422P10;
+ ctx->avctx->bits_per_raw_sample = 10;
+ if (ctx->bit_depth != 10) {
+ dsputil_init(&ctx->dsp, ctx->avctx);
+ ctx->bit_depth = 10;
+ ctx->decode_dct_block = dnxhd_decode_dct_block_10;
+ }
+ } else {
+ ctx->avctx->pix_fmt = PIX_FMT_YUV422P;
+ ctx->avctx->bits_per_raw_sample = 8;
+ if (ctx->bit_depth != 8) {
+ dsputil_init(&ctx->dsp, ctx->avctx);
+ ctx->bit_depth = 8;
+ ctx->decode_dct_block = dnxhd_decode_dct_block_8;
+ }
}
cid = AV_RB32(buf + 0x28);
@@ -158,79 +179,103 @@ static int dnxhd_decode_header(DNXHDContext *ctx, const uint8_t *buf, int buf_si
return 0;
}
-static int dnxhd_decode_dc(DNXHDContext *ctx)
+static av_always_inline void dnxhd_decode_dct_block(DNXHDContext *ctx,
+ DCTELEM *block, int n,
+ int qscale,
+ int index_bits,
+ int level_bias,
+ int level_shift)
{
- int len;
-
- len = get_vlc2(&ctx->gb, ctx->dc_vlc.table, DNXHD_DC_VLC_BITS, 1);
- return len ? get_xbits(&ctx->gb, len) : 0;
-}
-
-static void dnxhd_decode_dct_block(DNXHDContext *ctx, DCTELEM *block, int n, int qscale)
-{
- int i, j, index, index2;
+ int i, j, index1, index2, len;
int level, component, sign;
- const uint8_t *weigth_matrix;
+ const uint8_t *weight_matrix;
+ OPEN_READER(bs, &ctx->gb);
if (n&2) {
component = 1 + (n&1);
- weigth_matrix = ctx->cid_table->chroma_weight;
+ weight_matrix = ctx->cid_table->chroma_weight;
} else {
component = 0;
- weigth_matrix = ctx->cid_table->luma_weight;
+ weight_matrix = ctx->cid_table->luma_weight;
}
- ctx->last_dc[component] += dnxhd_decode_dc(ctx);
+ UPDATE_CACHE(bs, &ctx->gb);
+ GET_VLC(len, bs, &ctx->gb, ctx->dc_vlc.table, DNXHD_DC_VLC_BITS, 1);
+ if (len) {
+ level = GET_CACHE(bs, &ctx->gb);
+ LAST_SKIP_BITS(bs, &ctx->gb, len);
+ sign = ~level >> 31;
+ level = (NEG_USR32(sign ^ level, len) ^ sign) - sign;
+ ctx->last_dc[component] += level;
+ }
block[0] = ctx->last_dc[component];
//av_log(ctx->avctx, AV_LOG_DEBUG, "dc %d\n", block[0]);
+
for (i = 1; ; i++) {
- index = get_vlc2(&ctx->gb, ctx->ac_vlc.table, DNXHD_VLC_BITS, 2);
- //av_log(ctx->avctx, AV_LOG_DEBUG, "index %d\n", index);
- level = ctx->cid_table->ac_level[index];
+ UPDATE_CACHE(bs, &ctx->gb);
+ GET_VLC(index1, bs, &ctx->gb, ctx->ac_vlc.table,
+ DNXHD_VLC_BITS, 2);
+ //av_log(ctx->avctx, AV_LOG_DEBUG, "index %d\n", index1);
+ level = ctx->cid_table->ac_level[index1];
if (!level) { /* EOB */
//av_log(ctx->avctx, AV_LOG_DEBUG, "EOB\n");
- return;
+ break;
}
- sign = get_sbits(&ctx->gb, 1);
- if (ctx->cid_table->ac_index_flag[index]) {
- level += get_bits(&ctx->gb, ctx->cid_table->index_bits)<<6;
+ sign = SHOW_SBITS(bs, &ctx->gb, 1);
+ SKIP_BITS(bs, &ctx->gb, 1);
+
+ if (ctx->cid_table->ac_index_flag[index1]) {
+ level += SHOW_UBITS(bs, &ctx->gb, index_bits) << 6;
+ SKIP_BITS(bs, &ctx->gb, index_bits);
}
- if (ctx->cid_table->ac_run_flag[index]) {
- index2 = get_vlc2(&ctx->gb, ctx->run_vlc.table, DNXHD_VLC_BITS, 2);
+ if (ctx->cid_table->ac_run_flag[index1]) {
+ UPDATE_CACHE(bs, &ctx->gb);
+ GET_VLC(index2, bs, &ctx->gb, ctx->run_vlc.table,
+ DNXHD_VLC_BITS, 2);
i += ctx->cid_table->run[index2];
}
if (i > 63) {
av_log(ctx->avctx, AV_LOG_ERROR, "ac tex damaged %d, %d\n", n, i);
- return;
+ break;
}
j = ctx->scantable.permutated[i];
//av_log(ctx->avctx, AV_LOG_DEBUG, "j %d\n", j);
- //av_log(ctx->avctx, AV_LOG_DEBUG, "level %d, weigth %d\n", level, weigth_matrix[i]);
- level = (2*level+1) * qscale * weigth_matrix[i];
- if (ctx->cid_table->bit_depth == 10) {
- if (weigth_matrix[i] != 8)
- level += 8;
- level >>= 4;
- } else {
- if (weigth_matrix[i] != 32)
- level += 32;
- level >>= 6;
- }
+ //av_log(ctx->avctx, AV_LOG_DEBUG, "level %d, weight %d\n", level, weight_matrix[i]);
+ level = (2*level+1) * qscale * weight_matrix[i];
+ if (weight_matrix[i] != level_bias)
+ level += level_bias;
+ level >>= level_shift;
+
//av_log(NULL, AV_LOG_DEBUG, "i %d, j %d, end level %d\n", i, j, level);
block[j] = (level^sign) - sign;
}
+
+ CLOSE_READER(bs, &ctx->gb);
+}
+
+static void dnxhd_decode_dct_block_8(DNXHDContext *ctx, DCTELEM *block,
+ int n, int qscale)
+{
+ dnxhd_decode_dct_block(ctx, block, n, qscale, 4, 32, 6);
+}
+
+static void dnxhd_decode_dct_block_10(DNXHDContext *ctx, DCTELEM *block,
+ int n, int qscale)
+{
+ dnxhd_decode_dct_block(ctx, block, n, qscale, 6, 8, 4);
}
static int dnxhd_decode_macroblock(DNXHDContext *ctx, int x, int y)
{
+ int shift1 = ctx->bit_depth == 10;
int dct_linesize_luma = ctx->picture.linesize[0];
int dct_linesize_chroma = ctx->picture.linesize[1];
uint8_t *dest_y, *dest_u, *dest_v;
- int dct_offset;
+ int dct_y_offset, dct_x_offset;
int qscale, i;
qscale = get_bits(&ctx->gb, 11);
@@ -239,7 +284,7 @@ static int dnxhd_decode_macroblock(DNXHDContext *ctx, int x, int y)
for (i = 0; i < 8; i++) {
ctx->dsp.clear_block(ctx->blocks[i]);
- dnxhd_decode_dct_block(ctx, ctx->blocks[i], i, qscale);
+ ctx->decode_dct_block(ctx, ctx->blocks[i], i, qscale);
}
if (ctx->picture.interlaced_frame) {
@@ -247,9 +292,9 @@ static int dnxhd_decode_macroblock(DNXHDContext *ctx, int x, int y)
dct_linesize_chroma <<= 1;
}
- dest_y = ctx->picture.data[0] + ((y * dct_linesize_luma) << 4) + (x << 4);
- dest_u = ctx->picture.data[1] + ((y * dct_linesize_chroma) << 4) + (x << 3);
- dest_v = ctx->picture.data[2] + ((y * dct_linesize_chroma) << 4) + (x << 3);
+ dest_y = ctx->picture.data[0] + ((y * dct_linesize_luma) << 4) + (x << (4 + shift1));
+ dest_u = ctx->picture.data[1] + ((y * dct_linesize_chroma) << 4) + (x << (3 + shift1));
+ dest_v = ctx->picture.data[2] + ((y * dct_linesize_chroma) << 4) + (x << (3 + shift1));
if (ctx->cur_field) {
dest_y += ctx->picture.linesize[0];
@@ -257,18 +302,19 @@ static int dnxhd_decode_macroblock(DNXHDContext *ctx, int x, int y)
dest_v += ctx->picture.linesize[2];
}
- dct_offset = dct_linesize_luma << 3;
- ctx->dsp.idct_put(dest_y, dct_linesize_luma, ctx->blocks[0]);
- ctx->dsp.idct_put(dest_y + 8, dct_linesize_luma, ctx->blocks[1]);
- ctx->dsp.idct_put(dest_y + dct_offset, dct_linesize_luma, ctx->blocks[4]);
- ctx->dsp.idct_put(dest_y + dct_offset + 8, dct_linesize_luma, ctx->blocks[5]);
+ dct_y_offset = dct_linesize_luma << 3;
+ dct_x_offset = 8 << shift1;
+ ctx->dsp.idct_put(dest_y, dct_linesize_luma, ctx->blocks[0]);
+ ctx->dsp.idct_put(dest_y + dct_x_offset, dct_linesize_luma, ctx->blocks[1]);
+ ctx->dsp.idct_put(dest_y + dct_y_offset, dct_linesize_luma, ctx->blocks[4]);
+ ctx->dsp.idct_put(dest_y + dct_y_offset + dct_x_offset, dct_linesize_luma, ctx->blocks[5]);
if (!(ctx->avctx->flags & CODEC_FLAG_GRAY)) {
- dct_offset = dct_linesize_chroma << 3;
- ctx->dsp.idct_put(dest_u, dct_linesize_chroma, ctx->blocks[2]);
- ctx->dsp.idct_put(dest_v, dct_linesize_chroma, ctx->blocks[3]);
- ctx->dsp.idct_put(dest_u + dct_offset, dct_linesize_chroma, ctx->blocks[6]);
- ctx->dsp.idct_put(dest_v + dct_offset, dct_linesize_chroma, ctx->blocks[7]);
+ dct_y_offset = dct_linesize_chroma << 3;
+ ctx->dsp.idct_put(dest_u, dct_linesize_chroma, ctx->blocks[2]);
+ ctx->dsp.idct_put(dest_v, dct_linesize_chroma, ctx->blocks[3]);
+ ctx->dsp.idct_put(dest_u + dct_y_offset, dct_linesize_chroma, ctx->blocks[6]);
+ ctx->dsp.idct_put(dest_v + dct_y_offset, dct_linesize_chroma, ctx->blocks[7]);
}
return 0;
@@ -280,7 +326,7 @@ static int dnxhd_decode_macroblocks(DNXHDContext *ctx, const uint8_t *buf, int b
for (y = 0; y < ctx->mb_height; y++) {
ctx->last_dc[0] =
ctx->last_dc[1] =
- ctx->last_dc[2] = 1<<(ctx->cid_table->bit_depth+2); // for levels +2^(bitdepth-1)
+ ctx->last_dc[2] = 1 << (ctx->bit_depth + 2); // for levels +2^(bitdepth-1)
init_get_bits(&ctx->gb, buf + ctx->mb_scan_index[y], (buf_size - ctx->mb_scan_index[y]) << 3);
for (x = 0; x < ctx->mb_width; x++) {
//START_TIMER;
@@ -313,7 +359,6 @@ static int dnxhd_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
first_field = 1;
}
- avctx->pix_fmt = PIX_FMT_YUV422P;
if (av_image_check_size(ctx->width, ctx->height, 0, avctx))
return -1;
avcodec_set_dimensions(avctx, ctx->width, ctx->height);
diff --git a/libavcodec/dnxhdenc.c b/libavcodec/dnxhdenc.c
index 57ae8e2..c29144a 100644
--- a/libavcodec/dnxhdenc.c
+++ b/libavcodec/dnxhdenc.c
@@ -1,8 +1,10 @@
/*
* VC3/DNxHD encoder
* Copyright (c) 2007 Baptiste Coudurier <baptiste dot coudurier at smartjog dot com>
+ * Copyright (c) 2011 MirriAd Ltd
*
* VC-3 encoder funded by the British Broadcasting Corporation
+ * 10 bit support added by MirriAd Ltd, Joseph Artsimovich <joseph@mirriad.com>
*
* This file is part of FFmpeg.
*
@@ -32,6 +34,7 @@
#include "dnxhdenc.h"
#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
+#define DNX10BIT_QMAT_SHIFT 18 // The largest value that will not lead to overflow for 10bit samples.
static const AVOption options[]={
{"nitris_compat", "encode with Avid Nitris compatibility", offsetof(DNXHDEncContext, nitris_compat), FF_OPT_TYPE_INT, {.dbl = 0}, 0, 1, VE},
@@ -41,7 +44,7 @@ static const AVClass class = { "dnxhd", av_default_item_name, options, LIBAVUTIL
#define LAMBDA_FRAC_BITS 10
-static void dnxhd_get_pixels_8x4(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
+static void dnxhd_8bit_get_pixels_8x4_sym(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
{
int i;
for (i = 0; i < 4; i++) {
@@ -58,6 +61,43 @@ static void dnxhd_get_pixels_8x4(DCTELEM *restrict block, const uint8_t *pixels,
memcpy(block + 24, block - 32, sizeof(*block) * 8);
}
+static av_always_inline void dnxhd_10bit_get_pixels_8x4_sym(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
+{
+ int i;
+
+ block += 32;
+
+ for (i = 0; i < 4; i++) {
+ memcpy(block + i * 8, pixels + i * line_size, 8 * sizeof(*block));
+ memcpy(block - (i+1) * 8, pixels + i * line_size, 8 * sizeof(*block));
+ }
+}
+
+static int dnxhd_10bit_dct_quantize(MpegEncContext *ctx, DCTELEM *block,
+ int n, int qscale, int *overflow)
+{
+ const uint8_t *scantable= ctx->intra_scantable.scantable;
+ const int *qmat = ctx->q_intra_matrix[qscale];
+ int last_non_zero = 0;
+
+ ctx->dsp.fdct(block);
+
+ // Divide by 4 with rounding, to compensate scaling of DCT coefficients
+ block[0] = (block[0] + 2) >> 2;
+
+ for (int i = 1; i < 64; ++i) {
+ int j = scantable[i];
+ int sign = block[j] >> 31;
+ int level = (block[j] ^ sign) - sign;
+ level = level * qmat[j] >> DNX10BIT_QMAT_SHIFT;
+ block[j] = (level ^ sign) - sign;
+ if (level)
+ last_non_zero = i;
+ }
+
+ return last_non_zero;
+}
+
static int dnxhd_init_vlc(DNXHDEncContext *ctx)
{
int i, j, level, run;
@@ -118,31 +158,55 @@ static int dnxhd_init_qmat(DNXHDEncContext *ctx, int lbias, int cbias)
// init first elem to 1 to avoid div by 0 in convert_matrix
uint16_t weight_matrix[64] = {1,}; // convert_matrix needs uint16_t*
int qscale, i;
+ const uint8_t *luma_weight_table = ctx->cid_table->luma_weight;
+ const uint8_t *chroma_weight_table = ctx->cid_table->chroma_weight;
FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->qmatrix_l, (ctx->m.avctx->qmax+1) * 64 * sizeof(int), fail);
FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->qmatrix_c, (ctx->m.avctx->qmax+1) * 64 * sizeof(int), fail);
FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->qmatrix_l16, (ctx->m.avctx->qmax+1) * 64 * 2 * sizeof(uint16_t), fail);
FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->qmatrix_c16, (ctx->m.avctx->qmax+1) * 64 * 2 * sizeof(uint16_t), fail);
- for (i = 1; i < 64; i++) {
- int j = ctx->m.dsp.idct_permutation[ff_zigzag_direct[i]];
- weight_matrix[j] = ctx->cid_table->luma_weight[i];
- }
- ff_convert_matrix(&ctx->m.dsp, ctx->qmatrix_l, ctx->qmatrix_l16, weight_matrix,
- ctx->m.intra_quant_bias, 1, ctx->m.avctx->qmax, 1);
- for (i = 1; i < 64; i++) {
- int j = ctx->m.dsp.idct_permutation[ff_zigzag_direct[i]];
- weight_matrix[j] = ctx->cid_table->chroma_weight[i];
- }
- ff_convert_matrix(&ctx->m.dsp, ctx->qmatrix_c, ctx->qmatrix_c16, weight_matrix,
- ctx->m.intra_quant_bias, 1, ctx->m.avctx->qmax, 1);
- for (qscale = 1; qscale <= ctx->m.avctx->qmax; qscale++) {
- for (i = 0; i < 64; i++) {
- ctx->qmatrix_l [qscale] [i] <<= 2; ctx->qmatrix_c [qscale] [i] <<= 2;
- ctx->qmatrix_l16[qscale][0][i] <<= 2; ctx->qmatrix_l16[qscale][1][i] <<= 2;
- ctx->qmatrix_c16[qscale][0][i] <<= 2; ctx->qmatrix_c16[qscale][1][i] <<= 2;
+ if (ctx->cid_table->bit_depth == 8) {
+ for (i = 1; i < 64; i++) {
+ int j = ctx->m.dsp.idct_permutation[ff_zigzag_direct[i]];
+ weight_matrix[j] = ctx->cid_table->luma_weight[i];
+ }
+ ff_convert_matrix(&ctx->m.dsp, ctx->qmatrix_l, ctx->qmatrix_l16, weight_matrix,
+ ctx->m.intra_quant_bias, 1, ctx->m.avctx->qmax, 1);
+ for (i = 1; i < 64; i++) {
+ int j = ctx->m.dsp.idct_permutation[ff_zigzag_direct[i]];
+ weight_matrix[j] = ctx->cid_table->chroma_weight[i];
+ }
+ ff_convert_matrix(&ctx->m.dsp, ctx->qmatrix_c, ctx->qmatrix_c16, weight_matrix,
+ ctx->m.intra_quant_bias, 1, ctx->m.avctx->qmax, 1);
+
+ for (qscale = 1; qscale <= ctx->m.avctx->qmax; qscale++) {
+ for (i = 0; i < 64; i++) {
+ ctx->qmatrix_l [qscale] [i] <<= 2; ctx->qmatrix_c [qscale] [i] <<= 2;
+ ctx->qmatrix_l16[qscale][0][i] <<= 2; ctx->qmatrix_l16[qscale][1][i] <<= 2;
+ ctx->qmatrix_c16[qscale][0][i] <<= 2; ctx->qmatrix_c16[qscale][1][i] <<= 2;
+ }
+ }
+ } else {
+ // 10-bit
+ for (qscale = 1; qscale <= ctx->m.avctx->qmax; qscale++) {
+ for (i = 1; i < 64; i++) {
+ int j = ctx->m.dsp.idct_permutation[ff_zigzag_direct[i]];
+
+ // The quantization formula from the VC-3 standard is:
+ // quantized = sign(block[i]) * floor(abs(block[i]/s) * p / (qscale * weight_table[i]))
+ // Where p is 32 for 8-bit samples and 8 for 10-bit ones.
+ // The s factor compensates scaling of DCT coefficients done by the DCT routines,
+ // and therefore is not present in standard. It's 8 for 8-bit samples and 4 for 10-bit ones.
+ // We want values of ctx->qtmatrix_l and ctx->qtmatrix_r to be:
+ // ((1 << DNX10BIT_QMAT_SHIFT) * (p / s)) / (qscale * weight_table[i])
+ // For 10-bit samples, p / s == 2
+ ctx->qmatrix_l[qscale][j] = (1 << (DNX10BIT_QMAT_SHIFT + 1)) / (qscale * luma_weight_table[i]);
+ ctx->qmatrix_c[qscale][j] = (1 << (DNX10BIT_QMAT_SHIFT + 1)) / (qscale * chroma_weight_table[i]);
+ }
}
}
+
return 0;
fail:
return -1;
@@ -165,10 +229,22 @@ static int dnxhd_init_rc(DNXHDEncContext *ctx)
static int dnxhd_encode_init(AVCodecContext *avctx)
{
DNXHDEncContext *ctx = avctx->priv_data;
- int i, index;
+ int i, index, bit_depth;
+
+ switch (avctx->pix_fmt) {
+ case PIX_FMT_YUV422P:
+ bit_depth = 8;
+ break;
+ case PIX_FMT_YUV422P10:
+ bit_depth = 10;
+ break;
+ default:
+ av_log(avctx, AV_LOG_ERROR, "pixel format is incompatible with DNxHD\n");
+ return -1;
+ }
- ctx->cid = ff_dnxhd_find_cid(avctx);
- if (!ctx->cid || avctx->pix_fmt != PIX_FMT_YUV422P) {
+ ctx->cid = ff_dnxhd_find_cid(avctx, bit_depth);
+ if (!ctx->cid) {
av_log(avctx, AV_LOG_ERROR, "video parameters incompatible with DNxHD\n");
return -1;
}
@@ -181,15 +257,25 @@ static int dnxhd_encode_init(AVCodecContext *avctx)
ctx->m.mb_intra = 1;
ctx->m.h263_aic = 1;
- ctx->get_pixels_8x4_sym = dnxhd_get_pixels_8x4;
+ avctx->bits_per_raw_sample = ctx->cid_table->bit_depth;
dsputil_init(&ctx->m.dsp, avctx);
ff_dct_common_init(&ctx->m);
+ if (!ctx->m.dct_quantize)
+ ctx->m.dct_quantize = dct_quantize_c;
+
+ if (ctx->cid_table->bit_depth == 10) {
+ ctx->m.dct_quantize = dnxhd_10bit_dct_quantize;
+ ctx->get_pixels_8x4_sym = dnxhd_10bit_get_pixels_8x4_sym;
+ ctx->block_width_l2 = 4;
+ } else {
+ ctx->get_pixels_8x4_sym = dnxhd_8bit_get_pixels_8x4_sym;
+ ctx->block_width_l2 = 3;
+ }
+
#if HAVE_MMX
ff_dnxhd_init_mmx(ctx);
#endif
- if (!ctx->m.dct_quantize)
- ctx->m.dct_quantize = dct_quantize_c;
ctx->m.mb_height = (avctx->height + 15) / 16;
ctx->m.mb_width = (avctx->width + 15) / 16;
@@ -255,7 +341,7 @@ static int dnxhd_write_header(AVCodecContext *avctx, uint8_t *buf)
AV_WB16(buf + 0x1a, avctx->width); // SPL
AV_WB16(buf + 0x1d, avctx->height>>ctx->interlaced); // NAL
- buf[0x21] = 0x38; // FIXME 8 bit per comp
+ buf[0x21] = ctx->cid_table->bit_depth == 10 ? 0x58 : 0x38;
buf[0x22] = 0x88 + (ctx->interlaced<<2);
AV_WB32(buf + 0x28, ctx->cid); // CID
buf[0x2c] = ctx->interlaced ? 0 : 0x80;
@@ -321,15 +407,27 @@ static av_always_inline void dnxhd_unquantize_c(DNXHDEncContext *ctx, DCTELEM *b
if (level) {
if (level < 0) {
level = (1-2*level) * qscale * weight_matrix[i];
- if (weight_matrix[i] != 32)
- level += 32;
- level >>= 6;
+ if (ctx->cid_table->bit_depth == 10) {
+ if (weight_matrix[i] != 8)
+ level += 8;
+ level >>= 4;
+ } else {
+ if (weight_matrix[i] != 32)
+ level += 32;
+ level >>= 6;
+ }
level = -level;
} else {
level = (2*level+1) * qscale * weight_matrix[i];
- if (weight_matrix[i] != 32)
- level += 32;
- level >>= 6;
+ if (ctx->cid_table->bit_depth == 10) {
+ if (weight_matrix[i] != 8)
+ level += 8;
+ level >>= 4;
+ } else {
+ if (weight_matrix[i] != 32)
+ level += 32;
+ level >>= 6;
+ }
}
block[j] = level;
}
@@ -364,22 +462,24 @@ static av_always_inline int dnxhd_calc_ac_bits(DNXHDEncContext *ctx, DCTELEM *bl
static av_always_inline void dnxhd_get_blocks(DNXHDEncContext *ctx, int mb_x, int mb_y)
{
- const uint8_t *ptr_y = ctx->thread[0]->src[0] + ((mb_y << 4) * ctx->m.linesize) + (mb_x << 4);
- const uint8_t *ptr_u = ctx->thread[0]->src[1] + ((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << 3);
- const uint8_t *ptr_v = ctx->thread[0]->src[2] + ((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << 3);
+ const int bs = ctx->block_width_l2;
+ const int bw = 1 << bs;
+ const uint8_t *ptr_y = ctx->thread[0]->src[0] + ((mb_y << 4) * ctx->m.linesize) + (mb_x << bs+1);
+ const uint8_t *ptr_u = ctx->thread[0]->src[1] + ((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << bs);
+ const uint8_t *ptr_v = ctx->thread[0]->src[2] + ((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << bs);
DSPContext *dsp = &ctx->m.dsp;
- dsp->get_pixels(ctx->blocks[0], ptr_y, ctx->m.linesize);
- dsp->get_pixels(ctx->blocks[1], ptr_y + 8, ctx->m.linesize);
- dsp->get_pixels(ctx->blocks[2], ptr_u, ctx->m.uvlinesize);
- dsp->get_pixels(ctx->blocks[3], ptr_v, ctx->m.uvlinesize);
+ dsp->get_pixels(ctx->blocks[0], ptr_y, ctx->m.linesize);
+ dsp->get_pixels(ctx->blocks[1], ptr_y + bw, ctx->m.linesize);
+ dsp->get_pixels(ctx->blocks[2], ptr_u, ctx->m.uvlinesize);
+ dsp->get_pixels(ctx->blocks[3], ptr_v, ctx->m.uvlinesize);
if (mb_y+1 == ctx->m.mb_height && ctx->m.avctx->height == 1080) {
if (ctx->interlaced) {
- ctx->get_pixels_8x4_sym(ctx->blocks[4], ptr_y + ctx->dct_y_offset, ctx->m.linesize);
- ctx->get_pixels_8x4_sym(ctx->blocks[5], ptr_y + ctx->dct_y_offset + 8, ctx->m.linesize);
- ctx->get_pixels_8x4_sym(ctx->blocks[6], ptr_u + ctx->dct_uv_offset, ctx->m.uvlinesize);
- ctx->get_pixels_8x4_sym(ctx->blocks[7], ptr_v + ctx->dct_uv_offset, ctx->m.uvlinesize);
+ ctx->get_pixels_8x4_sym(ctx->blocks[4], ptr_y + ctx->dct_y_offset, ctx->m.linesize);
+ ctx->get_pixels_8x4_sym(ctx->blocks[5], ptr_y + ctx->dct_y_offset + bw, ctx->m.linesize);
+ ctx->get_pixels_8x4_sym(ctx->blocks[6], ptr_u + ctx->dct_uv_offset, ctx->m.uvlinesize);
+ ctx->get_pixels_8x4_sym(ctx->blocks[7], ptr_v + ctx->dct_uv_offset, ctx->m.uvlinesize);
} else {
dsp->clear_block(ctx->blocks[4]);
dsp->clear_block(ctx->blocks[5]);
@@ -387,10 +487,10 @@ static av_always_inline void dnxhd_get_blocks(DNXHDEncContext *ctx, int mb_x, in
dsp->clear_block(ctx->blocks[7]);
}
} else {
- dsp->get_pixels(ctx->blocks[4], ptr_y + ctx->dct_y_offset, ctx->m.linesize);
- dsp->get_pixels(ctx->blocks[5], ptr_y + ctx->dct_y_offset + 8, ctx->m.linesize);
- dsp->get_pixels(ctx->blocks[6], ptr_u + ctx->dct_uv_offset, ctx->m.uvlinesize);
- dsp->get_pixels(ctx->blocks[7], ptr_v + ctx->dct_uv_offset, ctx->m.uvlinesize);
+ dsp->get_pixels(ctx->blocks[4], ptr_y + ctx->dct_y_offset, ctx->m.linesize);
+ dsp->get_pixels(ctx->blocks[5], ptr_y + ctx->dct_y_offset + bw, ctx->m.linesize);
+ dsp->get_pixels(ctx->blocks[6], ptr_u + ctx->dct_uv_offset, ctx->m.uvlinesize);
+ dsp->get_pixels(ctx->blocks[7], ptr_v + ctx->dct_uv_offset, ctx->m.uvlinesize);
}
}
@@ -417,7 +517,7 @@ static int dnxhd_calc_bits_thread(AVCodecContext *avctx, void *arg, int jobnr, i
ctx->m.last_dc[0] =
ctx->m.last_dc[1] =
- ctx->m.last_dc[2] = 1024;
+ ctx->m.last_dc[2] = 1 << (ctx->cid_table->bit_depth + 2);
for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) {
unsigned mb = mb_y * ctx->m.mb_width + mb_x;
@@ -440,6 +540,8 @@ static int dnxhd_calc_bits_thread(AVCodecContext *avctx, void *arg, int jobnr, i
diff = block[0] - ctx->m.last_dc[n];
if (diff < 0) nbits = av_log2_16bit(-2*diff);
else nbits = av_log2_16bit( 2*diff);
+
+ assert(nbits < ctx->cid_table->bit_depth + 4);
dc_bits += ctx->cid_table->dc_bits[nbits] + nbits;
ctx->m.last_dc[n] = block[0];
@@ -465,7 +567,7 @@ static int dnxhd_encode_thread(AVCodecContext *avctx, void *arg, int jobnr, int
ctx->m.last_dc[0] =
ctx->m.last_dc[1] =
- ctx->m.last_dc[2] = 1024;
+ ctx->m.last_dc[2] = 1 << (ctx->cid_table->bit_depth + 2);
for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) {
unsigned mb = mb_y * ctx->m.mb_width + mb_x;
int qscale = ctx->mb_qscale[mb];
@@ -515,13 +617,39 @@ static int dnxhd_mb_var_thread(AVCodecContext *avctx, void *arg, int jobnr, int
DNXHDEncContext *ctx = avctx->priv_data;
int mb_y = jobnr, mb_x;
ctx = ctx->thread[threadnr];
- for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) {
- unsigned mb = mb_y * ctx->m.mb_width + mb_x;
- uint8_t *pix = ctx->thread[0]->src[0] + ((mb_y<<4) * ctx->m.linesize) + (mb_x<<4);
- int sum = ctx->m.dsp.pix_sum(pix, ctx->m.linesize);
- int varc = (ctx->m.dsp.pix_norm1(pix, ctx->m.linesize) - (((unsigned)(sum*sum))>>8)+128)>>8;
- ctx->mb_cmp[mb].value = varc;
- ctx->mb_cmp[mb].mb = mb;
+ if (ctx->cid_table->bit_depth == 8) {
+ uint8_t *pix = ctx->thread[0]->src[0] + ((mb_y<<4) * ctx->m.linesize);
+ for (mb_x = 0; mb_x < ctx->m.mb_width; ++mb_x, pix += 16) {
+ unsigned mb = mb_y * ctx->m.mb_width + mb_x;
+ int sum = ctx->m.dsp.pix_sum(pix, ctx->m.linesize);
+ int varc = (ctx->m.dsp.pix_norm1(pix, ctx->m.linesize) - (((unsigned)(sum*sum))>>8)+128)>>8;
+ ctx->mb_cmp[mb].value = varc;
+ ctx->mb_cmp[mb].mb = mb;
+ }
+ } else { // 10-bit
+ int const linesize = ctx->m.linesize >> 1;
+ for (mb_x = 0; mb_x < ctx->m.mb_width; ++mb_x) {
+ uint16_t *pix = (uint16_t*)ctx->thread[0]->src[0] + ((mb_y << 4) * linesize) + (mb_x << 4);
+ unsigned mb = mb_y * ctx->m.mb_width + mb_x;
+ int sum = 0;
+ int sqsum = 0;
+ int mean, sqmean;
+ // Macroblocks are 16x16 pixels, unlike DCT blocks which are 8x8.
+ for (int i = 0; i < 16; ++i) {
+ for (int j = 0; j < 16; ++j) {
+ // Turn 16-bit pixels into 10-bit ones.
+ int const sample = (unsigned)pix[j] >> 6;
+ sum += sample;
+ sqsum += sample * sample;
+ // 2^10 * 2^10 * 16 * 16 = 2^28, which is less than INT_MAX
+ }
+ pix += linesize;
+ }
+ mean = sum >> 8; // 16*16 == 2^8
+ sqmean = sqsum >> 8;
+ ctx->mb_cmp[mb].value = sqmean - mean * mean;
+ ctx->mb_cmp[mb].mb = mb;
+ }
}
return 0;
}
@@ -871,7 +999,7 @@ AVCodec ff_dnxhd_encoder = {
dnxhd_encode_picture,
dnxhd_encode_end,
.capabilities = CODEC_CAP_SLICE_THREADS,
- .pix_fmts = (const enum PixelFormat[]){PIX_FMT_YUV422P, PIX_FMT_NONE},
+ .pix_fmts = (const enum PixelFormat[]){PIX_FMT_YUV422P, PIX_FMT_YUV422P10, PIX_FMT_NONE},
.long_name = NULL_IF_CONFIG_SMALL("VC3/DNxHD"),
.priv_class = &class,
};
diff --git a/libavcodec/dnxhdenc.h b/libavcodec/dnxhdenc.h
index a968ae0..80b6f85 100644
--- a/libavcodec/dnxhdenc.h
+++ b/libavcodec/dnxhdenc.h
@@ -52,8 +52,12 @@ typedef struct DNXHDEncContext {
struct DNXHDEncContext *thread[MAX_THREADS];
+ // Because our samples are either 8 or 16 bits for 8-bit and 10-bit
+ // encoding respectively, these refer either to bytes or to two-byte words.
unsigned dct_y_offset;
unsigned dct_uv_offset;
+ unsigned block_width_l2;
+
int interlaced;
int cur_field;
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index 5c5f9db..6f8d8a0 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -306,25 +306,6 @@ static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
return s;
}
-static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
-{
- int i;
-
- /* read the pixels */
- for(i=0;i<8;i++) {
- block[0] = pixels[0];
- block[1] = pixels[1];
- block[2] = pixels[2];
- block[3] = pixels[3];
- block[4] = pixels[4];
- block[5] = pixels[5];
- block[6] = pixels[6];
- block[7] = pixels[7];
- pixels += line_size;
- block += 8;
- }
-}
-
static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
const uint8_t *s2, int stride){
int i;
@@ -2836,17 +2817,22 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
ff_check_alignment();
#if CONFIG_ENCODERS
- if(avctx->dct_algo==FF_DCT_FASTINT) {
- c->fdct = fdct_ifast;
- c->fdct248 = fdct_ifast248;
- }
- else if(avctx->dct_algo==FF_DCT_FAAN) {
- c->fdct = ff_faandct;
- c->fdct248 = ff_faandct248;
- }
- else {
- c->fdct = ff_jpeg_fdct_islow; //slow/accurate/default
- c->fdct248 = ff_fdct248_islow;
+ if (avctx->bits_per_raw_sample == 10) {
+ c->fdct = ff_jpeg_fdct_islow_10;
+ c->fdct248 = ff_fdct248_islow_10;
+ } else {
+ if(avctx->dct_algo==FF_DCT_FASTINT) {
+ c->fdct = fdct_ifast;
+ c->fdct248 = fdct_ifast248;
+ }
+ else if(avctx->dct_algo==FF_DCT_FAAN) {
+ c->fdct = ff_faandct;
+ c->fdct248 = ff_faandct248;
+ }
+ else {
+ c->fdct = ff_jpeg_fdct_islow_8; //slow/accurate/default
+ c->fdct248 = ff_fdct248_islow_8;
+ }
}
#endif //CONFIG_ENCODERS
@@ -2910,7 +2896,6 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
}
}
- c->get_pixels = get_pixels_c;
c->diff_pixels = diff_pixels_c;
c->put_pixels_clamped = ff_put_pixels_clamped_c;
c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_c;
@@ -3138,13 +3123,14 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
c->PFX ## _pixels_tab[IDX][15] = FUNCC(PFX ## NUM ## _mc33, depth)
-#define BIT_DEPTH_FUNCS(depth)\
+#define BIT_DEPTH_FUNCS(depth, dct)\
+ c->get_pixels = FUNCC(get_pixels ## dct , depth);\
c->draw_edges = FUNCC(draw_edges , depth);\
c->emulated_edge_mc = FUNC (ff_emulated_edge_mc , depth);\
- c->clear_block = FUNCC(clear_block , depth);\
- c->clear_blocks = FUNCC(clear_blocks , depth);\
- c->add_pixels8 = FUNCC(add_pixels8 , depth);\
- c->add_pixels4 = FUNCC(add_pixels4 , depth);\
+ c->clear_block = FUNCC(clear_block ## dct , depth);\
+ c->clear_blocks = FUNCC(clear_blocks ## dct , depth);\
+ c->add_pixels8 = FUNCC(add_pixels8 ## dct , depth);\
+ c->add_pixels4 = FUNCC(add_pixels4 ## dct , depth);\
c->put_no_rnd_pixels_l2[0] = FUNCC(put_no_rnd_pixels16_l2, depth);\
c->put_no_rnd_pixels_l2[1] = FUNCC(put_no_rnd_pixels8_l2 , depth);\
\
@@ -3178,15 +3164,23 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
switch (avctx->bits_per_raw_sample) {
case 9:
- BIT_DEPTH_FUNCS(9);
+ if (c->dct_bits == 32) {
+ BIT_DEPTH_FUNCS(9, _32);
+ } else {
+ BIT_DEPTH_FUNCS(9, _16);
+ }
break;
case 10:
- BIT_DEPTH_FUNCS(10);
+ if (c->dct_bits == 32) {
+ BIT_DEPTH_FUNCS(10, _32);
+ } else {
+ BIT_DEPTH_FUNCS(10, _16);
+ }
break;
default:
av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", avctx->bits_per_raw_sample);
case 8:
- BIT_DEPTH_FUNCS(8);
+ BIT_DEPTH_FUNCS(8, _16);
break;
}
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index cf0fa72..928a516 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -40,8 +40,10 @@ typedef short DCTELEM;
void fdct_ifast (DCTELEM *data);
void fdct_ifast248 (DCTELEM *data);
-void ff_jpeg_fdct_islow (DCTELEM *data);
-void ff_fdct248_islow (DCTELEM *data);
+void ff_jpeg_fdct_islow_8(DCTELEM *data);
+void ff_jpeg_fdct_islow_10(DCTELEM *data);
+void ff_fdct248_islow_8(DCTELEM *data);
+void ff_fdct248_islow_10(DCTELEM *data);
void j_rev_dct (DCTELEM *data);
void j_rev_dct4 (DCTELEM *data);
@@ -217,6 +219,11 @@ void ff_put_signed_pixels_clamped_c(const DCTELEM *block, uint8_t *dest, int lin
* DSPContext.
*/
typedef struct DSPContext {
+ /**
+ * Size of DCT coefficients.
+ */
+ int dct_bits;
+
/* pixel ops : interface with DCT */
void (*get_pixels)(DCTELEM *block/*align 16*/, const uint8_t *pixels/*align 8*/, int line_size);
void (*diff_pixels)(DCTELEM *block/*align 16*/, const uint8_t *s1/*align 8*/, const uint8_t *s2/*align 8*/, int stride);
diff --git a/libavcodec/dsputil_template.c b/libavcodec/dsputil_template.c
index 17f05e6..85d4fec 100644
--- a/libavcodec/dsputil_template.c
+++ b/libavcodec/dsputil_template.c
@@ -192,43 +192,89 @@ void FUNC(ff_emulated_edge_mc)(uint8_t *buf, const uint8_t *src, int linesize, i
}
}
-static void FUNCC(add_pixels8)(uint8_t *restrict p_pixels, DCTELEM *p_block, int line_size)
-{
- int i;
- pixel *restrict pixels = (pixel *restrict)p_pixels;
- dctcoef *block = (dctcoef*)p_block;
- line_size >>= sizeof(pixel)-1;
-
- for(i=0;i<8;i++) {
- pixels[0] += block[0];
- pixels[1] += block[1];
- pixels[2] += block[2];
- pixels[3] += block[3];
- pixels[4] += block[4];
- pixels[5] += block[5];
- pixels[6] += block[6];
- pixels[7] += block[7];
- pixels += line_size;
- block += 8;
- }
+#define DCTELEM_FUNCS(dctcoef, suffix) \
+static void FUNCC(get_pixels ## suffix)(DCTELEM *restrict _block, \
+ const uint8_t *_pixels, \
+ int line_size) \
+{ \
+ const pixel *pixels = (const pixel *) _pixels; \
+ dctcoef *restrict block = (dctcoef *) _block; \
+ int i; \
+ \
+ /* read the pixels */ \
+ for(i=0;i<8;i++) { \
+ block[0] = pixels[0]; \
+ block[1] = pixels[1]; \
+ block[2] = pixels[2]; \
+ block[3] = pixels[3]; \
+ block[4] = pixels[4]; \
+ block[5] = pixels[5]; \
+ block[6] = pixels[6]; \
+ block[7] = pixels[7]; \
+ pixels += line_size / sizeof(pixel); \
+ block += 8; \
+ } \
+} \
+ \
+static void FUNCC(add_pixels8 ## suffix)(uint8_t *restrict _pixels, \
+ DCTELEM *_block, \
+ int line_size) \
+{ \
+ int i; \
+ pixel *restrict pixels = (pixel *restrict)_pixels; \
+ dctcoef *block = (dctcoef*)_block; \
+ line_size /= sizeof(pixel); \
+ \
+ for(i=0;i<8;i++) { \
+ pixels[0] += block[0]; \
+ pixels[1] += block[1]; \
+ pixels[2] += block[2]; \
+ pixels[3] += block[3]; \
+ pixels[4] += block[4]; \
+ pixels[5] += block[5]; \
+ pixels[6] += block[6]; \
+ pixels[7] += block[7]; \
+ pixels += line_size; \
+ block += 8; \
+ } \
+} \
+ \
+static void FUNCC(add_pixels4 ## suffix)(uint8_t *restrict _pixels, \
+ DCTELEM *_block, \
+ int line_size) \
+{ \
+ int i; \
+ pixel *restrict pixels = (pixel *restrict)_pixels; \
+ dctcoef *block = (dctcoef*)_block; \
+ line_size /= sizeof(pixel); \
+ \
+ for(i=0;i<4;i++) { \
+ pixels[0] += block[0]; \
+ pixels[1] += block[1]; \
+ pixels[2] += block[2]; \
+ pixels[3] += block[3]; \
+ pixels += line_size; \
+ block += 4; \
+ } \
+} \
+ \
+static void FUNCC(clear_block ## suffix)(DCTELEM *block) \
+{ \
+ memset(block, 0, sizeof(dctcoef)*64); \
+} \
+ \
+/** \
+ * memset(blocks, 0, sizeof(DCTELEM)*6*64) \
+ */ \
+static void FUNCC(clear_blocks ## suffix)(DCTELEM *blocks) \
+{ \
+ memset(blocks, 0, sizeof(dctcoef)*6*64); \
}
-static void FUNCC(add_pixels4)(uint8_t *restrict p_pixels, DCTELEM *p_block, int line_size)
-{
- int i;
- pixel *restrict pixels = (pixel *restrict)p_pixels;
- dctcoef *block = (dctcoef*)p_block;
- line_size >>= sizeof(pixel)-1;
-
- for(i=0;i<4;i++) {
- pixels[0] += block[0];
- pixels[1] += block[1];
- pixels[2] += block[2];
- pixels[3] += block[3];
- pixels += line_size;
- block += 4;
- }
-}
+DCTELEM_FUNCS(DCTELEM, _16)
+#if BIT_DEPTH > 8
+DCTELEM_FUNCS(dctcoef, _32)
+#endif
#define PIXOP2(OPNAME, OP) \
static void FUNCC(OPNAME ## _pixels2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
@@ -1232,16 +1278,3 @@ void FUNCC(ff_avg_pixels16x16)(uint8_t *dst, uint8_t *src, int stride) {
FUNCC(avg_pixels16)(dst, src, stride, 16);
}
-static void FUNCC(clear_block)(DCTELEM *block)
-{
- memset(block, 0, sizeof(dctcoef)*64);
-}
-
-/**
- * memset(blocks, 0, sizeof(DCTELEM)*6*64)
- */
-static void FUNCC(clear_blocks)(DCTELEM *blocks)
-{
- memset(blocks, 0, sizeof(dctcoef)*6*64);
-}
-
diff --git a/libavcodec/eac3enc.c b/libavcodec/eac3enc.c
index 09fa80f..038aa2a 100644
--- a/libavcodec/eac3enc.c
+++ b/libavcodec/eac3enc.c
@@ -63,6 +63,11 @@ void ff_eac3_get_frame_exp_strategy(AC3EncodeContext *s)
{
int ch;
+ if (s->num_blocks < 6) {
+ s->use_frame_exp_strategy = 0;
+ return;
+ }
+
s->use_frame_exp_strategy = 1;
for (ch = !s->cpl_on; ch <= s->fbw_channels; ch++) {
int expstr = eac3_frame_expstr_index_tab[s->exp_strategy[ch][0]-1]
@@ -89,7 +94,7 @@ void ff_eac3_set_cpl_states(AC3EncodeContext *s)
/* set first cpl coords */
for (ch = 1; ch <= s->fbw_channels; ch++)
first_cpl_coords[ch] = 1;
- for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+ for (blk = 0; blk < s->num_blocks; blk++) {
AC3Block *block = &s->blocks[blk];
for (ch = 1; ch <= s->fbw_channels; ch++) {
if (block->channel_in_cpl[ch]) {
@@ -104,7 +109,7 @@ void ff_eac3_set_cpl_states(AC3EncodeContext *s)
}
/* set first cpl leak */
- for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+ for (blk = 0; blk < s->num_blocks; blk++) {
AC3Block *block = &s->blocks[blk];
if (block->cpl_in_use) {
block->new_cpl_leak = 2;
@@ -130,7 +135,7 @@ void ff_eac3_output_frame_header(AC3EncodeContext *s)
put_bits(&s->pb, 2, s->bit_alloc.sr_code); /* sample rate code */
} else {
put_bits(&s->pb, 2, s->bit_alloc.sr_code); /* sample rate code */
- put_bits(&s->pb, 2, 0x3); /* number of blocks = 6 */
+ put_bits(&s->pb, 2, s->num_blks_code); /* number of blocks */
}
put_bits(&s->pb, 3, s->channel_mode); /* audio coding mode */
put_bits(&s->pb, 1, s->lfe_on); /* LFE channel indicator */
@@ -141,11 +146,15 @@ void ff_eac3_output_frame_header(AC3EncodeContext *s)
/* TODO: mixing metadata */
put_bits(&s->pb, 1, 0); /* no info metadata */
/* TODO: info metadata */
+ if (s->num_blocks != 6)
+ put_bits(&s->pb, 1, !(s->avctx->frame_number % 6)); /* converter sync flag */
put_bits(&s->pb, 1, 0); /* no additional bit stream info */
/* frame header */
+ if (s->num_blocks == 6) {
put_bits(&s->pb, 1, !s->use_frame_exp_strategy);/* exponent strategy syntax */
put_bits(&s->pb, 1, 0); /* aht enabled = no */
+ }
put_bits(&s->pb, 2, 0); /* snr offset strategy = 1 */
put_bits(&s->pb, 1, 0); /* transient pre-noise processing enabled = no */
put_bits(&s->pb, 1, 0); /* block switch syntax enabled = no */
@@ -158,7 +167,7 @@ void ff_eac3_output_frame_header(AC3EncodeContext *s)
/* coupling strategy use flags */
if (s->channel_mode > AC3_CHMODE_MONO) {
put_bits(&s->pb, 1, s->blocks[0].cpl_in_use);
- for (blk = 1; blk < AC3_MAX_BLOCKS; blk++) {
+ for (blk = 1; blk < s->num_blocks; blk++) {
AC3Block *block = &s->blocks[blk];
put_bits(&s->pb, 1, block->new_cpl_strategy);
if (block->new_cpl_strategy)
@@ -170,26 +179,31 @@ void ff_eac3_output_frame_header(AC3EncodeContext *s)
for (ch = !s->cpl_on; ch <= s->fbw_channels; ch++)
put_bits(&s->pb, 5, s->frame_exp_strategy[ch]);
} else {
- for (blk = 0; blk < AC3_MAX_BLOCKS; blk++)
+ for (blk = 0; blk < s->num_blocks; blk++)
for (ch = !s->blocks[blk].cpl_in_use; ch <= s->fbw_channels; ch++)
put_bits(&s->pb, 2, s->exp_strategy[ch][blk]);
}
if (s->lfe_on) {
- for (blk = 0; blk < AC3_MAX_BLOCKS; blk++)
+ for (blk = 0; blk < s->num_blocks; blk++)
put_bits(&s->pb, 1, s->exp_strategy[s->lfe_channel][blk]);
}
- /* E-AC-3 to AC-3 converter exponent strategy (unfortunately not optional...) */
+ /* E-AC-3 to AC-3 converter exponent strategy (not optional when num blocks == 6) */
+ if (s->num_blocks != 6) {
+ put_bits(&s->pb, 1, 0);
+ } else {
for (ch = 1; ch <= s->fbw_channels; ch++) {
if (s->use_frame_exp_strategy)
put_bits(&s->pb, 5, s->frame_exp_strategy[ch]);
else
put_bits(&s->pb, 5, 0);
}
+ }
/* snr offsets */
put_bits(&s->pb, 6, s->coarse_snr_offset);
put_bits(&s->pb, 4, s->fine_snr_offset[1]);
/* block start info */
- put_bits(&s->pb, 1, 0);
+ if (s->num_blocks > 1)
+ put_bits(&s->pb, 1, 0);
}
diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index 1fde745..83d81c9 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -3707,6 +3707,7 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma);
ff_h264_pred_init(&h->hpc, s->codec_id, h->sps.bit_depth_luma);
+ s->dsp.dct_bits = h->sps.bit_depth_luma > 8 ? 32 : 16;
dsputil_init(&s->dsp, s->avctx);
} else {
av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", h->sps.bit_depth_luma);
diff --git a/libavcodec/imgconvert.c b/libavcodec/imgconvert.c
index 9aa584f..6593e88 100644
--- a/libavcodec/imgconvert.c
+++ b/libavcodec/imgconvert.c
@@ -42,9 +42,6 @@
#include "x86/dsputil_mmx.h"
#endif
-#define xglue(x, y) x ## y
-#define glue(x, y) xglue(x, y)
-
#define FF_COLOR_RGB 0 /**< RGB color space */
#define FF_COLOR_GRAY 1 /**< gray color space */
#define FF_COLOR_YUV 2 /**< YUV color space. 16 <= Y <= 235, 16 <= U, V <= 240 */
diff --git a/libavcodec/jfdctint.c b/libavcodec/jfdctint.c
index 072c744..0482bc5 100644
--- a/libavcodec/jfdctint.c
+++ b/libavcodec/jfdctint.c
@@ -1,402 +1,25 @@
-/*
- * jfdctint.c
- *
- * This file is part of the Independent JPEG Group's software.
- *
- * The authors make NO WARRANTY or representation, either express or implied,
- * with respect to this software, its quality, accuracy, merchantability, or
- * fitness for a particular purpose. This software is provided "AS IS", and
- * you, its user, assume the entire risk as to its quality and accuracy.
- *
- * This software is copyright (C) 1991-1996, Thomas G. Lane.
- * All Rights Reserved except as specified below.
- *
- * Permission is hereby granted to use, copy, modify, and distribute this
- * software (or portions thereof) for any purpose, without fee, subject to
- * these conditions:
- * (1) If any part of the source code for this software is distributed, then
- * this README file must be included, with this copyright and no-warranty
- * notice unaltered; and any additions, deletions, or changes to the original
- * files must be clearly indicated in accompanying documentation.
- * (2) If only executable code is distributed, then the accompanying
- * documentation must state that "this software is based in part on the work
- * of the Independent JPEG Group".
- * (3) Permission for use of this software is granted only if the user accepts
- * full responsibility for any undesirable consequences; the authors accept
- * NO LIABILITY for damages of any kind.
- *
- * These conditions apply to any software derived from or based on the IJG
- * code, not just to the unmodified library. If you use our work, you ought
- * to acknowledge us.
- *
- * Permission is NOT granted for the use of any IJG author's name or company
- * name in advertising or publicity relating to this software or products
- * derived from it. This software may be referred to only as "the Independent
- * JPEG Group's software".
- *
- * We specifically permit and encourage the use of this software as the basis
- * of commercial products, provided that all warranty or liability claims are
- * assumed by the product vendor.
- *
- * This file contains a slow-but-accurate integer implementation of the
- * forward DCT (Discrete Cosine Transform).
- *
- * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
- * on each column. Direct algorithms are also available, but they are
- * much more complex and seem not to be any faster when reduced to code.
- *
- * This implementation is based on an algorithm described in
- * C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
- * Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
- * Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
- * The primary algorithm described there uses 11 multiplies and 29 adds.
- * We use their alternate method with 12 multiplies and 32 adds.
- * The advantage of this method is that no data path contains more than one
- * multiplication; this allows a very simple and accurate implementation in
- * scaled fixed-point arithmetic, with a minimal number of shifts.
- */
-
/**
- * @file
- * Independent JPEG Group's slow & accurate dct.
- */
-
-#include <stdlib.h>
-#include <stdio.h>
-#include "libavutil/common.h"
-#include "dsputil.h"
-
-#define DCTSIZE 8
-#define BITS_IN_JSAMPLE 8
-#define GLOBAL(x) x
-#define RIGHT_SHIFT(x, n) ((x) >> (n))
-#define MULTIPLY16C16(var,const) ((var)*(const))
-
-#if 1 //def USE_ACCURATE_ROUNDING
-#define DESCALE(x,n) RIGHT_SHIFT((x) + (1 << ((n) - 1)), n)
-#else
-#define DESCALE(x,n) RIGHT_SHIFT(x, n)
-#endif
-
-
-/*
- * This module is specialized to the case DCTSIZE = 8.
- */
-
-#if DCTSIZE != 8
- Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
-#endif
-
-
-/*
- * The poop on this scaling stuff is as follows:
- *
- * Each 1-D DCT step produces outputs which are a factor of sqrt(N)
- * larger than the true DCT outputs. The final outputs are therefore
- * a factor of N larger than desired; since N=8 this can be cured by
- * a simple right shift at the end of the algorithm. The advantage of
- * this arrangement is that we save two multiplications per 1-D DCT,
- * because the y0 and y4 outputs need not be divided by sqrt(N).
- * In the IJG code, this factor of 8 is removed by the quantization step
- * (in jcdctmgr.c), NOT in this module.
+ * This file is part of Libav.
*
- * We have to do addition and subtraction of the integer inputs, which
- * is no problem, and multiplication by fractional constants, which is
- * a problem to do in integer arithmetic. We multiply all the constants
- * by CONST_SCALE and convert them to integer constants (thus retaining
- * CONST_BITS bits of precision in the constants). After doing a
- * multiplication we have to divide the product by CONST_SCALE, with proper
- * rounding, to produce the correct output. This division can be done
- * cheaply as a right shift of CONST_BITS bits. We postpone shifting
- * as long as possible so that partial sums can be added together with
- * full fractional precision.
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
*
- * The outputs of the first pass are scaled up by PASS1_BITS bits so that
- * they are represented to better-than-integral precision. These outputs
- * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
- * with the recommended scaling. (For 12-bit sample data, the intermediate
- * array is int32_t anyway.)
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
*
- * To avoid overflow of the 32-bit intermediate results in pass 2, we must
- * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis
- * shows that the values given below are the most effective.
- */
-
-#if BITS_IN_JSAMPLE == 8
-#define CONST_BITS 13
-#define PASS1_BITS 4 /* set this to 2 if 16x16 multiplies are faster */
-#else
-#define CONST_BITS 13
-#define PASS1_BITS 1 /* lose a little precision to avoid overflow */
-#endif
-
-/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
- * causing a lot of useless floating-point operations at run time.
- * To get around this we use the following pre-calculated constants.
- * If you change CONST_BITS you may want to add appropriate values.
- * (With a reasonable C compiler, you can just rely on the FIX() macro...)
- */
-
-#if CONST_BITS == 13
-#define FIX_0_298631336 ((int32_t) 2446) /* FIX(0.298631336) */
-#define FIX_0_390180644 ((int32_t) 3196) /* FIX(0.390180644) */
-#define FIX_0_541196100 ((int32_t) 4433) /* FIX(0.541196100) */
-#define FIX_0_765366865 ((int32_t) 6270) /* FIX(0.765366865) */
-#define FIX_0_899976223 ((int32_t) 7373) /* FIX(0.899976223) */
-#define FIX_1_175875602 ((int32_t) 9633) /* FIX(1.175875602) */
-#define FIX_1_501321110 ((int32_t) 12299) /* FIX(1.501321110) */
-#define FIX_1_847759065 ((int32_t) 15137) /* FIX(1.847759065) */
-#define FIX_1_961570560 ((int32_t) 16069) /* FIX(1.961570560) */
-#define FIX_2_053119869 ((int32_t) 16819) /* FIX(2.053119869) */
-#define FIX_2_562915447 ((int32_t) 20995) /* FIX(2.562915447) */
-#define FIX_3_072711026 ((int32_t) 25172) /* FIX(3.072711026) */
-#else
-#define FIX_0_298631336 FIX(0.298631336)
-#define FIX_0_390180644 FIX(0.390180644)
-#define FIX_0_541196100 FIX(0.541196100)
-#define FIX_0_765366865 FIX(0.765366865)
-#define FIX_0_899976223 FIX(0.899976223)
-#define FIX_1_175875602 FIX(1.175875602)
-#define FIX_1_501321110 FIX(1.501321110)
-#define FIX_1_847759065 FIX(1.847759065)
-#define FIX_1_961570560 FIX(1.961570560)
-#define FIX_2_053119869 FIX(2.053119869)
-#define FIX_2_562915447 FIX(2.562915447)
-#define FIX_3_072711026 FIX(3.072711026)
-#endif
-
-
-/* Multiply an int32_t variable by an int32_t constant to yield an int32_t result.
- * For 8-bit samples with the recommended scaling, all the variable
- * and constant values involved are no more than 16 bits wide, so a
- * 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
- * For 12-bit samples, a full 32-bit multiplication will be needed.
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
-#if BITS_IN_JSAMPLE == 8 && CONST_BITS<=13 && PASS1_BITS<=2
-#define MULTIPLY(var,const) MULTIPLY16C16(var,const)
-#else
-#define MULTIPLY(var,const) ((var) * (const))
-#endif
-
-
-static av_always_inline void row_fdct(DCTELEM * data){
- int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
- int tmp10, tmp11, tmp12, tmp13;
- int z1, z2, z3, z4, z5;
- DCTELEM *dataptr;
- int ctr;
-
- /* Pass 1: process rows. */
- /* Note results are scaled up by sqrt(8) compared to a true DCT; */
- /* furthermore, we scale the results by 2**PASS1_BITS. */
-
- dataptr = data;
- for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
- tmp0 = dataptr[0] + dataptr[7];
- tmp7 = dataptr[0] - dataptr[7];
- tmp1 = dataptr[1] + dataptr[6];
- tmp6 = dataptr[1] - dataptr[6];
- tmp2 = dataptr[2] + dataptr[5];
- tmp5 = dataptr[2] - dataptr[5];
- tmp3 = dataptr[3] + dataptr[4];
- tmp4 = dataptr[3] - dataptr[4];
-
- /* Even part per LL&M figure 1 --- note that published figure is faulty;
- * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
- */
-
- tmp10 = tmp0 + tmp3;
- tmp13 = tmp0 - tmp3;
- tmp11 = tmp1 + tmp2;
- tmp12 = tmp1 - tmp2;
-
- dataptr[0] = (DCTELEM) ((tmp10 + tmp11) << PASS1_BITS);
- dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS);
-
- z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
- dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
- CONST_BITS-PASS1_BITS);
- dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
- CONST_BITS-PASS1_BITS);
-
- /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
- * cK represents cos(K*pi/16).
- * i0..i3 in the paper are tmp4..tmp7 here.
- */
-
- z1 = tmp4 + tmp7;
- z2 = tmp5 + tmp6;
- z3 = tmp4 + tmp6;
- z4 = tmp5 + tmp7;
- z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
-
- tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
- tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
- tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
- tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
- z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
- z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
- z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
- z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
-
- z3 += z5;
- z4 += z5;
-
- dataptr[7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, CONST_BITS-PASS1_BITS);
- dataptr[5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, CONST_BITS-PASS1_BITS);
- dataptr[3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS);
- dataptr[1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS);
-
- dataptr += DCTSIZE; /* advance pointer to next row */
- }
-}
-
-/*
- * Perform the forward DCT on one block of samples.
- */
-
-GLOBAL(void)
-ff_jpeg_fdct_islow (DCTELEM * data)
-{
- int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
- int tmp10, tmp11, tmp12, tmp13;
- int z1, z2, z3, z4, z5;
- DCTELEM *dataptr;
- int ctr;
-
- row_fdct(data);
-
- /* Pass 2: process columns.
- * We remove the PASS1_BITS scaling, but leave the results scaled up
- * by an overall factor of 8.
- */
-
- dataptr = data;
- for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
- tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
- tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
- tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
- tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
- tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
- tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
- tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
- tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
-
- /* Even part per LL&M figure 1 --- note that published figure is faulty;
- * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
- */
-
- tmp10 = tmp0 + tmp3;
- tmp13 = tmp0 - tmp3;
- tmp11 = tmp1 + tmp2;
- tmp12 = tmp1 - tmp2;
-
- dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS);
- dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS);
-
- z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
- dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
- CONST_BITS+PASS1_BITS);
- dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
- CONST_BITS+PASS1_BITS);
-
- /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
- * cK represents cos(K*pi/16).
- * i0..i3 in the paper are tmp4..tmp7 here.
- */
-
- z1 = tmp4 + tmp7;
- z2 = tmp5 + tmp6;
- z3 = tmp4 + tmp6;
- z4 = tmp5 + tmp7;
- z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
-
- tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
- tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
- tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
- tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
- z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
- z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
- z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
- z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
-
- z3 += z5;
- z4 += z5;
-
- dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp4 + z1 + z3,
- CONST_BITS+PASS1_BITS);
- dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp5 + z2 + z4,
- CONST_BITS+PASS1_BITS);
- dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp6 + z2 + z3,
- CONST_BITS+PASS1_BITS);
- dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp7 + z1 + z4,
- CONST_BITS+PASS1_BITS);
-
- dataptr++; /* advance pointer to next column */
- }
-}
-
-/*
- * The secret of DCT2-4-8 is really simple -- you do the usual 1-DCT
- * on the rows and then, instead of doing even and odd, part on the colums
- * you do even part two times.
- */
-GLOBAL(void)
-ff_fdct248_islow (DCTELEM * data)
-{
- int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
- int tmp10, tmp11, tmp12, tmp13;
- int z1;
- DCTELEM *dataptr;
- int ctr;
-
- row_fdct(data);
-
- /* Pass 2: process columns.
- * We remove the PASS1_BITS scaling, but leave the results scaled up
- * by an overall factor of 8.
- */
-
- dataptr = data;
- for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
- tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*1];
- tmp1 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3];
- tmp2 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5];
- tmp3 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7];
- tmp4 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*1];
- tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3];
- tmp6 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5];
- tmp7 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7];
-
- tmp10 = tmp0 + tmp3;
- tmp11 = tmp1 + tmp2;
- tmp12 = tmp1 - tmp2;
- tmp13 = tmp0 - tmp3;
-
- dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS);
- dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS);
-
- z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
- dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
- CONST_BITS+PASS1_BITS);
- dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
- CONST_BITS+PASS1_BITS);
-
- tmp10 = tmp4 + tmp7;
- tmp11 = tmp5 + tmp6;
- tmp12 = tmp5 - tmp6;
- tmp13 = tmp4 - tmp7;
-
- dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS);
- dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS);
-
- z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
- dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
- CONST_BITS+PASS1_BITS);
- dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
- CONST_BITS+PASS1_BITS);
+#define BIT_DEPTH 8
+#include "jfdctint_template.c"
+#undef BIT_DEPTH
- dataptr++; /* advance pointer to next column */
- }
-}
+#define BIT_DEPTH 10
+#include "jfdctint_template.c"
+#undef BIT_DEPTH
diff --git a/libavcodec/jfdctint_template.c b/libavcodec/jfdctint_template.c
new file mode 100644
index 0000000..e60e72a
--- /dev/null
+++ b/libavcodec/jfdctint_template.c
@@ -0,0 +1,405 @@
+/*
+ * jfdctint.c
+ *
+ * This file is part of the Independent JPEG Group's software.
+ *
+ * The authors make NO WARRANTY or representation, either express or implied,
+ * with respect to this software, its quality, accuracy, merchantability, or
+ * fitness for a particular purpose. This software is provided "AS IS", and
+ * you, its user, assume the entire risk as to its quality and accuracy.
+ *
+ * This software is copyright (C) 1991-1996, Thomas G. Lane.
+ * All Rights Reserved except as specified below.
+ *
+ * Permission is hereby granted to use, copy, modify, and distribute this
+ * software (or portions thereof) for any purpose, without fee, subject to
+ * these conditions:
+ * (1) If any part of the source code for this software is distributed, then
+ * this README file must be included, with this copyright and no-warranty
+ * notice unaltered; and any additions, deletions, or changes to the original
+ * files must be clearly indicated in accompanying documentation.
+ * (2) If only executable code is distributed, then the accompanying
+ * documentation must state that "this software is based in part on the work
+ * of the Independent JPEG Group".
+ * (3) Permission for use of this software is granted only if the user accepts
+ * full responsibility for any undesirable consequences; the authors accept
+ * NO LIABILITY for damages of any kind.
+ *
+ * These conditions apply to any software derived from or based on the IJG
+ * code, not just to the unmodified library. If you use our work, you ought
+ * to acknowledge us.
+ *
+ * Permission is NOT granted for the use of any IJG author's name or company
+ * name in advertising or publicity relating to this software or products
+ * derived from it. This software may be referred to only as "the Independent
+ * JPEG Group's software".
+ *
+ * We specifically permit and encourage the use of this software as the basis
+ * of commercial products, provided that all warranty or liability claims are
+ * assumed by the product vendor.
+ *
+ * This file contains a slow-but-accurate integer implementation of the
+ * forward DCT (Discrete Cosine Transform).
+ *
+ * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
+ * on each column. Direct algorithms are also available, but they are
+ * much more complex and seem not to be any faster when reduced to code.
+ *
+ * This implementation is based on an algorithm described in
+ * C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
+ * Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
+ * Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
+ * The primary algorithm described there uses 11 multiplies and 29 adds.
+ * We use their alternate method with 12 multiplies and 32 adds.
+ * The advantage of this method is that no data path contains more than one
+ * multiplication; this allows a very simple and accurate implementation in
+ * scaled fixed-point arithmetic, with a minimal number of shifts.
+ */
+
+/**
+ * @file
+ * Independent JPEG Group's slow & accurate dct.
+ */
+
+#include "libavutil/common.h"
+#include "dsputil.h"
+
+#include "bit_depth_template.c"
+
+#define DCTSIZE 8
+#define BITS_IN_JSAMPLE BIT_DEPTH
+#define GLOBAL(x) x
+#define RIGHT_SHIFT(x, n) ((x) >> (n))
+#define MULTIPLY16C16(var,const) ((var)*(const))
+
+#if 1 //def USE_ACCURATE_ROUNDING
+#define DESCALE(x,n) RIGHT_SHIFT((x) + (1 << ((n) - 1)), n)
+#else
+#define DESCALE(x,n) RIGHT_SHIFT(x, n)
+#endif
+
+
+/*
+ * This module is specialized to the case DCTSIZE = 8.
+ */
+
+#if DCTSIZE != 8
+#error "Sorry, this code only copes with 8x8 DCTs."
+#endif
+
+
+/*
+ * The poop on this scaling stuff is as follows:
+ *
+ * Each 1-D DCT step produces outputs which are a factor of sqrt(N)
+ * larger than the true DCT outputs. The final outputs are therefore
+ * a factor of N larger than desired; since N=8 this can be cured by
+ * a simple right shift at the end of the algorithm. The advantage of
+ * this arrangement is that we save two multiplications per 1-D DCT,
+ * because the y0 and y4 outputs need not be divided by sqrt(N).
+ * In the IJG code, this factor of 8 is removed by the quantization step
+ * (in jcdctmgr.c), NOT in this module.
+ *
+ * We have to do addition and subtraction of the integer inputs, which
+ * is no problem, and multiplication by fractional constants, which is
+ * a problem to do in integer arithmetic. We multiply all the constants
+ * by CONST_SCALE and convert them to integer constants (thus retaining
+ * CONST_BITS bits of precision in the constants). After doing a
+ * multiplication we have to divide the product by CONST_SCALE, with proper
+ * rounding, to produce the correct output. This division can be done
+ * cheaply as a right shift of CONST_BITS bits. We postpone shifting
+ * as long as possible so that partial sums can be added together with
+ * full fractional precision.
+ *
+ * The outputs of the first pass are scaled up by PASS1_BITS bits so that
+ * they are represented to better-than-integral precision. These outputs
+ * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
+ * with the recommended scaling. (For 12-bit sample data, the intermediate
+ * array is int32_t anyway.)
+ *
+ * To avoid overflow of the 32-bit intermediate results in pass 2, we must
+ * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis
+ * shows that the values given below are the most effective.
+ */
+
+#undef CONST_BITS
+#undef PASS1_BITS
+#undef OUT_SHIFT
+
+#if BITS_IN_JSAMPLE == 8
+#define CONST_BITS 13
+#define PASS1_BITS 4 /* set this to 2 if 16x16 multiplies are faster */
+#define OUT_SHIFT PASS1_BITS
+#else
+#define CONST_BITS 13
+#define PASS1_BITS 1 /* lose a little precision to avoid overflow */
+#define OUT_SHIFT (PASS1_BITS + 1)
+#endif
+
+/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
+ * causing a lot of useless floating-point operations at run time.
+ * To get around this we use the following pre-calculated constants.
+ * If you change CONST_BITS you may want to add appropriate values.
+ * (With a reasonable C compiler, you can just rely on the FIX() macro...)
+ */
+
+#if CONST_BITS == 13
+#define FIX_0_298631336 ((int32_t) 2446) /* FIX(0.298631336) */
+#define FIX_0_390180644 ((int32_t) 3196) /* FIX(0.390180644) */
+#define FIX_0_541196100 ((int32_t) 4433) /* FIX(0.541196100) */
+#define FIX_0_765366865 ((int32_t) 6270) /* FIX(0.765366865) */
+#define FIX_0_899976223 ((int32_t) 7373) /* FIX(0.899976223) */
+#define FIX_1_175875602 ((int32_t) 9633) /* FIX(1.175875602) */
+#define FIX_1_501321110 ((int32_t) 12299) /* FIX(1.501321110) */
+#define FIX_1_847759065 ((int32_t) 15137) /* FIX(1.847759065) */
+#define FIX_1_961570560 ((int32_t) 16069) /* FIX(1.961570560) */
+#define FIX_2_053119869 ((int32_t) 16819) /* FIX(2.053119869) */
+#define FIX_2_562915447 ((int32_t) 20995) /* FIX(2.562915447) */
+#define FIX_3_072711026 ((int32_t) 25172) /* FIX(3.072711026) */
+#else
+#define FIX_0_298631336 FIX(0.298631336)
+#define FIX_0_390180644 FIX(0.390180644)
+#define FIX_0_541196100 FIX(0.541196100)
+#define FIX_0_765366865 FIX(0.765366865)
+#define FIX_0_899976223 FIX(0.899976223)
+#define FIX_1_175875602 FIX(1.175875602)
+#define FIX_1_501321110 FIX(1.501321110)
+#define FIX_1_847759065 FIX(1.847759065)
+#define FIX_1_961570560 FIX(1.961570560)
+#define FIX_2_053119869 FIX(2.053119869)
+#define FIX_2_562915447 FIX(2.562915447)
+#define FIX_3_072711026 FIX(3.072711026)
+#endif
+
+
+/* Multiply an int32_t variable by an int32_t constant to yield an int32_t result.
+ * For 8-bit samples with the recommended scaling, all the variable
+ * and constant values involved are no more than 16 bits wide, so a
+ * 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
+ * For 12-bit samples, a full 32-bit multiplication will be needed.
+ */
+
+#if BITS_IN_JSAMPLE == 8 && CONST_BITS<=13 && PASS1_BITS<=2
+#define MULTIPLY(var,const) MULTIPLY16C16(var,const)
+#else
+#define MULTIPLY(var,const) ((var) * (const))
+#endif
+
+
+static av_always_inline void FUNC(row_fdct)(DCTELEM *data)
+{
+ int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+ int tmp10, tmp11, tmp12, tmp13;
+ int z1, z2, z3, z4, z5;
+ DCTELEM *dataptr;
+ int ctr;
+
+ /* Pass 1: process rows. */
+ /* Note results are scaled up by sqrt(8) compared to a true DCT; */
+ /* furthermore, we scale the results by 2**PASS1_BITS. */
+
+ dataptr = data;
+ for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+ tmp0 = dataptr[0] + dataptr[7];
+ tmp7 = dataptr[0] - dataptr[7];
+ tmp1 = dataptr[1] + dataptr[6];
+ tmp6 = dataptr[1] - dataptr[6];
+ tmp2 = dataptr[2] + dataptr[5];
+ tmp5 = dataptr[2] - dataptr[5];
+ tmp3 = dataptr[3] + dataptr[4];
+ tmp4 = dataptr[3] - dataptr[4];
+
+ /* Even part per LL&M figure 1 --- note that published figure is faulty;
+ * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
+ */
+
+ tmp10 = tmp0 + tmp3;
+ tmp13 = tmp0 - tmp3;
+ tmp11 = tmp1 + tmp2;
+ tmp12 = tmp1 - tmp2;
+
+ dataptr[0] = (DCTELEM) ((tmp10 + tmp11) << PASS1_BITS);
+ dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS);
+
+ z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
+ dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
+ CONST_BITS-PASS1_BITS);
+ dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
+ CONST_BITS-PASS1_BITS);
+
+ /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
+ * cK represents cos(K*pi/16).
+ * i0..i3 in the paper are tmp4..tmp7 here.
+ */
+
+ z1 = tmp4 + tmp7;
+ z2 = tmp5 + tmp6;
+ z3 = tmp4 + tmp6;
+ z4 = tmp5 + tmp7;
+ z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
+
+ tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
+ tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
+ tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
+ tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
+ z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
+ z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
+ z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
+ z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
+
+ z3 += z5;
+ z4 += z5;
+
+ dataptr[7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, CONST_BITS-PASS1_BITS);
+ dataptr[5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, CONST_BITS-PASS1_BITS);
+ dataptr[3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS);
+ dataptr[1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS);
+
+ dataptr += DCTSIZE; /* advance pointer to next row */
+ }
+}
+
+/*
+ * Perform the forward DCT on one block of samples.
+ */
+
+GLOBAL(void)
+FUNC(ff_jpeg_fdct_islow)(DCTELEM *data)
+{
+ int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+ int tmp10, tmp11, tmp12, tmp13;
+ int z1, z2, z3, z4, z5;
+ DCTELEM *dataptr;
+ int ctr;
+
+ FUNC(row_fdct)(data);
+
+ /* Pass 2: process columns.
+ * We remove the PASS1_BITS scaling, but leave the results scaled up
+ * by an overall factor of 8.
+ */
+
+ dataptr = data;
+ for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+ tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
+ tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
+ tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
+ tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
+ tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
+ tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
+ tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
+ tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
+
+ /* Even part per LL&M figure 1 --- note that published figure is faulty;
+ * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
+ */
+
+ tmp10 = tmp0 + tmp3;
+ tmp13 = tmp0 - tmp3;
+ tmp11 = tmp1 + tmp2;
+ tmp12 = tmp1 - tmp2;
+
+ dataptr[DCTSIZE*0] = DESCALE(tmp10 + tmp11, OUT_SHIFT);
+ dataptr[DCTSIZE*4] = DESCALE(tmp10 - tmp11, OUT_SHIFT);
+
+ z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
+ dataptr[DCTSIZE*2] = DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
+ CONST_BITS + OUT_SHIFT);
+ dataptr[DCTSIZE*6] = DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
+ CONST_BITS + OUT_SHIFT);
+
+ /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
+ * cK represents cos(K*pi/16).
+ * i0..i3 in the paper are tmp4..tmp7 here.
+ */
+
+ z1 = tmp4 + tmp7;
+ z2 = tmp5 + tmp6;
+ z3 = tmp4 + tmp6;
+ z4 = tmp5 + tmp7;
+ z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
+
+ tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
+ tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
+ tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
+ tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
+ z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
+ z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
+ z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
+ z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
+
+ z3 += z5;
+ z4 += z5;
+
+ dataptr[DCTSIZE*7] = DESCALE(tmp4 + z1 + z3, CONST_BITS + OUT_SHIFT);
+ dataptr[DCTSIZE*5] = DESCALE(tmp5 + z2 + z4, CONST_BITS + OUT_SHIFT);
+ dataptr[DCTSIZE*3] = DESCALE(tmp6 + z2 + z3, CONST_BITS + OUT_SHIFT);
+ dataptr[DCTSIZE*1] = DESCALE(tmp7 + z1 + z4, CONST_BITS + OUT_SHIFT);
+
+ dataptr++; /* advance pointer to next column */
+ }
+}
+
+/*
+ * The secret of DCT2-4-8 is really simple -- you do the usual 1-DCT
+ * on the rows and then, instead of doing even and odd, part on the colums
+ * you do even part two times.
+ */
+GLOBAL(void)
+FUNC(ff_fdct248_islow)(DCTELEM *data)
+{
+ int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+ int tmp10, tmp11, tmp12, tmp13;
+ int z1;
+ DCTELEM *dataptr;
+ int ctr;
+
+ FUNC(row_fdct)(data);
+
+ /* Pass 2: process columns.
+ * We remove the PASS1_BITS scaling, but leave the results scaled up
+ * by an overall factor of 8.
+ */
+
+ dataptr = data;
+ for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
+ tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*1];
+ tmp1 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3];
+ tmp2 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5];
+ tmp3 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7];
+ tmp4 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*1];
+ tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3];
+ tmp6 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5];
+ tmp7 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7];
+
+ tmp10 = tmp0 + tmp3;
+ tmp11 = tmp1 + tmp2;
+ tmp12 = tmp1 - tmp2;
+ tmp13 = tmp0 - tmp3;
+
+ dataptr[DCTSIZE*0] = DESCALE(tmp10 + tmp11, OUT_SHIFT);
+ dataptr[DCTSIZE*4] = DESCALE(tmp10 - tmp11, OUT_SHIFT);
+
+ z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
+ dataptr[DCTSIZE*2] = DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
+ CONST_BITS+OUT_SHIFT);
+ dataptr[DCTSIZE*6] = DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
+ CONST_BITS+OUT_SHIFT);
+
+ tmp10 = tmp4 + tmp7;
+ tmp11 = tmp5 + tmp6;
+ tmp12 = tmp5 - tmp6;
+ tmp13 = tmp4 - tmp7;
+
+ dataptr[DCTSIZE*1] = DESCALE(tmp10 + tmp11, OUT_SHIFT);
+ dataptr[DCTSIZE*5] = DESCALE(tmp10 - tmp11, OUT_SHIFT);
+
+ z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
+ dataptr[DCTSIZE*3] = DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
+ CONST_BITS + OUT_SHIFT);
+ dataptr[DCTSIZE*7] = DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
+ CONST_BITS + OUT_SHIFT);
+
+ dataptr++; /* advance pointer to next column */
+ }
+}
diff --git a/libavcodec/mlib/dsputil_mlib.c b/libavcodec/mlib/dsputil_mlib.c
index 1a18a8a..b5594a9 100644
--- a/libavcodec/mlib/dsputil_mlib.c
+++ b/libavcodec/mlib/dsputil_mlib.c
@@ -421,13 +421,14 @@ static void ff_fdct_mlib(DCTELEM *data)
void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx)
{
- const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+ const int high_bit_depth = avctx->bits_per_raw_sample > 8;
- c->get_pixels = get_pixels_mlib;
c->diff_pixels = diff_pixels_mlib;
c->add_pixels_clamped = add_pixels_clamped_mlib;
if (!high_bit_depth) {
+ c->get_pixels = get_pixels_mlib;
+
c->put_pixels_tab[0][0] = put_pixels16_mlib;
c->put_pixels_tab[0][1] = put_pixels16_x2_mlib;
c->put_pixels_tab[0][2] = put_pixels16_y2_mlib;
diff --git a/libavcodec/mpeg4videodec.c b/libavcodec/mpeg4videodec.c
index b2a0187..b983a44 100644
--- a/libavcodec/mpeg4videodec.c
+++ b/libavcodec/mpeg4videodec.c
@@ -1527,6 +1527,22 @@ static int mpeg4_decode_gop_header(MpegEncContext * s, GetBitContext *gb){
return 0;
}
+static int mpeg4_decode_profile_level(MpegEncContext * s, GetBitContext *gb){
+ int profile_and_level_indication;
+
+ profile_and_level_indication = get_bits(gb, 8);
+
+ s->avctx->profile = (profile_and_level_indication & 0xf0) >> 4;
+ s->avctx->level = (profile_and_level_indication & 0x0f);
+
+ // for Simple profile, level 0
+ if (s->avctx->profile == 0 && s->avctx->level == 8) {
+ s->avctx->level = 0;
+ }
+
+ return 0;
+}
+
static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){
int width, height, vo_ver_id;
@@ -2181,6 +2197,9 @@ int ff_mpeg4_decode_picture_header(MpegEncContext * s, GetBitContext *gb)
else if(startcode == GOP_STARTCODE){
mpeg4_decode_gop_header(s, gb);
}
+ else if(startcode == VOS_STARTCODE){
+ mpeg4_decode_profile_level(s, gb);
+ }
else if(startcode == VOP_STARTCODE){
break;
}
@@ -2241,6 +2260,25 @@ static av_cold int decode_init(AVCodecContext *avctx)
return 0;
}
+static const AVProfile mpeg4_video_profiles[] = {
+ { FF_PROFILE_MPEG4_SIMPLE, "Simple Profile" },
+ { FF_PROFILE_MPEG4_SIMPLE_SCALABLE, "Simple Scalable Profile" },
+ { FF_PROFILE_MPEG4_CORE, "Core Profile" },
+ { FF_PROFILE_MPEG4_MAIN, "Main Profile" },
+ { FF_PROFILE_MPEG4_N_BIT, "N-bit Profile" },
+ { FF_PROFILE_MPEG4_SCALABLE_TEXTURE, "Scalable Texture Profile" },
+ { FF_PROFILE_MPEG4_SIMPLE_FACE_ANIMATION, "Simple Face Animation Profile" },
+ { FF_PROFILE_MPEG4_BASIC_ANIMATED_TEXTURE, "Basic Animated Texture Profile" },
+ { FF_PROFILE_MPEG4_HYBRID, "Hybrid Profile" },
+ { FF_PROFILE_MPEG4_ADVANCED_REAL_TIME, "Advanced Real Time Simple Profile" },
+ { FF_PROFILE_MPEG4_CORE_SCALABLE, "Code Scalable Profile" },
+ { FF_PROFILE_MPEG4_ADVANCED_CODING, "Advanced Coding Profile" },
+ { FF_PROFILE_MPEG4_ADVANCED_CORE, "Advanced Core Profile" },
+ { FF_PROFILE_MPEG4_ADVANCED_SCALABLE_TEXTURE, "Advanced Scalable Texture Profile" },
+ { FF_PROFILE_MPEG4_SIMPLE_STUDIO, "Simple Studio Profile" },
+ { FF_PROFILE_MPEG4_ADVANCED_SIMPLE, "Advanced Simple Profile" },
+};
+
AVCodec ff_mpeg4_decoder = {
"mpeg4",
AVMEDIA_TYPE_VIDEO,
@@ -2255,6 +2293,7 @@ AVCodec ff_mpeg4_decoder = {
.max_lowres= 3,
.long_name= NULL_IF_CONFIG_SMALL("MPEG-4 part 2"),
.pix_fmts= ff_hwaccel_pixfmt_list_420,
+ .profiles = NULL_IF_CONFIG_SMALL(mpeg4_video_profiles),
.update_thread_context= ONLY_IF_THREADS_ENABLED(ff_mpeg_update_thread_context)
};
diff --git a/libavcodec/mpegvideo_enc.c b/libavcodec/mpegvideo_enc.c
index af32442..6b2c7c7 100644
--- a/libavcodec/mpegvideo_enc.c
+++ b/libavcodec/mpegvideo_enc.c
@@ -69,7 +69,8 @@ void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[2][6
for(qscale=qmin; qscale<=qmax; qscale++){
int i;
- if (dsp->fdct == ff_jpeg_fdct_islow
+ if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
+ dsp->fdct == ff_jpeg_fdct_islow_10
#ifdef FAAN_POSTSCALE
|| dsp->fdct == ff_faandct
#endif
diff --git a/libavcodec/ppc/dsputil_altivec.c b/libavcodec/ppc/dsputil_altivec.c
index bda8124..7f36fa9 100644
--- a/libavcodec/ppc/dsputil_altivec.c
+++ b/libavcodec/ppc/dsputil_altivec.c
@@ -1373,7 +1373,7 @@ static void avg_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int l
void dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx)
{
- const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+ const int high_bit_depth = avctx->bits_per_raw_sample > 8;
c->pix_abs[0][1] = sad16_x2_altivec;
c->pix_abs[0][2] = sad16_y2_altivec;
@@ -1387,11 +1387,10 @@ void dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx)
c->sse[0]= sse16_altivec;
c->pix_sum = pix_sum_altivec;
c->diff_pixels = diff_pixels_altivec;
- c->get_pixels = get_pixels_altivec;
- if (!high_bit_depth)
- c->clear_block = clear_block_altivec;
c->add_bytes= add_bytes_altivec;
if (!high_bit_depth) {
+ c->get_pixels = get_pixels_altivec;
+ c->clear_block = clear_block_altivec;
c->put_pixels_tab[0][0] = put_pixels16_altivec;
/* the two functions do the same thing, so use the same code */
c->put_no_rnd_pixels_tab[0][0] = put_pixels16_altivec;
diff --git a/libavcodec/ppc/dsputil_ppc.c b/libavcodec/ppc/dsputil_ppc.c
index 8229fb5..6e85241 100644
--- a/libavcodec/ppc/dsputil_ppc.c
+++ b/libavcodec/ppc/dsputil_ppc.c
@@ -145,7 +145,7 @@ static void prefetch_ppc(void *mem, int stride, int h)
void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
{
- const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+ const int high_bit_depth = avctx->bits_per_raw_sample > 8;
// Common optimizations whether AltiVec is available or not
c->prefetch = prefetch_ppc;
@@ -172,8 +172,9 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
c->gmc1 = gmc1_altivec;
#if CONFIG_ENCODERS
- if (avctx->dct_algo == FF_DCT_AUTO ||
- avctx->dct_algo == FF_DCT_ALTIVEC) {
+ if (avctx->bits_per_raw_sample <= 8 &&
+ (avctx->dct_algo == FF_DCT_AUTO ||
+ avctx->dct_algo == FF_DCT_ALTIVEC)) {
c->fdct = fdct_altivec;
}
#endif //CONFIG_ENCODERS
diff --git a/libavcodec/ppc/h264_altivec.c b/libavcodec/ppc/h264_altivec.c
index 9ba6bba..223971b 100644
--- a/libavcodec/ppc/h264_altivec.c
+++ b/libavcodec/ppc/h264_altivec.c
@@ -967,7 +967,7 @@ H264_WEIGHT( 8, 8)
H264_WEIGHT( 8, 4)
void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) {
- const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+ const int high_bit_depth = avctx->bits_per_raw_sample > 8;
if (av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) {
if (!high_bit_depth) {
diff --git a/libavcodec/ps2/dsputil_mmi.c b/libavcodec/ps2/dsputil_mmi.c
index 707d1c9..d04a425 100644
--- a/libavcodec/ps2/dsputil_mmi.c
+++ b/libavcodec/ps2/dsputil_mmi.c
@@ -142,7 +142,7 @@ static void put_pixels16_mmi(uint8_t *block, const uint8_t *pixels, int line_siz
void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx)
{
const int idct_algo= avctx->idct_algo;
- const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+ const int high_bit_depth = avctx->bits_per_raw_sample > 8;
if (!high_bit_depth) {
c->clear_blocks = clear_blocks_mmi;
@@ -152,9 +152,9 @@ void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx)
c->put_pixels_tab[0][0] = put_pixels16_mmi;
c->put_no_rnd_pixels_tab[0][0] = put_pixels16_mmi;
- }
c->get_pixels = get_pixels_mmi;
+ }
if (avctx->bits_per_raw_sample <= 8 &&
(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_PS2)) {
diff --git a/libavcodec/sh4/dsputil_align.c b/libavcodec/sh4/dsputil_align.c
index 8be9318..e918936 100644
--- a/libavcodec/sh4/dsputil_align.c
+++ b/libavcodec/sh4/dsputil_align.c
@@ -333,7 +333,7 @@ DEFFUNC(avg,no_rnd,xy,16,OP_XY,PACK)
void dsputil_init_align(DSPContext* c, AVCodecContext *avctx)
{
- const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+ const int high_bit_depth = avctx->bits_per_raw_sample > 8;
if (!high_bit_depth) {
c->put_pixels_tab[0][0] = put_rnd_pixels16_o;
diff --git a/libavcodec/sh4/dsputil_sh4.c b/libavcodec/sh4/dsputil_sh4.c
index 8850267..905e8b1 100644
--- a/libavcodec/sh4/dsputil_sh4.c
+++ b/libavcodec/sh4/dsputil_sh4.c
@@ -92,7 +92,7 @@ static void idct_add(uint8_t *dest, int line_size, DCTELEM *block)
void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx)
{
const int idct_algo= avctx->idct_algo;
- const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+ const int high_bit_depth = avctx->bits_per_raw_sample > 8;
dsputil_init_align(c,avctx);
if (!high_bit_depth)
diff --git a/libavcodec/sparc/dsputil_vis.c b/libavcodec/sparc/dsputil_vis.c
index 2806182..bb80cd9 100644
--- a/libavcodec/sparc/dsputil_vis.c
+++ b/libavcodec/sparc/dsputil_vis.c
@@ -3953,7 +3953,7 @@ void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx)
{
/* VIS-specific optimizations */
int accel = vis_level ();
- const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+ const int high_bit_depth = avctx->bits_per_raw_sample > 8;
if (accel & ACCEL_SPARC_VIS) {
if (avctx->bits_per_raw_sample <= 8 &&
diff --git a/libavcodec/x86/dnxhd_mmx.c b/libavcodec/x86/dnxhd_mmx.c
index 1256bee..1f2b035 100644
--- a/libavcodec/x86/dnxhd_mmx.c
+++ b/libavcodec/x86/dnxhd_mmx.c
@@ -53,6 +53,7 @@ static void get_pixels_8x4_sym_sse2(DCTELEM *block, const uint8_t *pixels, int l
void ff_dnxhd_init_mmx(DNXHDEncContext *ctx)
{
if (av_get_cpu_flags() & AV_CPU_FLAG_SSE2) {
- ctx->get_pixels_8x4_sym = get_pixels_8x4_sym_sse2;
+ if (ctx->cid_table->bit_depth == 8)
+ ctx->get_pixels_8x4_sym = get_pixels_8x4_sym_sse2;
}
}
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c
index 53ffaca..6d96ab3 100644
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -2341,7 +2341,7 @@ void ff_vector_clip_int32_sse41 (int32_t *dst, const int32_t *src, int32_t min
void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
{
int mm_flags = av_get_cpu_flags();
- const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
+ const int high_bit_depth = avctx->bits_per_raw_sample > 8;
const int bit_depth = avctx->bits_per_raw_sample;
if (avctx->dsp_mask) {
diff --git a/libavcodec/x86/dsputilenc_mmx.c b/libavcodec/x86/dsputilenc_mmx.c
index bd31205..f13c121 100644
--- a/libavcodec/x86/dsputilenc_mmx.c
+++ b/libavcodec/x86/dsputilenc_mmx.c
@@ -1098,10 +1098,12 @@ static int ssd_int8_vs_int16_mmx(const int8_t *pix1, const int16_t *pix2, int si
void dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx)
{
int mm_flags = av_get_cpu_flags();
+ int bit_depth = avctx->bits_per_raw_sample;
if (mm_flags & AV_CPU_FLAG_MMX) {
const int dct_algo = avctx->dct_algo;
- if(dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX){
+ if (avctx->bits_per_raw_sample <= 8 &&
+ (dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX)) {
if(mm_flags & AV_CPU_FLAG_SSE2){
c->fdct = ff_fdct_sse2;
}else if(mm_flags & AV_CPU_FLAG_MMX2){
@@ -1111,7 +1113,8 @@ void dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx)
}
}
- c->get_pixels = get_pixels_mmx;
+ if (bit_depth <= 8)
+ c->get_pixels = get_pixels_mmx;
c->diff_pixels = diff_pixels_mmx;
c->pix_sum = pix_sum16_mmx;
@@ -1158,7 +1161,8 @@ void dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx)
}
if(mm_flags & AV_CPU_FLAG_SSE2){
- c->get_pixels = get_pixels_sse2;
+ if (bit_depth <= 8)
+ c->get_pixels = get_pixels_sse2;
c->sum_abs_dctelem= sum_abs_dctelem_sse2;
#if HAVE_YASM && HAVE_ALIGNED_STACK
c->hadamard8_diff[0]= ff_hadamard8_diff16_sse2;
OpenPOWER on IntegriCloud