diff options
Diffstat (limited to 'libavcodec/aaccoder.c')
-rw-r--r-- | libavcodec/aaccoder.c | 377 |
1 files changed, 309 insertions, 68 deletions
diff --git a/libavcodec/aaccoder.c b/libavcodec/aaccoder.c index ee89148..5bdba46 100644 --- a/libavcodec/aaccoder.c +++ b/libavcodec/aaccoder.c @@ -2,20 +2,20 @@ * AAC coefficients encoder * Copyright (C) 2008-2009 Konstantin Shishkov * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -40,6 +40,27 @@ #include "aacenc.h" #include "aactab.h" +/** Frequency in Hz for lower limit of noise substitution **/ +#define NOISE_LOW_LIMIT 4500 + +/* Energy spread threshold value below which no PNS is used, this corresponds to + * typically around 17Khz, after which PNS usage decays ending at 19Khz */ +#define NOISE_SPREAD_THRESHOLD 0.5f + +/* This constant gets divided by lambda to return ~1.65 which when multiplied + * by the band->threshold and compared to band->energy is the boundary between + * excessive PNS and little PNS usage. */ +#define NOISE_LAMBDA_NUMERATOR 252.1f + +/** Frequency in Hz for lower limit of intensity stereo **/ +#define INT_STEREO_LOW_LIMIT 6100 + +/** Total number of usable codebooks **/ +#define CB_TOT 12 + +/** Total number of codebooks, including special ones **/ +#define CB_TOT_ALL 15 + /** bits needed to code codebook run value for long windows */ static const uint8_t run_value_bits_long[64] = { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, @@ -53,10 +74,14 @@ static const uint8_t run_value_bits_short[16] = { 3, 3, 3, 3, 3, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 9 }; -static const uint8_t *run_value_bits[2] = { +static const uint8_t * const run_value_bits[2] = { run_value_bits_long, run_value_bits_short }; +/** Map to convert values from BandCodingPath index to a codebook index **/ +static const uint8_t aac_cb_out_map[CB_TOT_ALL] = {0,1,2,3,4,5,6,7,8,9,10,11,13,14,15}; +/** Inverse map to convert from codebooks to BandCodingPath indices **/ +static const uint8_t aac_cb_in_map[CB_TOT_ALL+1] = {0,1,2,3,4,5,6,7,8,9,10,11,0,12,13,14}; /** * Quantize one coefficient. @@ -108,7 +133,7 @@ static av_always_inline float quantize_and_encode_band_cost_template( const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, int *bits, int BT_ZERO, int BT_UNSIGNED, - int BT_PAIR, int BT_ESC) + int BT_PAIR, int BT_ESC, int BT_NOISE, int BT_STEREO) { const int q_idx = POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512; const float Q = ff_aac_pow2sf_tab [q_idx]; @@ -119,11 +144,9 @@ static av_always_inline float quantize_and_encode_band_cost_template( float cost = 0; const int dim = BT_PAIR ? 2 : 4; int resbits = 0; - const int range = aac_cb_range[cb]; - const int maxval = aac_cb_maxval[cb]; int off; - if (BT_ZERO) { + if (BT_ZERO || BT_NOISE || BT_STEREO) { for (i = 0; i < size; i++) cost += in[i]*in[i]; if (bits) @@ -134,11 +157,11 @@ static av_always_inline float quantize_and_encode_band_cost_template( abs_pow34_v(s->scoefs, in, size); scaled = s->scoefs; } - quantize_bands(s->qcoefs, in, scaled, size, Q34, !BT_UNSIGNED, maxval); + quantize_bands(s->qcoefs, in, scaled, size, Q34, !BT_UNSIGNED, aac_cb_maxval[cb]); if (BT_UNSIGNED) { off = 0; } else { - off = maxval; + off = aac_cb_maxval[cb]; } for (i = 0; i < size; i += dim) { const float *vec; @@ -147,7 +170,7 @@ static av_always_inline float quantize_and_encode_band_cost_template( int curbits; float rd = 0.0f; for (j = 0; j < dim; j++) { - curidx *= range; + curidx *= aac_cb_range[cb]; curidx += quants[j] + off; } curbits = ff_aac_spectral_bits[cb-1][curidx]; @@ -195,7 +218,7 @@ static av_always_inline float quantize_and_encode_band_cost_template( int len = av_log2(coef); put_bits(pb, len - 4 + 1, (1 << (len - 4 + 1)) - 2); - put_bits(pb, len, coef & ((1 << len) - 1)); + put_sbits(pb, len, coef); } } } @@ -207,25 +230,36 @@ static av_always_inline float quantize_and_encode_band_cost_template( return cost; } -#define QUANTIZE_AND_ENCODE_BAND_COST_FUNC(NAME, BT_ZERO, BT_UNSIGNED, BT_PAIR, BT_ESC) \ -static float quantize_and_encode_band_cost_ ## NAME( \ - struct AACEncContext *s, \ - PutBitContext *pb, const float *in, \ - const float *scaled, int size, int scale_idx, \ - int cb, const float lambda, const float uplim, \ - int *bits) { \ - return quantize_and_encode_band_cost_template( \ - s, pb, in, scaled, size, scale_idx, \ - BT_ESC ? ESC_BT : cb, lambda, uplim, bits, \ - BT_ZERO, BT_UNSIGNED, BT_PAIR, BT_ESC); \ +static float quantize_and_encode_band_cost_NONE(struct AACEncContext *s, PutBitContext *pb, + const float *in, const float *scaled, + int size, int scale_idx, int cb, + const float lambda, const float uplim, + int *bits) { + av_assert0(0); + return 0.0f; } -QUANTIZE_AND_ENCODE_BAND_COST_FUNC(ZERO, 1, 0, 0, 0) -QUANTIZE_AND_ENCODE_BAND_COST_FUNC(SQUAD, 0, 0, 0, 0) -QUANTIZE_AND_ENCODE_BAND_COST_FUNC(UQUAD, 0, 1, 0, 0) -QUANTIZE_AND_ENCODE_BAND_COST_FUNC(SPAIR, 0, 0, 1, 0) -QUANTIZE_AND_ENCODE_BAND_COST_FUNC(UPAIR, 0, 1, 1, 0) -QUANTIZE_AND_ENCODE_BAND_COST_FUNC(ESC, 0, 1, 1, 1) +#define QUANTIZE_AND_ENCODE_BAND_COST_FUNC(NAME, BT_ZERO, BT_UNSIGNED, BT_PAIR, BT_ESC, BT_NOISE, BT_STEREO) \ +static float quantize_and_encode_band_cost_ ## NAME( \ + struct AACEncContext *s, \ + PutBitContext *pb, const float *in, \ + const float *scaled, int size, int scale_idx, \ + int cb, const float lambda, const float uplim, \ + int *bits) { \ + return quantize_and_encode_band_cost_template( \ + s, pb, in, scaled, size, scale_idx, \ + BT_ESC ? ESC_BT : cb, lambda, uplim, bits, \ + BT_ZERO, BT_UNSIGNED, BT_PAIR, BT_ESC, BT_NOISE, BT_STEREO); \ +} + +QUANTIZE_AND_ENCODE_BAND_COST_FUNC(ZERO, 1, 0, 0, 0, 0, 0) +QUANTIZE_AND_ENCODE_BAND_COST_FUNC(SQUAD, 0, 0, 0, 0, 0, 0) +QUANTIZE_AND_ENCODE_BAND_COST_FUNC(UQUAD, 0, 1, 0, 0, 0, 0) +QUANTIZE_AND_ENCODE_BAND_COST_FUNC(SPAIR, 0, 0, 1, 0, 0, 0) +QUANTIZE_AND_ENCODE_BAND_COST_FUNC(UPAIR, 0, 1, 1, 0, 0, 0) +QUANTIZE_AND_ENCODE_BAND_COST_FUNC(ESC, 0, 1, 1, 1, 0, 0) +QUANTIZE_AND_ENCODE_BAND_COST_FUNC(NOISE, 0, 0, 0, 0, 1, 0) +QUANTIZE_AND_ENCODE_BAND_COST_FUNC(STEREO,0, 0, 0, 0, 0, 1) static float (*const quantize_and_encode_band_cost_arr[])( struct AACEncContext *s, @@ -245,6 +279,10 @@ static float (*const quantize_and_encode_band_cost_arr[])( quantize_and_encode_band_cost_UPAIR, quantize_and_encode_band_cost_UPAIR, quantize_and_encode_band_cost_ESC, + quantize_and_encode_band_cost_NONE, /* CB 12 doesn't exist */ + quantize_and_encode_band_cost_NOISE, + quantize_and_encode_band_cost_STEREO, + quantize_and_encode_band_cost_STEREO, }; #define quantize_and_encode_band_cost( \ @@ -312,7 +350,7 @@ typedef struct BandCodingPath { static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce, int win, int group_len, const float lambda) { - BandCodingPath path[120][12]; + BandCodingPath path[120][CB_TOT_ALL]; int w, swb, cb, start, size; int i, j; const int max_sfb = sce->ics.max_sfb; @@ -325,7 +363,7 @@ static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce abs_pow34_v(s->scoefs, sce->coeffs, 1024); start = win*128; - for (cb = 0; cb < 12; cb++) { + for (cb = 0; cb < CB_TOT_ALL; cb++) { path[0][cb].cost = 0.0f; path[0][cb].prev_idx = -1; path[0][cb].run = 0; @@ -333,7 +371,7 @@ static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce for (swb = 0; swb < max_sfb; swb++) { size = sce->ics.swb_sizes[swb]; if (sce->zeroes[win*16 + swb]) { - for (cb = 0; cb < 12; cb++) { + for (cb = 0; cb < CB_TOT_ALL; cb++) { path[swb+1][cb].prev_idx = cb; path[swb+1][cb].cost = path[swb][cb].cost; path[swb+1][cb].run = path[swb][cb].run + 1; @@ -343,14 +381,21 @@ static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce int mincb = next_mincb; next_minrd = INFINITY; next_mincb = 0; - for (cb = 0; cb < 12; cb++) { + for (cb = 0; cb < CB_TOT_ALL; cb++) { float cost_stay_here, cost_get_here; float rd = 0.0f; + if (cb >= 12 && sce->band_type[win*16+swb] < aac_cb_out_map[cb] || + cb < aac_cb_in_map[sce->band_type[win*16+swb]] && sce->band_type[win*16+swb] > aac_cb_out_map[cb]) { + path[swb+1][cb].prev_idx = -1; + path[swb+1][cb].cost = INFINITY; + path[swb+1][cb].run = path[swb][cb].run + 1; + continue; + } for (w = 0; w < group_len; w++) { FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(win+w)*16+swb]; rd += quantize_band_cost(s, sce->coeffs + start + w*128, s->scoefs + start + w*128, size, - sce->sf_idx[(win+w)*16+swb], cb, + sce->sf_idx[(win+w)*16+swb], aac_cb_out_map[cb], lambda / band->threshold, INFINITY, NULL); } cost_stay_here = path[swb][cb].cost + rd; @@ -379,11 +424,12 @@ static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce //convert resulting path from backward-linked list stack_len = 0; idx = 0; - for (cb = 1; cb < 12; cb++) + for (cb = 1; cb < CB_TOT_ALL; cb++) if (path[max_sfb][cb].cost < path[max_sfb][idx].cost) idx = cb; ppos = max_sfb; while (ppos > 0) { + av_assert1(idx >= 0); cb = idx; stackrun[stack_len] = path[ppos][cb].run; stackcb [stack_len] = cb; @@ -394,12 +440,13 @@ static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce //perform actual band info encoding start = 0; for (i = stack_len - 1; i >= 0; i--) { - put_bits(&s->pb, 4, stackcb[i]); + cb = aac_cb_out_map[stackcb[i]]; + put_bits(&s->pb, 4, cb); count = stackrun[i]; - memset(sce->zeroes + win*16 + start, !stackcb[i], count); + memset(sce->zeroes + win*16 + start, !cb, count); //XXX: memset when band_type is also uint8_t for (j = 0; j < count; j++) { - sce->band_type[win*16 + start] = stackcb[i]; + sce->band_type[win*16 + start] = cb; start++; } while (count >= run_esc) { @@ -413,7 +460,7 @@ static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce, int win, int group_len, const float lambda) { - BandCodingPath path[120][12]; + BandCodingPath path[120][CB_TOT_ALL]; int w, swb, cb, start, size; int i, j; const int max_sfb = sce->ics.max_sfb; @@ -426,7 +473,7 @@ static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce, abs_pow34_v(s->scoefs, sce->coeffs, 1024); start = win*128; - for (cb = 0; cb < 12; cb++) { + for (cb = 0; cb < CB_TOT_ALL; cb++) { path[0][cb].cost = run_bits+4; path[0][cb].prev_idx = -1; path[0][cb].run = 0; @@ -450,7 +497,7 @@ static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce, } next_minbits = path[swb+1][0].cost; next_mincb = 0; - for (cb = 1; cb < 12; cb++) { + for (cb = 1; cb < CB_TOT_ALL; cb++) { path[swb+1][cb].cost = 61450; path[swb+1][cb].prev_idx = -1; path[swb+1][cb].run = 0; @@ -459,6 +506,7 @@ static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce, float minbits = next_minbits; int mincb = next_mincb; int startcb = sce->band_type[win*16+swb]; + startcb = aac_cb_in_map[startcb]; next_minbits = INFINITY; next_mincb = 0; for (cb = 0; cb < startcb; cb++) { @@ -466,13 +514,20 @@ static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce, path[swb+1][cb].prev_idx = -1; path[swb+1][cb].run = 0; } - for (cb = startcb; cb < 12; cb++) { + for (cb = startcb; cb < CB_TOT_ALL; cb++) { float cost_stay_here, cost_get_here; float bits = 0.0f; + if (cb >= 12 && sce->band_type[win*16+swb] != aac_cb_out_map[cb]) { + path[swb+1][cb].cost = 61450; + path[swb+1][cb].prev_idx = -1; + path[swb+1][cb].run = 0; + continue; + } for (w = 0; w < group_len; w++) { bits += quantize_band_cost(s, sce->coeffs + start + w*128, s->scoefs + start + w*128, size, - sce->sf_idx[(win+w)*16+swb], cb, + sce->sf_idx[(win+w)*16+swb], + aac_cb_out_map[cb], 0, INFINITY, NULL); } cost_stay_here = path[swb][cb].cost + bits; @@ -501,12 +556,12 @@ static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce, //convert resulting path from backward-linked list stack_len = 0; idx = 0; - for (cb = 1; cb < 12; cb++) + for (cb = 1; cb < CB_TOT_ALL; cb++) if (path[max_sfb][cb].cost < path[max_sfb][idx].cost) idx = cb; ppos = max_sfb; while (ppos > 0) { - assert(idx >= 0); + av_assert1(idx >= 0); cb = idx; stackrun[stack_len] = path[ppos][cb].run; stackcb [stack_len] = cb; @@ -517,12 +572,13 @@ static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce, //perform actual band info encoding start = 0; for (i = stack_len - 1; i >= 0; i--) { - put_bits(&s->pb, 4, stackcb[i]); + cb = aac_cb_out_map[stackcb[i]]; + put_bits(&s->pb, 4, cb); count = stackrun[i]; - memset(sce->zeroes + win*16 + start, !stackcb[i], count); + memset(sce->zeroes + win*16 + start, !cb, count); //XXX: memset when band_type is also uint8_t for (j = 0; j < count; j++) { - sce->band_type[win*16 + start] = stackcb[i]; + sce->band_type[win*16 + start] = cb; start++; } while (count >= run_esc) { @@ -551,6 +607,43 @@ typedef struct TrellisPath { #define TRELLIS_STAGES 121 #define TRELLIS_STATES (SCALE_MAX_DIFF+1) +static void set_special_band_scalefactors(AACEncContext *s, SingleChannelElement *sce) +{ + int w, g, start = 0; + int minscaler_n = sce->sf_idx[0], minscaler_i = sce->sf_idx[0]; + int bands = 0; + + for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { + start = 0; + for (g = 0; g < sce->ics.num_swb; g++) { + if (sce->band_type[w*16+g] == INTENSITY_BT || sce->band_type[w*16+g] == INTENSITY_BT2) { + sce->sf_idx[w*16+g] = av_clip(ceilf(log2f(sce->is_ener[w*16+g])*2), -155, 100); + minscaler_i = FFMIN(minscaler_i, sce->sf_idx[w*16+g]); + bands++; + } else if (sce->band_type[w*16+g] == NOISE_BT) { + sce->sf_idx[w*16+g] = av_clip(4+log2f(sce->pns_ener[w*16+g])*2, -100, 155); + minscaler_n = FFMIN(minscaler_n, sce->sf_idx[w*16+g]); + bands++; + } + start += sce->ics.swb_sizes[g]; + } + } + + if (!bands) + return; + + /* Clip the scalefactor indices */ + for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { + for (g = 0; g < sce->ics.num_swb; g++) { + if (sce->band_type[w*16+g] == INTENSITY_BT || sce->band_type[w*16+g] == INTENSITY_BT2) { + sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler_i, minscaler_i + SCALE_MAX_DIFF); + } else if (sce->band_type[w*16+g] == NOISE_BT) { + sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler_n, minscaler_n + SCALE_MAX_DIFF); + } + } + } +} + static void search_for_quantizers_anmr(AVCodecContext *avctx, AACEncContext *s, SingleChannelElement *sce, const float lambda) @@ -711,7 +804,7 @@ static void search_for_quantizers_twoloop(AVCodecContext *avctx, { int start = 0, i, w, w2, g; int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate / avctx->channels * (lambda / 120.f); - float dists[128] = { 0 }, uplims[128]; + float dists[128] = { 0 }, uplims[128] = { 0 }; float maxvals[128]; int fflag, minscaler; int its = 0; @@ -726,10 +819,11 @@ static void search_for_quantizers_twoloop(AVCodecContext *avctx, for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { for (g = 0; g < sce->ics.num_swb; g++) { int nz = 0; - float uplim = 0.0f; + float uplim = 0.0f, energy = 0.0f; for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g]; - uplim += band->threshold; + uplim += band->threshold; + energy += band->energy; if (band->energy <= band->threshold || band->threshold == 0.0f) { sce->zeroes[(w+w2)*16+g] = 1; continue; @@ -776,7 +870,6 @@ static void search_for_quantizers_twoloop(AVCodecContext *avctx, do { int prev = -1; tbits = 0; - fflag = 0; for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { start = w*128; for (g = 0; g < sce->ics.num_swb; g++) { @@ -829,6 +922,7 @@ static void search_for_quantizers_twoloop(AVCodecContext *avctx, fflag = 0; minscaler = av_clip(minscaler, 60, 255 - SCALE_MAX_DIFF); + for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { for (g = 0; g < sce->ics.num_swb; g++) { int prevsc = sce->sf_idx[w*16+g]; @@ -875,7 +969,7 @@ static void search_for_quantizers_faac(AVCodecContext *avctx, AACEncContext *s, } else { for (w = 0; w < 8; w++) { const float *coeffs = sce->coeffs + w*128; - start = 0; + curband = start = 0; for (i = 0; i < 128; i++) { if (i - start >= sce->ics.swb_sizes[curband]) { start += sce->ics.swb_sizes[curband]; @@ -953,7 +1047,6 @@ static void search_for_quantizers_faac(AVCodecContext *avctx, AACEncContext *s, } sce->zeroes[w*16+g] = 0; scf = prev_scf = av_clip(SCALE_ONE_POS - SCALE_DIV_512 - log2f(1/maxq[w*16+g])*16/3, 60, 218); - step = 16; for (;;) { float dist = 0.0f; int quant_max; @@ -1051,6 +1144,141 @@ static void search_for_quantizers_fast(AVCodecContext *avctx, AACEncContext *s, sce->sf_idx[(w+w2)*16+g] = sce->sf_idx[w*16+g]; } +static void search_for_pns(AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce, + const float lambda) +{ + int start = 0, w, w2, g; + const float freq_mult = avctx->sample_rate/(1024.0f/sce->ics.num_windows)/2.0f; + const float spread_threshold = NOISE_SPREAD_THRESHOLD*(lambda/120.f); + const float thr_mult = NOISE_LAMBDA_NUMERATOR/lambda; + + /* Coders !twoloop don't reset the band_types */ + for (w = 0; w < 128; w++) + if (sce->band_type[w] == NOISE_BT) + sce->band_type[w] = 0; + + for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { + start = 0; + for (g = 0; g < sce->ics.num_swb; g++) { + if (start*freq_mult > NOISE_LOW_LIMIT*(lambda/170.0f)) { + float energy = 0.0f, threshold = 0.0f, spread = 0.0f; + for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { + FFPsyBand *band = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g]; + energy += band->energy; + threshold += band->threshold; + spread += band->spread; + } + if (spread > spread_threshold*sce->ics.group_len[w] && + ((sce->zeroes[w*16+g] && energy >= threshold) || + energy < threshold*thr_mult*sce->ics.group_len[w])) { + sce->band_type[w*16+g] = NOISE_BT; + sce->pns_ener[w*16+g] = energy / sce->ics.group_len[w]; + sce->zeroes[w*16+g] = 0; + } + } + start += sce->ics.swb_sizes[g]; + } + } +} + +static void search_for_is(AACEncContext *s, AVCodecContext *avctx, ChannelElement *cpe, + const float lambda) +{ + float IS[128]; + float *L34 = s->scoefs + 128*0, *R34 = s->scoefs + 128*1; + float *I34 = s->scoefs + 128*2; + SingleChannelElement *sce0 = &cpe->ch[0]; + SingleChannelElement *sce1 = &cpe->ch[1]; + int start = 0, count = 0, i, w, w2, g; + const float freq_mult = avctx->sample_rate/(1024.0f/sce0->ics.num_windows)/2.0f; + + for (w = 0; w < 128; w++) + if (sce1->band_type[w] >= INTENSITY_BT2) + sce1->band_type[w] = 0; + + if (!cpe->common_window) + return; + for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) { + start = 0; + for (g = 0; g < sce0->ics.num_swb; g++) { + if (start*freq_mult > INT_STEREO_LOW_LIMIT*(lambda/170.0f) && + cpe->ch[0].band_type[w*16+g] != NOISE_BT && !cpe->ch[0].zeroes[w*16+g] && + cpe->ch[1].band_type[w*16+g] != NOISE_BT && !cpe->ch[1].zeroes[w*16+g]) { + int phase = 0; + float ener0 = 0.0f, ener1 = 0.0f, ener01 = 0.0f; + float dist1 = 0.0f, dist2 = 0.0f; + for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) { + for (i = 0; i < sce0->ics.swb_sizes[g]; i++) { + float coef0 = sce0->pcoeffs[start+(w+w2)*128+i]; + float coef1 = sce1->pcoeffs[start+(w+w2)*128+i]; + phase += coef0*coef1 >= 0.0f ? 1 : -1; + ener0 += coef0*coef0; + ener1 += coef1*coef1; + ener01 += (coef0 + coef1)*(coef0 + coef1); + } + } + if (!phase) { /* Too much phase difference between channels */ + start += sce0->ics.swb_sizes[g]; + continue; + } + phase = av_clip(phase, -1, 1); + for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) { + FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g]; + FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g]; + int is_band_type, is_sf_idx = FFMAX(1, sce0->sf_idx[(w+w2)*16+g]-4); + float e01_34 = phase*pow(sqrt(ener1/ener0), 3.0/4.0); + float maxval, dist_spec_err = 0.0f; + float minthr = FFMIN(band0->threshold, band1->threshold); + for (i = 0; i < sce0->ics.swb_sizes[g]; i++) + IS[i] = (sce0->pcoeffs[start+(w+w2)*128+i] + phase*sce1->pcoeffs[start+(w+w2)*128+i]) * sqrt(ener0/ener01); + abs_pow34_v(L34, sce0->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]); + abs_pow34_v(R34, sce1->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]); + abs_pow34_v(I34, IS, sce0->ics.swb_sizes[g]); + maxval = find_max_val(1, sce0->ics.swb_sizes[g], I34); + is_band_type = find_min_book(maxval, is_sf_idx); + dist1 += quantize_band_cost(s, sce0->coeffs + start + (w+w2)*128, + L34, + sce0->ics.swb_sizes[g], + sce0->sf_idx[(w+w2)*16+g], + sce0->band_type[(w+w2)*16+g], + lambda / band0->threshold, INFINITY, NULL); + dist1 += quantize_band_cost(s, sce1->coeffs + start + (w+w2)*128, + R34, + sce1->ics.swb_sizes[g], + sce1->sf_idx[(w+w2)*16+g], + sce1->band_type[(w+w2)*16+g], + lambda / band1->threshold, INFINITY, NULL); + dist2 += quantize_band_cost(s, IS, + I34, + sce0->ics.swb_sizes[g], + is_sf_idx, + is_band_type, + lambda / minthr, INFINITY, NULL); + for (i = 0; i < sce0->ics.swb_sizes[g]; i++) { + dist_spec_err += (L34[i] - I34[i])*(L34[i] - I34[i]); + dist_spec_err += (R34[i] - I34[i]*e01_34)*(R34[i] - I34[i]*e01_34); + } + dist_spec_err *= lambda / minthr; + dist2 += dist_spec_err; + } + if (dist2 <= dist1) { + cpe->is_mask[w*16+g] = 1; + cpe->ms_mask[w*16+g] = 0; + cpe->ch[0].is_ener[w*16+g] = sqrt(ener0/ener01); + cpe->ch[1].is_ener[w*16+g] = ener0/ener1; + if (phase) + cpe->ch[1].band_type[w*16+g] = INTENSITY_BT; + else + cpe->ch[1].band_type[w*16+g] = INTENSITY_BT2; + count++; + } + } + start += sce0->ics.swb_sizes[g]; + } + } + cpe->is_mode = !!count; +} + static void search_for_ms(AACEncContext *s, ChannelElement *cpe, const float lambda) { @@ -1062,8 +1290,9 @@ static void search_for_ms(AACEncContext *s, ChannelElement *cpe, if (!cpe->common_window) return; for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) { + start = 0; for (g = 0; g < sce0->ics.num_swb; g++) { - if (!cpe->ch[0].zeroes[w*16+g] && !cpe->ch[1].zeroes[w*16+g]) { + if (!cpe->ch[0].zeroes[w*16+g] && !cpe->ch[1].zeroes[w*16+g] && !cpe->is_mask[w*16+g]) { float dist1 = 0.0f, dist2 = 0.0f; for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) { FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g]; @@ -1071,22 +1300,22 @@ static void search_for_ms(AACEncContext *s, ChannelElement *cpe, float minthr = FFMIN(band0->threshold, band1->threshold); float maxthr = FFMAX(band0->threshold, band1->threshold); for (i = 0; i < sce0->ics.swb_sizes[g]; i++) { - M[i] = (sce0->coeffs[start+w2*128+i] - + sce1->coeffs[start+w2*128+i]) * 0.5; + M[i] = (sce0->pcoeffs[start+(w+w2)*128+i] + + sce1->pcoeffs[start+(w+w2)*128+i]) * 0.5; S[i] = M[i] - - sce1->coeffs[start+w2*128+i]; + - sce1->pcoeffs[start+(w+w2)*128+i]; } - abs_pow34_v(L34, sce0->coeffs+start+w2*128, sce0->ics.swb_sizes[g]); - abs_pow34_v(R34, sce1->coeffs+start+w2*128, sce0->ics.swb_sizes[g]); + abs_pow34_v(L34, sce0->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]); + abs_pow34_v(R34, sce1->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]); abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]); abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]); - dist1 += quantize_band_cost(s, sce0->coeffs + start + w2*128, + dist1 += quantize_band_cost(s, sce0->coeffs + start + (w+w2)*128, L34, sce0->ics.swb_sizes[g], sce0->sf_idx[(w+w2)*16+g], sce0->band_type[(w+w2)*16+g], lambda / band0->threshold, INFINITY, NULL); - dist1 += quantize_band_cost(s, sce1->coeffs + start + w2*128, + dist1 += quantize_band_cost(s, sce1->coeffs + start + (w+w2)*128, R34, sce1->ics.swb_sizes[g], sce1->sf_idx[(w+w2)*16+g], @@ -1112,29 +1341,41 @@ static void search_for_ms(AACEncContext *s, ChannelElement *cpe, } } -AACCoefficientsEncoder ff_aac_coders[] = { - { +AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = { + [AAC_CODER_FAAC] = { search_for_quantizers_faac, encode_window_bands_info, quantize_and_encode_band, + set_special_band_scalefactors, + search_for_pns, search_for_ms, + search_for_is, }, - { + [AAC_CODER_ANMR] = { search_for_quantizers_anmr, encode_window_bands_info, quantize_and_encode_band, + set_special_band_scalefactors, + search_for_pns, search_for_ms, + search_for_is, }, - { + [AAC_CODER_TWOLOOP] = { search_for_quantizers_twoloop, codebook_trellis_rate, quantize_and_encode_band, + set_special_band_scalefactors, + search_for_pns, search_for_ms, + search_for_is, }, - { + [AAC_CODER_FAST] = { search_for_quantizers_fast, encode_window_bands_info, quantize_and_encode_band, + set_special_band_scalefactors, + search_for_pns, search_for_ms, + search_for_is, }, }; |