diff options
Diffstat (limited to 'libavcodec/aaccoder.c')
-rw-r--r-- | libavcodec/aaccoder.c | 1138 |
1 files changed, 481 insertions, 657 deletions
diff --git a/libavcodec/aaccoder.c b/libavcodec/aaccoder.c index a654844..baa8248 100644 --- a/libavcodec/aaccoder.c +++ b/libavcodec/aaccoder.c @@ -2,20 +2,20 @@ * AAC coefficients encoder * Copyright (C) 2008-2009 Konstantin Shishkov * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -33,269 +33,34 @@ #include "libavutil/libm.h" // brought forward to work around cygwin header breakage #include <float.h> + #include "libavutil/mathematics.h" +#include "mathops.h" #include "avcodec.h" #include "put_bits.h" #include "aac.h" #include "aacenc.h" #include "aactab.h" +#include "aacenctab.h" +#include "aacenc_utils.h" +#include "aacenc_quantization.h" -/** bits needed to code codebook run value for long windows */ -static const uint8_t run_value_bits_long[64] = { - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 15 -}; - -/** bits needed to code codebook run value for short windows */ -static const uint8_t run_value_bits_short[16] = { - 3, 3, 3, 3, 3, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 9 -}; - -static const uint8_t * const run_value_bits[2] = { - run_value_bits_long, run_value_bits_short -}; - - -/** - * Quantize one coefficient. - * @return absolute value of the quantized coefficient - * @see 3GPP TS26.403 5.6.2 "Scalefactor determination" - */ -static av_always_inline int quant(float coef, const float Q) -{ - float a = coef * Q; - return sqrtf(a * sqrtf(a)) + 0.4054; -} - -static void quantize_bands(int *out, const float *in, const float *scaled, - int size, float Q34, int is_signed, int maxval) -{ - int i; - double qc; - for (i = 0; i < size; i++) { - qc = scaled[i] * Q34; - out[i] = (int)FFMIN(qc + 0.4054, (double)maxval); - if (is_signed && in[i] < 0.0f) { - out[i] = -out[i]; - } - } -} - -static void abs_pow34_v(float *out, const float *in, const int size) -{ -#ifndef USE_REALLY_FULL_SEARCH - int i; - for (i = 0; i < size; i++) { - float a = fabsf(in[i]); - out[i] = sqrtf(a * sqrtf(a)); - } -#endif /* USE_REALLY_FULL_SEARCH */ -} - -static const uint8_t aac_cb_range [12] = {0, 3, 3, 3, 3, 9, 9, 8, 8, 13, 13, 17}; -static const uint8_t aac_cb_maxval[12] = {0, 1, 1, 2, 2, 4, 4, 7, 7, 12, 12, 16}; - -/** - * Calculate rate distortion cost for quantizing with given codebook - * - * @return quantization distortion - */ -static av_always_inline float quantize_and_encode_band_cost_template( - struct AACEncContext *s, - PutBitContext *pb, const float *in, - const float *scaled, int size, int scale_idx, - int cb, const float lambda, const float uplim, - int *bits, int BT_ZERO, int BT_UNSIGNED, - int BT_PAIR, int BT_ESC) -{ - const int q_idx = POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512; - const float Q = ff_aac_pow2sf_tab [q_idx]; - const float Q34 = ff_aac_pow34sf_tab[q_idx]; - const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512]; - const float CLIPPED_ESCAPE = 165140.0f*IQ; - int i, j; - float cost = 0; - const int dim = BT_PAIR ? 2 : 4; - int resbits = 0; - const int range = aac_cb_range[cb]; - const int maxval = aac_cb_maxval[cb]; - int off; - - if (BT_ZERO) { - for (i = 0; i < size; i++) - cost += in[i]*in[i]; - if (bits) - *bits = 0; - return cost * lambda; - } - if (!scaled) { - abs_pow34_v(s->scoefs, in, size); - scaled = s->scoefs; - } - quantize_bands(s->qcoefs, in, scaled, size, Q34, !BT_UNSIGNED, maxval); - if (BT_UNSIGNED) { - off = 0; - } else { - off = maxval; - } - for (i = 0; i < size; i += dim) { - const float *vec; - int *quants = s->qcoefs + i; - int curidx = 0; - int curbits; - float rd = 0.0f; - for (j = 0; j < dim; j++) { - curidx *= range; - curidx += quants[j] + off; - } - curbits = ff_aac_spectral_bits[cb-1][curidx]; - vec = &ff_aac_codebook_vectors[cb-1][curidx*dim]; - if (BT_UNSIGNED) { - for (j = 0; j < dim; j++) { - float t = fabsf(in[i+j]); - float di; - if (BT_ESC && vec[j] == 64.0f) { //FIXME: slow - if (t >= CLIPPED_ESCAPE) { - di = t - CLIPPED_ESCAPE; - curbits += 21; - } else { - int c = av_clip_uintp2(quant(t, Q), 13); - di = t - c*cbrtf(c)*IQ; - curbits += av_log2(c)*2 - 4 + 1; - } - } else { - di = t - vec[j]*IQ; - } - if (vec[j] != 0.0f) - curbits++; - rd += di*di; - } - } else { - for (j = 0; j < dim; j++) { - float di = in[i+j] - vec[j]*IQ; - rd += di*di; - } - } - cost += rd * lambda + curbits; - resbits += curbits; - if (cost >= uplim) - return uplim; - if (pb) { - put_bits(pb, ff_aac_spectral_bits[cb-1][curidx], ff_aac_spectral_codes[cb-1][curidx]); - if (BT_UNSIGNED) - for (j = 0; j < dim; j++) - if (ff_aac_codebook_vectors[cb-1][curidx*dim+j] != 0.0f) - put_bits(pb, 1, in[i+j] < 0.0f); - if (BT_ESC) { - for (j = 0; j < 2; j++) { - if (ff_aac_codebook_vectors[cb-1][curidx*2+j] == 64.0f) { - int coef = av_clip_uintp2(quant(fabsf(in[i+j]), Q), 13); - int len = av_log2(coef); - - put_bits(pb, len - 4 + 1, (1 << (len - 4 + 1)) - 2); - put_bits(pb, len, coef & ((1 << len) - 1)); - } - } - } - } - } - - if (bits) - *bits = resbits; - return cost; -} +#include "aacenc_is.h" +#include "aacenc_tns.h" +#include "aacenc_ltp.h" +#include "aacenc_pred.h" -#define QUANTIZE_AND_ENCODE_BAND_COST_FUNC(NAME, BT_ZERO, BT_UNSIGNED, BT_PAIR, BT_ESC) \ -static float quantize_and_encode_band_cost_ ## NAME( \ - struct AACEncContext *s, \ - PutBitContext *pb, const float *in, \ - const float *scaled, int size, int scale_idx, \ - int cb, const float lambda, const float uplim, \ - int *bits) { \ - return quantize_and_encode_band_cost_template( \ - s, pb, in, scaled, size, scale_idx, \ - BT_ESC ? ESC_BT : cb, lambda, uplim, bits, \ - BT_ZERO, BT_UNSIGNED, BT_PAIR, BT_ESC); \ -} +#include "libavcodec/aaccoder_twoloop.h" -QUANTIZE_AND_ENCODE_BAND_COST_FUNC(ZERO, 1, 0, 0, 0) -QUANTIZE_AND_ENCODE_BAND_COST_FUNC(SQUAD, 0, 0, 0, 0) -QUANTIZE_AND_ENCODE_BAND_COST_FUNC(UQUAD, 0, 1, 0, 0) -QUANTIZE_AND_ENCODE_BAND_COST_FUNC(SPAIR, 0, 0, 1, 0) -QUANTIZE_AND_ENCODE_BAND_COST_FUNC(UPAIR, 0, 1, 1, 0) -QUANTIZE_AND_ENCODE_BAND_COST_FUNC(ESC, 0, 1, 1, 1) - -static float (*const quantize_and_encode_band_cost_arr[])( - struct AACEncContext *s, - PutBitContext *pb, const float *in, - const float *scaled, int size, int scale_idx, - int cb, const float lambda, const float uplim, - int *bits) = { - quantize_and_encode_band_cost_ZERO, - quantize_and_encode_band_cost_SQUAD, - quantize_and_encode_band_cost_SQUAD, - quantize_and_encode_band_cost_UQUAD, - quantize_and_encode_band_cost_UQUAD, - quantize_and_encode_band_cost_SPAIR, - quantize_and_encode_band_cost_SPAIR, - quantize_and_encode_band_cost_UPAIR, - quantize_and_encode_band_cost_UPAIR, - quantize_and_encode_band_cost_UPAIR, - quantize_and_encode_band_cost_UPAIR, - quantize_and_encode_band_cost_ESC, -}; +/* Parameter of f(x) = a*(lambda/100), defines the maximum fourier spread + * beyond which no PNS is used (since the SFBs contain tone rather than noise) */ +#define NOISE_SPREAD_THRESHOLD 0.9f -#define quantize_and_encode_band_cost( \ - s, pb, in, scaled, size, scale_idx, cb, \ - lambda, uplim, bits) \ - quantize_and_encode_band_cost_arr[cb]( \ - s, pb, in, scaled, size, scale_idx, cb, \ - lambda, uplim, bits) - -static float quantize_band_cost(struct AACEncContext *s, const float *in, - const float *scaled, int size, int scale_idx, - int cb, const float lambda, const float uplim, - int *bits) -{ - return quantize_and_encode_band_cost(s, NULL, in, scaled, size, scale_idx, - cb, lambda, uplim, bits); -} +/* Parameter of f(x) = a*(100/lambda), defines how much PNS is allowed to + * replace low energy non zero bands */ +#define NOISE_LAMBDA_REPLACE 1.948f -static void quantize_and_encode_band(struct AACEncContext *s, PutBitContext *pb, - const float *in, int size, int scale_idx, - int cb, const float lambda) -{ - quantize_and_encode_band_cost(s, pb, in, NULL, size, scale_idx, cb, lambda, - INFINITY, NULL); -} - -static float find_max_val(int group_len, int swb_size, const float *scaled) { - float maxval = 0.0f; - int w2, i; - for (w2 = 0; w2 < group_len; w2++) { - for (i = 0; i < swb_size; i++) { - maxval = FFMAX(maxval, scaled[w2*128+i]); - } - } - return maxval; -} - -static int find_min_book(float maxval, int sf) { - float Q = ff_aac_pow2sf_tab[POW_SF2_ZERO - sf + SCALE_ONE_POS - SCALE_DIV_512]; - float Q34 = sqrtf(Q * sqrtf(Q)); - int qmaxval, cb; - qmaxval = maxval * Q34 + 0.4054f; - if (qmaxval == 0) cb = 0; - else if (qmaxval == 1) cb = 1; - else if (qmaxval == 2) cb = 3; - else if (qmaxval <= 4) cb = 5; - else if (qmaxval <= 7) cb = 7; - else if (qmaxval <= 12) cb = 9; - else cb = 11; - return cb; -} +#include "libavcodec/aaccoder_trellis.h" /** * structure used in optimal codebook search @@ -312,7 +77,7 @@ typedef struct BandCodingPath { static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce, int win, int group_len, const float lambda) { - BandCodingPath path[120][12]; + BandCodingPath path[120][CB_TOT_ALL]; int w, swb, cb, start, size; int i, j; const int max_sfb = sce->ics.max_sfb; @@ -323,9 +88,9 @@ static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce float next_minrd = INFINITY; int next_mincb = 0; - abs_pow34_v(s->scoefs, sce->coeffs, 1024); + s->abs_pow34(s->scoefs, sce->coeffs, 1024); start = win*128; - for (cb = 0; cb < 12; cb++) { + for (cb = 0; cb < CB_TOT_ALL; cb++) { path[0][cb].cost = 0.0f; path[0][cb].prev_idx = -1; path[0][cb].run = 0; @@ -333,7 +98,7 @@ static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce for (swb = 0; swb < max_sfb; swb++) { size = sce->ics.swb_sizes[swb]; if (sce->zeroes[win*16 + swb]) { - for (cb = 0; cb < 12; cb++) { + for (cb = 0; cb < CB_TOT_ALL; cb++) { path[swb+1][cb].prev_idx = cb; path[swb+1][cb].cost = path[swb][cb].cost; path[swb+1][cb].run = path[swb][cb].run + 1; @@ -343,15 +108,22 @@ static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce int mincb = next_mincb; next_minrd = INFINITY; next_mincb = 0; - for (cb = 0; cb < 12; cb++) { + for (cb = 0; cb < CB_TOT_ALL; cb++) { float cost_stay_here, cost_get_here; float rd = 0.0f; + if (cb >= 12 && sce->band_type[win*16+swb] < aac_cb_out_map[cb] || + cb < aac_cb_in_map[sce->band_type[win*16+swb]] && sce->band_type[win*16+swb] > aac_cb_out_map[cb]) { + path[swb+1][cb].prev_idx = -1; + path[swb+1][cb].cost = INFINITY; + path[swb+1][cb].run = path[swb][cb].run + 1; + continue; + } for (w = 0; w < group_len; w++) { FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(win+w)*16+swb]; - rd += quantize_band_cost(s, sce->coeffs + start + w*128, - s->scoefs + start + w*128, size, - sce->sf_idx[(win+w)*16+swb], cb, - lambda / band->threshold, INFINITY, NULL); + rd += quantize_band_cost(s, &sce->coeffs[start + w*128], + &s->scoefs[start + w*128], size, + sce->sf_idx[(win+w)*16+swb], aac_cb_out_map[cb], + lambda / band->threshold, INFINITY, NULL, NULL, 0); } cost_stay_here = path[swb][cb].cost + rd; cost_get_here = minrd + rd + run_bits + 4; @@ -379,11 +151,12 @@ static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce //convert resulting path from backward-linked list stack_len = 0; idx = 0; - for (cb = 1; cb < 12; cb++) + for (cb = 1; cb < CB_TOT_ALL; cb++) if (path[max_sfb][cb].cost < path[max_sfb][idx].cost) idx = cb; ppos = max_sfb; while (ppos > 0) { + av_assert1(idx >= 0); cb = idx; stackrun[stack_len] = path[ppos][cb].run; stackcb [stack_len] = cb; @@ -394,12 +167,13 @@ static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce //perform actual band info encoding start = 0; for (i = stack_len - 1; i >= 0; i--) { - put_bits(&s->pb, 4, stackcb[i]); + cb = aac_cb_out_map[stackcb[i]]; + put_bits(&s->pb, 4, cb); count = stackrun[i]; - memset(sce->zeroes + win*16 + start, !stackcb[i], count); + memset(sce->zeroes + win*16 + start, !cb, count); //XXX: memset when band_type is also uint8_t for (j = 0; j < count; j++) { - sce->band_type[win*16 + start] = stackcb[i]; + sce->band_type[win*16 + start] = cb; start++; } while (count >= run_esc) { @@ -410,147 +184,54 @@ static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce } } -static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce, - int win, int group_len, const float lambda) + +typedef struct TrellisPath { + float cost; + int prev; +} TrellisPath; + +#define TRELLIS_STAGES 121 +#define TRELLIS_STATES (SCALE_MAX_DIFF+1) + +static void set_special_band_scalefactors(AACEncContext *s, SingleChannelElement *sce) { - BandCodingPath path[120][12]; - int w, swb, cb, start, size; - int i, j; - const int max_sfb = sce->ics.max_sfb; - const int run_bits = sce->ics.num_windows == 1 ? 5 : 3; - const int run_esc = (1 << run_bits) - 1; - int idx, ppos, count; - int stackrun[120], stackcb[120], stack_len; - float next_minbits = INFINITY; - int next_mincb = 0; + int w, g; + int prevscaler_n = -255, prevscaler_i = 0; + int bands = 0; - abs_pow34_v(s->scoefs, sce->coeffs, 1024); - start = win*128; - for (cb = 0; cb < 12; cb++) { - path[0][cb].cost = run_bits+4; - path[0][cb].prev_idx = -1; - path[0][cb].run = 0; - } - for (swb = 0; swb < max_sfb; swb++) { - size = sce->ics.swb_sizes[swb]; - if (sce->zeroes[win*16 + swb]) { - float cost_stay_here = path[swb][0].cost; - float cost_get_here = next_minbits + run_bits + 4; - if ( run_value_bits[sce->ics.num_windows == 8][path[swb][0].run] - != run_value_bits[sce->ics.num_windows == 8][path[swb][0].run+1]) - cost_stay_here += run_bits; - if (cost_get_here < cost_stay_here) { - path[swb+1][0].prev_idx = next_mincb; - path[swb+1][0].cost = cost_get_here; - path[swb+1][0].run = 1; - } else { - path[swb+1][0].prev_idx = 0; - path[swb+1][0].cost = cost_stay_here; - path[swb+1][0].run = path[swb][0].run + 1; - } - next_minbits = path[swb+1][0].cost; - next_mincb = 0; - for (cb = 1; cb < 12; cb++) { - path[swb+1][cb].cost = 61450; - path[swb+1][cb].prev_idx = -1; - path[swb+1][cb].run = 0; - } - } else { - float minbits = next_minbits; - int mincb = next_mincb; - int startcb = sce->band_type[win*16+swb]; - next_minbits = INFINITY; - next_mincb = 0; - for (cb = 0; cb < startcb; cb++) { - path[swb+1][cb].cost = 61450; - path[swb+1][cb].prev_idx = -1; - path[swb+1][cb].run = 0; - } - for (cb = startcb; cb < 12; cb++) { - float cost_stay_here, cost_get_here; - float bits = 0.0f; - for (w = 0; w < group_len; w++) { - bits += quantize_band_cost(s, sce->coeffs + start + w*128, - s->scoefs + start + w*128, size, - sce->sf_idx[(win+w)*16+swb], cb, - 0, INFINITY, NULL); - } - cost_stay_here = path[swb][cb].cost + bits; - cost_get_here = minbits + bits + run_bits + 4; - if ( run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run] - != run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run+1]) - cost_stay_here += run_bits; - if (cost_get_here < cost_stay_here) { - path[swb+1][cb].prev_idx = mincb; - path[swb+1][cb].cost = cost_get_here; - path[swb+1][cb].run = 1; - } else { - path[swb+1][cb].prev_idx = cb; - path[swb+1][cb].cost = cost_stay_here; - path[swb+1][cb].run = path[swb][cb].run + 1; - } - if (path[swb+1][cb].cost < next_minbits) { - next_minbits = path[swb+1][cb].cost; - next_mincb = cb; - } + for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { + for (g = 0; g < sce->ics.num_swb; g++) { + if (sce->zeroes[w*16+g]) + continue; + if (sce->band_type[w*16+g] == INTENSITY_BT || sce->band_type[w*16+g] == INTENSITY_BT2) { + sce->sf_idx[w*16+g] = av_clip(roundf(log2f(sce->is_ener[w*16+g])*2), -155, 100); + bands++; + } else if (sce->band_type[w*16+g] == NOISE_BT) { + sce->sf_idx[w*16+g] = av_clip(3+ceilf(log2f(sce->pns_ener[w*16+g])*2), -100, 155); + if (prevscaler_n == -255) + prevscaler_n = sce->sf_idx[w*16+g]; + bands++; } } - start += sce->ics.swb_sizes[swb]; } - //convert resulting path from backward-linked list - stack_len = 0; - idx = 0; - for (cb = 1; cb < 12; cb++) - if (path[max_sfb][cb].cost < path[max_sfb][idx].cost) - idx = cb; - ppos = max_sfb; - while (ppos > 0) { - assert(idx >= 0); - cb = idx; - stackrun[stack_len] = path[ppos][cb].run; - stackcb [stack_len] = cb; - idx = path[ppos-path[ppos][cb].run+1][cb].prev_idx; - ppos -= path[ppos][cb].run; - stack_len++; - } - //perform actual band info encoding - start = 0; - for (i = stack_len - 1; i >= 0; i--) { - put_bits(&s->pb, 4, stackcb[i]); - count = stackrun[i]; - memset(sce->zeroes + win*16 + start, !stackcb[i], count); - //XXX: memset when band_type is also uint8_t - for (j = 0; j < count; j++) { - sce->band_type[win*16 + start] = stackcb[i]; - start++; - } - while (count >= run_esc) { - put_bits(&s->pb, run_bits, run_esc); - count -= run_esc; + if (!bands) + return; + + /* Clip the scalefactor indices */ + for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { + for (g = 0; g < sce->ics.num_swb; g++) { + if (sce->zeroes[w*16+g]) + continue; + if (sce->band_type[w*16+g] == INTENSITY_BT || sce->band_type[w*16+g] == INTENSITY_BT2) { + sce->sf_idx[w*16+g] = prevscaler_i = av_clip(sce->sf_idx[w*16+g], prevscaler_i - SCALE_MAX_DIFF, prevscaler_i + SCALE_MAX_DIFF); + } else if (sce->band_type[w*16+g] == NOISE_BT) { + sce->sf_idx[w*16+g] = prevscaler_n = av_clip(sce->sf_idx[w*16+g], prevscaler_n - SCALE_MAX_DIFF, prevscaler_n + SCALE_MAX_DIFF); + } } - put_bits(&s->pb, run_bits, count); } } -/** Return the minimum scalefactor where the quantized coef does not clip. */ -static av_always_inline uint8_t coef2minsf(float coef) { - return av_clip_uint8(log2f(coef)*4 - 69 + SCALE_ONE_POS - SCALE_DIV_512); -} - -/** Return the maximum scalefactor where the quantized coef is not zero. */ -static av_always_inline uint8_t coef2maxsf(float coef) { - return av_clip_uint8(log2f(coef)*4 + 6 + SCALE_ONE_POS - SCALE_DIV_512); -} - -typedef struct TrellisPath { - float cost; - int prev; -} TrellisPath; - -#define TRELLIS_STAGES 121 -#define TRELLIS_STATES (SCALE_MAX_DIFF+1) - static void search_for_quantizers_anmr(AVCodecContext *avctx, AACEncContext *s, SingleChannelElement *sce, const float lambda) @@ -582,9 +263,9 @@ static void search_for_quantizers_anmr(AVCodecContext *avctx, AACEncContext *s, } //minimum scalefactor index is when minimum nonzero coefficient after quantizing is not clipped - q0 = coef2minsf(q0f); + q0 = av_clip(coef2minsf(q0f), 0, SCALE_MAX_POS-1); //maximum scalefactor index is when maximum coefficient after quantizing is still not zero - q1 = coef2maxsf(q1f); + q1 = av_clip(coef2maxsf(q1f), 1, SCALE_MAX_POS); if (q1 - q0 > 60) { int q0low = q0; int q1high = q1; @@ -600,6 +281,12 @@ static void search_for_quantizers_anmr(AVCodecContext *avctx, AACEncContext *s, q1 = q1high; } } + // q0 == q1 isn't really a legal situation + if (q0 == q1) { + // the following is indirect but guarantees q1 != q0 && q1 near q0 + q1 = av_clip(q0+1, 1, SCALE_MAX_POS); + q0 = av_clip(q1-1, 0, SCALE_MAX_POS - 1); + } for (i = 0; i < TRELLIS_STATES; i++) { paths[0][i].cost = 0.0f; @@ -612,11 +299,11 @@ static void search_for_quantizers_anmr(AVCodecContext *avctx, AACEncContext *s, } } idx = 1; - abs_pow34_v(s->scoefs, sce->coeffs, 1024); + s->abs_pow34(s->scoefs, sce->coeffs, 1024); for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { start = w*128; for (g = 0; g < sce->ics.num_swb; g++) { - const float *coefs = sce->coeffs + start; + const float *coefs = &sce->coeffs[start]; float qmin, qmax; int nz = 0; @@ -648,6 +335,10 @@ static void search_for_quantizers_anmr(AVCodecContext *avctx, AACEncContext *s, maxscale = coef2maxsf(qmax); minscale = av_clip(minscale - q0, 0, TRELLIS_STATES - 1); maxscale = av_clip(maxscale - q0, 0, TRELLIS_STATES); + if (minscale == maxscale) { + maxscale = av_clip(minscale+1, 1, TRELLIS_STATES); + minscale = av_clip(maxscale-1, 0, TRELLIS_STATES - 1); + } maxval = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], s->scoefs+start); for (q = minscale; q < maxscale; q++) { float dist = 0; @@ -655,7 +346,7 @@ static void search_for_quantizers_anmr(AVCodecContext *avctx, AACEncContext *s, for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g]; dist += quantize_band_cost(s, coefs + w2*128, s->scoefs + start + w2*128, sce->ics.swb_sizes[g], - q + q0, cb, lambda / band->threshold, INFINITY, NULL); + q + q0, cb, lambda / band->threshold, INFINITY, NULL, NULL, 0); } minrd = FFMIN(minrd, dist); @@ -691,27 +382,23 @@ static void search_for_quantizers_anmr(AVCodecContext *avctx, AACEncContext *s, } while (idx) { sce->sf_idx[bandaddr[idx]] = minq + q0; - minq = paths[idx][minq].prev; + minq = FFMAX(paths[idx][minq].prev, 0); idx--; } //set the same quantizers inside window groups for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) - for (g = 0; g < sce->ics.num_swb; g++) + for (g = 0; g < sce->ics.num_swb; g++) for (w2 = 1; w2 < sce->ics.group_len[w]; w2++) sce->sf_idx[(w+w2)*16+g] = sce->sf_idx[w*16+g]; } -/** - * two-loop quantizers search taken from ISO 13818-7 Appendix C - */ -static void search_for_quantizers_twoloop(AVCodecContext *avctx, - AACEncContext *s, - SingleChannelElement *sce, - const float lambda) +static void search_for_quantizers_fast(AVCodecContext *avctx, AACEncContext *s, + SingleChannelElement *sce, + const float lambda) { int start = 0, i, w, w2, g; int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate / avctx->channels * (lambda / 120.f); - float dists[128] = { 0 }, uplims[128]; + float dists[128] = { 0 }, uplims[128] = { 0 }; float maxvals[128]; int fflag, minscaler; int its = 0; @@ -721,15 +408,17 @@ static void search_for_quantizers_twoloop(AVCodecContext *avctx, // for values above this the decoder might end up in an endless loop // due to always having more bits than what can be encoded. destbits = FFMIN(destbits, 5800); - //XXX: some heuristic to determine initial quantizers will reduce search time + //some heuristic to determine initial quantizers will reduce search time //determine zero bands and upper limits for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { - for (g = 0; g < sce->ics.num_swb; g++) { + start = 0; + for (g = 0; g < sce->ics.num_swb; g++) { int nz = 0; - float uplim = 0.0f; + float uplim = 0.0f, energy = 0.0f; for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g]; uplim += band->threshold; + energy += band->energy; if (band->energy <= band->threshold || band->threshold == 0.0f) { sce->zeroes[(w+w2)*16+g] = 1; continue; @@ -737,14 +426,16 @@ static void search_for_quantizers_twoloop(AVCodecContext *avctx, nz = 1; } uplims[w*16+g] = uplim *512; + sce->band_type[w*16+g] = 0; sce->zeroes[w*16+g] = !nz; if (nz) minthr = FFMIN(minthr, uplim); allz |= nz; + start += sce->ics.swb_sizes[g]; } } for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { - for (g = 0; g < sce->ics.num_swb; g++) { + for (g = 0; g < sce->ics.num_swb; g++) { if (sce->zeroes[w*16+g]) { sce->sf_idx[w*16+g] = SCALE_ONE_POS; continue; @@ -755,11 +446,12 @@ static void search_for_quantizers_twoloop(AVCodecContext *avctx, if (!allz) return; - abs_pow34_v(s->scoefs, sce->coeffs, 1024); + s->abs_pow34(s->scoefs, sce->coeffs, 1024); + ff_quantize_band_cost_cache_init(s); for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { start = w*128; - for (g = 0; g < sce->ics.num_swb; g++) { + for (g = 0; g < sce->ics.num_swb; g++) { const float *scaled = s->scoefs + start; maxvals[w*16+g] = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], scaled); start += sce->ics.swb_sizes[g]; @@ -776,10 +468,9 @@ static void search_for_quantizers_twoloop(AVCodecContext *avctx, do { int prev = -1; tbits = 0; - fflag = 0; for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { start = w*128; - for (g = 0; g < sce->ics.num_swb; g++) { + for (g = 0; g < sce->ics.num_swb; g++) { const float *coefs = sce->coeffs + start; const float *scaled = s->scoefs + start; int bits = 0; @@ -794,14 +485,13 @@ static void search_for_quantizers_twoloop(AVCodecContext *avctx, cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]); for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { int b; - dist += quantize_band_cost(s, coefs + w2*128, - scaled + w2*128, - sce->ics.swb_sizes[g], - sce->sf_idx[w*16+g], - cb, - 1.0f, - INFINITY, - &b); + dist += quantize_band_cost_cached(s, w + w2, g, + coefs + w2*128, + scaled + w2*128, + sce->ics.swb_sizes[g], + sce->sf_idx[w*16+g], + cb, 1.0f, INFINITY, + &b, NULL, 0); bits += b; } dists[w*16+g] = dist - bits; @@ -829,6 +519,7 @@ static void search_for_quantizers_twoloop(AVCodecContext *avctx, fflag = 0; minscaler = av_clip(minscaler, 60, 255 - SCALE_MAX_DIFF); + for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { for (g = 0; g < sce->ics.num_swb; g++) { int prevsc = sce->sf_idx[w*16+g]; @@ -849,292 +540,425 @@ static void search_for_quantizers_twoloop(AVCodecContext *avctx, } while (fflag && its < 10); } -static void search_for_quantizers_faac(AVCodecContext *avctx, AACEncContext *s, - SingleChannelElement *sce, - const float lambda) +static void search_for_pns(AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce) { - int start = 0, i, w, w2, g; - float uplim[128], maxq[128]; - int minq, maxsf; - float distfact = ((sce->ics.num_windows > 1) ? 85.80 : 147.84) / lambda; - int last = 0, lastband = 0, curband = 0; - float avg_energy = 0.0; - if (sce->ics.num_windows == 1) { - start = 0; - for (i = 0; i < 1024; i++) { - if (i - start >= sce->ics.swb_sizes[curband]) { - start += sce->ics.swb_sizes[curband]; - curband++; - } - if (sce->coeffs[i]) { - avg_energy += sce->coeffs[i] * sce->coeffs[i]; - last = i; - lastband = curband; - } - } + FFPsyBand *band; + int w, g, w2, i; + int wlen = 1024 / sce->ics.num_windows; + int bandwidth, cutoff; + float *PNS = &s->scoefs[0*128], *PNS34 = &s->scoefs[1*128]; + float *NOR34 = &s->scoefs[3*128]; + uint8_t nextband[128]; + const float lambda = s->lambda; + const float freq_mult = avctx->sample_rate*0.5f/wlen; + const float thr_mult = NOISE_LAMBDA_REPLACE*(100.0f/lambda); + const float spread_threshold = FFMIN(0.75f, NOISE_SPREAD_THRESHOLD*FFMAX(0.5f, lambda/100.f)); + const float dist_bias = av_clipf(4.f * 120 / lambda, 0.25f, 4.0f); + const float pns_transient_energy_r = FFMIN(0.7f, lambda / 140.f); + + int refbits = avctx->bit_rate * 1024.0 / avctx->sample_rate + / ((avctx->flags & AV_CODEC_FLAG_QSCALE) ? 2.0f : avctx->channels) + * (lambda / 120.f); + + /** Keep this in sync with twoloop's cutoff selection */ + float rate_bandwidth_multiplier = 1.5f; + int prev = -1000, prev_sf = -1; + int frame_bit_rate = (avctx->flags & AV_CODEC_FLAG_QSCALE) + ? (refbits * rate_bandwidth_multiplier * avctx->sample_rate / 1024) + : (avctx->bit_rate / avctx->channels); + + frame_bit_rate *= 1.15f; + + if (avctx->cutoff > 0) { + bandwidth = avctx->cutoff; } else { - for (w = 0; w < 8; w++) { - const float *coeffs = sce->coeffs + w*128; - start = 0; - for (i = 0; i < 128; i++) { - if (i - start >= sce->ics.swb_sizes[curband]) { - start += sce->ics.swb_sizes[curband]; - curband++; - } - if (coeffs[i]) { - avg_energy += coeffs[i] * coeffs[i]; - last = FFMAX(last, i); - lastband = FFMAX(lastband, curband); - } - } - } - } - last++; - avg_energy /= last; - if (avg_energy == 0.0f) { - for (i = 0; i < FF_ARRAY_ELEMS(sce->sf_idx); i++) - sce->sf_idx[i] = SCALE_ONE_POS; - return; + bandwidth = FFMAX(3000, AAC_CUTOFF_FROM_BITRATE(frame_bit_rate, 1, avctx->sample_rate)); } + + cutoff = bandwidth * 2 * wlen / avctx->sample_rate; + + memcpy(sce->band_alt, sce->band_type, sizeof(sce->band_type)); + ff_init_nextband_map(sce, nextband); for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { - start = w*128; + int wstart = w*128; for (g = 0; g < sce->ics.num_swb; g++) { - float *coefs = sce->coeffs + start; - const int size = sce->ics.swb_sizes[g]; - int start2 = start, end2 = start + size, peakpos = start; - float maxval = -1, thr = 0.0f, t; - maxq[w*16+g] = 0.0f; - if (g > lastband) { - maxq[w*16+g] = 0.0f; - start += size; - for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) - memset(coefs + w2*128, 0, sizeof(coefs[0])*size); + int noise_sfi; + float dist1 = 0.0f, dist2 = 0.0f, noise_amp; + float pns_energy = 0.0f, pns_tgt_energy, energy_ratio, dist_thresh; + float sfb_energy = 0.0f, threshold = 0.0f, spread = 2.0f; + float min_energy = -1.0f, max_energy = 0.0f; + const int start = wstart+sce->ics.swb_offset[g]; + const float freq = (start-wstart)*freq_mult; + const float freq_boost = FFMAX(0.88f*freq/NOISE_LOW_LIMIT, 1.0f); + if (freq < NOISE_LOW_LIMIT || (start-wstart) >= cutoff) { + if (!sce->zeroes[w*16+g]) + prev_sf = sce->sf_idx[w*16+g]; continue; } for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { - for (i = 0; i < size; i++) { - float t = coefs[w2*128+i]*coefs[w2*128+i]; - maxq[w*16+g] = FFMAX(maxq[w*16+g], fabsf(coefs[w2*128 + i])); - thr += t; - if (sce->ics.num_windows == 1 && maxval < t) { - maxval = t; - peakpos = start+i; - } + band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g]; + sfb_energy += band->energy; + spread = FFMIN(spread, band->spread); + threshold += band->threshold; + if (!w2) { + min_energy = max_energy = band->energy; + } else { + min_energy = FFMIN(min_energy, band->energy); + max_energy = FFMAX(max_energy, band->energy); } } - if (sce->ics.num_windows == 1) { - start2 = FFMAX(peakpos - 2, start2); - end2 = FFMIN(peakpos + 3, end2); - } else { - start2 -= start; - end2 -= start; - } - start += size; - thr = pow(thr / (avg_energy * (end2 - start2)), 0.3 + 0.1*(lastband - g) / lastband); - t = 1.0 - (1.0 * start2 / last); - uplim[w*16+g] = distfact / (1.4 * thr + t*t*t + 0.075); - } - } - memset(sce->sf_idx, 0, sizeof(sce->sf_idx)); - abs_pow34_v(s->scoefs, sce->coeffs, 1024); - for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { - start = w*128; - for (g = 0; g < sce->ics.num_swb; g++) { - const float *coefs = sce->coeffs + start; - const float *scaled = s->scoefs + start; - const int size = sce->ics.swb_sizes[g]; - int scf, prev_scf, step; - int min_scf = -1, max_scf = 256; - float curdiff; - if (maxq[w*16+g] < 21.544) { - sce->zeroes[w*16+g] = 1; - start += size; + + /* Ramps down at ~8000Hz and loosens the dist threshold */ + dist_thresh = av_clipf(2.5f*NOISE_LOW_LIMIT/freq, 0.5f, 2.5f) * dist_bias; + + /* PNS is acceptable when all of these are true: + * 1. high spread energy (noise-like band) + * 2. near-threshold energy (high PE means the random nature of PNS content will be noticed) + * 3. on short window groups, all windows have similar energy (variations in energy would be destroyed by PNS) + * + * At this stage, point 2 is relaxed for zeroed bands near the noise threshold (hole avoidance is more important) + */ + if ((!sce->zeroes[w*16+g] && !ff_sfdelta_can_remove_band(sce, nextband, prev_sf, w*16+g)) || + ((sce->zeroes[w*16+g] || !sce->band_alt[w*16+g]) && sfb_energy < threshold*sqrtf(1.0f/freq_boost)) || spread < spread_threshold || + (!sce->zeroes[w*16+g] && sce->band_alt[w*16+g] && sfb_energy > threshold*thr_mult*freq_boost) || + min_energy < pns_transient_energy_r * max_energy ) { + sce->pns_ener[w*16+g] = sfb_energy; + if (!sce->zeroes[w*16+g]) + prev_sf = sce->sf_idx[w*16+g]; continue; } - sce->zeroes[w*16+g] = 0; - scf = prev_scf = av_clip(SCALE_ONE_POS - SCALE_DIV_512 - log2f(1/maxq[w*16+g])*16/3, 60, 218); - step = 16; - for (;;) { - float dist = 0.0f; - int quant_max; - - for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { - int b; - dist += quantize_band_cost(s, coefs + w2*128, - scaled + w2*128, - sce->ics.swb_sizes[g], - scf, - ESC_BT, - lambda, - INFINITY, - &b); - dist -= b; - } - dist *= 1.0f / 512.0f / lambda; - quant_max = quant(maxq[w*16+g], ff_aac_pow2sf_tab[POW_SF2_ZERO - scf + SCALE_ONE_POS - SCALE_DIV_512]); - if (quant_max >= 8191) { // too much, return to the previous quantizer - sce->sf_idx[w*16+g] = prev_scf; - break; + + pns_tgt_energy = sfb_energy*FFMIN(1.0f, spread*spread); + noise_sfi = av_clip(roundf(log2f(pns_tgt_energy)*2), -100, 155); /* Quantize */ + noise_amp = -ff_aac_pow2sf_tab[noise_sfi + POW_SF2_ZERO]; /* Dequantize */ + if (prev != -1000) { + int noise_sfdiff = noise_sfi - prev + SCALE_DIFF_ZERO; + if (noise_sfdiff < 0 || noise_sfdiff > 2*SCALE_MAX_DIFF) { + if (!sce->zeroes[w*16+g]) + prev_sf = sce->sf_idx[w*16+g]; + continue; } - prev_scf = scf; - curdiff = fabsf(dist - uplim[w*16+g]); - if (curdiff <= 1.0f) - step = 0; - else - step = log2f(curdiff); - if (dist > uplim[w*16+g]) - step = -step; - scf += step; - scf = av_clip_uint8(scf); - step = scf - prev_scf; - if (FFABS(step) <= 1 || (step > 0 && scf >= max_scf) || (step < 0 && scf <= min_scf)) { - sce->sf_idx[w*16+g] = av_clip(scf, min_scf, max_scf); - break; + } + for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { + float band_energy, scale, pns_senergy; + const int start_c = (w+w2)*128+sce->ics.swb_offset[g]; + band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g]; + for (i = 0; i < sce->ics.swb_sizes[g]; i++) { + s->random_state = lcg_random(s->random_state); + PNS[i] = s->random_state; } - if (step > 0) - min_scf = prev_scf; - else - max_scf = prev_scf; + band_energy = s->fdsp->scalarproduct_float(PNS, PNS, sce->ics.swb_sizes[g]); + scale = noise_amp/sqrtf(band_energy); + s->fdsp->vector_fmul_scalar(PNS, PNS, scale, sce->ics.swb_sizes[g]); + pns_senergy = s->fdsp->scalarproduct_float(PNS, PNS, sce->ics.swb_sizes[g]); + pns_energy += pns_senergy; + s->abs_pow34(NOR34, &sce->coeffs[start_c], sce->ics.swb_sizes[g]); + s->abs_pow34(PNS34, PNS, sce->ics.swb_sizes[g]); + dist1 += quantize_band_cost(s, &sce->coeffs[start_c], + NOR34, + sce->ics.swb_sizes[g], + sce->sf_idx[(w+w2)*16+g], + sce->band_alt[(w+w2)*16+g], + lambda/band->threshold, INFINITY, NULL, NULL, 0); + /* Estimate rd on average as 5 bits for SF, 4 for the CB, plus spread energy * lambda/thr */ + dist2 += band->energy/(band->spread*band->spread)*lambda*dist_thresh/band->threshold; + } + if (g && sce->band_type[w*16+g-1] == NOISE_BT) { + dist2 += 5; + } else { + dist2 += 9; + } + energy_ratio = pns_tgt_energy/pns_energy; /* Compensates for quantization error */ + sce->pns_ener[w*16+g] = energy_ratio*pns_tgt_energy; + if (sce->zeroes[w*16+g] || !sce->band_alt[w*16+g] || (energy_ratio > 0.85f && energy_ratio < 1.25f && dist2 < dist1)) { + sce->band_type[w*16+g] = NOISE_BT; + sce->zeroes[w*16+g] = 0; + prev = noise_sfi; + } else { + if (!sce->zeroes[w*16+g]) + prev_sf = sce->sf_idx[w*16+g]; } - start += size; } } - minq = sce->sf_idx[0] ? sce->sf_idx[0] : INT_MAX; - for (i = 1; i < 128; i++) { - if (!sce->sf_idx[i]) - sce->sf_idx[i] = sce->sf_idx[i-1]; - else - minq = FFMIN(minq, sce->sf_idx[i]); - } - if (minq == INT_MAX) - minq = 0; - minq = FFMIN(minq, SCALE_MAX_POS); - maxsf = FFMIN(minq + SCALE_MAX_DIFF, SCALE_MAX_POS); - for (i = 126; i >= 0; i--) { - if (!sce->sf_idx[i]) - sce->sf_idx[i] = sce->sf_idx[i+1]; - sce->sf_idx[i] = av_clip(sce->sf_idx[i], minq, maxsf); - } } -static void search_for_quantizers_fast(AVCodecContext *avctx, AACEncContext *s, - SingleChannelElement *sce, - const float lambda) +static void mark_pns(AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce) { - int i, w, w2, g; - int minq = 255; + FFPsyBand *band; + int w, g, w2; + int wlen = 1024 / sce->ics.num_windows; + int bandwidth, cutoff; + const float lambda = s->lambda; + const float freq_mult = avctx->sample_rate*0.5f/wlen; + const float spread_threshold = FFMIN(0.75f, NOISE_SPREAD_THRESHOLD*FFMAX(0.5f, lambda/100.f)); + const float pns_transient_energy_r = FFMIN(0.7f, lambda / 140.f); + + int refbits = avctx->bit_rate * 1024.0 / avctx->sample_rate + / ((avctx->flags & AV_CODEC_FLAG_QSCALE) ? 2.0f : avctx->channels) + * (lambda / 120.f); + + /** Keep this in sync with twoloop's cutoff selection */ + float rate_bandwidth_multiplier = 1.5f; + int frame_bit_rate = (avctx->flags & AV_CODEC_FLAG_QSCALE) + ? (refbits * rate_bandwidth_multiplier * avctx->sample_rate / 1024) + : (avctx->bit_rate / avctx->channels); + + frame_bit_rate *= 1.15f; + + if (avctx->cutoff > 0) { + bandwidth = avctx->cutoff; + } else { + bandwidth = FFMAX(3000, AAC_CUTOFF_FROM_BITRATE(frame_bit_rate, 1, avctx->sample_rate)); + } - memset(sce->sf_idx, 0, sizeof(sce->sf_idx)); + cutoff = bandwidth * 2 * wlen / avctx->sample_rate; + + memcpy(sce->band_alt, sce->band_type, sizeof(sce->band_type)); for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { for (g = 0; g < sce->ics.num_swb; g++) { + float sfb_energy = 0.0f, threshold = 0.0f, spread = 2.0f; + float min_energy = -1.0f, max_energy = 0.0f; + const int start = sce->ics.swb_offset[g]; + const float freq = start*freq_mult; + const float freq_boost = FFMAX(0.88f*freq/NOISE_LOW_LIMIT, 1.0f); + if (freq < NOISE_LOW_LIMIT || start >= cutoff) { + sce->can_pns[w*16+g] = 0; + continue; + } for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { - FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g]; - if (band->energy <= band->threshold) { - sce->sf_idx[(w+w2)*16+g] = 218; - sce->zeroes[(w+w2)*16+g] = 1; + band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g]; + sfb_energy += band->energy; + spread = FFMIN(spread, band->spread); + threshold += band->threshold; + if (!w2) { + min_energy = max_energy = band->energy; } else { - sce->sf_idx[(w+w2)*16+g] = av_clip(SCALE_ONE_POS - SCALE_DIV_512 + log2f(band->threshold), 80, 218); - sce->zeroes[(w+w2)*16+g] = 0; + min_energy = FFMIN(min_energy, band->energy); + max_energy = FFMAX(max_energy, band->energy); } - minq = FFMIN(minq, sce->sf_idx[(w+w2)*16+g]); + } + + /* PNS is acceptable when all of these are true: + * 1. high spread energy (noise-like band) + * 2. near-threshold energy (high PE means the random nature of PNS content will be noticed) + * 3. on short window groups, all windows have similar energy (variations in energy would be destroyed by PNS) + */ + sce->pns_ener[w*16+g] = sfb_energy; + if (sfb_energy < threshold*sqrtf(1.5f/freq_boost) || spread < spread_threshold || min_energy < pns_transient_energy_r * max_energy) { + sce->can_pns[w*16+g] = 0; + } else { + sce->can_pns[w*16+g] = 1; } } } - for (i = 0; i < 128; i++) { - sce->sf_idx[i] = 140; - //av_clip(sce->sf_idx[i], minq, minq + SCALE_MAX_DIFF - 1); - } - //set the same quantizers inside window groups - for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) - for (g = 0; g < sce->ics.num_swb; g++) - for (w2 = 1; w2 < sce->ics.group_len[w]; w2++) - sce->sf_idx[(w+w2)*16+g] = sce->sf_idx[w*16+g]; } -static void search_for_ms(AACEncContext *s, ChannelElement *cpe, - const float lambda) +static void search_for_ms(AACEncContext *s, ChannelElement *cpe) { - int start = 0, i, w, w2, g; - float M[128], S[128]; - float *L34 = s->scoefs, *R34 = s->scoefs + 128, *M34 = s->scoefs + 128*2, *S34 = s->scoefs + 128*3; + int start = 0, i, w, w2, g, sid_sf_boost, prev_mid, prev_side; + uint8_t nextband0[128], nextband1[128]; + float *M = s->scoefs + 128*0, *S = s->scoefs + 128*1; + float *L34 = s->scoefs + 128*2, *R34 = s->scoefs + 128*3; + float *M34 = s->scoefs + 128*4, *S34 = s->scoefs + 128*5; + const float lambda = s->lambda; + const float mslambda = FFMIN(1.0f, lambda / 120.f); SingleChannelElement *sce0 = &cpe->ch[0]; SingleChannelElement *sce1 = &cpe->ch[1]; if (!cpe->common_window) return; + + /** Scout out next nonzero bands */ + ff_init_nextband_map(sce0, nextband0); + ff_init_nextband_map(sce1, nextband1); + + prev_mid = sce0->sf_idx[0]; + prev_side = sce1->sf_idx[0]; for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) { - for (g = 0; g < sce0->ics.num_swb; g++) { - if (!cpe->ch[0].zeroes[w*16+g] && !cpe->ch[1].zeroes[w*16+g]) { - float dist1 = 0.0f, dist2 = 0.0f; + start = 0; + for (g = 0; g < sce0->ics.num_swb; g++) { + float bmax = bval2bmax(g * 17.0f / sce0->ics.num_swb) / 0.0045f; + if (!cpe->is_mask[w*16+g]) + cpe->ms_mask[w*16+g] = 0; + if (!sce0->zeroes[w*16+g] && !sce1->zeroes[w*16+g] && !cpe->is_mask[w*16+g]) { + float Mmax = 0.0f, Smax = 0.0f; + + /* Must compute mid/side SF and book for the whole window group */ for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) { - FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g]; - FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g]; - float minthr = FFMIN(band0->threshold, band1->threshold); - float maxthr = FFMAX(band0->threshold, band1->threshold); for (i = 0; i < sce0->ics.swb_sizes[g]; i++) { - M[i] = (sce0->coeffs[start+w2*128+i] - + sce1->coeffs[start+w2*128+i]) * 0.5; + M[i] = (sce0->coeffs[start+(w+w2)*128+i] + + sce1->coeffs[start+(w+w2)*128+i]) * 0.5; S[i] = M[i] - - sce1->coeffs[start+w2*128+i]; + - sce1->coeffs[start+(w+w2)*128+i]; + } + s->abs_pow34(M34, M, sce0->ics.swb_sizes[g]); + s->abs_pow34(S34, S, sce0->ics.swb_sizes[g]); + for (i = 0; i < sce0->ics.swb_sizes[g]; i++ ) { + Mmax = FFMAX(Mmax, M34[i]); + Smax = FFMAX(Smax, S34[i]); + } + } + + for (sid_sf_boost = 0; sid_sf_boost < 4; sid_sf_boost++) { + float dist1 = 0.0f, dist2 = 0.0f; + int B0 = 0, B1 = 0; + int minidx; + int mididx, sididx; + int midcb, sidcb; + + minidx = FFMIN(sce0->sf_idx[w*16+g], sce1->sf_idx[w*16+g]); + mididx = av_clip(minidx, 0, SCALE_MAX_POS - SCALE_DIV_512); + sididx = av_clip(minidx - sid_sf_boost * 3, 0, SCALE_MAX_POS - SCALE_DIV_512); + if (sce0->band_type[w*16+g] != NOISE_BT && sce1->band_type[w*16+g] != NOISE_BT + && ( !ff_sfdelta_can_replace(sce0, nextband0, prev_mid, mididx, w*16+g) + || !ff_sfdelta_can_replace(sce1, nextband1, prev_side, sididx, w*16+g))) { + /* scalefactor range violation, bad stuff, will decrease quality unacceptably */ + continue; + } + + midcb = find_min_book(Mmax, mididx); + sidcb = find_min_book(Smax, sididx); + + /* No CB can be zero */ + midcb = FFMAX(1,midcb); + sidcb = FFMAX(1,sidcb); + + for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) { + FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g]; + FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g]; + float minthr = FFMIN(band0->threshold, band1->threshold); + int b1,b2,b3,b4; + for (i = 0; i < sce0->ics.swb_sizes[g]; i++) { + M[i] = (sce0->coeffs[start+(w+w2)*128+i] + + sce1->coeffs[start+(w+w2)*128+i]) * 0.5; + S[i] = M[i] + - sce1->coeffs[start+(w+w2)*128+i]; + } + + s->abs_pow34(L34, sce0->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]); + s->abs_pow34(R34, sce1->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]); + s->abs_pow34(M34, M, sce0->ics.swb_sizes[g]); + s->abs_pow34(S34, S, sce0->ics.swb_sizes[g]); + dist1 += quantize_band_cost(s, &sce0->coeffs[start + (w+w2)*128], + L34, + sce0->ics.swb_sizes[g], + sce0->sf_idx[w*16+g], + sce0->band_type[w*16+g], + lambda / band0->threshold, INFINITY, &b1, NULL, 0); + dist1 += quantize_band_cost(s, &sce1->coeffs[start + (w+w2)*128], + R34, + sce1->ics.swb_sizes[g], + sce1->sf_idx[w*16+g], + sce1->band_type[w*16+g], + lambda / band1->threshold, INFINITY, &b2, NULL, 0); + dist2 += quantize_band_cost(s, M, + M34, + sce0->ics.swb_sizes[g], + mididx, + midcb, + lambda / minthr, INFINITY, &b3, NULL, 0); + dist2 += quantize_band_cost(s, S, + S34, + sce1->ics.swb_sizes[g], + sididx, + sidcb, + mslambda / (minthr * bmax), INFINITY, &b4, NULL, 0); + B0 += b1+b2; + B1 += b3+b4; + dist1 -= b1+b2; + dist2 -= b3+b4; + } + cpe->ms_mask[w*16+g] = dist2 <= dist1 && B1 < B0; + if (cpe->ms_mask[w*16+g]) { + if (sce0->band_type[w*16+g] != NOISE_BT && sce1->band_type[w*16+g] != NOISE_BT) { + sce0->sf_idx[w*16+g] = mididx; + sce1->sf_idx[w*16+g] = sididx; + sce0->band_type[w*16+g] = midcb; + sce1->band_type[w*16+g] = sidcb; + } else if ((sce0->band_type[w*16+g] != NOISE_BT) ^ (sce1->band_type[w*16+g] != NOISE_BT)) { + /* ms_mask unneeded, and it confuses some decoders */ + cpe->ms_mask[w*16+g] = 0; + } + break; + } else if (B1 > B0) { + /* More boost won't fix this */ + break; } - abs_pow34_v(L34, sce0->coeffs+start+w2*128, sce0->ics.swb_sizes[g]); - abs_pow34_v(R34, sce1->coeffs+start+w2*128, sce0->ics.swb_sizes[g]); - abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]); - abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]); - dist1 += quantize_band_cost(s, sce0->coeffs + start + w2*128, - L34, - sce0->ics.swb_sizes[g], - sce0->sf_idx[(w+w2)*16+g], - sce0->band_type[(w+w2)*16+g], - lambda / band0->threshold, INFINITY, NULL); - dist1 += quantize_band_cost(s, sce1->coeffs + start + w2*128, - R34, - sce1->ics.swb_sizes[g], - sce1->sf_idx[(w+w2)*16+g], - sce1->band_type[(w+w2)*16+g], - lambda / band1->threshold, INFINITY, NULL); - dist2 += quantize_band_cost(s, M, - M34, - sce0->ics.swb_sizes[g], - sce0->sf_idx[(w+w2)*16+g], - sce0->band_type[(w+w2)*16+g], - lambda / maxthr, INFINITY, NULL); - dist2 += quantize_band_cost(s, S, - S34, - sce1->ics.swb_sizes[g], - sce1->sf_idx[(w+w2)*16+g], - sce1->band_type[(w+w2)*16+g], - lambda / minthr, INFINITY, NULL); } - cpe->ms_mask[w*16+g] = dist2 < dist1; } + if (!sce0->zeroes[w*16+g] && sce0->band_type[w*16+g] < RESERVED_BT) + prev_mid = sce0->sf_idx[w*16+g]; + if (!sce1->zeroes[w*16+g] && !cpe->is_mask[w*16+g] && sce1->band_type[w*16+g] < RESERVED_BT) + prev_side = sce1->sf_idx[w*16+g]; start += sce0->ics.swb_sizes[g]; } } } -const AACCoefficientsEncoder ff_aac_coders[] = { - { - search_for_quantizers_faac, - encode_window_bands_info, - quantize_and_encode_band, - search_for_ms, - }, - { +const AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = { + [AAC_CODER_ANMR] = { search_for_quantizers_anmr, encode_window_bands_info, quantize_and_encode_band, + ff_aac_encode_tns_info, + ff_aac_encode_ltp_info, + ff_aac_encode_main_pred, + ff_aac_adjust_common_pred, + ff_aac_adjust_common_ltp, + ff_aac_apply_main_pred, + ff_aac_apply_tns, + ff_aac_update_ltp, + ff_aac_ltp_insert_new_frame, + set_special_band_scalefactors, + search_for_pns, + mark_pns, + ff_aac_search_for_tns, + ff_aac_search_for_ltp, search_for_ms, + ff_aac_search_for_is, + ff_aac_search_for_pred, }, - { + [AAC_CODER_TWOLOOP] = { search_for_quantizers_twoloop, codebook_trellis_rate, quantize_and_encode_band, + ff_aac_encode_tns_info, + ff_aac_encode_ltp_info, + ff_aac_encode_main_pred, + ff_aac_adjust_common_pred, + ff_aac_adjust_common_ltp, + ff_aac_apply_main_pred, + ff_aac_apply_tns, + ff_aac_update_ltp, + ff_aac_ltp_insert_new_frame, + set_special_band_scalefactors, + search_for_pns, + mark_pns, + ff_aac_search_for_tns, + ff_aac_search_for_ltp, search_for_ms, + ff_aac_search_for_is, + ff_aac_search_for_pred, }, - { + [AAC_CODER_FAST] = { search_for_quantizers_fast, - encode_window_bands_info, + codebook_trellis_rate, quantize_and_encode_band, + ff_aac_encode_tns_info, + ff_aac_encode_ltp_info, + ff_aac_encode_main_pred, + ff_aac_adjust_common_pred, + ff_aac_adjust_common_ltp, + ff_aac_apply_main_pred, + ff_aac_apply_tns, + ff_aac_update_ltp, + ff_aac_ltp_insert_new_frame, + set_special_band_scalefactors, + search_for_pns, + mark_pns, + ff_aac_search_for_tns, + ff_aac_search_for_ltp, search_for_ms, + ff_aac_search_for_is, + ff_aac_search_for_pred, }, }; |