aaccoder: add frequency scaling and quantization correction for PNS

This commit once again improves the PNS implementation by scaling the thresholds with frequency. The thresholds get looser as the frequency increases since higher frequencies are basically noise to human ears. Also, this introduces quantization error correction for PNS. Should the error be too much, no PNS will be used. The energy_ratio is used to regulate the actual encoded PNS energy: if the generated PNS energy is higher than the energy from the psy system, energy_ratio is used to correct it so that hopefully once requantized and transmitted the value in the decoder will be closer to what the encoder has. Signed-off-by: Rostislav Pehlivanov <atomnuker@gmail.com>
author: Rostislav Pehlivanov <atomnuker@gmail.com> 2015-09-07 12:39:04 +0100
committer: Rostislav Pehlivanov <atomnuker@gmail.com> 2015-09-07 12:44:09 +0100
commit: b6cc8ec7ecc3bea2a84fe65fa278e22fbb26a48c (patch)
tree: 71f3803f41c53a5c7fee317d39101ff0f5a07832 /libavcodec/aaccoder.c
parent: 0c511eb272e007b5998fa0b4741618274b78b4df (diff)
download: ffmpeg-streaming-b6cc8ec7ecc3bea2a84fe65fa278e22fbb26a48c.zip
ffmpeg-streaming-b6cc8ec7ecc3bea2a84fe65fa278e22fbb26a48c.tar.gz
1 files changed, 17 insertions, 11 deletions
diff --git a/libavcodec/aaccoder.c b/libavcodec/aaccoder.c
index 8a241d2..cf4b11b 100644
--- a/libavcodec/aaccoder.c
+++ b/libavcodec/aaccoder.c
@@ -876,36 +876,39 @@ static void search_for_pns(AACEncContext *s, AVCodecContext *avctx, SingleChanne
         for (g = 0;  g < sce->ics.num_swb; g++) {
             int noise_sfi, try_pns = 0;
             float dist1 = 0.0f, dist2 = 0.0f, noise_amp;
-            float energy = 0.0f, threshold = 0.0f, spread = 0.0f;
+            float pns_energy = 0.0f, energy_ratio, dist_thresh;
+            float sfb_energy = 0.0f, threshold = 0.0f, spread = 0.0f;
+            float freq_boost = FFMAX(0.88f*start*freq_mult/NOISE_LOW_LIMIT, 1.0f);
             if (start*freq_mult < NOISE_LOW_LIMIT) {
                 start += sce->ics.swb_sizes[g];
                 continue;
+            } else {
+                dist_thresh = FFMIN(0.008f*(NOISE_LOW_LIMIT/start*freq_mult), 1.11f);
             }
             for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
                 band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
-                energy    += band->energy;
-                spread    += band->spread;
-                threshold += band->threshold;
+                sfb_energy += band->energy;
+                spread     += band->spread;
+                threshold  += band->threshold;
             }
-            sce->pns_ener[w*16+g] = energy;
 
             if (sce->zeroes[w*16+g]) {
                 try_pns = 1;
-            } else if (energy < threshold) {
+            } else if (sfb_energy < threshold*freq_boost) {
                 try_pns = 1;
             } else if (spread > spread_threshold) {
                 try_pns = 0;
-            } else if (energy < threshold*thr_mult) {
+            } else if (sfb_energy < threshold*thr_mult*freq_boost) {
                 try_pns = 1;
             }
 
-            if (!try_pns || !energy) {
+            if (!try_pns || !sfb_energy) {
                 start += sce->ics.swb_sizes[g];
                 continue;
             }
 
-            noise_sfi = av_clip(roundf(log2f(energy)*2), -100, 155);  /* Quantize */
-            noise_amp = -ff_aac_pow2sf_tab[noise_sfi + POW_SF2_ZERO]; /* Dequantize */
+            noise_sfi = av_clip(roundf(log2f(sfb_energy)*2), -100, 155); /* Quantize */
+            noise_amp = -ff_aac_pow2sf_tab[noise_sfi + POW_SF2_ZERO];    /* Dequantize */
             for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
                 float band_energy, scale;
                 band = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];
@@ -914,6 +917,7 @@ static void search_for_pns(AACEncContext *s, AVCodecContext *avctx, SingleChanne
                 band_energy = s->fdsp->scalarproduct_float(PNS, PNS, sce->ics.swb_sizes[g]);
                 scale = noise_amp/sqrtf(band_energy);
                 s->fdsp->vector_fmul_scalar(PNS, PNS, scale, sce->ics.swb_sizes[g]);
+                pns_energy += s->fdsp->scalarproduct_float(PNS, PNS, sce->ics.swb_sizes[g]);
                 abs_pow34_v(NOR34, &sce->coeffs[start+(w+w2)*128], sce->ics.swb_sizes[g]);
                 abs_pow34_v(PNS34, PNS, sce->ics.swb_sizes[g]);
                 dist1 += quantize_band_cost(s, &sce->coeffs[start + (w+w2)*128],
@@ -929,7 +933,9 @@ static void search_for_pns(AACEncContext *s, AVCodecContext *avctx, SingleChanne
                                             NOISE_BT,
                                             lambda/band->threshold, INFINITY, NULL, 0);
             }
-            if (dist2 < dist1) {
+            energy_ratio = sfb_energy/pns_energy; /* Compensates for quantization error */
+            sce->pns_ener[w*16+g] = energy_ratio*sfb_energy;
+            if (energy_ratio > 0.80f && energy_ratio < 1.20f && dist1/dist2 > dist_thresh) {
                 sce->band_type[w*16+g] = NOISE_BT;
                 sce->zeroes[w*16+g] = 0;
                 if (sce->band_type[w*16+g-1] != NOISE_BT && /* Prevent holes */
author	Rostislav Pehlivanov <atomnuker@gmail.com>	2015-09-07 12:39:04 +0100
committer	Rostislav Pehlivanov <atomnuker@gmail.com>	2015-09-07 12:44:09 +0100
commit	b6cc8ec7ecc3bea2a84fe65fa278e22fbb26a48c (patch)
tree	71f3803f41c53a5c7fee317d39101ff0f5a07832 /libavcodec/aaccoder.c
parent	0c511eb272e007b5998fa0b4741618274b78b4df (diff)
download	ffmpeg-streaming-b6cc8ec7ecc3bea2a84fe65fa278e22fbb26a48c.zip ffmpeg-streaming-b6cc8ec7ecc3bea2a84fe65fa278e22fbb26a48c.tar.gz