From 1809894b2afdfb84dc97ac8b589c00bdb04b0ab7 Mon Sep 17 00:00:00 2001 From: Paul B Mahol Date: Sun, 27 Dec 2015 20:45:46 +0100 Subject: avfilter/af_silenceremove: add peak detector Signed-off-by: Paul B Mahol --- libavfilter/af_silenceremove.c | 106 ++++++++++++++++++++++++++++------------- 1 file changed, 73 insertions(+), 33 deletions(-) (limited to 'libavfilter/af_silenceremove.c') diff --git a/libavfilter/af_silenceremove.c b/libavfilter/af_silenceremove.c index 3389d64..c547901 100644 --- a/libavfilter/af_silenceremove.c +++ b/libavfilter/af_silenceremove.c @@ -65,11 +65,15 @@ typedef struct SilenceRemoveContext { double *window_current; double *window_end; int window_size; - double rms_sum; + double sum; int leave_silence; int restart; int64_t next_pts; + + int detection; + void (*update)(struct SilenceRemoveContext *s, double sample); + double(*compute)(struct SilenceRemoveContext *s, double sample); } SilenceRemoveContext; #define OFFSET(x) offsetof(SilenceRemoveContext, x) @@ -82,11 +86,58 @@ static const AVOption silenceremove_options[] = { { "stop_duration", NULL, OFFSET(stop_duration), AV_OPT_TYPE_DURATION, {.i64=0}, 0, 9000, FLAGS }, { "stop_threshold", NULL, OFFSET(stop_threshold), AV_OPT_TYPE_DOUBLE, {.dbl=0}, 0, DBL_MAX, FLAGS }, { "leave_silence", NULL, OFFSET(leave_silence), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS }, + { "detection", NULL, OFFSET(detection), AV_OPT_TYPE_INT, {.i64=1}, 0, 1, FLAGS, "detection" }, + { "peak", 0, 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, FLAGS, "detection" }, + { "rms", 0, 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, FLAGS, "detection" }, { NULL } }; AVFILTER_DEFINE_CLASS(silenceremove); +static double compute_peak(SilenceRemoveContext *s, double sample) +{ + double new_sum; + + new_sum = s->sum; + new_sum -= *s->window_current; + new_sum += fabs(sample); + + return new_sum / s->window_size; +} + +static void update_peak(SilenceRemoveContext *s, double sample) +{ + s->sum -= *s->window_current; + *s->window_current = fabs(sample); + s->sum += *s->window_current; + + s->window_current++; + if (s->window_current >= s->window_end) + s->window_current = s->window; +} + +static double compute_rms(SilenceRemoveContext *s, double sample) +{ + double new_sum; + + new_sum = s->sum; + new_sum -= *s->window_current; + new_sum += sample * sample; + + return sqrt(new_sum / s->window_size); +} + +static void update_rms(SilenceRemoveContext *s, double sample) +{ + s->sum -= *s->window_current; + *s->window_current = sample * sample; + s->sum += *s->window_current; + + s->window_current++; + if (s->window_current >= s->window_end) + s->window_current = s->window; +} + static av_cold int init(AVFilterContext *ctx) { SilenceRemoveContext *s = ctx->priv; @@ -96,16 +147,27 @@ static av_cold int init(AVFilterContext *ctx) s->restart = 1; } + switch (s->detection) { + case 0: + s->update = update_peak; + s->compute = compute_peak; + break; + case 1: + s->update = update_rms; + s->compute = compute_rms; + break; + }; + return 0; } -static void clear_rms(SilenceRemoveContext *s) +static void clear_window(SilenceRemoveContext *s) { memset(s->window, 0, s->window_size * sizeof(*s->window)); s->window_current = s->window; s->window_end = s->window + s->window_size; - s->rms_sum = 0; + s->sum = 0; } static int config_input(AVFilterLink *inlink) @@ -118,7 +180,7 @@ static int config_input(AVFilterLink *inlink) if (!s->window) return AVERROR(ENOMEM); - clear_rms(s); + clear_window(s); s->start_duration = av_rescale(s->start_duration, inlink->sample_rate, AV_TIME_BASE); @@ -153,28 +215,6 @@ static int config_input(AVFilterLink *inlink) return 0; } -static double compute_rms(SilenceRemoveContext *s, double sample) -{ - double new_sum; - - new_sum = s->rms_sum; - new_sum -= *s->window_current; - new_sum += sample * sample; - - return sqrt(new_sum / s->window_size); -} - -static void update_rms(SilenceRemoveContext *s, double sample) -{ - s->rms_sum -= *s->window_current; - *s->window_current = sample * sample; - s->rms_sum += *s->window_current; - - s->window_current++; - if (s->window_current >= s->window_end) - s->window_current = s->window; -} - static void flush(AVFrame *out, AVFilterLink *outlink, int *nb_samples_written, int *ret) { @@ -209,12 +249,12 @@ silence_trim: for (i = 0; i < nbs; i++) { threshold = 0; for (j = 0; j < inlink->channels; j++) { - threshold |= compute_rms(s, ibuf[j]) > s->start_threshold; + threshold |= s->compute(s, ibuf[j]) > s->start_threshold; } if (threshold) { for (j = 0; j < inlink->channels; j++) { - update_rms(s, *ibuf); + s->update(s, *ibuf); s->start_holdoff[s->start_holdoff_end++] = *ibuf++; nb_samples_read++; } @@ -232,7 +272,7 @@ silence_trim: s->start_holdoff_end = 0; for (j = 0; j < inlink->channels; j++) - update_rms(s, ibuf[j]); + s->update(s, ibuf[j]); ibuf += inlink->channels; nb_samples_read += inlink->channels; @@ -284,7 +324,7 @@ silence_copy: for (i = 0; i < nbs; i++) { threshold = 1; for (j = 0; j < inlink->channels; j++) - threshold &= compute_rms(s, ibuf[j]) > s->stop_threshold; + threshold &= s->compute(s, ibuf[j]) > s->stop_threshold; if (threshold && s->stop_holdoff_end && !s->leave_silence) { s->mode = SILENCE_COPY_FLUSH; @@ -292,14 +332,14 @@ silence_copy: goto silence_copy_flush; } else if (threshold) { for (j = 0; j < inlink->channels; j++) { - update_rms(s, *ibuf); + s->update(s, *ibuf); *obuf++ = *ibuf++; nb_samples_read++; nb_samples_written++; } } else if (!threshold) { for (j = 0; j < inlink->channels; j++) { - update_rms(s, *ibuf); + s->update(s, *ibuf); if (s->leave_silence) { *obuf++ = *ibuf; nb_samples_written++; @@ -323,7 +363,7 @@ silence_copy: s->start_found_periods = 0; s->start_holdoff_offset = 0; s->start_holdoff_end = 0; - clear_rms(s); + clear_window(s); s->mode = SILENCE_TRIM; flush(out, outlink, &nb_samples_written, &ret); goto silence_trim; -- cgit v1.1