/* * Copyright (c) 2013-2014 Clément Bœsch * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ /** * A simple, relatively efficient and slow DCT image denoiser. * * @see http://www.ipol.im/pub/art/2011/ys-dct/ * * The DCT factorization used is based on "Fast and numerically stable * algorithms for discrete cosine transforms" from Gerlind Plonkaa & Manfred * Tasche (DOI: 10.1016/j.laa.2004.07.015). */ #include "libavutil/avassert.h" #include "libavutil/eval.h" #include "libavutil/opt.h" #include "internal.h" static const char *const var_names[] = { "c", NULL }; enum { VAR_C, VAR_VARS_NB }; #define MAX_THREADS 8 typedef struct DCTdnoizContext { const AVClass *class; /* coefficient factor expression */ char *expr_str; AVExpr *expr[MAX_THREADS]; double var_values[MAX_THREADS][VAR_VARS_NB]; int nb_threads; int pr_width, pr_height; // width and height to process float sigma; // used when no expression are st float th; // threshold (3*sigma) float *cbuf[2][3]; // two planar rgb color buffers float *slices[MAX_THREADS]; // slices buffers (1 slice buffer per thread) float *weights; // dct coeff are cumulated with overlapping; these values are used for averaging int p_linesize; // line sizes for color and weights int overlap; // number of block overlapping pixels int step; // block step increment (blocksize - overlap) int n; // 1<th); \ } \ \ static void filter_freq_expr_##bsize(DCTdnoizContext *s, \ const float *src, int src_linesize, \ float *dst, int dst_linesize, int thread_id) \ { \ filter_freq_##bsize(src, src_linesize, dst, dst_linesize, \ s->expr[thread_id], s->var_values[thread_id], 0); \ } DEF_FILTER_FREQ_FUNCS(8) DEF_FILTER_FREQ_FUNCS(16) #define DCT3X3_0_0 0.5773502691896258f /* 1/sqrt(3) */ #define DCT3X3_0_1 0.5773502691896258f /* 1/sqrt(3) */ #define DCT3X3_0_2 0.5773502691896258f /* 1/sqrt(3) */ #define DCT3X3_1_0 0.7071067811865475f /* 1/sqrt(2) */ #define DCT3X3_1_2 -0.7071067811865475f /* -1/sqrt(2) */ #define DCT3X3_2_0 0.4082482904638631f /* 1/sqrt(6) */ #define DCT3X3_2_1 -0.8164965809277261f /* -2/sqrt(6) */ #define DCT3X3_2_2 0.4082482904638631f /* 1/sqrt(6) */ static av_always_inline void color_decorrelation(float **dst, int dst_linesize, const uint8_t **src, int src_linesize, int w, int h, int r, int g, int b) { int x, y; float *dstp_r = dst[0]; float *dstp_g = dst[1]; float *dstp_b = dst[2]; const uint8_t *srcp = src[0]; for (y = 0; y < h; y++) { for (x = 0; x < w; x++) { dstp_r[x] = srcp[r] * DCT3X3_0_0 + srcp[g] * DCT3X3_0_1 + srcp[b] * DCT3X3_0_2; dstp_g[x] = srcp[r] * DCT3X3_1_0 + srcp[b] * DCT3X3_1_2; dstp_b[x] = srcp[r] * DCT3X3_2_0 + srcp[g] * DCT3X3_2_1 + srcp[b] * DCT3X3_2_2; srcp += 3; } srcp += src_linesize - w * 3; dstp_r += dst_linesize; dstp_g += dst_linesize; dstp_b += dst_linesize; } } static av_always_inline void color_correlation(uint8_t **dst, int dst_linesize, float **src, int src_linesize, int w, int h, int r, int g, int b) { int x, y; const float *src_r = src[0]; const float *src_g = src[1]; const float *src_b = src[2]; uint8_t *dstp = dst[0]; for (y = 0; y < h; y++) { for (x = 0; x < w; x++) { dstp[r] = av_clip_uint8(src_r[x] * DCT3X3_0_0 + src_g[x] * DCT3X3_1_0 + src_b[x] * DCT3X3_2_0); dstp[g] = av_clip_uint8(src_r[x] * DCT3X3_0_1 + src_b[x] * DCT3X3_2_1); dstp[b] = av_clip_uint8(src_r[x] * DCT3X3_0_2 + src_g[x] * DCT3X3_1_2 + src_b[x] * DCT3X3_2_2); dstp += 3; } dstp += dst_linesize - w * 3; src_r += src_linesize; src_g += src_linesize; src_b += src_linesize; } } #define DECLARE_COLOR_FUNCS(name, r, g, b) \ static void color_decorrelation_##name(float **dst, int dst_linesize, \ const uint8_t **src, int src_linesize, \ int w, int h) \ { \ color_decorrelation(dst, dst_linesize, src, src_linesize, w, h, r, g, b); \ } \ \ static void color_correlation_##name(uint8_t **dst, int dst_linesize, \ float **src, int src_linesize, \ int w, int h) \ { \ color_correlation(dst, dst_linesize, src, src_linesize, w, h, r, g, b); \ } DECLARE_COLOR_FUNCS(rgb, 0, 1, 2) DECLARE_COLOR_FUNCS(bgr, 2, 1, 0) static av_always_inline void color_decorrelation_gbrp(float **dst, int dst_linesize, const uint8_t **src, int src_linesize, int w, int h) { int x, y; float *dstp_r = dst[0]; float *dstp_g = dst[1]; float *dstp_b = dst[2]; const uint8_t *srcp_r = src[2]; const uint8_t *srcp_g = src[0]; const uint8_t *srcp_b = src[1]; for (y = 0; y < h; y++) { for (x = 0; x < w; x++) { dstp_r[x] = srcp_r[x] * DCT3X3_0_0 + srcp_g[x] * DCT3X3_0_1 + srcp_b[x] * DCT3X3_0_2; dstp_g[x] = srcp_r[x] * DCT3X3_1_0 + srcp_b[x] * DCT3X3_1_2; dstp_b[x] = srcp_r[x] * DCT3X3_2_0 + srcp_g[x] * DCT3X3_2_1 + srcp_b[x] * DCT3X3_2_2; } srcp_r += src_linesize; srcp_g += src_linesize; srcp_b += src_linesize; dstp_r += dst_linesize; dstp_g += dst_linesize; dstp_b += dst_linesize; } } static av_always_inline void color_correlation_gbrp(uint8_t **dst, int dst_linesize, float **src, int src_linesize, int w, int h) { int x, y; const float *src_r = src[0]; const float *src_g = src[1]; const float *src_b = src[2]; uint8_t *dstp_r = dst[2]; uint8_t *dstp_g = dst[0]; uint8_t *dstp_b = dst[1]; for (y = 0; y < h; y++) { for (x = 0; x < w; x++) { dstp_r[x] = av_clip_uint8(src_r[x] * DCT3X3_0_0 + src_g[x] * DCT3X3_1_0 + src_b[x] * DCT3X3_2_0); dstp_g[x] = av_clip_uint8(src_r[x] * DCT3X3_0_1 + src_b[x] * DCT3X3_2_1); dstp_b[x] = av_clip_uint8(src_r[x] * DCT3X3_0_2 + src_g[x] * DCT3X3_1_2 + src_b[x] * DCT3X3_2_2); } dstp_r += dst_linesize; dstp_g += dst_linesize; dstp_b += dst_linesize; src_r += src_linesize; src_g += src_linesize; src_b += src_linesize; } } static int config_input(AVFilterLink *inlink) { AVFilterContext *ctx = inlink->dst; DCTdnoizContext *s = ctx->priv; int i, x, y, bx, by, linesize, *iweights, max_slice_h, slice_h; const int bsize = 1 << s->n; switch (inlink->format) { case AV_PIX_FMT_BGR24: s->color_decorrelation = color_decorrelation_bgr; s->color_correlation = color_correlation_bgr; break; case AV_PIX_FMT_RGB24: s->color_decorrelation = color_decorrelation_rgb; s->color_correlation = color_correlation_rgb; break; case AV_PIX_FMT_GBRP: s->color_decorrelation = color_decorrelation_gbrp; s->color_correlation = color_correlation_gbrp; break; default: av_assert0(0); } s->pr_width = inlink->w - (inlink->w - bsize) % s->step; s->pr_height = inlink->h - (inlink->h - bsize) % s->step; if (s->pr_width != inlink->w) av_log(ctx, AV_LOG_WARNING, "The last %d horizontal pixels won't be denoised\n", inlink->w - s->pr_width); if (s->pr_height != inlink->h) av_log(ctx, AV_LOG_WARNING, "The last %d vertical pixels won't be denoised\n", inlink->h - s->pr_height); max_slice_h = s->pr_height / ((s->bsize - 1) * 2); s->nb_threads = FFMIN3(MAX_THREADS, ff_filter_get_nb_threads(ctx), max_slice_h); av_log(ctx, AV_LOG_DEBUG, "threads: [max=%d hmax=%d user=%d] => %d\n", MAX_THREADS, max_slice_h, ff_filter_get_nb_threads(ctx), s->nb_threads); s->p_linesize = linesize = FFALIGN(s->pr_width, 32); for (i = 0; i < 2; i++) { s->cbuf[i][0] = av_malloc_array(linesize * s->pr_height, sizeof(*s->cbuf[i][0])); s->cbuf[i][1] = av_malloc_array(linesize * s->pr_height, sizeof(*s->cbuf[i][1])); s->cbuf[i][2] = av_malloc_array(linesize * s->pr_height, sizeof(*s->cbuf[i][2])); if (!s->cbuf[i][0] || !s->cbuf[i][1] || !s->cbuf[i][2]) return AVERROR(ENOMEM); } /* eval expressions are probably not thread safe when the eval internal * state can be changed (typically through load & store operations) */ if (s->expr_str) { for (i = 0; i < s->nb_threads; i++) { int ret = av_expr_parse(&s->expr[i], s->expr_str, var_names, NULL, NULL, NULL, NULL, 0, ctx); if (ret < 0) return ret; } } /* each slice will need to (pre & re)process the top and bottom block of * the previous one in in addition to its processing area. This is because * each pixel is averaged by all the surrounding blocks */ slice_h = (int)ceilf(s->pr_height / (float)s->nb_threads) + (s->bsize - 1) * 2; for (i = 0; i < s->nb_threads; i++) { s->slices[i] = av_malloc_array(linesize, slice_h * sizeof(*s->slices[i])); if (!s->slices[i]) return AVERROR(ENOMEM); } s->weights = av_malloc(s->pr_height * linesize * sizeof(*s->weights)); if (!s->weights) return AVERROR(ENOMEM); iweights = av_calloc(s->pr_height, linesize * sizeof(*iweights)); if (!iweights) return AVERROR(ENOMEM); for (y = 0; y < s->pr_height - bsize + 1; y += s->step) for (x = 0; x < s->pr_width - bsize + 1; x += s->step) for (by = 0; by < bsize; by++) for (bx = 0; bx < bsize; bx++) iweights[(y + by)*linesize + x + bx]++; for (y = 0; y < s->pr_height; y++) for (x = 0; x < s->pr_width; x++) s->weights[y*linesize + x] = 1. / iweights[y*linesize + x]; av_free(iweights); return 0; } static av_cold int init(AVFilterContext *ctx) { DCTdnoizContext *s = ctx->priv; s->bsize = 1 << s->n; if (s->overlap == -1) s->overlap = s->bsize - 1; if (s->overlap > s->bsize - 1) { av_log(s, AV_LOG_ERROR, "Overlap value can not except %d " "with a block size of %dx%d\n", s->bsize - 1, s->bsize, s->bsize); return AVERROR(EINVAL); } if (s->expr_str) { switch (s->n) { case 3: s->filter_freq_func = filter_freq_expr_8; break; case 4: s->filter_freq_func = filter_freq_expr_16; break; default: av_assert0(0); } } else { switch (s->n) { case 3: s->filter_freq_func = filter_freq_sigma_8; break; case 4: s->filter_freq_func = filter_freq_sigma_16; break; default: av_assert0(0); } } s->th = s->sigma * 3.; s->step = s->bsize - s->overlap; return 0; } static int query_formats(AVFilterContext *ctx) { static const enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_BGR24, AV_PIX_FMT_RGB24, AV_PIX_FMT_GBRP, AV_PIX_FMT_NONE }; AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts); if (!fmts_list) return AVERROR(ENOMEM); return ff_set_common_formats(ctx, fmts_list); } typedef struct ThreadData { float *src, *dst; } ThreadData; static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) { int x, y; DCTdnoizContext *s = ctx->priv; const ThreadData *td = arg; const int w = s->pr_width; const int h = s->pr_height; const int slice_start = (h * jobnr ) / nb_jobs; const int slice_end = (h * (jobnr+1)) / nb_jobs; const int slice_start_ctx = FFMAX(slice_start - s->bsize + 1, 0); const int slice_end_ctx = FFMIN(slice_end, h - s->bsize + 1); const int slice_h = slice_end_ctx - slice_start_ctx; const int src_linesize = s->p_linesize; const int dst_linesize = s->p_linesize; const int slice_linesize = s->p_linesize; float *dst; const float *src = td->src + slice_start_ctx * src_linesize; const float *weights = s->weights + slice_start * dst_linesize; float *slice = s->slices[jobnr]; // reset block sums memset(slice, 0, (slice_h + s->bsize - 1) * dst_linesize * sizeof(*slice)); // block dct sums for (y = 0; y < slice_h; y += s->step) { for (x = 0; x < w - s->bsize + 1; x += s->step) s->filter_freq_func(s, src + x, src_linesize, slice + x, slice_linesize, jobnr); src += s->step * src_linesize; slice += s->step * slice_linesize; } // average blocks slice = s->slices[jobnr] + (slice_start - slice_start_ctx) * slice_linesize; dst = td->dst + slice_start * dst_linesize; for (y = slice_start; y < slice_end; y++) { for (x = 0; x < w; x++) dst[x] = slice[x] * weights[x]; slice += slice_linesize; dst += dst_linesize; weights += dst_linesize; } return 0; } static int filter_frame(AVFilterLink *inlink, AVFrame *in) { AVFilterContext *ctx = inlink->dst; DCTdnoizContext *s = ctx->priv; AVFilterLink *outlink = inlink->dst->outputs[0]; int direct, plane; AVFrame *out; if (av_frame_is_writable(in)) { direct = 1; out = in; } else { direct = 0; out = ff_get_video_buffer(outlink, outlink->w, outlink->h); if (!out) { av_frame_free(&in); return AVERROR(ENOMEM); } av_frame_copy_props(out, in); } s->color_decorrelation(s->cbuf[0], s->p_linesize, (const uint8_t **)in->data, in->linesize[0], s->pr_width, s->pr_height); for (plane = 0; plane < 3; plane++) { ThreadData td = { .src = s->cbuf[0][plane], .dst = s->cbuf[1][plane], }; ctx->internal->execute(ctx, filter_slice, &td, NULL, s->nb_threads); } s->color_correlation(out->data, out->linesize[0], s->cbuf[1], s->p_linesize, s->pr_width, s->pr_height); if (!direct) { int y; uint8_t *dst = out->data[0]; const uint8_t *src = in->data[0]; const int dst_linesize = out->linesize[0]; const int src_linesize = in->linesize[0]; const int hpad = (inlink->w - s->pr_width) * 3; const int vpad = (inlink->h - s->pr_height); if (hpad) { uint8_t *dstp = dst + s->pr_width * 3; const uint8_t *srcp = src + s->pr_width * 3; for (y = 0; y < s->pr_height; y++) { memcpy(dstp, srcp, hpad); dstp += dst_linesize; srcp += src_linesize; } } if (vpad) { uint8_t *dstp = dst + s->pr_height * dst_linesize; const uint8_t *srcp = src + s->pr_height * src_linesize; for (y = 0; y < vpad; y++) { memcpy(dstp, srcp, inlink->w * 3); dstp += dst_linesize; srcp += src_linesize; } } av_frame_free(&in); } return ff_filter_frame(outlink, out); } static av_cold void uninit(AVFilterContext *ctx) { int i; DCTdnoizContext *s = ctx->priv; av_freep(&s->weights); for (i = 0; i < 2; i++) { av_freep(&s->cbuf[i][0]); av_freep(&s->cbuf[i][1]); av_freep(&s->cbuf[i][2]); } for (i = 0; i < s->nb_threads; i++) { av_freep(&s->slices[i]); av_expr_free(s->expr[i]); } } static const AVFilterPad dctdnoiz_inputs[] = { { .name = "default", .type = AVMEDIA_TYPE_VIDEO, .filter_frame = filter_frame, .config_props = config_input, }, { NULL } }; static const AVFilterPad dctdnoiz_outputs[] = { { .name = "default", .type = AVMEDIA_TYPE_VIDEO, }, { NULL } }; AVFilter ff_vf_dctdnoiz = { .name = "dctdnoiz", .description = NULL_IF_CONFIG_SMALL("Denoise frames using 2D DCT."), .priv_size = sizeof(DCTdnoizContext), .init = init, .uninit = uninit, .query_formats = query_formats, .inputs = dctdnoiz_inputs, .outputs = dctdnoiz_outputs, .priv_class = &dctdnoiz_class, .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS, };