diff options
author | Ruiling Song <ruiling.song@intel.com> | 2019-04-12 16:29:03 +0800 |
---|---|---|
committer | Ruiling Song <ruiling.song@intel.com> | 2019-05-24 15:09:22 +0800 |
commit | 1d74150a7dacf4912de21820ab1011b94c13eeb5 (patch) | |
tree | 0bbf1196f1d3669735f6759f12f42d4d1bec64f8 /libavfilter/opencl | |
parent | 023ea5e360cb08d4f71991aca45a636df831b88d (diff) | |
download | ffmpeg-streaming-1d74150a7dacf4912de21820ab1011b94c13eeb5.zip ffmpeg-streaming-1d74150a7dacf4912de21820ab1011b94c13eeb5.tar.gz |
lavfi/opencl: add nlmeans_opencl filter
Reviewed-by: Mark Thompson <sw@jkqxz.net>
Signed-off-by: Ruiling Song <ruiling.song@intel.com>
Diffstat (limited to 'libavfilter/opencl')
-rw-r--r-- | libavfilter/opencl/nlmeans.cl | 115 |
1 files changed, 115 insertions, 0 deletions
diff --git a/libavfilter/opencl/nlmeans.cl b/libavfilter/opencl/nlmeans.cl new file mode 100644 index 0000000..72bd681 --- /dev/null +++ b/libavfilter/opencl/nlmeans.cl @@ -0,0 +1,115 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE | + CLK_ADDRESS_CLAMP_TO_EDGE | + CLK_FILTER_NEAREST); + +kernel void horiz_sum(__global uint4 *integral_img, + __read_only image2d_t src, + int width, + int height, + int4 dx, + int4 dy) +{ + + int y = get_global_id(0); + int work_size = get_global_size(0); + + uint4 sum = (uint4)(0); + float4 s2; + for (int i = 0; i < width; i++) { + float s1 = read_imagef(src, sampler, (int2)(i, y)).x; + s2.x = read_imagef(src, sampler, (int2)(i + dx.x, y + dy.x)).x; + s2.y = read_imagef(src, sampler, (int2)(i + dx.y, y + dy.y)).x; + s2.z = read_imagef(src, sampler, (int2)(i + dx.z, y + dy.z)).x; + s2.w = read_imagef(src, sampler, (int2)(i + dx.w, y + dy.w)).x; + sum += convert_uint4((s1 - s2) * (s1 - s2) * 255 * 255); + integral_img[y * width + i] = sum; + } +} + +kernel void vert_sum(__global uint4 *integral_img, + __global int *overflow, + int width, + int height) +{ + int x = get_global_id(0); + uint4 sum = 0; + for (int i = 0; i < height; i++) { + if (any((uint4)UINT_MAX - integral_img[i * width + x] < sum)) + atomic_inc(overflow); + integral_img[i * width + x] += sum; + sum = integral_img[i * width + x]; + } +} + +kernel void weight_accum(global float *sum, global float *weight, + global uint4 *integral_img, __read_only image2d_t src, + int width, int height, int p, float h, + int4 dx, int4 dy) +{ + // w(x) = integral_img(x-p, y-p) + + // integral_img(x+p, y+p) - + // integral_img(x+p, y-p) - + // integral_img(x-p, y+p) + // total_sum[x] += w(x, y) * src(x + dx, y + dy) + // total_weight += w(x, y) + + int x = get_global_id(0); + int y = get_global_id(1); + int4 xoff = x + dx; + int4 yoff = y + dy; + uint4 a = 0, b = 0, c = 0, d = 0; + uint4 src_pix = 0; + + // out-of-bounding-box? + int oobb = (x - p) < 0 || (y - p) < 0 || (y + p) >= height || (x + p) >= width; + + src_pix.x = (int)(255 * read_imagef(src, sampler, (int2)(xoff.x, yoff.x)).x); + src_pix.y = (int)(255 * read_imagef(src, sampler, (int2)(xoff.y, yoff.y)).x); + src_pix.z = (int)(255 * read_imagef(src, sampler, (int2)(xoff.z, yoff.z)).x); + src_pix.w = (int)(255 * read_imagef(src, sampler, (int2)(xoff.w, yoff.w)).x); + if (!oobb) { + a = integral_img[(y - p) * width + x - p]; + b = integral_img[(y + p) * width + x - p]; + c = integral_img[(y - p) * width + x + p]; + d = integral_img[(y + p) * width + x + p]; + } + + float4 patch_diff = convert_float4(d + a - c - b); + float4 w = native_exp(-patch_diff / (h * h)); + float w_sum = w.x + w.y + w.z + w.w; + weight[y * width + x] += w_sum; + sum[y * width + x] += dot(w, convert_float4(src_pix)); +} + +kernel void average(__write_only image2d_t dst, + __read_only image2d_t src, + global float *sum, global float *weight) { + int x = get_global_id(0); + int y = get_global_id(1); + int2 dim = get_image_dim(dst); + + float w = weight[y * dim.x + x]; + float s = sum[y * dim.x + x]; + float src_pix = read_imagef(src, sampler, (int2)(x, y)).x; + float r = (s + src_pix * 255) / (1.0f + w) / 255.0f; + if (x < dim.x && y < dim.y) + write_imagef(dst, (int2)(x, y), (float4)(r, 0.0f, 0.0f, 1.0f)); +} |