summaryrefslogtreecommitdiffstats
path: root/libavfilter/vf_overlay.c
diff options
context:
space:
mode:
authorPaul B Mahol <onemda@gmail.com>2016-09-10 16:23:13 +0200
committerPaul B Mahol <onemda@gmail.com>2016-09-11 00:38:13 +0200
commit97297fb1446f3d5713b4258fa24ccaff3f4f932b (patch)
tree2f44ed4d9041b1f28b08c96ba02bc15d8bdfd8bd /libavfilter/vf_overlay.c
parent140a0485d3775330c42f431e4ff49737d747799c (diff)
downloadffmpeg-streaming-97297fb1446f3d5713b4258fa24ccaff3f4f932b.zip
ffmpeg-streaming-97297fb1446f3d5713b4258fa24ccaff3f4f932b.tar.gz
avfilter/vf_overlay: inline yuv output formats
Overall speedup ~10-20% Tested-by: Michael on mingw32 mingw64 linux32 mips and arm
Diffstat (limited to 'libavfilter/vf_overlay.c')
-rw-r--r--libavfilter/vf_overlay.c251
1 files changed, 147 insertions, 104 deletions
diff --git a/libavfilter/vf_overlay.c b/libavfilter/vf_overlay.c
index 177544e..78ced18 100644
--- a/libavfilter/vf_overlay.c
+++ b/libavfilter/vf_overlay.c
@@ -462,121 +462,160 @@ static void blend_image_packed_rgb(AVFilterContext *ctx,
}
}
-static void blend_image_yuv(AVFilterContext *ctx,
- AVFrame *dst, const AVFrame *src,
- int x, int y)
+static av_always_inline void blend_plane(AVFilterContext *ctx,
+ AVFrame *dst, const AVFrame *src,
+ int src_w, int src_h,
+ int dst_w, int dst_h,
+ int i, int hsub, int vsub,
+ int x, int y,
+ int main_has_alpha)
{
- OverlayContext *s = ctx->priv;
- int i, imax, j, jmax, k, kmax;
- const int src_w = src->width;
- const int src_h = src->height;
- const int dst_w = dst->width;
- const int dst_h = dst->height;
- const int main_has_alpha = s->main_has_alpha;
-
- if (main_has_alpha) {
- uint8_t alpha; ///< the amount of overlay to blend on to main
- uint8_t *s, *sa, *d, *da;
-
- i = FFMAX(-y, 0);
- sa = src->data[3] + i * src->linesize[3];
- da = dst->data[3] + (y+i) * dst->linesize[3];
-
- for (imax = FFMIN(-y + dst_h, src_h); i < imax; i++) {
- j = FFMAX(-x, 0);
- s = sa + j;
- d = da + x+j;
-
- for (jmax = FFMIN(-x + dst_w, src_w); j < jmax; j++) {
- alpha = *s;
- if (alpha != 0 && alpha != 255) {
- uint8_t alpha_d = *d;
- alpha = UNPREMULTIPLY_ALPHA(alpha, alpha_d);
- }
- switch (alpha) {
- case 0:
- break;
- case 255:
- *d = *s;
- break;
- default:
- // apply alpha compositing: main_alpha += (1-main_alpha) * overlay_alpha
- *d += FAST_DIV255((255 - *d) * *s);
- }
- d += 1;
- s += 1;
- }
- da += dst->linesize[3];
- sa += src->linesize[3];
- }
- }
- for (i = 0; i < 3; i++) {
- int hsub = i ? s->hsub : 0;
- int vsub = i ? s->vsub : 0;
- int src_wp = AV_CEIL_RSHIFT(src_w, hsub);
- int src_hp = AV_CEIL_RSHIFT(src_h, vsub);
- int dst_wp = AV_CEIL_RSHIFT(dst_w, hsub);
- int dst_hp = AV_CEIL_RSHIFT(dst_h, vsub);
- int yp = y>>vsub;
- int xp = x>>hsub;
- uint8_t *s, *sp, *d, *dp, *a, *ap;
-
- j = FFMAX(-yp, 0);
- sp = src->data[i] + j * src->linesize[i];
- dp = dst->data[i] + (yp+j) * dst->linesize[i];
- ap = src->data[3] + (j<<vsub) * src->linesize[3];
-
- for (jmax = FFMIN(-yp + dst_hp, src_hp); j < jmax; j++) {
- k = FFMAX(-xp, 0);
- d = dp + xp+k;
- s = sp + k;
- a = ap + (k<<hsub);
-
- for (kmax = FFMIN(-xp + dst_wp, src_wp); k < kmax; k++) {
- int alpha_v, alpha_h, alpha;
-
+ int src_wp = AV_CEIL_RSHIFT(src_w, hsub);
+ int src_hp = AV_CEIL_RSHIFT(src_h, vsub);
+ int dst_wp = AV_CEIL_RSHIFT(dst_w, hsub);
+ int dst_hp = AV_CEIL_RSHIFT(dst_h, vsub);
+ int yp = y>>vsub;
+ int xp = x>>hsub;
+ uint8_t *s, *sp, *d, *dp, *a, *ap;
+ int jmax, j, k, kmax;
+
+ j = FFMAX(-yp, 0);
+ sp = src->data[i] + j * src->linesize[i];
+ dp = dst->data[i] + (yp+j) * dst->linesize[i];
+ ap = src->data[3] + (j<<vsub) * src->linesize[3];
+
+ for (jmax = FFMIN(-yp + dst_hp, src_hp); j < jmax; j++) {
+ k = FFMAX(-xp, 0);
+ d = dp + xp+k;
+ s = sp + k;
+ a = ap + (k<<hsub);
+
+ for (kmax = FFMIN(-xp + dst_wp, src_wp); k < kmax; k++) {
+ int alpha_v, alpha_h, alpha;
+
+ // average alpha for color components, improve quality
+ if (hsub && vsub && j+1 < src_hp && k+1 < src_wp) {
+ alpha = (a[0] + a[src->linesize[3]] +
+ a[1] + a[src->linesize[3]+1]) >> 2;
+ } else if (hsub || vsub) {
+ alpha_h = hsub && k+1 < src_wp ?
+ (a[0] + a[1]) >> 1 : a[0];
+ alpha_v = vsub && j+1 < src_hp ?
+ (a[0] + a[src->linesize[3]]) >> 1 : a[0];
+ alpha = (alpha_v + alpha_h) >> 1;
+ } else
+ alpha = a[0];
+ // if the main channel has an alpha channel, alpha has to be calculated
+ // to create an un-premultiplied (straight) alpha value
+ if (main_has_alpha && alpha != 0 && alpha != 255) {
// average alpha for color components, improve quality
+ uint8_t alpha_d;
if (hsub && vsub && j+1 < src_hp && k+1 < src_wp) {
- alpha = (a[0] + a[src->linesize[3]] +
- a[1] + a[src->linesize[3]+1]) >> 2;
+ alpha_d = (d[0] + d[src->linesize[3]] +
+ d[1] + d[src->linesize[3]+1]) >> 2;
} else if (hsub || vsub) {
alpha_h = hsub && k+1 < src_wp ?
- (a[0] + a[1]) >> 1 : a[0];
+ (d[0] + d[1]) >> 1 : d[0];
alpha_v = vsub && j+1 < src_hp ?
- (a[0] + a[src->linesize[3]]) >> 1 : a[0];
- alpha = (alpha_v + alpha_h) >> 1;
+ (d[0] + d[src->linesize[3]]) >> 1 : d[0];
+ alpha_d = (alpha_v + alpha_h) >> 1;
} else
- alpha = a[0];
- // if the main channel has an alpha channel, alpha has to be calculated
- // to create an un-premultiplied (straight) alpha value
- if (main_has_alpha && alpha != 0 && alpha != 255) {
- // average alpha for color components, improve quality
- uint8_t alpha_d;
- if (hsub && vsub && j+1 < src_hp && k+1 < src_wp) {
- alpha_d = (d[0] + d[src->linesize[3]] +
- d[1] + d[src->linesize[3]+1]) >> 2;
- } else if (hsub || vsub) {
- alpha_h = hsub && k+1 < src_wp ?
- (d[0] + d[1]) >> 1 : d[0];
- alpha_v = vsub && j+1 < src_hp ?
- (d[0] + d[src->linesize[3]]) >> 1 : d[0];
- alpha_d = (alpha_v + alpha_h) >> 1;
- } else
- alpha_d = d[0];
- alpha = UNPREMULTIPLY_ALPHA(alpha, alpha_d);
- }
- *d = FAST_DIV255(*d * (255 - alpha) + *s * alpha);
- s++;
- d++;
- a += 1 << hsub;
+ alpha_d = d[0];
+ alpha = UNPREMULTIPLY_ALPHA(alpha, alpha_d);
+ }
+ *d = FAST_DIV255(*d * (255 - alpha) + *s * alpha);
+ s++;
+ d++;
+ a += 1 << hsub;
+ }
+ dp += dst->linesize[i];
+ sp += src->linesize[i];
+ ap += (1 << vsub) * src->linesize[3];
+ }
+}
+
+static inline void alpha_composite(const AVFrame *src, const AVFrame *dst,
+ int src_w, int src_h,
+ int dst_w, int dst_h,
+ int x, int y)
+{
+ uint8_t alpha; ///< the amount of overlay to blend on to main
+ uint8_t *s, *sa, *d, *da;
+ int i, imax, j, jmax;
+
+ i = FFMAX(-y, 0);
+ sa = src->data[3] + i * src->linesize[3];
+ da = dst->data[3] + (y+i) * dst->linesize[3];
+
+ for (imax = FFMIN(-y + dst_h, src_h); i < imax; i++) {
+ j = FFMAX(-x, 0);
+ s = sa + j;
+ d = da + x+j;
+
+ for (jmax = FFMIN(-x + dst_w, src_w); j < jmax; j++) {
+ alpha = *s;
+ if (alpha != 0 && alpha != 255) {
+ uint8_t alpha_d = *d;
+ alpha = UNPREMULTIPLY_ALPHA(alpha, alpha_d);
+ }
+ switch (alpha) {
+ case 0:
+ break;
+ case 255:
+ *d = *s;
+ break;
+ default:
+ // apply alpha compositing: main_alpha += (1-main_alpha) * overlay_alpha
+ *d += FAST_DIV255((255 - *d) * *s);
}
- dp += dst->linesize[i];
- sp += src->linesize[i];
- ap += (1 << vsub) * src->linesize[3];
+ d += 1;
+ s += 1;
}
+ da += dst->linesize[3];
+ sa += src->linesize[3];
}
}
+static av_always_inline void blend_image_yuv(AVFilterContext *ctx,
+ AVFrame *dst, const AVFrame *src,
+ int hsub, int vsub,
+ int main_has_alpha,
+ int x, int y)
+{
+ const int src_w = src->width;
+ const int src_h = src->height;
+ const int dst_w = dst->width;
+ const int dst_h = dst->height;
+
+ if (main_has_alpha)
+ alpha_composite(src, dst, src_w, src_h, dst_w, dst_h, x, y);
+
+ blend_plane(ctx, dst, src, src_w, src_h, dst_w, dst_h, 0, 0, 0, x, y, main_has_alpha);
+ blend_plane(ctx, dst, src, src_w, src_h, dst_w, dst_h, 1, hsub, vsub, x, y, main_has_alpha);
+ blend_plane(ctx, dst, src, src_w, src_h, dst_w, dst_h, 2, hsub, vsub, x, y, main_has_alpha);
+}
+
+static void blend_image_yuv420(AVFilterContext *ctx, AVFrame *dst, const AVFrame *src, int x, int y)
+{
+ OverlayContext *s = ctx->priv;
+
+ blend_image_yuv(ctx, dst, src, 1, 1, s->main_has_alpha, x, y);
+}
+
+static void blend_image_yuv422(AVFilterContext *ctx, AVFrame *dst, const AVFrame *src, int x, int y)
+{
+ OverlayContext *s = ctx->priv;
+
+ blend_image_yuv(ctx, dst, src, 1, 0, s->main_has_alpha, x, y);
+}
+
+static void blend_image_yuv444(AVFilterContext *ctx, AVFrame *dst, const AVFrame *src, int x, int y)
+{
+ OverlayContext *s = ctx->priv;
+
+ blend_image_yuv(ctx, dst, src, 0, 0, s->main_has_alpha, x, y);
+}
+
static int config_input_main(AVFilterLink *inlink)
{
OverlayContext *s = inlink->dst->priv;
@@ -592,9 +631,13 @@ static int config_input_main(AVFilterLink *inlink)
s->main_has_alpha = ff_fmt_is_in(inlink->format, alpha_pix_fmts);
switch (s->format) {
case OVERLAY_FORMAT_YUV420:
+ s->blend_image = blend_image_yuv420;
+ break;
case OVERLAY_FORMAT_YUV422:
+ s->blend_image = blend_image_yuv422;
+ break;
case OVERLAY_FORMAT_YUV444:
- s->blend_image = blend_image_yuv;
+ s->blend_image = blend_image_yuv444;
break;
case OVERLAY_FORMAT_RGB:
s->blend_image = blend_image_packed_rgb;
OpenPOWER on IntegriCloud