diff options
author | Anton Khirnov <anton@khirnov.net> | 2016-08-23 21:46:50 +0200 |
---|---|---|
committer | Anton Khirnov <anton@khirnov.net> | 2016-08-31 08:15:47 +0200 |
commit | d7bc52bf456deba0f32d9fe5c288ec441f1ebef5 (patch) | |
tree | 23727303f674466f004d94c55f00b61124a8a670 | |
parent | 24da430324735f95880c4a4a54298dc8023125bb (diff) | |
download | ffmpeg-streaming-d7bc52bf456deba0f32d9fe5c288ec441f1ebef5.zip ffmpeg-streaming-d7bc52bf456deba0f32d9fe5c288ec441f1ebef5.tar.gz |
imgutils: add a function for copying image data from GPU mapped memory
See https://software.intel.com/en-us/articles/copying-accelerated-video-decode-frame-buffers
-rw-r--r-- | doc/APIchanges | 4 | ||||
-rw-r--r-- | libavutil/imgutils.c | 75 | ||||
-rw-r--r-- | libavutil/imgutils.h | 18 | ||||
-rw-r--r-- | libavutil/imgutils_internal.h | 30 | ||||
-rw-r--r-- | libavutil/version.h | 2 | ||||
-rw-r--r-- | libavutil/x86/Makefile | 2 | ||||
-rw-r--r-- | libavutil/x86/imgutils.asm | 53 | ||||
-rw-r--r-- | libavutil/x86/imgutils_init.c | 49 |
8 files changed, 219 insertions, 14 deletions
diff --git a/doc/APIchanges b/doc/APIchanges index c4cc0b4..a4e418a 100644 --- a/doc/APIchanges +++ b/doc/APIchanges @@ -13,6 +13,10 @@ libavutil: 2015-08-28 API changes, most recent first: +2016-08-24 - xxxxxxx - lavu 55.21.0 - imgutils.h + Add av_image_copy_uc_from(), a version of av_image_copy() for copying + from GPU mapped memory. + 2016-xx-xx - xxxxxxx - lavc 59.26.0 - vaapi.h Deprecate struct vaapi_context and the vaapi.h installed header. Callers should set AVCodecContext.hw_frames_ctx instead. diff --git a/libavutil/imgutils.c b/libavutil/imgutils.c index 6c58139..20d06ec 100644 --- a/libavutil/imgutils.c +++ b/libavutil/imgutils.c @@ -23,6 +23,7 @@ #include "common.h" #include "imgutils.h" +#include "imgutils_internal.h" #include "internal.h" #include "intreadwrite.h" #include "log.h" @@ -252,9 +253,9 @@ int av_image_check_sar(unsigned int w, unsigned int h, AVRational sar) return AVERROR(EINVAL); } -void av_image_copy_plane(uint8_t *dst, int dst_linesize, - const uint8_t *src, int src_linesize, - int bytewidth, int height) +static void image_copy_plane(uint8_t *dst, ptrdiff_t dst_linesize, + const uint8_t *src, ptrdiff_t src_linesize, + ptrdiff_t bytewidth, int height) { if (!dst || !src) return; @@ -265,9 +266,33 @@ void av_image_copy_plane(uint8_t *dst, int dst_linesize, } } -void av_image_copy(uint8_t *dst_data[4], int dst_linesizes[4], - const uint8_t *src_data[4], const int src_linesizes[4], - enum AVPixelFormat pix_fmt, int width, int height) +static void image_copy_plane_uc_from(uint8_t *dst, ptrdiff_t dst_linesize, + const uint8_t *src, ptrdiff_t src_linesize, + ptrdiff_t bytewidth, int height) +{ + int ret = -1; + +#if ARCH_X86 + ret = ff_image_copy_plane_uc_from_x86(dst, dst_linesize, src, src_linesize, + bytewidth, height); +#endif + + if (ret < 0) + image_copy_plane(dst, dst_linesize, src, src_linesize, bytewidth, height); +} + +void av_image_copy_plane(uint8_t *dst, int dst_linesize, + const uint8_t *src, int src_linesize, + int bytewidth, int height) +{ + image_copy_plane(dst, dst_linesize, src, src_linesize, bytewidth, height); +} + +static void image_copy(uint8_t *dst_data[4], const ptrdiff_t dst_linesizes[4], + const uint8_t *src_data[4], const ptrdiff_t src_linesizes[4], + enum AVPixelFormat pix_fmt, int width, int height, + void (*copy_plane)(uint8_t *, ptrdiff_t, const uint8_t *, + ptrdiff_t, ptrdiff_t, int)) { const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); @@ -276,9 +301,9 @@ void av_image_copy(uint8_t *dst_data[4], int dst_linesizes[4], if (desc->flags & AV_PIX_FMT_FLAG_PAL || desc->flags & AV_PIX_FMT_FLAG_PSEUDOPAL) { - av_image_copy_plane(dst_data[0], dst_linesizes[0], - src_data[0], src_linesizes[0], - width, height); + copy_plane(dst_data[0], dst_linesizes[0], + src_data[0], src_linesizes[0], + width, height); /* copy the palette */ memcpy(dst_data[1], src_data[1], 4*256); } else { @@ -289,17 +314,41 @@ void av_image_copy(uint8_t *dst_data[4], int dst_linesizes[4], for (i = 0; i < planes_nb; i++) { int h = height; - int bwidth = av_image_get_linesize(pix_fmt, width, i); + ptrdiff_t bwidth = av_image_get_linesize(pix_fmt, width, i); if (i == 1 || i == 2) { h = AV_CEIL_RSHIFT(height, desc->log2_chroma_h); } - av_image_copy_plane(dst_data[i], dst_linesizes[i], - src_data[i], src_linesizes[i], - bwidth, h); + copy_plane(dst_data[i], dst_linesizes[i], + src_data[i], src_linesizes[i], + bwidth, h); } } } +void av_image_copy(uint8_t *dst_data[4], int dst_linesizes[4], + const uint8_t *src_data[4], const int src_linesizes[4], + enum AVPixelFormat pix_fmt, int width, int height) +{ + ptrdiff_t dst_linesizes1[4], src_linesizes1[4]; + int i; + + for (i = 0; i < 4; i++) { + dst_linesizes1[i] = dst_linesizes[i]; + src_linesizes1[i] = src_linesizes[i]; + } + + image_copy(dst_data, dst_linesizes1, src_data, src_linesizes1, pix_fmt, + width, height, image_copy_plane); +} + +void av_image_copy_uc_from(uint8_t *dst_data[4], const ptrdiff_t dst_linesizes[4], + const uint8_t *src_data[4], const ptrdiff_t src_linesizes[4], + enum AVPixelFormat pix_fmt, int width, int height) +{ + image_copy(dst_data, dst_linesizes, src_data, src_linesizes, pix_fmt, + width, height, image_copy_plane_uc_from); +} + int av_image_fill_arrays(uint8_t *dst_data[4], int dst_linesize[4], const uint8_t *src, enum AVPixelFormat pix_fmt, int width, int height, int align) diff --git a/libavutil/imgutils.h b/libavutil/imgutils.h index 04d6138..f98a18f 100644 --- a/libavutil/imgutils.h +++ b/libavutil/imgutils.h @@ -118,6 +118,24 @@ void av_image_copy(uint8_t *dst_data[4], int dst_linesizes[4], enum AVPixelFormat pix_fmt, int width, int height); /** + * Copy image data located in uncacheable (e.g. GPU mapped) memory. Where + * available, this function will use special functionality for reading from such + * memory, which may result in greatly improved performance compared to plain + * av_image_copy(). + * + * The data pointers and the linesizes must be aligned to the maximum required + * by the CPU architecture. + * + * @note The linesize parameters have the type ptrdiff_t here, while they are + * int for av_image_copy(). + * @note On x86, the linesizes currently need to be aligned to the cacheline + * size (i.e. 64) to get improved performance. + */ +void av_image_copy_uc_from(uint8_t *dst_data[4], const ptrdiff_t dst_linesizes[4], + const uint8_t *src_data[4], const ptrdiff_t src_linesizes[4], + enum AVPixelFormat pix_fmt, int width, int height); + +/** * Setup the data pointers and linesizes based on the specified image * parameters and the provided array. * diff --git a/libavutil/imgutils_internal.h b/libavutil/imgutils_internal.h new file mode 100644 index 0000000..7a932a5 --- /dev/null +++ b/libavutil/imgutils_internal.h @@ -0,0 +1,30 @@ +/* + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVUTIL_IMGUTILS_INTERNAL_H +#define AVUTIL_IMGUTILS_INTERNAL_H + +#include <stddef.h> +#include <stdint.h> + +int ff_image_copy_plane_uc_from_x86(uint8_t *dst, ptrdiff_t dst_linesize, + const uint8_t *src, ptrdiff_t src_linesize, + ptrdiff_t bytewidth, int height); + + +#endif /* AVUTIL_IMGUTILS_INTERNAL_H */ diff --git a/libavutil/version.h b/libavutil/version.h index 5cbc0e8..f63dfa5 100644 --- a/libavutil/version.h +++ b/libavutil/version.h @@ -54,7 +54,7 @@ */ #define LIBAVUTIL_VERSION_MAJOR 55 -#define LIBAVUTIL_VERSION_MINOR 20 +#define LIBAVUTIL_VERSION_MINOR 21 #define LIBAVUTIL_VERSION_MICRO 0 #define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \ diff --git a/libavutil/x86/Makefile b/libavutil/x86/Makefile index 1e19082..ffee43b 100644 --- a/libavutil/x86/Makefile +++ b/libavutil/x86/Makefile @@ -1,8 +1,10 @@ OBJS += x86/cpu.o \ x86/float_dsp_init.o \ + x86/imgutils_init.o \ x86/lls_init.o \ YASM-OBJS += x86/cpuid.o \ x86/emms.o \ x86/float_dsp.o \ + x86/imgutils.o \ x86/lls.o \ diff --git a/libavutil/x86/imgutils.asm b/libavutil/x86/imgutils.asm new file mode 100644 index 0000000..9c1e940 --- /dev/null +++ b/libavutil/x86/imgutils.asm @@ -0,0 +1,53 @@ +;***************************************************************************** +;* Copyright 2016 Anton Khirnov +;* +;* This file is part of Libav. +;* +;* Libav is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* Libav is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with Libav; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "libavutil/x86/x86util.asm" + +SECTION .text + +INIT_XMM sse4 +cglobal image_copy_plane_uc_from, 6, 7, 4, dst, dst_linesize, src, src_linesize, bw, height, rowpos + add dstq, bwq + add srcq, bwq + neg bwq + +.row_start + mov rowposq, bwq + +.loop + movntdqa m0, [srcq + rowposq + 0 * mmsize] + movntdqa m1, [srcq + rowposq + 1 * mmsize] + movntdqa m2, [srcq + rowposq + 2 * mmsize] + movntdqa m3, [srcq + rowposq + 3 * mmsize] + + mova [dstq + rowposq + 0 * mmsize], m0 + mova [dstq + rowposq + 1 * mmsize], m1 + mova [dstq + rowposq + 2 * mmsize], m2 + mova [dstq + rowposq + 3 * mmsize], m3 + + add rowposq, 4 * mmsize + jnz .loop + + add srcq, src_linesizeq + add dstq, dst_linesizeq + dec heightd + jnz .row_start + + RET diff --git a/libavutil/x86/imgutils_init.c b/libavutil/x86/imgutils_init.c new file mode 100644 index 0000000..20ee9ab --- /dev/null +++ b/libavutil/x86/imgutils_init.c @@ -0,0 +1,49 @@ +/* + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <stddef.h> +#include <stdint.h> + +#include "libavutil/cpu.h" +#include "libavutil/error.h" +#include "libavutil/imgutils.h" +#include "libavutil/imgutils_internal.h" +#include "libavutil/internal.h" + +#include "cpu.h" + +void ff_image_copy_plane_uc_from_sse4(uint8_t *dst, ptrdiff_t dst_linesize, + const uint8_t *src, ptrdiff_t src_linesize, + ptrdiff_t bytewidth, int height); + +int ff_image_copy_plane_uc_from_x86(uint8_t *dst, ptrdiff_t dst_linesize, + const uint8_t *src, ptrdiff_t src_linesize, + ptrdiff_t bytewidth, int height) +{ + int cpu_flags = av_get_cpu_flags(); + ptrdiff_t bw_aligned = FFALIGN(bytewidth, 64); + + if (EXTERNAL_SSE4(cpu_flags) && + bw_aligned <= dst_linesize && bw_aligned <= src_linesize) + ff_image_copy_plane_uc_from_sse4(dst, dst_linesize, src, src_linesize, + bw_aligned, height); + else + return AVERROR(ENOSYS); + + return 0; +} |