diff options
author | Michael Niedermayer <michael@niedermayer.cc> | 2015-07-12 21:00:50 +0200 |
---|---|---|
committer | Michael Niedermayer <michael@niedermayer.cc> | 2015-07-12 21:03:06 +0200 |
commit | f14fc55969d8662a7572ea5a3bc2fb151ea0d2ed (patch) | |
tree | 4cd786371eb04d36c8fe0b259dbb871ec6413118 | |
parent | 4547cf68a0d28c01549f84567e4d39a8b40230e7 (diff) | |
parent | 8bc67ec2c0d2b5444d51a1bed1d50f0e10d92717 (diff) | |
download | ffmpeg-streaming-f14fc55969d8662a7572ea5a3bc2fb151ea0d2ed.zip ffmpeg-streaming-f14fc55969d8662a7572ea5a3bc2fb151ea0d2ed.tar.gz |
Merge commit '8bc67ec2c0d2b5444d51a1bed1d50f0e10d92717'
* commit '8bc67ec2c0d2b5444d51a1bed1d50f0e10d92717':
Checkasm: assembly testing and benchmarking tool
Merged-by: Michael Niedermayer <michael@niedermayer.cc>
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | tests/Makefile | 2 | ||||
-rw-r--r-- | tests/checkasm/Makefile | 33 | ||||
-rw-r--r-- | tests/checkasm/checkasm.c | 484 | ||||
-rw-r--r-- | tests/checkasm/checkasm.h | 115 | ||||
-rw-r--r-- | tests/checkasm/h264pred.c | 252 | ||||
-rw-r--r-- | tests/checkasm/x86/Makefile | 6 | ||||
-rw-r--r-- | tests/checkasm/x86/checkasm.asm | 193 |
8 files changed, 1086 insertions, 0 deletions
@@ -63,6 +63,7 @@ /libavutil/ffversion.h /tests/audiogen /tests/base64 +/tests/checkasm/checkasm /tests/data/ /tests/pixfmts.mak /tests/rotozoom diff --git a/tests/Makefile b/tests/Makefile index 9ac7f43..8ba8210 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -230,5 +230,7 @@ testclean:: -include $(wildcard tests/*.d) +include $(SRC_PATH)/tests/checkasm/Makefile + .PHONY: fate* lcov lcov-reset .INTERMEDIATE: coverage.info diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile new file mode 100644 index 0000000..33e2c09 --- /dev/null +++ b/tests/checkasm/Makefile @@ -0,0 +1,33 @@ +# libavcodec tests +AVCODECOBJS-$(CONFIG_H264PRED) += h264pred.o + +CHECKASMOBJS-$(CONFIG_AVCODEC) += $(AVCODECOBJS-yes) + + +-include $(SRC_PATH)/tests/checkasm/$(ARCH)/Makefile + +CHECKASMOBJS += $(CHECKASMOBJS-yes) checkasm.o +CHECKASMOBJS := $(sort $(CHECKASMOBJS:%=tests/checkasm/%)) + +-include $(CHECKASMOBJS:.o=.d) + +CHECKASMDIRS := $(sort $(dir $(CHECKASMOBJS))) +$(CHECKASMOBJS): | $(CHECKASMDIRS) +OBJDIRS += $(CHECKASMDIRS) + +# We rely on function pointers intentionally declared without specified argument types. +tests/checkasm/%.o: CFLAGS := $(CFLAGS:-Wstrict-prototypes=-Wno-strict-prototypes) + +CHECKASM := tests/checkasm/checkasm$(EXESUF) + +$(CHECKASM): $(EXEOBJS) $(CHECKASMOBJS) $(FF_DEP_LIBS) + $(LD) $(LDFLAGS) $(LDEXEFLAGS) $(LD_O) $(CHECKASMOBJS) $(FF_EXTRALIBS) + +checkasm: $(CHECKASM) + +clean:: checkasmclean + +checkasmclean: + $(RM) $(CHECKASM) $(CLEANSUFFIXES:%=tests/checkasm/%) $(CLEANSUFFIXES:%=tests/checkasm/$(ARCH)/%) + +.PHONY: checkasm diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c new file mode 100644 index 0000000..f44f981 --- /dev/null +++ b/tests/checkasm/checkasm.c @@ -0,0 +1,484 @@ +/* + * Assembly testing and benchmarking tool + * Copyright (c) 2015 Henrik Gramner + * Copyright (c) 2008 Loren Merritt + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with FFmpeg; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "checkasm.h" +#include "libavutil/common.h" +#include "libavutil/cpu.h" +#include "libavutil/random_seed.h" + +#if ARCH_X86 +#include "libavutil/x86/cpu.h" +#endif + +#if HAVE_SETCONSOLETEXTATTRIBUTE +#include <windows.h> +#define COLOR_RED FOREGROUND_RED +#define COLOR_GREEN FOREGROUND_GREEN +#define COLOR_YELLOW (FOREGROUND_RED|FOREGROUND_GREEN) +#else +#define COLOR_RED 1 +#define COLOR_GREEN 2 +#define COLOR_YELLOW 3 +#endif + +#if HAVE_UNISTD_H +#include <unistd.h> +#endif + +#if !HAVE_ISATTY +#define isatty(fd) 1 +#endif + +/* List of tests to invoke */ +static void (* const tests[])(void) = { +#if CONFIG_H264PRED + checkasm_check_h264pred, +#endif + NULL +}; + +/* List of cpu flags to check */ +static const struct { + const char *name; + const char *suffix; + int flag; +} cpus[] = { +#if ARCH_X86 + { "MMX", "mmx", AV_CPU_FLAG_MMX|AV_CPU_FLAG_CMOV }, + { "MMXEXT", "mmxext", AV_CPU_FLAG_MMXEXT }, + { "3DNOW", "3dnow", AV_CPU_FLAG_3DNOW }, + { "3DNOWEXT", "3dnowext", AV_CPU_FLAG_3DNOWEXT }, + { "SSE", "sse", AV_CPU_FLAG_SSE }, + { "SSE2", "sse2", AV_CPU_FLAG_SSE2|AV_CPU_FLAG_SSE2SLOW }, + { "SSE3", "sse3", AV_CPU_FLAG_SSE3|AV_CPU_FLAG_SSE3SLOW }, + { "SSSE3", "ssse3", AV_CPU_FLAG_SSSE3|AV_CPU_FLAG_ATOM }, + { "SSE4.1", "sse4", AV_CPU_FLAG_SSE4 }, + { "SSE4.2", "sse42", AV_CPU_FLAG_SSE42 }, + { "AVX", "avx", AV_CPU_FLAG_AVX }, + { "XOP", "xop", AV_CPU_FLAG_XOP }, + { "FMA3", "fma3", AV_CPU_FLAG_FMA3 }, + { "FMA4", "fma4", AV_CPU_FLAG_FMA4 }, + { "AVX2", "avx2", AV_CPU_FLAG_AVX2 }, +#endif + { NULL } +}; + +typedef struct CheckasmFuncVersion { + struct CheckasmFuncVersion *next; + intptr_t (*func)(); + int ok; + int cpu; + int iterations; + uint64_t cycles; +} CheckasmFuncVersion; + +/* Binary search tree node */ +typedef struct CheckasmFunc { + struct CheckasmFunc *child[2]; + CheckasmFuncVersion versions; + char name[1]; +} CheckasmFunc; + +/* Internal state */ +static struct { + CheckasmFunc *funcs; + CheckasmFunc *current_func; + CheckasmFuncVersion *current_func_ver; + const char *bench_pattern; + int bench_pattern_len; + int num_checked; + int num_failed; + int nop_time; + int cpu_flag; + const char *cpu_flag_name; +} state; + +/* PRNG state */ +AVLFG checkasm_lfg; + +/* Print colored text to stderr if the terminal supports it */ +static void color_printf(int color, const char *fmt, ...) +{ + static int use_color = -1; + va_list arg; + +#if HAVE_SETCONSOLETEXTATTRIBUTE + static HANDLE con; + static WORD org_attributes; + + if (use_color < 0) { + CONSOLE_SCREEN_BUFFER_INFO con_info; + con = GetStdHandle(STD_ERROR_HANDLE); + if (con && con != INVALID_HANDLE_VALUE && GetConsoleScreenBufferInfo(con, &con_info)) { + org_attributes = con_info.wAttributes; + use_color = 1; + } else + use_color = 0; + } + if (use_color) + SetConsoleTextAttribute(con, (org_attributes & 0xfff0) | (color & 0x0f)); +#else + if (use_color < 0) { + const char *term = getenv("TERM"); + use_color = term && strcmp(term, "dumb") && isatty(2); + } + if (use_color) + fprintf(stderr, "\x1b[%d;3%dm", (color & 0x08) >> 3, color & 0x07); +#endif + + va_start(arg, fmt); + vfprintf(stderr, fmt, arg); + va_end(arg); + + if (use_color) { +#if HAVE_SETCONSOLETEXTATTRIBUTE + SetConsoleTextAttribute(con, org_attributes); +#else + fprintf(stderr, "\x1b[0m"); +#endif + } +} + +/* Deallocate a tree */ +static void destroy_func_tree(CheckasmFunc *f) +{ + if (f) { + CheckasmFuncVersion *v = f->versions.next; + while (v) { + CheckasmFuncVersion *next = v->next; + free(v); + v = next; + } + + destroy_func_tree(f->child[0]); + destroy_func_tree(f->child[1]); + free(f); + } +} + +/* Allocate a zero-initialized block, clean up and exit on failure */ +static void *checkasm_malloc(size_t size) +{ + void *ptr = calloc(1, size); + if (!ptr) { + fprintf(stderr, "checkasm: malloc failed\n"); + destroy_func_tree(state.funcs); + exit(1); + } + return ptr; +} + +/* Get the suffix of the specified cpu flag */ +static const char *cpu_suffix(int cpu) +{ + int i = FF_ARRAY_ELEMS(cpus); + + while (--i >= 0) + if (cpu & cpus[i].flag) + return cpus[i].suffix; + + return "c"; +} + +#ifdef AV_READ_TIME +static int cmp_nop(const void *a, const void *b) +{ + return *(const uint16_t*)a - *(const uint16_t*)b; +} + +/* Measure the overhead of the timing code (in decicycles) */ +static int measure_nop_time(void) +{ + uint16_t nops[10000]; + int i, nop_sum = 0; + + for (i = 0; i < 10000; i++) { + uint64_t t = AV_READ_TIME(); + nops[i] = AV_READ_TIME() - t; + } + + qsort(nops, 10000, sizeof(uint16_t), cmp_nop); + for (i = 2500; i < 7500; i++) + nop_sum += nops[i]; + + return nop_sum / 500; +} + +/* Print benchmark results */ +static void print_benchs(CheckasmFunc *f) +{ + if (f) { + print_benchs(f->child[0]); + + /* Only print functions with at least one assembly version */ + if (f->versions.cpu || f->versions.next) { + CheckasmFuncVersion *v = &f->versions; + do { + if (v->iterations) { + int decicycles = (10*v->cycles/v->iterations - state.nop_time) / 4; + printf("%s_%s: %d.%d\n", f->name, cpu_suffix(v->cpu), decicycles/10, decicycles%10); + } + } while ((v = v->next)); + } + + print_benchs(f->child[1]); + } +} +#endif + +/* ASCIIbetical sort except preserving natural order for numbers */ +static int cmp_func_names(const char *a, const char *b) +{ + int ascii_diff, digit_diff; + + for (; !(ascii_diff = *a - *b) && *a; a++, b++); + for (; av_isdigit(*a) && av_isdigit(*b); a++, b++); + + return (digit_diff = av_isdigit(*a) - av_isdigit(*b)) ? digit_diff : ascii_diff; +} + +/* Get a node with the specified name, creating it if it doesn't exist */ +static CheckasmFunc *get_func(const char *name, int length) +{ + CheckasmFunc *f, **f_ptr = &state.funcs; + + /* Search the tree for a matching node */ + while ((f = *f_ptr)) { + int cmp = cmp_func_names(name, f->name); + if (!cmp) + return f; + + f_ptr = &f->child[(cmp > 0)]; + } + + /* Allocate and insert a new node into the tree */ + f = *f_ptr = checkasm_malloc(sizeof(CheckasmFunc) + length); + memcpy(f->name, name, length+1); + + return f; +} + +/* Perform tests and benchmarks for the specified cpu flag if supported by the host */ +static void check_cpu_flag(const char *name, int flag) +{ + int old_cpu_flag = state.cpu_flag; + + flag |= old_cpu_flag; + av_set_cpu_flags_mask(flag); + state.cpu_flag = av_get_cpu_flags(); + + if (!flag || state.cpu_flag != old_cpu_flag) { + int i; + + state.cpu_flag_name = name; + for (i = 0; tests[i]; i++) + tests[i](); + } +} + +/* Print the name of the current CPU flag, but only do it once */ +static void print_cpu_name(void) +{ + if (state.cpu_flag_name) { + color_printf(COLOR_YELLOW, "%s:\n", state.cpu_flag_name); + state.cpu_flag_name = NULL; + } +} + +int main(int argc, char *argv[]) +{ + int i, seed, ret = 0; + + if (!tests[0] || !cpus[0].flag) { + fprintf(stderr, "checkasm: no tests to perform\n"); + return 1; + } + + if (argc > 1 && !strncmp(argv[1], "--bench", 7)) { +#ifndef AV_READ_TIME + fprintf(stderr, "checkasm: --bench is not supported on your system\n"); + return 1; +#endif + if (argv[1][7] == '=') { + state.bench_pattern = argv[1] + 8; + state.bench_pattern_len = strlen(state.bench_pattern); + } else + state.bench_pattern = ""; + + argc--; + argv++; + } + + seed = (argc > 1) ? atoi(argv[1]) : av_get_random_seed(); + fprintf(stderr, "checkasm: using random seed %u\n", seed); + av_lfg_init(&checkasm_lfg, seed); + + check_cpu_flag(NULL, 0); + for (i = 0; cpus[i].flag; i++) + check_cpu_flag(cpus[i].name, cpus[i].flag); + + if (state.num_failed) { + fprintf(stderr, "checkasm: %d of %d tests have failed\n", state.num_failed, state.num_checked); + ret = 1; + } else { + fprintf(stderr, "checkasm: all %d tests passed\n", state.num_checked); +#ifdef AV_READ_TIME + if (state.bench_pattern) { + state.nop_time = measure_nop_time(); + printf("nop: %d.%d\n", state.nop_time/10, state.nop_time%10); + print_benchs(state.funcs); + } +#endif + } + + destroy_func_tree(state.funcs); + return ret; +} + +/* Decide whether or not the specified function needs to be tested and + * allocate/initialize data structures if needed. Returns a pointer to a + * reference function if the function should be tested, otherwise NULL */ +intptr_t (*checkasm_check_func(intptr_t (*func)(), const char *name, ...))() +{ + char name_buf[256]; + intptr_t (*ref)() = func; + CheckasmFuncVersion *v; + int name_length; + va_list arg; + + va_start(arg, name); + name_length = vsnprintf(name_buf, sizeof(name_buf), name, arg); + va_end(arg); + + if (!func || name_length <= 0 || name_length >= sizeof(name_buf)) + return NULL; + + state.current_func = get_func(name_buf, name_length); + v = &state.current_func->versions; + + if (v->func) { + CheckasmFuncVersion *prev; + do { + /* Only test functions that haven't already been tested */ + if (v->func == func) + return NULL; + + if (v->ok) + ref = v->func; + + prev = v; + } while ((v = v->next)); + + v = prev->next = checkasm_malloc(sizeof(CheckasmFuncVersion)); + } + + v->func = func; + v->ok = 1; + v->cpu = state.cpu_flag; + state.current_func_ver = v; + + if (state.cpu_flag) + state.num_checked++; + + return ref; +} + +/* Decide whether or not the current function needs to be benchmarked */ +int checkasm_bench_func(void) +{ + return !state.num_failed && state.bench_pattern && + !strncmp(state.current_func->name, state.bench_pattern, state.bench_pattern_len); +} + +/* Indicate that the current test has failed */ +void checkasm_fail_func(const char *msg, ...) +{ + if (state.current_func_ver->cpu && state.current_func_ver->ok) { + va_list arg; + + print_cpu_name(); + fprintf(stderr, " %s_%s (", state.current_func->name, cpu_suffix(state.current_func_ver->cpu)); + va_start(arg, msg); + vfprintf(stderr, msg, arg); + va_end(arg); + fprintf(stderr, ")\n"); + + state.current_func_ver->ok = 0; + state.num_failed++; + } +} + +/* Update benchmark results of the current function */ +void checkasm_update_bench(int iterations, uint64_t cycles) +{ + state.current_func_ver->iterations += iterations; + state.current_func_ver->cycles += cycles; +} + +/* Print the outcome of all tests performed since the last time this function was called */ +void checkasm_report(const char *name, ...) +{ + static int prev_checked, prev_failed, max_length; + + if (state.num_checked > prev_checked) { + print_cpu_name(); + + if (*name) { + int pad_length = max_length; + va_list arg; + + fprintf(stderr, " - "); + va_start(arg, name); + pad_length -= vfprintf(stderr, name, arg); + va_end(arg); + fprintf(stderr, "%*c", FFMAX(pad_length, 0) + 2, '['); + } else + fprintf(stderr, " - %-*s [", max_length, state.current_func->name); + + if (state.num_failed == prev_failed) + color_printf(COLOR_GREEN, "OK"); + else + color_printf(COLOR_RED, "FAILED"); + fprintf(stderr, "]\n"); + + prev_checked = state.num_checked; + prev_failed = state.num_failed; + } else if (!state.cpu_flag) { + int length; + + /* Calculate the amount of padding required to make the output vertically aligned */ + if (*name) { + va_list arg; + va_start(arg, name); + length = vsnprintf(NULL, 0, name, arg); + va_end(arg); + } else + length = strlen(state.current_func->name); + + if (length > max_length) + max_length = length; + } +} diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h new file mode 100644 index 0000000..039aed7 --- /dev/null +++ b/tests/checkasm/checkasm.h @@ -0,0 +1,115 @@ +/* + * Assembly testing and benchmarking tool + * Copyright (c) 2015 Henrik Gramner + * Copyright (c) 2008 Loren Merritt + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with FFmpeg; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef CHECKASM_H +#define CHECKASM_H + +#include <stdint.h> +#include "config.h" +#include "libavutil/avstring.h" +#include "libavutil/lfg.h" +#include "libavutil/timer.h" + +void checkasm_check_h264pred(void); + +intptr_t (*checkasm_check_func(intptr_t (*func)(), const char *name, ...))() av_printf_format(2, 3); +int checkasm_bench_func(void); +void checkasm_fail_func(const char *msg, ...) av_printf_format(1, 2); +void checkasm_update_bench(int iterations, uint64_t cycles); +void checkasm_report(const char *name, ...) av_printf_format(1, 2); + +extern AVLFG checkasm_lfg; +#define rnd() av_lfg_get(&checkasm_lfg) + +static av_unused intptr_t (*func_ref)(); +static av_unused intptr_t (*func_new)(); + +#define BENCH_RUNS 1000 /* Trade-off between accuracy and speed */ + +/* Decide whether or not the specified function needs to be tested */ +#define check_func(func, ...) ((func_new = (intptr_t (*)())func) &&\ + (func_ref = checkasm_check_func(func_new, __VA_ARGS__))) + +/* Indicate that the current test has failed */ +#define fail() checkasm_fail_func("%s:%d", av_basename(__FILE__), __LINE__) + +/* Print the test outcome */ +#define report(...) checkasm_report("" __VA_ARGS__) + +/* Call the reference function */ +#define call_ref(...) func_ref(__VA_ARGS__) + +#if ARCH_X86 && HAVE_YASM +/* Verifies that clobbered callee-saved registers are properly saved and restored */ +intptr_t checkasm_checked_call(intptr_t (*func)(), ...); +#endif + +/* Call the function */ +#if ARCH_X86_64 && HAVE_YASM +/* Evil hack: detect incorrect assumptions that 32-bit ints are zero-extended to 64-bit. + * This is done by clobbering the stack with junk around the stack pointer and calling the + * assembly function through x264_checkasm_call with added dummy arguments which forces all + * real arguments to be passed on the stack and not in registers. For 32-bit arguments the + * upper half of the 64-bit register locations on the stack will now contain junk which will + * cause misbehaving functions to either produce incorrect output or segfault. Note that + * even though this works extremely well in practice, it's technically not guaranteed + * and false negatives is theoretically possible, but there can never be any false positives. + */ +void checkasm_stack_clobber(uint64_t clobber, ...); +#define CLOB (UINT64_C(0xdeadbeefdeadbeef)) +#define call_new(...) (checkasm_stack_clobber(CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,\ + CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB,CLOB),\ + checkasm_checked_call(func_new, 0, 0, 0, 0, 0, __VA_ARGS__)) +#elif ARCH_X86_32 && HAVE_YASM +#define call_new(...) checkasm_checked_call(func_new, __VA_ARGS__) +#else +#define call_new(...) func_new(__VA_ARGS__) +#endif + +/* Benchmark the function */ +#ifdef AV_READ_TIME +#define bench_new(...)\ + do {\ + if (checkasm_bench_func()) {\ + intptr_t (*tfunc)() = func_new;\ + uint64_t tsum = 0;\ + int ti, tcount = 0;\ + for (ti = 0; ti < BENCH_RUNS; ti++) {\ + uint64_t t = AV_READ_TIME();\ + tfunc(__VA_ARGS__);\ + tfunc(__VA_ARGS__);\ + tfunc(__VA_ARGS__);\ + tfunc(__VA_ARGS__);\ + t = AV_READ_TIME() - t;\ + if (t*tcount <= tsum*4 && ti > 0) {\ + tsum += t;\ + tcount++;\ + }\ + }\ + checkasm_update_bench(tcount, tsum);\ + }\ + } while (0) +#else +#define bench_new(...) +#endif + +#endif diff --git a/tests/checkasm/h264pred.c b/tests/checkasm/h264pred.c new file mode 100644 index 0000000..5ac91e2 --- /dev/null +++ b/tests/checkasm/h264pred.c @@ -0,0 +1,252 @@ +/* + * Copyright (c) 2015 Henrik Gramner + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with FFmpeg; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <string.h> +#include "checkasm.h" +#include "libavcodec/avcodec.h" +#include "libavcodec/h264pred.h" +#include "libavutil/common.h" +#include "libavutil/intreadwrite.h" + +static const int codec_ids[4] = { AV_CODEC_ID_H264, AV_CODEC_ID_VP8, AV_CODEC_ID_RV40, AV_CODEC_ID_SVQ3 }; + +static const char * const pred4x4_modes[4][15] = { + { /* H264 */ + [VERT_PRED ] = "vertical", + [HOR_PRED ] = "horizontal", + [DC_PRED ] = "dc", + [DIAG_DOWN_LEFT_PRED ] = "down_left", + [DIAG_DOWN_RIGHT_PRED] = "down_right", + [VERT_RIGHT_PRED ] = "vertical_right", + [HOR_DOWN_PRED ] = "horizontal_right", + [VERT_LEFT_PRED ] = "vertical_left", + [HOR_UP_PRED ] = "horizontal_up", + [LEFT_DC_PRED ] = "left_dc", + [TOP_DC_PRED ] = "top_dc", + [DC_128_PRED ] = "dc_128", + }, + { /* VP8 */ + [VERT_PRED ] = "vertical_vp8", + [HOR_PRED ] = "horizontal_vp8", + [VERT_LEFT_PRED] = "vertical_left_vp8", + [TM_VP8_PRED ] = "tm_vp8", + [DC_127_PRED ] = "dc_127_vp8", + [DC_129_PRED ] = "dc_129_vp8", + }, + { /* RV40 */ + [DIAG_DOWN_LEFT_PRED ] = "down_left_rv40", + [VERT_LEFT_PRED ] = "vertical_left_rv40", + [HOR_UP_PRED ] = "horizontal_up_rv40", + [DIAG_DOWN_LEFT_PRED_RV40_NODOWN] = "down_left_nodown_rv40", + [HOR_UP_PRED_RV40_NODOWN ] = "horizontal_up_nodown_rv40", + [VERT_LEFT_PRED_RV40_NODOWN ] = "vertical_left_nodown_rv40", + }, + { /* SVQ3 */ + [DIAG_DOWN_LEFT_PRED] = "down_left_svq3", + }, +}; + +static const char * const pred8x8_modes[4][11] = { + { /* H264 */ + [DC_PRED8x8 ] = "dc", + [HOR_PRED8x8 ] = "horizontal", + [VERT_PRED8x8 ] = "vertical", + [PLANE_PRED8x8 ] = "plane", + [LEFT_DC_PRED8x8 ] = "left_dc", + [TOP_DC_PRED8x8 ] = "top_dc", + [DC_128_PRED8x8 ] = "dc_128", + [ALZHEIMER_DC_L0T_PRED8x8] = "mad_cow_dc_l0t", + [ALZHEIMER_DC_0LT_PRED8x8] = "mad_cow_dc_0lt", + [ALZHEIMER_DC_L00_PRED8x8] = "mad_cow_dc_l00", + [ALZHEIMER_DC_0L0_PRED8x8] = "mad_cow_dc_0l0", + }, + { /* VP8 */ + [PLANE_PRED8x8 ] = "tm_vp8", + [DC_127_PRED8x8] = "dc_127_vp8", + [DC_129_PRED8x8] = "dc_129_vp8", + }, + { /* RV40 */ + [DC_PRED8x8 ] = "dc_rv40", + [LEFT_DC_PRED8x8] = "left_dc_rv40", + [TOP_DC_PRED8x8 ] = "top_dc_rv40", + }, + { /* SVQ3 */ + }, +}; + +static const char * const pred16x16_modes[4][9] = { + { /* H264 */ + [DC_PRED8x8 ] = "dc", + [HOR_PRED8x8 ] = "horizontal", + [VERT_PRED8x8 ] = "vertical", + [PLANE_PRED8x8 ] = "plane", + [LEFT_DC_PRED8x8] = "left_dc", + [TOP_DC_PRED8x8 ] = "top_dc", + [DC_128_PRED8x8 ] = "dc_128", + }, + { /* VP8 */ + [PLANE_PRED8x8 ] = "tm_vp8", + [DC_127_PRED8x8] = "dc_127_vp8", + [DC_129_PRED8x8] = "dc_129_vp8", + }, + { /* RV40 */ + [PLANE_PRED8x8] = "plane_rv40", + }, + { /* SVQ3 */ + [PLANE_PRED8x8] = "plane_svq3", + }, +}; + +static const uint32_t pixel_mask[3] = { 0xffffffff, 0x01ff01ff, 0x03ff03ff }; + +#define SIZEOF_PIXEL ((bit_depth + 7) / 8) +#define BUF_SIZE (3*16*17) + +#define check_pred_func(func, name, mode_name)\ + (mode_name && ((codec_ids[codec] == AV_CODEC_ID_H264) ?\ + check_func(func, "pred%s_%s_%d", name, mode_name, bit_depth) :\ + check_func(func, "pred%s_%s", name, mode_name))) + +#define randomize_buffers()\ + do {\ + uint32_t mask = pixel_mask[bit_depth-8];\ + int i;\ + for (i = 0; i < BUF_SIZE; i += 4) {\ + uint32_t r = rnd() & mask;\ + AV_WN32A(buf0+i, r);\ + AV_WN32A(buf1+i, r);\ + }\ + } while (0) + +#define src0 (buf0 + 4*16) /* Offset to allow room for top and left */ +#define src1 (buf1 + 4*16) + +static void check_pred4x4(H264PredContext *h, uint8_t *buf0, uint8_t *buf1, + int codec, int chroma_format, int bit_depth) +{ + if (chroma_format == 1) { + uint8_t *topright = buf0 + 2*16; + int pred_mode; + for (pred_mode = 0; pred_mode < 15; pred_mode++) { + if (check_pred_func(h->pred4x4[pred_mode], "4x4", pred4x4_modes[codec][pred_mode])) { + randomize_buffers(); + call_ref(src0, topright, (ptrdiff_t)12*SIZEOF_PIXEL); + call_new(src1, topright, (ptrdiff_t)12*SIZEOF_PIXEL); + if (memcmp(buf0, buf1, BUF_SIZE)) + fail(); + bench_new(src1, topright, (ptrdiff_t)12*SIZEOF_PIXEL); + } + } + } +} + +static void check_pred8x8(H264PredContext *h, uint8_t *buf0, uint8_t *buf1, + int codec, int chroma_format, int bit_depth) +{ + int pred_mode; + for (pred_mode = 0; pred_mode < 11; pred_mode++) { + if (check_pred_func(h->pred8x8[pred_mode], (chroma_format == 2) ? "8x16" : "8x8", + pred8x8_modes[codec][pred_mode])) { + randomize_buffers(); + call_ref(src0, (ptrdiff_t)24*SIZEOF_PIXEL); + call_new(src1, (ptrdiff_t)24*SIZEOF_PIXEL); + if (memcmp(buf0, buf1, BUF_SIZE)) + fail(); + bench_new(src1, (ptrdiff_t)24*SIZEOF_PIXEL); + } + } +} + +static void check_pred16x16(H264PredContext *h, uint8_t *buf0, uint8_t *buf1, + int codec, int chroma_format, int bit_depth) +{ + if (chroma_format == 1) { + int pred_mode; + for (pred_mode = 0; pred_mode < 9; pred_mode++) { + if (check_pred_func(h->pred16x16[pred_mode], "16x16", pred16x16_modes[codec][pred_mode])) { + randomize_buffers(); + call_ref(src0, (ptrdiff_t)48); + call_new(src1, (ptrdiff_t)48); + if (memcmp(buf0, buf1, BUF_SIZE)) + fail(); + bench_new(src1, (ptrdiff_t)48); + } + } + } +} + +static void check_pred8x8l(H264PredContext *h, uint8_t *buf0, uint8_t *buf1, + int codec, int chroma_format, int bit_depth) +{ + if (chroma_format == 1 && codec_ids[codec] == AV_CODEC_ID_H264) { + int pred_mode; + for (pred_mode = 0; pred_mode < 12; pred_mode++) { + if (check_pred_func(h->pred8x8l[pred_mode], "8x8l", pred4x4_modes[codec][pred_mode])) { + int neighbors; + for (neighbors = 0; neighbors <= 0xc000; neighbors += 0x4000) { + int has_topleft = neighbors & 0x8000; + int has_topright = neighbors & 0x4000; + + if ((pred_mode == DIAG_DOWN_RIGHT_PRED || pred_mode == VERT_RIGHT_PRED) && !has_topleft) + continue; /* Those aren't allowed according to the spec */ + + randomize_buffers(); + call_ref(src0, has_topleft, has_topright, (ptrdiff_t)24*SIZEOF_PIXEL); + call_new(src1, has_topleft, has_topright, (ptrdiff_t)24*SIZEOF_PIXEL); + if (memcmp(buf0, buf1, BUF_SIZE)) + fail(); + bench_new(src1, has_topleft, has_topright, (ptrdiff_t)24*SIZEOF_PIXEL); + } + } + } + } +} + +/* TODO: Add tests for H.264 lossless H/V prediction */ + +void checkasm_check_h264pred(void) +{ + static const struct { + void (*func)(H264PredContext*, uint8_t*, uint8_t*, int, int, int); + const char *name; + } tests[] = { + { check_pred4x4, "pred4x4" }, + { check_pred8x8, "pred8x8" }, + { check_pred16x16, "pred16x16" }, + { check_pred8x8l, "pred8x8l" }, + }; + + DECLARE_ALIGNED(16, uint8_t, buf0)[BUF_SIZE]; + DECLARE_ALIGNED(16, uint8_t, buf1)[BUF_SIZE]; + H264PredContext h; + int test, codec, chroma_format, bit_depth; + + for (test = 0; test < FF_ARRAY_ELEMS(tests); test++) { + for (codec = 0; codec < 4; codec++) { + int codec_id = codec_ids[codec]; + for (bit_depth = 8; bit_depth <= (codec_id == AV_CODEC_ID_H264 ? 10 : 8); bit_depth++) + for (chroma_format = 1; chroma_format <= (codec_id == AV_CODEC_ID_H264 ? 2 : 1); chroma_format++) { + ff_h264_pred_init(&h, codec_id, bit_depth, chroma_format); + tests[test].func(&h, buf0, buf1, codec, chroma_format, bit_depth); + } + } + report("%s", tests[test].name); + } +} diff --git a/tests/checkasm/x86/Makefile b/tests/checkasm/x86/Makefile new file mode 100644 index 0000000..0254c61 --- /dev/null +++ b/tests/checkasm/x86/Makefile @@ -0,0 +1,6 @@ +CHECKASMOBJS-$(HAVE_YASM) += x86/checkasm.o + +tests/checkasm/x86/%.o: tests/checkasm/x86/%.asm + $(DEPYASM) $(YASMFLAGS) -I $(<D)/ -M -o $@ $< > $(@:.o=.d) + $(YASM) $(YASMFLAGS) -I $(<D)/ -o $@ $< + -$(STRIP) $(STRIPFLAGS) $@ diff --git a/tests/checkasm/x86/checkasm.asm b/tests/checkasm/x86/checkasm.asm new file mode 100644 index 0000000..82d4076 --- /dev/null +++ b/tests/checkasm/x86/checkasm.asm @@ -0,0 +1,193 @@ +;***************************************************************************** +;* Assembly testing and benchmarking tool +;* Copyright (c) 2008 Loren Merritt +;* Copyright (c) 2012 Henrik Gramner +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or modify +;* it under the terms of the GNU General Public License as published by +;* the Free Software Foundation; either version 2 of the License, or +;* (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;* GNU General Public License for more details. +;* +;* You should have received a copy of the GNU General Public License +;* along with this program; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. +;***************************************************************************** + +%define private_prefix checkasm +%include "libavutil/x86/x86inc.asm" + +SECTION_RODATA + +error_message: db "failed to preserve register", 0 + +%if ARCH_X86_64 +; just random numbers to reduce the chance of incidental match +ALIGN 16 +x6: dq 0x1a1b2550a612b48c,0x79445c159ce79064 +x7: dq 0x2eed899d5a28ddcd,0x86b2536fcd8cf636 +x8: dq 0xb0856806085e7943,0x3f2bf84fc0fcca4e +x9: dq 0xacbd382dcf5b8de2,0xd229e1f5b281303f +x10: dq 0x71aeaff20b095fd9,0xab63e2e11fa38ed9 +x11: dq 0x89b0c0765892729a,0x77d410d5c42c882d +x12: dq 0xc45ea11a955d8dd5,0x24b3c1d2a024048b +x13: dq 0x2e8ec680de14b47c,0xdd7b8919edd42786 +x14: dq 0x135ce6888fa02cbf,0x11e53e2b2ac655ef +x15: dq 0x011ff554472a7a10,0x6de8f4c914c334d5 +n7: dq 0x21f86d66c8ca00ce +n8: dq 0x75b6ba21077c48ad +n9: dq 0xed56bb2dcb3c7736 +n10: dq 0x8bda43d3fd1a7e06 +n11: dq 0xb64a9c9e5d318408 +n12: dq 0xdf9a54b303f1d3a3 +n13: dq 0x4a75479abd64e097 +n14: dq 0x249214109d5d1c88 +%endif + +SECTION .text + +cextern fail_func + +; max number of args used by any asm function. +; (max_args % 4) must equal 3 for stack alignment +%define max_args 15 + +%if ARCH_X86_64 + +;----------------------------------------------------------------------------- +; int checkasm_stack_clobber(uint64_t clobber, ...) +;----------------------------------------------------------------------------- +cglobal stack_clobber, 1,2 + ; Clobber the stack with junk below the stack pointer + %define size (max_args+6)*8 + SUB rsp, size + mov r1, size-8 +.loop: + mov [rsp+r1], r0 + sub r1, 8 + jge .loop + ADD rsp, size + RET + +%if WIN64 + %assign free_regs 7 +%else + %assign free_regs 9 +%endif + +;----------------------------------------------------------------------------- +; intptr_t checkasm_checked_call(intptr_t (*func)(), ...) +;----------------------------------------------------------------------------- +INIT_XMM +cglobal checked_call, 2,15,16,max_args*8+8 + mov r6, r0 + + ; All arguments have been pushed on the stack instead of registers in order to + ; test for incorrect assumptions that 32-bit ints are zero-extended to 64-bit. + mov r0, r6mp + mov r1, r7mp + mov r2, r8mp + mov r3, r9mp +%if UNIX64 + mov r4, r10mp + mov r5, r11mp + %assign i 6 + %rep max_args-6 + mov r9, [rsp+stack_offset+(i+1)*8] + mov [rsp+(i-6)*8], r9 + %assign i i+1 + %endrep +%else + %assign i 4 + %rep max_args-4 + mov r9, [rsp+stack_offset+(i+7)*8] + mov [rsp+i*8], r9 + %assign i i+1 + %endrep +%endif + +%if WIN64 + %assign i 6 + %rep 16-6 + mova m %+ i, [x %+ i] + %assign i i+1 + %endrep +%endif + +%assign i 14 +%rep 15-free_regs + mov r %+ i, [n %+ i] + %assign i i-1 +%endrep + call r6 +%assign i 14 +%rep 15-free_regs + xor r %+ i, [n %+ i] + or r14, r %+ i + %assign i i-1 +%endrep + +%if WIN64 + %assign i 6 + %rep 16-6 + pxor m %+ i, [x %+ i] + por m6, m %+ i + %assign i i+1 + %endrep + packsswb m6, m6 + movq r5, m6 + or r14, r5 +%endif + + jz .ok + mov r9, rax + lea r0, [error_message] + call fail_func + mov rax, r9 +.ok: + RET + +%else + +; just random numbers to reduce the chance of incidental match +%define n3 dword 0x6549315c +%define n4 dword 0xe02f3e23 +%define n5 dword 0xb78d0d1d +%define n6 dword 0x33627ba7 + +;----------------------------------------------------------------------------- +; intptr_t checkasm_checked_call(intptr_t (*func)(), ...) +;----------------------------------------------------------------------------- +cglobal checked_call, 1,7 + mov r3, n3 + mov r4, n4 + mov r5, n5 + mov r6, n6 +%rep max_args + PUSH dword [esp+20+max_args*4] +%endrep + call r0 + xor r3, n3 + xor r4, n4 + xor r5, n5 + xor r6, n6 + or r3, r4 + or r5, r6 + or r3, r5 + jz .ok + mov r3, eax + lea r0, [error_message] + mov [esp], r0 + call fail_func + mov eax, r3 +.ok: + add esp, max_args*4 + REP_RET + +%endif ; ARCH_X86_64 |