summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2012-01-03 16:31:57 +0100
committerMichael Niedermayer <michaelni@gmx.at>2012-01-06 01:46:51 +0100
commitc4f237a98175438e76da64c51cc6fe613f8d767e (patch)
tree127b2f700030647a06a87d34c6eae14a53e26616
parente9a591d1f01a472dd18058b830207191c701c40f (diff)
downloadffmpeg-streaming-c4f237a98175438e76da64c51cc6fe613f8d767e.zip
ffmpeg-streaming-c4f237a98175438e76da64c51cc6fe613f8d767e.tar.gz
Revert commit 599b4c6efddaed33b1667c386b34b07729ba732b
Author: Mans Rullgard <mans@mansr.com> Date: Sun Dec 11 21:41:59 2011 +0000 x86: cabac: replace explicit memory references with "m" operands This replaces the explicit offset(reg) memory references with "m" operands for the same locations. As a result, one fewer register operand is needed for these inline asm statements. This change appears to have broken compilation on darwin, and subsequent fixes by martin (which did not fix compilation) removed the register advantage, thus this change seems not a good idea to keep. See: http://fate.ffmpeg.org/log.cgi?time=20120103122446&log=compile&slot=i386-darwin-llvm-gcc-4.2.1 Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
-rw-r--r--libavcodec/x86/cabac.h49
-rw-r--r--libavcodec/x86/h264_i386.h27
2 files changed, 43 insertions, 33 deletions
diff --git a/libavcodec/x86/cabac.h b/libavcodec/x86/cabac.h
index 3c3652d..33304ab 100644
--- a/libavcodec/x86/cabac.h
+++ b/libavcodec/x86/cabac.h
@@ -27,7 +27,7 @@
#include "config.h"
#if HAVE_FAST_CMOV
-#define BRANCHLESS_GET_CABAC_UPDATE(ret, statep, low, lowword, range, tmp)\
+#define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp)\
"mov "tmp" , %%ecx \n\t"\
"shl $17 , "tmp" \n\t"\
"cmp "low" , "tmp" \n\t"\
@@ -37,7 +37,7 @@
"xor %%ecx , "ret" \n\t"\
"sub "tmp" , "low" \n\t"
#else /* HAVE_FAST_CMOV */
-#define BRANCHLESS_GET_CABAC_UPDATE(ret, statep, low, lowword, range, tmp)\
+#define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp)\
"mov "tmp" , %%ecx \n\t"\
"shl $17 , "tmp" \n\t"\
"sub "low" , "tmp" \n\t"\
@@ -51,13 +51,14 @@
"xor "tmp" , "ret" \n\t"
#endif /* HAVE_FAST_CMOV */
-#define BRANCHLESS_GET_CABAC(ret, statep, low, lowword, range, tmp, tmpbyte, byte) \
+#define BRANCHLESS_GET_CABAC(ret, cabac, statep, low, lowword, range, tmp, tmpbyte, byte) \
"movzbl "statep" , "ret" \n\t"\
"mov "range" , "tmp" \n\t"\
"and $0xC0 , "range" \n\t"\
"movzbl "MANGLE(ff_h264_lps_range)"("ret", "range", 2), "range" \n\t"\
"sub "range" , "tmp" \n\t"\
- BRANCHLESS_GET_CABAC_UPDATE(ret, statep, low, lowword, range, tmp) \
+ BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, \
+ range, tmp) \
"movzbl " MANGLE(ff_h264_norm_shift) "("range"), %%ecx \n\t"\
"shl %%cl , "range" \n\t"\
"movzbl "MANGLE(ff_h264_mlps_state)"+128("ret"), "tmp" \n\t"\
@@ -65,8 +66,8 @@
"mov "tmpbyte" , "statep" \n\t"\
"test "lowword" , "lowword" \n\t"\
" jnz 1f \n\t"\
- "mov "byte" , %%"REG_c" \n\t"\
- "add"OPSIZE" $2 , "byte" \n\t"\
+ "mov "byte"("cabac"), %%"REG_c" \n\t"\
+ "add"OPSIZE" $2 , "byte "("cabac") \n\t"\
"movzwl (%%"REG_c") , "tmp" \n\t"\
"lea -1("low") , %%ecx \n\t"\
"xor "low" , %%ecx \n\t"\
@@ -86,14 +87,20 @@
static av_always_inline int get_cabac_inline_x86(CABACContext *c,
uint8_t *const state)
{
- int bit, tmp;
+ int bit, low, range, tmp;
__asm__ volatile(
- BRANCHLESS_GET_CABAC("%0", "(%5)", "%1", "%w1", "%2",
- "%3", "%b3", "%4")
- :"=&r"(bit), "+&r"(c->low), "+&r"(c->range), "=&q"(tmp),
- "+m"(c->bytestream)
- :"r"(state)
+ "movl %a6(%5), %2 \n\t"
+ "movl %a7(%5), %1 \n\t"
+ BRANCHLESS_GET_CABAC("%0", "%5", "(%4)", "%1", "%w1", "%2",
+ "%3", "%b3", "%a8")
+ "movl %2, %a6(%5) \n\t"
+ "movl %1, %a7(%5) \n\t"
+
+ :"=&r"(bit), "=&r"(low), "=&r"(range), "=&q"(tmp)
+ :"r"(state), "r"(c),
+ "i"(offsetof(CABACContext, range)), "i"(offsetof(CABACContext, low)),
+ "i"(offsetof(CABACContext, bytestream))
: "%"REG_c, "memory"
);
return bit & 1;
@@ -105,8 +112,8 @@ static av_always_inline int get_cabac_bypass_sign_x86(CABACContext *c, int val)
{
x86_reg tmp;
__asm__ volatile(
- "movl %4, %k1 \n\t"
- "movl %2, %%eax \n\t"
+ "movl %a3(%2), %k1 \n\t"
+ "movl %a4(%2), %%eax \n\t"
"shl $17, %k1 \n\t"
"add %%eax, %%eax \n\t"
"sub %k1, %%eax \n\t"
@@ -117,20 +124,22 @@ static av_always_inline int get_cabac_bypass_sign_x86(CABACContext *c, int val)
"sub %%edx, %%ecx \n\t"
"test %%ax, %%ax \n\t"
" jnz 1f \n\t"
- "mov %3, %1 \n\t"
+ "mov %a5(%2), %1 \n\t"
"subl $0xFFFF, %%eax \n\t"
"movzwl (%1), %%edx \n\t"
"bswap %%edx \n\t"
"shrl $15, %%edx \n\t"
"add $2, %1 \n\t"
"addl %%edx, %%eax \n\t"
- "mov %1, %3 \n\t"
+ "mov %1, %a5(%2) \n\t"
"1: \n\t"
- "movl %%eax, %2 \n\t"
+ "movl %%eax, %a4(%2) \n\t"
- :"+c"(val), "=&r"(tmp), "+m"(c->low), "+m"(c->bytestream)
- :"m"(c->range)
- : "%eax", "%edx"
+ :"+c"(val), "=&r"(tmp)
+ :"r"(c),
+ "i"(offsetof(CABACContext, range)), "i"(offsetof(CABACContext, low)),
+ "i"(offsetof(CABACContext, bytestream))
+ : "%eax", "%edx", "memory"
);
return val;
}
diff --git a/libavcodec/x86/h264_i386.h b/libavcodec/x86/h264_i386.h
index 510f726..84da48d 100644
--- a/libavcodec/x86/h264_i386.h
+++ b/libavcodec/x86/h264_i386.h
@@ -48,15 +48,15 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
__asm__ volatile(
"2: \n\t"
- BRANCHLESS_GET_CABAC("%4", "(%1)", "%3",
- "%w3", "%5", "%k0", "%b0", "%6")
+ BRANCHLESS_GET_CABAC("%4", "%6", "(%1)", "%3",
+ "%w3", "%5", "%k0", "%b0", "%a11")
"test $1, %4 \n\t"
" jz 3f \n\t"
"add %10, %1 \n\t"
- BRANCHLESS_GET_CABAC("%4", "(%1)", "%3",
- "%w3", "%5", "%k0", "%b0", "%6")
+ BRANCHLESS_GET_CABAC("%4", "%6", "(%1)", "%3",
+ "%w3", "%5", "%k0", "%b0", "%a11")
"sub %10, %1 \n\t"
"mov %2, %0 \n\t"
@@ -81,9 +81,9 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
"add %9, %k0 \n\t"
"shr $2, %k0 \n\t"
:"=&q"(coeff_count), "+r"(significant_coeff_ctx_base), "+m"(index),
- "+&r"(c->low), "=&r"(bit), "+&r"(c->range),
- "+m"(c->bytestream)
- :"m"(minusstart), "m"(end), "m"(minusindex), "m"(last_off)
+ "+&r"(c->low), "=&r"(bit), "+&r"(c->range)
+ :"r"(c), "m"(minusstart), "m"(end), "m"(minusindex), "m"(last_off),
+ "i"(offsetof(CABACContext, bytestream))
: "%"REG_c, "memory"
);
return coeff_count;
@@ -105,8 +105,8 @@ static int decode_significance_8x8_x86(CABACContext *c,
"movzbl (%0, %6), %k6 \n\t"
"add %9, %6 \n\t"
- BRANCHLESS_GET_CABAC("%4", "(%6)", "%3",
- "%w3", "%5", "%k0", "%b0", "%7")
+ BRANCHLESS_GET_CABAC("%4", "%7", "(%6)", "%3",
+ "%w3", "%5", "%k0", "%b0", "%a12")
"mov %1, %k6 \n\t"
"test $1, %4 \n\t"
@@ -115,8 +115,8 @@ static int decode_significance_8x8_x86(CABACContext *c,
"movzbl "MANGLE(last_coeff_flag_offset_8x8)"(%k6), %k6\n\t"
"add %11, %6 \n\t"
- BRANCHLESS_GET_CABAC("%4", "(%6)", "%3",
- "%w3", "%5", "%k0", "%b0", "%7")
+ BRANCHLESS_GET_CABAC("%4", "%7", "(%6)", "%3",
+ "%w3", "%5", "%k0", "%b0", "%a12")
"mov %2, %0 \n\t"
"mov %1, %k6 \n\t"
@@ -138,8 +138,9 @@ static int decode_significance_8x8_x86(CABACContext *c,
"addl %8, %k0 \n\t"
"shr $2, %k0 \n\t"
:"=&q"(coeff_count),"+m"(last), "+m"(index), "+&r"(c->low), "=&r"(bit),
- "+&r"(c->range), "=&r"(state), "+m"(c->bytestream)
- :"m"(minusindex), "m"(significant_coeff_ctx_base), "m"(sig_off), "m"(last_coeff_ctx_base)
+ "+&r"(c->range), "=&r"(state)
+ :"r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base), "m"(sig_off), "m"(last_coeff_ctx_base),
+ "i"(offsetof(CABACContext, bytestream))
: "%"REG_c, "memory"
);
return coeff_count;
OpenPOWER on IntegriCloud