diff options
author | bde <bde@FreeBSD.org> | 2006-12-27 20:26:00 +0000 |
---|---|---|
committer | bde <bde@FreeBSD.org> | 2006-12-27 20:26:00 +0000 |
commit | 9d0b5905142545fd1c7dfe17e5724d8638a18adc (patch) | |
tree | a5aa6b3135d4a70c3bc9a0ee5a7eaf33192a6d74 | |
parent | 0234fef5787742d388b0e2b45b22c02d03a7a4db (diff) | |
download | FreeBSD-src-9d0b5905142545fd1c7dfe17e5724d8638a18adc.zip FreeBSD-src-9d0b5905142545fd1c7dfe17e5724d8638a18adc.tar.gz |
Avoid an instruction in atomic_cmpset_{int_long)() in most cases.
These functions are used a lot for mutexes, so this reduces the text
size of an average kernel by about 0.75%. This wasn't intended to
be a significant optimization, but it somehow increased the maximum
number of packets per second that can be transmitted by my bge hardware
from 320000 to 460000 (this benchmark is CPU-bound and remarkably
sensitive to changes in the text section).
Details: we would prefer to leave the result of the cmpxchg in %al,
but cannot tell gcc that it is there, so we have to convert it to an
integer register. We converted to %al, then to %[re]ax, but the
latter step is usually wasted since gcc usually only wants the condition
code and can recover it from %al just as easily as from %[re]ax. Let
gcc promote %al in the few cases where this is needed.
Nearby style fixes;
- let gcc manage the load of `res', and don't abuse `res' for a copy of `exp'
- don't echo `res's name in comments
- consistently spell the condition code as 'e' after comparison for equality
- don't hard-code %al anywhere except in constraints
- for the version that doesn't use cmpxchg, there is no requirement to use
%al anywhere, so don't hard-code it in the constraints either.
Style non-fix:
- for the versions that use cmpxchg, keep using "a" (was %[re]ax, now %al)
for the main output operand, although this is not required. The input
and output operands that use the "a" constraint are now decoupled, and
this makes things clearer except for the reason that the output register
is hard-coded. It is now just a hack to tell gcc that the input "a" has
been clobbered without increasing the number of operands.
-rw-r--r-- | sys/amd64/include/atomic.h | 20 | ||||
-rw-r--r-- | sys/i386/include/atomic.h | 22 |
2 files changed, 21 insertions, 21 deletions
diff --git a/sys/amd64/include/atomic.h b/sys/amd64/include/atomic.h index fbdbd8f..d65d3a9 100644 --- a/sys/amd64/include/atomic.h +++ b/sys/amd64/include/atomic.h @@ -116,19 +116,19 @@ struct __hack static __inline int atomic_cmpset_int(volatile u_int *dst, u_int exp, u_int src) { - int res = exp; + u_char res; __asm __volatile ( " " __XSTRING(MPLOCKED) " " " cmpxchgl %2,%1 ; " - " setz %%al ; " - " movzbl %%al,%0 ; " + " sete %0 ; " "1: " "# atomic_cmpset_int" - : "+a" (res), /* 0 (result) */ + : "=a" (res), /* 0 */ "=m" (*dst) /* 1 */ : "r" (src), /* 2 */ - "m" (*dst) /* 3 */ + "a" (exp), /* 3 */ + "m" (*dst) /* 4 */ : "memory"); return (res); @@ -137,19 +137,19 @@ atomic_cmpset_int(volatile u_int *dst, u_int exp, u_int src) static __inline int atomic_cmpset_long(volatile u_long *dst, u_long exp, u_long src) { - long res = exp; + u_char res; __asm __volatile ( " " __XSTRING(MPLOCKED) " " " cmpxchgq %2,%1 ; " - " setz %%al ; " - " movzbq %%al,%0 ; " + " sete %0 ; " "1: " "# atomic_cmpset_long" - : "+a" (res), /* 0 (result) */ + : "=a" (res), /* 0 */ "=m" (*dst) /* 1 */ : "r" (src), /* 2 */ - "m" (*dst) /* 3 */ + "a" (exp), /* 3 */ + "m" (*dst) /* 4 */ : "memory"); return (res); diff --git a/sys/i386/include/atomic.h b/sys/i386/include/atomic.h index 200200b..b22a543 100644 --- a/sys/i386/include/atomic.h +++ b/sys/i386/include/atomic.h @@ -117,23 +117,23 @@ struct __hack static __inline int atomic_cmpset_int(volatile u_int *dst, u_int exp, u_int src) { - int res = exp; + u_char res; __asm __volatile( " pushfl ; " " cli ; " - " cmpl %0,%3 ; " + " cmpl %3,%4 ; " " jne 1f ; " " movl %2,%1 ; " "1: " - " sete %%al; " - " movzbl %%al,%0 ; " + " sete %0 ; " " popfl ; " "# atomic_cmpset_int" - : "+a" (res), /* 0 (result) */ + : "=q" (res), /* 0 */ "=m" (*dst) /* 1 */ : "r" (src), /* 2 */ - "m" (*dst) /* 3 */ + "r" (exp), /* 3 */ + "m" (*dst) /* 4 */ : "memory"); return (res); @@ -144,19 +144,19 @@ atomic_cmpset_int(volatile u_int *dst, u_int exp, u_int src) static __inline int atomic_cmpset_int(volatile u_int *dst, u_int exp, u_int src) { - int res = exp; + u_char res; __asm __volatile ( " " __XSTRING(MPLOCKED) " " " cmpxchgl %2,%1 ; " - " setz %%al ; " - " movzbl %%al,%0 ; " + " sete %0 ; " "1: " "# atomic_cmpset_int" - : "+a" (res), /* 0 (result) */ + : "=a" (res), /* 0 */ "=m" (*dst) /* 1 */ : "r" (src), /* 2 */ - "m" (*dst) /* 3 */ + "a" (exp), /* 3 */ + "m" (*dst) /* 4 */ : "memory"); return (res); |