summaryrefslogtreecommitdiffstats
path: root/sys/amd64/include/atomic.h
diff options
context:
space:
mode:
authorbde <bde@FreeBSD.org>2006-12-27 20:26:00 +0000
committerbde <bde@FreeBSD.org>2006-12-27 20:26:00 +0000
commit9d0b5905142545fd1c7dfe17e5724d8638a18adc (patch)
treea5aa6b3135d4a70c3bc9a0ee5a7eaf33192a6d74 /sys/amd64/include/atomic.h
parent0234fef5787742d388b0e2b45b22c02d03a7a4db (diff)
downloadFreeBSD-src-9d0b5905142545fd1c7dfe17e5724d8638a18adc.zip
FreeBSD-src-9d0b5905142545fd1c7dfe17e5724d8638a18adc.tar.gz
Avoid an instruction in atomic_cmpset_{int_long)() in most cases.
These functions are used a lot for mutexes, so this reduces the text size of an average kernel by about 0.75%. This wasn't intended to be a significant optimization, but it somehow increased the maximum number of packets per second that can be transmitted by my bge hardware from 320000 to 460000 (this benchmark is CPU-bound and remarkably sensitive to changes in the text section). Details: we would prefer to leave the result of the cmpxchg in %al, but cannot tell gcc that it is there, so we have to convert it to an integer register. We converted to %al, then to %[re]ax, but the latter step is usually wasted since gcc usually only wants the condition code and can recover it from %al just as easily as from %[re]ax. Let gcc promote %al in the few cases where this is needed. Nearby style fixes; - let gcc manage the load of `res', and don't abuse `res' for a copy of `exp' - don't echo `res's name in comments - consistently spell the condition code as 'e' after comparison for equality - don't hard-code %al anywhere except in constraints - for the version that doesn't use cmpxchg, there is no requirement to use %al anywhere, so don't hard-code it in the constraints either. Style non-fix: - for the versions that use cmpxchg, keep using "a" (was %[re]ax, now %al) for the main output operand, although this is not required. The input and output operands that use the "a" constraint are now decoupled, and this makes things clearer except for the reason that the output register is hard-coded. It is now just a hack to tell gcc that the input "a" has been clobbered without increasing the number of operands.
Diffstat (limited to 'sys/amd64/include/atomic.h')
-rw-r--r--sys/amd64/include/atomic.h20
1 files changed, 10 insertions, 10 deletions
diff --git a/sys/amd64/include/atomic.h b/sys/amd64/include/atomic.h
index fbdbd8f..d65d3a9 100644
--- a/sys/amd64/include/atomic.h
+++ b/sys/amd64/include/atomic.h
@@ -116,19 +116,19 @@ struct __hack
static __inline int
atomic_cmpset_int(volatile u_int *dst, u_int exp, u_int src)
{
- int res = exp;
+ u_char res;
__asm __volatile (
" " __XSTRING(MPLOCKED) " "
" cmpxchgl %2,%1 ; "
- " setz %%al ; "
- " movzbl %%al,%0 ; "
+ " sete %0 ; "
"1: "
"# atomic_cmpset_int"
- : "+a" (res), /* 0 (result) */
+ : "=a" (res), /* 0 */
"=m" (*dst) /* 1 */
: "r" (src), /* 2 */
- "m" (*dst) /* 3 */
+ "a" (exp), /* 3 */
+ "m" (*dst) /* 4 */
: "memory");
return (res);
@@ -137,19 +137,19 @@ atomic_cmpset_int(volatile u_int *dst, u_int exp, u_int src)
static __inline int
atomic_cmpset_long(volatile u_long *dst, u_long exp, u_long src)
{
- long res = exp;
+ u_char res;
__asm __volatile (
" " __XSTRING(MPLOCKED) " "
" cmpxchgq %2,%1 ; "
- " setz %%al ; "
- " movzbq %%al,%0 ; "
+ " sete %0 ; "
"1: "
"# atomic_cmpset_long"
- : "+a" (res), /* 0 (result) */
+ : "=a" (res), /* 0 */
"=m" (*dst) /* 1 */
: "r" (src), /* 2 */
- "m" (*dst) /* 3 */
+ "a" (exp), /* 3 */
+ "m" (*dst) /* 4 */
: "memory");
return (res);
OpenPOWER on IntegriCloud