diff options
author | kib <kib@FreeBSD.org> | 2012-06-02 18:10:16 +0000 |
---|---|---|
committer | kib <kib@FreeBSD.org> | 2012-06-02 18:10:16 +0000 |
commit | 5926081022f052f7ab0726cce418eb461282fafa (patch) | |
tree | 876b9bec3dee42351f436f6d49b361ba70e9c245 /sys | |
parent | 532b670a507e01b9a82aebff22722077bb20d85a (diff) | |
download | FreeBSD-src-5926081022f052f7ab0726cce418eb461282fafa.zip FreeBSD-src-5926081022f052f7ab0726cce418eb461282fafa.tar.gz |
Use plain store for atomic_store_rel on x86, instead of implicitly
locked xchg instruction. IA32 memory model guarantees that store has
release semantic, since stores cannot pass loads or stores.
Reviewed by: bde, jhb
Tested by: pho
MFC after: 2 weeks
Diffstat (limited to 'sys')
-rw-r--r-- | sys/amd64/include/atomic.h | 74 | ||||
-rw-r--r-- | sys/i386/include/atomic.h | 78 |
2 files changed, 76 insertions, 76 deletions
diff --git a/sys/amd64/include/atomic.h b/sys/amd64/include/atomic.h index e167b54..99a94b7 100644 --- a/sys/amd64/include/atomic.h +++ b/sys/amd64/include/atomic.h @@ -81,8 +81,9 @@ int atomic_cmpset_long(volatile u_long *dst, u_long expect, u_long src); u_int atomic_fetchadd_int(volatile u_int *p, u_int v); u_long atomic_fetchadd_long(volatile u_long *p, u_long v); -#define ATOMIC_STORE_LOAD(TYPE, LOP, SOP) \ -u_##TYPE atomic_load_acq_##TYPE(volatile u_##TYPE *p); \ +#define ATOMIC_LOAD(TYPE, LOP) \ +u_##TYPE atomic_load_acq_##TYPE(volatile u_##TYPE *p) +#define ATOMIC_STORE(TYPE) \ void atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v) #else /* !KLD_MODULE && __GNUCLIKE_ASM */ @@ -210,37 +211,43 @@ atomic_fetchadd_long(volatile u_long *p, u_long v) return (v); } -#if defined(_KERNEL) && !defined(SMP) - /* - * We assume that a = b will do atomic loads and stores. However, on a - * PentiumPro or higher, reads may pass writes, so for that case we have - * to use a serializing instruction (i.e. with LOCK) to do the load in - * SMP kernels. For UP kernels, however, the cache of the single processor - * is always consistent, so we only need to take care of compiler. + * We assume that a = b will do atomic loads and stores. Due to the + * IA32 memory model, a simple store guarantees release semantics. + * + * However, loads may pass stores, so for atomic_load_acq we have to + * ensure a Store/Load barrier to do the load in SMP kernels. We use + * "lock cmpxchg" as recommended by the AMD Software Optimization + * Guide, and not mfence. For UP kernels, however, the cache of the + * single processor is always consistent, so we only need to take care + * of the compiler. */ -#define ATOMIC_STORE_LOAD(TYPE, LOP, SOP) \ +#define ATOMIC_STORE(TYPE) \ +static __inline void \ +atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ +{ \ + __asm __volatile("" : : : "memory"); \ + *p = v; \ +} \ +struct __hack + +#if defined(_KERNEL) && !defined(SMP) + +#define ATOMIC_LOAD(TYPE, LOP) \ static __inline u_##TYPE \ atomic_load_acq_##TYPE(volatile u_##TYPE *p) \ { \ u_##TYPE tmp; \ \ tmp = *p; \ - __asm __volatile ("" : : : "memory"); \ + __asm __volatile("" : : : "memory"); \ return (tmp); \ } \ - \ -static __inline void \ -atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ -{ \ - __asm __volatile ("" : : : "memory"); \ - *p = v; \ -} \ struct __hack #else /* !(_KERNEL && !SMP) */ -#define ATOMIC_STORE_LOAD(TYPE, LOP, SOP) \ +#define ATOMIC_LOAD(TYPE, LOP) \ static __inline u_##TYPE \ atomic_load_acq_##TYPE(volatile u_##TYPE *p) \ { \ @@ -254,19 +261,6 @@ atomic_load_acq_##TYPE(volatile u_##TYPE *p) \ \ return (res); \ } \ - \ -/* \ - * The XCHG instruction asserts LOCK automagically. \ - */ \ -static __inline void \ -atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ -{ \ - __asm __volatile(SOP \ - : "=m" (*p), /* 0 */ \ - "+r" (v) /* 1 */ \ - : "m" (*p) /* 2 */ \ - : "memory"); \ -} \ struct __hack #endif /* _KERNEL && !SMP */ @@ -293,13 +287,19 @@ ATOMIC_ASM(clear, long, "andq %1,%0", "ir", ~v); ATOMIC_ASM(add, long, "addq %1,%0", "ir", v); ATOMIC_ASM(subtract, long, "subq %1,%0", "ir", v); -ATOMIC_STORE_LOAD(char, "cmpxchgb %b0,%1", "xchgb %b1,%0"); -ATOMIC_STORE_LOAD(short,"cmpxchgw %w0,%1", "xchgw %w1,%0"); -ATOMIC_STORE_LOAD(int, "cmpxchgl %0,%1", "xchgl %1,%0"); -ATOMIC_STORE_LOAD(long, "cmpxchgq %0,%1", "xchgq %1,%0"); +ATOMIC_LOAD(char, "cmpxchgb %b0,%1"); +ATOMIC_LOAD(short, "cmpxchgw %w0,%1"); +ATOMIC_LOAD(int, "cmpxchgl %0,%1"); +ATOMIC_LOAD(long, "cmpxchgq %0,%1"); + +ATOMIC_STORE(char); +ATOMIC_STORE(short); +ATOMIC_STORE(int); +ATOMIC_STORE(long); #undef ATOMIC_ASM -#undef ATOMIC_STORE_LOAD +#undef ATOMIC_LOAD +#undef ATOMIC_STORE #ifndef WANT_FUNCTIONS diff --git a/sys/i386/include/atomic.h b/sys/i386/include/atomic.h index 9cb96d2..6ef5962 100644 --- a/sys/i386/include/atomic.h +++ b/sys/i386/include/atomic.h @@ -32,9 +32,9 @@ #error this file needs sys/cdefs.h as a prerequisite #endif -#define mb() __asm __volatile("lock; addl $0,(%%esp)" : : : "memory") -#define wmb() __asm __volatile("lock; addl $0,(%%esp)" : : : "memory") -#define rmb() __asm __volatile("lock; addl $0,(%%esp)" : : : "memory") +#define mb() __asm __volatile("lock; addl $0,(%%esp)" : : : "memory", "cc") +#define wmb() __asm __volatile("lock; addl $0,(%%esp)" : : : "memory", "cc") +#define rmb() __asm __volatile("lock; addl $0,(%%esp)" : : : "memory", "cc") /* * Various simple operations on memory, each of which is atomic in the @@ -79,8 +79,9 @@ void atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v) int atomic_cmpset_int(volatile u_int *dst, u_int expect, u_int src); u_int atomic_fetchadd_int(volatile u_int *p, u_int v); -#define ATOMIC_STORE_LOAD(TYPE, LOP, SOP) \ -u_##TYPE atomic_load_acq_##TYPE(volatile u_##TYPE *p); \ +#define ATOMIC_LOAD(TYPE, LOP) \ +u_##TYPE atomic_load_acq_##TYPE(volatile u_##TYPE *p) +#define ATOMIC_STORE(TYPE) \ void atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v) #else /* !KLD_MODULE && __GNUCLIKE_ASM */ @@ -280,16 +281,29 @@ atomic_fetchadd_int(volatile u_int *p, u_int v) return (v); } -#if defined(_KERNEL) && !defined(SMP) - /* - * We assume that a = b will do atomic loads and stores. However, on a - * PentiumPro or higher, reads may pass writes, so for that case we have - * to use a serializing instruction (i.e. with LOCK) to do the load in - * SMP kernels. For UP kernels, however, the cache of the single processor - * is always consistent, so we only need to take care of compiler. + * We assume that a = b will do atomic loads and stores. Due to the + * IA32 memory model, a simple store guarantees release semantics. + * + * However, loads may pass stores, so for atomic_load_acq we have to + * ensure a Store/Load barrier to do the load in SMP kernels. We use + * "lock cmpxchg" as recommended by the AMD Software Optimization + * Guide, and not mfence. For UP kernels, however, the cache of the + * single processor is always consistent, so we only need to take care + * of the compiler. */ -#define ATOMIC_STORE_LOAD(TYPE, LOP, SOP) \ +#define ATOMIC_STORE(TYPE) \ +static __inline void \ +atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ +{ \ + __asm __volatile("" : : : "memory"); \ + *p = v; \ +} \ +struct __hack + +#if defined(_KERNEL) && !defined(SMP) + +#define ATOMIC_LOAD(TYPE, LOP) \ static __inline u_##TYPE \ atomic_load_acq_##TYPE(volatile u_##TYPE *p) \ { \ @@ -299,18 +313,11 @@ atomic_load_acq_##TYPE(volatile u_##TYPE *p) \ __asm __volatile("" : : : "memory"); \ return (tmp); \ } \ - \ -static __inline void \ -atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ -{ \ - __asm __volatile("" : : : "memory"); \ - *p = v; \ -} \ struct __hack #else /* !(_KERNEL && !SMP) */ -#define ATOMIC_STORE_LOAD(TYPE, LOP, SOP) \ +#define ATOMIC_LOAD(TYPE, LOP) \ static __inline u_##TYPE \ atomic_load_acq_##TYPE(volatile u_##TYPE *p) \ { \ @@ -324,19 +331,6 @@ atomic_load_acq_##TYPE(volatile u_##TYPE *p) \ \ return (res); \ } \ - \ -/* \ - * The XCHG instruction asserts LOCK automagically. \ - */ \ -static __inline void \ -atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ -{ \ - __asm __volatile(SOP \ - : "=m" (*p), /* 0 */ \ - "+r" (v) /* 1 */ \ - : "m" (*p) /* 2 */ \ - : "memory"); \ -} \ struct __hack #endif /* _KERNEL && !SMP */ @@ -363,13 +357,19 @@ ATOMIC_ASM(clear, long, "andl %1,%0", "ir", ~v); ATOMIC_ASM(add, long, "addl %1,%0", "ir", v); ATOMIC_ASM(subtract, long, "subl %1,%0", "ir", v); -ATOMIC_STORE_LOAD(char, "cmpxchgb %b0,%1", "xchgb %b1,%0"); -ATOMIC_STORE_LOAD(short,"cmpxchgw %w0,%1", "xchgw %w1,%0"); -ATOMIC_STORE_LOAD(int, "cmpxchgl %0,%1", "xchgl %1,%0"); -ATOMIC_STORE_LOAD(long, "cmpxchgl %0,%1", "xchgl %1,%0"); +ATOMIC_LOAD(char, "cmpxchgb %b0,%1"); +ATOMIC_LOAD(short, "cmpxchgw %w0,%1"); +ATOMIC_LOAD(int, "cmpxchgl %0,%1"); +ATOMIC_LOAD(long, "cmpxchgl %0,%1"); + +ATOMIC_STORE(char); +ATOMIC_STORE(short); +ATOMIC_STORE(int); +ATOMIC_STORE(long); #undef ATOMIC_ASM -#undef ATOMIC_STORE_LOAD +#undef ATOMIC_LOAD +#undef ATOMIC_STORE #ifndef WANT_FUNCTIONS |