diff options
author | kib <kib@FreeBSD.org> | 2015-06-28 05:04:08 +0000 |
---|---|---|
committer | kib <kib@FreeBSD.org> | 2015-06-28 05:04:08 +0000 |
commit | 6279b7c930802b0ec654cf9d6c051c6a919d7bd0 (patch) | |
tree | 2e41a3d37eee6a7b024d2845a13f9a891c21c2f7 /sys/amd64/include/atomic.h | |
parent | 9c6d6e9137dfbc13f2677060c04dad121320aaaf (diff) | |
download | FreeBSD-src-6279b7c930802b0ec654cf9d6c051c6a919d7bd0.zip FreeBSD-src-6279b7c930802b0ec654cf9d6c051c6a919d7bd0.tar.gz |
Remove unneeded data dependency, currently imposed by
atomic_load_acq(9), on it source, for x86.
Right now, atomic_load_acq() on x86 is sequentially consistent with
other atomics, code ensures this by doing store/load barrier by
performing locked nop on the source. Provide separate primitive
__storeload_barrier(), which is implemented as the locked nop done on
a cpu-private variable, and put __storeload_barrier() before load, to
keep seq_cst semantic but avoid introducing false dependency on the
no-modification of the source for its later use.
Note that seq_cst property of x86 atomic_load_acq() is not documented
and not carried by atomics implementations on other architectures,
although some kernel code relies on the behaviour. This commit does
not intend to change this.
Reviewed by: alc
Discussed with: bde
Tested by: pho
Sponsored by: The FreeBSD Foundation
MFC after: 2 weeks
Diffstat (limited to 'sys/amd64/include/atomic.h')
-rw-r--r-- | sys/amd64/include/atomic.h | 125 |
1 files changed, 75 insertions, 50 deletions
diff --git a/sys/amd64/include/atomic.h b/sys/amd64/include/atomic.h index 9110dc5..dceb3dc 100644 --- a/sys/amd64/include/atomic.h +++ b/sys/amd64/include/atomic.h @@ -85,7 +85,7 @@ u_long atomic_fetchadd_long(volatile u_long *p, u_long v); int atomic_testandset_int(volatile u_int *p, u_int v); int atomic_testandset_long(volatile u_long *p, u_int v); -#define ATOMIC_LOAD(TYPE, LOP) \ +#define ATOMIC_LOAD(TYPE) \ u_##TYPE atomic_load_acq_##TYPE(volatile u_##TYPE *p) #define ATOMIC_STORE(TYPE) \ void atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v) @@ -245,53 +245,79 @@ atomic_testandset_long(volatile u_long *p, u_int v) * We assume that a = b will do atomic loads and stores. Due to the * IA32 memory model, a simple store guarantees release semantics. * - * However, loads may pass stores, so for atomic_load_acq we have to - * ensure a Store/Load barrier to do the load in SMP kernels. We use - * "lock cmpxchg" as recommended by the AMD Software Optimization - * Guide, and not mfence. For UP kernels, however, the cache of the - * single processor is always consistent, so we only need to take care - * of the compiler. + * However, a load may pass a store if they are performed on distinct + * addresses, so for atomic_load_acq we introduce a Store/Load barrier + * before the load in SMP kernels. We use "lock addl $0,mem", as + * recommended by the AMD Software Optimization Guide, and not mfence. + * In the kernel, we use a private per-cpu cache line as the target + * for the locked addition, to avoid introducing false data + * dependencies. In userspace, a word in the red zone on the stack + * (-8(%rsp)) is utilized. + * + * For UP kernels, however, the memory of the single processor is + * always consistent, so we only need to stop the compiler from + * reordering accesses in a way that violates the semantics of acquire + * and release. */ -#define ATOMIC_STORE(TYPE) \ -static __inline void \ -atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ -{ \ - __compiler_membar(); \ - *p = v; \ -} \ -struct __hack -#if defined(_KERNEL) && !defined(SMP) +#if defined(_KERNEL) -#define ATOMIC_LOAD(TYPE, LOP) \ -static __inline u_##TYPE \ -atomic_load_acq_##TYPE(volatile u_##TYPE *p) \ -{ \ - u_##TYPE tmp; \ - \ - tmp = *p; \ - __compiler_membar(); \ - return (tmp); \ -} \ -struct __hack +/* + * OFFSETOF_MONITORBUF == __pcpu_offset(pc_monitorbuf). + * + * The open-coded number is used instead of the symbolic expression to + * avoid a dependency on sys/pcpu.h in machine/atomic.h consumers. + * An assertion in amd64/vm_machdep.c ensures that the value is correct. + */ +#define OFFSETOF_MONITORBUF 0x180 + +#if defined(SMP) +static __inline void +__storeload_barrier(void) +{ -#else /* !(_KERNEL && !SMP) */ + __asm __volatile("lock; addl $0,%%gs:%0" + : "+m" (*(u_int *)OFFSETOF_MONITORBUF) : : "memory", "cc"); +} +#else /* _KERNEL && UP */ +static __inline void +__storeload_barrier(void) +{ -#define ATOMIC_LOAD(TYPE, LOP) \ -static __inline u_##TYPE \ -atomic_load_acq_##TYPE(volatile u_##TYPE *p) \ -{ \ - u_##TYPE res; \ - \ - __asm __volatile(MPLOCKED LOP \ - : "=a" (res), /* 0 */ \ - "+m" (*p) /* 1 */ \ - : : "memory", "cc"); \ - return (res); \ -} \ + __compiler_membar(); +} +#endif /* SMP */ +#else /* !_KERNEL */ +static __inline void +__storeload_barrier(void) +{ + + __asm __volatile("lock; addl $0,-8(%%rsp)" : : : "memory", "cc"); +} +#endif /* _KERNEL*/ + +#define ATOMIC_LOAD(TYPE) \ +static __inline u_##TYPE \ +atomic_load_acq_##TYPE(volatile u_##TYPE *p) \ +{ \ + u_##TYPE res; \ + \ + __storeload_barrier(); \ + res = *p; \ + __compiler_membar(); \ + return (res); \ +} \ struct __hack -#endif /* _KERNEL && !SMP */ +#define ATOMIC_STORE(TYPE) \ +static __inline void \ +atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v) \ +{ \ + \ + __compiler_membar(); \ + *p = v; \ +} \ +struct __hack #endif /* KLD_MODULE || !__GNUCLIKE_ASM */ @@ -315,20 +341,19 @@ ATOMIC_ASM(clear, long, "andq %1,%0", "ir", ~v); ATOMIC_ASM(add, long, "addq %1,%0", "ir", v); ATOMIC_ASM(subtract, long, "subq %1,%0", "ir", v); -ATOMIC_LOAD(char, "cmpxchgb %b0,%1"); -ATOMIC_LOAD(short, "cmpxchgw %w0,%1"); -ATOMIC_LOAD(int, "cmpxchgl %0,%1"); -ATOMIC_LOAD(long, "cmpxchgq %0,%1"); +#define ATOMIC_LOADSTORE(TYPE) \ + ATOMIC_LOAD(TYPE); \ + ATOMIC_STORE(TYPE) -ATOMIC_STORE(char); -ATOMIC_STORE(short); -ATOMIC_STORE(int); -ATOMIC_STORE(long); +ATOMIC_LOADSTORE(char); +ATOMIC_LOADSTORE(short); +ATOMIC_LOADSTORE(int); +ATOMIC_LOADSTORE(long); #undef ATOMIC_ASM #undef ATOMIC_LOAD #undef ATOMIC_STORE - +#undef ATOMIC_LOADSTORE #ifndef WANT_FUNCTIONS /* Read the current value and store a new value in the destination. */ |