summaryrefslogtreecommitdiffstats
path: root/sys/i386/include/atomic.h
diff options
context:
space:
mode:
Diffstat (limited to 'sys/i386/include/atomic.h')
-rw-r--r--sys/i386/include/atomic.h132
1 files changed, 83 insertions, 49 deletions
diff --git a/sys/i386/include/atomic.h b/sys/i386/include/atomic.h
index 0156b5b..9d365bc 100644
--- a/sys/i386/include/atomic.h
+++ b/sys/i386/include/atomic.h
@@ -87,7 +87,7 @@ int atomic_cmpset_int(volatile u_int *dst, u_int expect, u_int src);
u_int atomic_fetchadd_int(volatile u_int *p, u_int v);
int atomic_testandset_int(volatile u_int *p, u_int v);
-#define ATOMIC_LOAD(TYPE, LOP) \
+#define ATOMIC_LOAD(TYPE) \
u_##TYPE atomic_load_acq_##TYPE(volatile u_##TYPE *p)
#define ATOMIC_STORE(TYPE) \
void atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)
@@ -228,53 +228,87 @@ atomic_testandset_int(volatile u_int *p, u_int v)
* We assume that a = b will do atomic loads and stores. Due to the
* IA32 memory model, a simple store guarantees release semantics.
*
- * However, loads may pass stores, so for atomic_load_acq we have to
- * ensure a Store/Load barrier to do the load in SMP kernels. We use
- * "lock cmpxchg" as recommended by the AMD Software Optimization
- * Guide, and not mfence. For UP kernels, however, the cache of the
- * single processor is always consistent, so we only need to take care
- * of the compiler.
+ * However, a load may pass a store if they are performed on distinct
+ * addresses, so for atomic_load_acq we introduce a Store/Load barrier
+ * before the load in SMP kernels. We use "lock addl $0,mem", as
+ * recommended by the AMD Software Optimization Guide, and not mfence.
+ * In the kernel, we use a private per-cpu cache line as the target
+ * for the locked addition, to avoid introducing false data
+ * dependencies. In userspace, a word at the top of the stack is
+ * utilized.
+ *
+ * For UP kernels, however, the memory of the single processor is
+ * always consistent, so we only need to stop the compiler from
+ * reordering accesses in a way that violates the semantics of acquire
+ * and release.
*/
-#define ATOMIC_STORE(TYPE) \
-static __inline void \
-atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
-{ \
- __compiler_membar(); \
- *p = v; \
-} \
-struct __hack
+#if defined(_KERNEL)
-#if defined(_KERNEL) && !defined(SMP)
+/*
+ * OFFSETOF_MONITORBUF == __pcpu_offset(pc_monitorbuf).
+ *
+ * The open-coded number is used instead of the symbolic expression to
+ * avoid a dependency on sys/pcpu.h in machine/atomic.h consumers.
+ * An assertion in i386/vm_machdep.c ensures that the value is correct.
+ */
+#define OFFSETOF_MONITORBUF 0x180
-#define ATOMIC_LOAD(TYPE, LOP) \
-static __inline u_##TYPE \
-atomic_load_acq_##TYPE(volatile u_##TYPE *p) \
-{ \
- u_##TYPE tmp; \
- \
- tmp = *p; \
- __compiler_membar(); \
- return (tmp); \
-} \
-struct __hack
+#if defined(SMP)
+static __inline void
+__storeload_barrier(void)
+{
-#else /* !(_KERNEL && !SMP) */
+ __asm __volatile("lock; addl $0,%%fs:%0"
+ : "+m" (*(u_int *)OFFSETOF_MONITORBUF) : : "memory", "cc");
+}
+#else /* _KERNEL && UP */
+static __inline void
+__storeload_barrier(void)
+{
-#define ATOMIC_LOAD(TYPE, LOP) \
-static __inline u_##TYPE \
-atomic_load_acq_##TYPE(volatile u_##TYPE *p) \
-{ \
- u_##TYPE res; \
- \
- __asm __volatile(MPLOCKED LOP \
- : "=a" (res), /* 0 */ \
- "+m" (*p) /* 1 */ \
- : : "memory", "cc"); \
- return (res); \
-} \
+ __compiler_membar();
+}
+#endif /* SMP */
+#else /* !_KERNEL */
+static __inline void
+__storeload_barrier(void)
+{
+
+ __asm __volatile("lock; addl $0,(%%esp)" : : : "memory", "cc");
+}
+#endif /* _KERNEL*/
+
+/*
+ * C11-standard acq/rel semantics only apply when the variable in the
+ * call is the same for acq as it is for rel. However, our previous
+ * (x86) implementations provided much stronger ordering than required
+ * (essentially what is called seq_cst order in C11). This
+ * implementation provides the historical strong ordering since some
+ * callers depend on it.
+ */
+
+#define ATOMIC_LOAD(TYPE) \
+static __inline u_##TYPE \
+atomic_load_acq_##TYPE(volatile u_##TYPE *p) \
+{ \
+ u_##TYPE res; \
+ \
+ __storeload_barrier(); \
+ res = *p; \
+ __compiler_membar(); \
+ return (res); \
+} \
struct __hack
-#endif /* _KERNEL && !SMP */
+#define ATOMIC_STORE(TYPE) \
+static __inline void \
+atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v) \
+{ \
+ \
+ __compiler_membar(); \
+ *p = v; \
+} \
+struct __hack
#ifdef _KERNEL
@@ -511,19 +545,19 @@ ATOMIC_ASM(clear, long, "andl %1,%0", "ir", ~v);
ATOMIC_ASM(add, long, "addl %1,%0", "ir", v);
ATOMIC_ASM(subtract, long, "subl %1,%0", "ir", v);
-ATOMIC_LOAD(char, "cmpxchgb %b0,%1");
-ATOMIC_LOAD(short, "cmpxchgw %w0,%1");
-ATOMIC_LOAD(int, "cmpxchgl %0,%1");
-ATOMIC_LOAD(long, "cmpxchgl %0,%1");
+#define ATOMIC_LOADSTORE(TYPE) \
+ ATOMIC_LOAD(TYPE); \
+ ATOMIC_STORE(TYPE)
-ATOMIC_STORE(char);
-ATOMIC_STORE(short);
-ATOMIC_STORE(int);
-ATOMIC_STORE(long);
+ATOMIC_LOADSTORE(char);
+ATOMIC_LOADSTORE(short);
+ATOMIC_LOADSTORE(int);
+ATOMIC_LOADSTORE(long);
#undef ATOMIC_ASM
#undef ATOMIC_LOAD
#undef ATOMIC_STORE
+#undef ATOMIC_LOADSTORE
#ifndef WANT_FUNCTIONS
OpenPOWER on IntegriCloud