summaryrefslogtreecommitdiffstats
path: root/arch/arm64/include
diff options
context:
space:
mode:
authorWill Deacon <will.deacon@arm.com>2015-05-29 13:31:10 +0100
committerWill Deacon <will.deacon@arm.com>2015-07-27 15:28:53 +0100
commit0ea366f5e1b6413a6095dce60ea49ae51e468b61 (patch)
treefce4fc690edf16784d21a714415a74a8ce53eb2b /arch/arm64/include
parenta82e62382fcbbf5c3348e802af73583e0cac39c0 (diff)
downloadop-kernel-dev-0ea366f5e1b6413a6095dce60ea49ae51e468b61.zip
op-kernel-dev-0ea366f5e1b6413a6095dce60ea49ae51e468b61.tar.gz
arm64: atomics: prefetch the destination word for write prior to stxr
The cost of changing a cacheline from shared to exclusive state can be significant, especially when this is triggered by an exclusive store, since it may result in having to retry the transaction. This patch makes use of prfm to prefetch cachelines for write prior to ldxr/stxr loops when using the ll/sc atomic routines. Reviewed-by: Catalin Marinas <catalin.marinas@arm.com> Signed-off-by: Will Deacon <will.deacon@arm.com>
Diffstat (limited to 'arch/arm64/include')
-rw-r--r--arch/arm64/include/asm/atomic_ll_sc.h9
-rw-r--r--arch/arm64/include/asm/cmpxchg.h8
-rw-r--r--arch/arm64/include/asm/futex.h2
3 files changed, 19 insertions, 0 deletions
diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h
index 5a9fb37..50d6abd 100644
--- a/arch/arm64/include/asm/atomic_ll_sc.h
+++ b/arch/arm64/include/asm/atomic_ll_sc.h
@@ -45,6 +45,7 @@ __LL_SC_PREFIX(atomic_##op(int i, atomic_t *v)) \
int result; \
\
asm volatile("// atomic_" #op "\n" \
+" prfm pstl1strm, %2\n" \
"1: ldxr %w0, %2\n" \
" " #asm_op " %w0, %w0, %w3\n" \
" stxr %w1, %w0, %2\n" \
@@ -62,6 +63,7 @@ __LL_SC_PREFIX(atomic_##op##_return(int i, atomic_t *v)) \
int result; \
\
asm volatile("// atomic_" #op "_return\n" \
+" prfm pstl1strm, %2\n" \
"1: ldxr %w0, %2\n" \
" " #asm_op " %w0, %w0, %w3\n" \
" stlxr %w1, %w0, %2\n" \
@@ -98,6 +100,7 @@ __LL_SC_PREFIX(atomic_cmpxchg(atomic_t *ptr, int old, int new))
int oldval;
asm volatile("// atomic_cmpxchg\n"
+" prfm pstl1strm, %2\n"
"1: ldxr %w1, %2\n"
" eor %w0, %w1, %w3\n"
" cbnz %w0, 2f\n"
@@ -121,6 +124,7 @@ __LL_SC_PREFIX(atomic64_##op(long i, atomic64_t *v)) \
unsigned long tmp; \
\
asm volatile("// atomic64_" #op "\n" \
+" prfm pstl1strm, %2\n" \
"1: ldxr %0, %2\n" \
" " #asm_op " %0, %0, %3\n" \
" stxr %w1, %0, %2\n" \
@@ -138,6 +142,7 @@ __LL_SC_PREFIX(atomic64_##op##_return(long i, atomic64_t *v)) \
unsigned long tmp; \
\
asm volatile("// atomic64_" #op "_return\n" \
+" prfm pstl1strm, %2\n" \
"1: ldxr %0, %2\n" \
" " #asm_op " %0, %0, %3\n" \
" stlxr %w1, %0, %2\n" \
@@ -174,6 +179,7 @@ __LL_SC_PREFIX(atomic64_cmpxchg(atomic64_t *ptr, long old, long new))
unsigned long res;
asm volatile("// atomic64_cmpxchg\n"
+" prfm pstl1strm, %2\n"
"1: ldxr %1, %2\n"
" eor %0, %1, %3\n"
" cbnz %w0, 2f\n"
@@ -196,6 +202,7 @@ __LL_SC_PREFIX(atomic64_dec_if_positive(atomic64_t *v))
unsigned long tmp;
asm volatile("// atomic64_dec_if_positive\n"
+" prfm pstl1strm, %2\n"
"1: ldxr %0, %2\n"
" subs %0, %0, #1\n"
" b.mi 2f\n"
@@ -220,6 +227,7 @@ __LL_SC_PREFIX(__cmpxchg_case_##name(volatile void *ptr, \
unsigned long tmp, oldval; \
\
asm volatile( \
+ " prfm pstl1strm, %2\n" \
"1: ldxr" #sz "\t%" #w "[oldval], %[v]\n" \
" eor %" #w "[tmp], %" #w "[oldval], %" #w "[old]\n" \
" cbnz %" #w "[tmp], 2f\n" \
@@ -259,6 +267,7 @@ __LL_SC_PREFIX(__cmpxchg_double##name(unsigned long old1, \
unsigned long tmp, ret; \
\
asm volatile("// __cmpxchg_double" #name "\n" \
+ " prfm pstl1strm, %2\n" \
"1: ldxp %0, %1, %2\n" \
" eor %0, %0, %3\n" \
" eor %1, %1, %4\n" \
diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
index f702126..7bfda09 100644
--- a/arch/arm64/include/asm/cmpxchg.h
+++ b/arch/arm64/include/asm/cmpxchg.h
@@ -33,12 +33,14 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
case 1:
asm volatile(ARM64_LSE_ATOMIC_INSN(
/* LL/SC */
+ " prfm pstl1strm, %2\n"
"1: ldxrb %w0, %2\n"
" stlxrb %w1, %w3, %2\n"
" cbnz %w1, 1b\n"
" dmb ish",
/* LSE atomics */
" nop\n"
+ " nop\n"
" swpalb %w3, %w0, %2\n"
" nop\n"
" nop")
@@ -49,12 +51,14 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
case 2:
asm volatile(ARM64_LSE_ATOMIC_INSN(
/* LL/SC */
+ " prfm pstl1strm, %2\n"
"1: ldxrh %w0, %2\n"
" stlxrh %w1, %w3, %2\n"
" cbnz %w1, 1b\n"
" dmb ish",
/* LSE atomics */
" nop\n"
+ " nop\n"
" swpalh %w3, %w0, %2\n"
" nop\n"
" nop")
@@ -65,12 +69,14 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
case 4:
asm volatile(ARM64_LSE_ATOMIC_INSN(
/* LL/SC */
+ " prfm pstl1strm, %2\n"
"1: ldxr %w0, %2\n"
" stlxr %w1, %w3, %2\n"
" cbnz %w1, 1b\n"
" dmb ish",
/* LSE atomics */
" nop\n"
+ " nop\n"
" swpal %w3, %w0, %2\n"
" nop\n"
" nop")
@@ -81,12 +87,14 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
case 8:
asm volatile(ARM64_LSE_ATOMIC_INSN(
/* LL/SC */
+ " prfm pstl1strm, %2\n"
"1: ldxr %0, %2\n"
" stlxr %w1, %3, %2\n"
" cbnz %w1, 1b\n"
" dmb ish",
/* LSE atomics */
" nop\n"
+ " nop\n"
" swpal %3, %0, %2\n"
" nop\n"
" nop")
diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h
index 775e85b..007a69f 100644
--- a/arch/arm64/include/asm/futex.h
+++ b/arch/arm64/include/asm/futex.h
@@ -30,6 +30,7 @@
asm volatile( \
ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, \
CONFIG_ARM64_PAN) \
+" prfm pstl1strm, %2\n" \
"1: ldxr %w1, %2\n" \
insn "\n" \
"2: stlxr %w3, %w0, %2\n" \
@@ -120,6 +121,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
return -EFAULT;
asm volatile("// futex_atomic_cmpxchg_inatomic\n"
+" prfm pstl1strm, %2\n"
"1: ldxr %w1, %2\n"
" sub %w3, %w1, %w4\n"
" cbnz %w3, 3f\n"
OpenPOWER on IntegriCloud