From f7d71f2052555ae57b47322f2c2f6c29ff2438ae Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Fri, 19 Jun 2015 11:50:00 -0400 Subject: locking/qrwlock: Rename functions to queued_*() To sync up with the naming convention used in qspinlock, all the qrwlock functions were renamed to started with "queued" instead of "queue". Signed-off-by: Waiman Long Signed-off-by: Peter Zijlstra (Intel) Cc: Arnd Bergmann Cc: Douglas Hatch Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Scott J Norton Cc: Thomas Gleixner Cc: Will Deacon Link: http://lkml.kernel.org/r/1434729002-57724-2-git-send-email-Waiman.Long@hp.com Signed-off-by: Ingo Molnar --- include/asm-generic/qrwlock.h | 58 +++++++++++++++++++++---------------------- 1 file changed, 29 insertions(+), 29 deletions(-) (limited to 'include/asm-generic') diff --git a/include/asm-generic/qrwlock.h b/include/asm-generic/qrwlock.h index 6383d54..55e3ee1 100644 --- a/include/asm-generic/qrwlock.h +++ b/include/asm-generic/qrwlock.h @@ -36,33 +36,33 @@ /* * External function declarations */ -extern void queue_read_lock_slowpath(struct qrwlock *lock); -extern void queue_write_lock_slowpath(struct qrwlock *lock); +extern void queued_read_lock_slowpath(struct qrwlock *lock); +extern void queued_write_lock_slowpath(struct qrwlock *lock); /** - * queue_read_can_lock- would read_trylock() succeed? + * queued_read_can_lock- would read_trylock() succeed? * @lock: Pointer to queue rwlock structure */ -static inline int queue_read_can_lock(struct qrwlock *lock) +static inline int queued_read_can_lock(struct qrwlock *lock) { return !(atomic_read(&lock->cnts) & _QW_WMASK); } /** - * queue_write_can_lock- would write_trylock() succeed? + * queued_write_can_lock- would write_trylock() succeed? * @lock: Pointer to queue rwlock structure */ -static inline int queue_write_can_lock(struct qrwlock *lock) +static inline int queued_write_can_lock(struct qrwlock *lock) { return !atomic_read(&lock->cnts); } /** - * queue_read_trylock - try to acquire read lock of a queue rwlock + * queued_read_trylock - try to acquire read lock of a queue rwlock * @lock : Pointer to queue rwlock structure * Return: 1 if lock acquired, 0 if failed */ -static inline int queue_read_trylock(struct qrwlock *lock) +static inline int queued_read_trylock(struct qrwlock *lock) { u32 cnts; @@ -77,11 +77,11 @@ static inline int queue_read_trylock(struct qrwlock *lock) } /** - * queue_write_trylock - try to acquire write lock of a queue rwlock + * queued_write_trylock - try to acquire write lock of a queue rwlock * @lock : Pointer to queue rwlock structure * Return: 1 if lock acquired, 0 if failed */ -static inline int queue_write_trylock(struct qrwlock *lock) +static inline int queued_write_trylock(struct qrwlock *lock) { u32 cnts; @@ -93,10 +93,10 @@ static inline int queue_write_trylock(struct qrwlock *lock) cnts, cnts | _QW_LOCKED) == cnts); } /** - * queue_read_lock - acquire read lock of a queue rwlock + * queued_read_lock - acquire read lock of a queue rwlock * @lock: Pointer to queue rwlock structure */ -static inline void queue_read_lock(struct qrwlock *lock) +static inline void queued_read_lock(struct qrwlock *lock) { u32 cnts; @@ -105,27 +105,27 @@ static inline void queue_read_lock(struct qrwlock *lock) return; /* The slowpath will decrement the reader count, if necessary. */ - queue_read_lock_slowpath(lock); + queued_read_lock_slowpath(lock); } /** - * queue_write_lock - acquire write lock of a queue rwlock + * queued_write_lock - acquire write lock of a queue rwlock * @lock : Pointer to queue rwlock structure */ -static inline void queue_write_lock(struct qrwlock *lock) +static inline void queued_write_lock(struct qrwlock *lock) { /* Optimize for the unfair lock case where the fair flag is 0. */ if (atomic_cmpxchg(&lock->cnts, 0, _QW_LOCKED) == 0) return; - queue_write_lock_slowpath(lock); + queued_write_lock_slowpath(lock); } /** - * queue_read_unlock - release read lock of a queue rwlock + * queued_read_unlock - release read lock of a queue rwlock * @lock : Pointer to queue rwlock structure */ -static inline void queue_read_unlock(struct qrwlock *lock) +static inline void queued_read_unlock(struct qrwlock *lock) { /* * Atomically decrement the reader count @@ -134,12 +134,12 @@ static inline void queue_read_unlock(struct qrwlock *lock) atomic_sub(_QR_BIAS, &lock->cnts); } -#ifndef queue_write_unlock +#ifndef queued_write_unlock /** - * queue_write_unlock - release write lock of a queue rwlock + * queued_write_unlock - release write lock of a queue rwlock * @lock : Pointer to queue rwlock structure */ -static inline void queue_write_unlock(struct qrwlock *lock) +static inline void queued_write_unlock(struct qrwlock *lock) { /* * If the writer field is atomic, it can be cleared directly. @@ -154,13 +154,13 @@ static inline void queue_write_unlock(struct qrwlock *lock) * Remapping rwlock architecture specific functions to the corresponding * queue rwlock functions. */ -#define arch_read_can_lock(l) queue_read_can_lock(l) -#define arch_write_can_lock(l) queue_write_can_lock(l) -#define arch_read_lock(l) queue_read_lock(l) -#define arch_write_lock(l) queue_write_lock(l) -#define arch_read_trylock(l) queue_read_trylock(l) -#define arch_write_trylock(l) queue_write_trylock(l) -#define arch_read_unlock(l) queue_read_unlock(l) -#define arch_write_unlock(l) queue_write_unlock(l) +#define arch_read_can_lock(l) queued_read_can_lock(l) +#define arch_write_can_lock(l) queued_write_can_lock(l) +#define arch_read_lock(l) queued_read_lock(l) +#define arch_write_lock(l) queued_write_lock(l) +#define arch_read_trylock(l) queued_read_trylock(l) +#define arch_write_trylock(l) queued_write_trylock(l) +#define arch_read_unlock(l) queued_read_unlock(l) +#define arch_write_unlock(l) queued_write_unlock(l) #endif /* __ASM_GENERIC_QRWLOCK_H */ -- cgit v1.1 From 0e06e5be70d392aa842c1455ec2d0baf62aeed48 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Fri, 19 Jun 2015 11:50:01 -0400 Subject: locking/qrwlock: Better optimization for interrupt context readers The qrwlock is fair in the process context, but becoming unfair when in the interrupt context to support use cases like the tasklist_lock. The current code isn't that well-documented on what happens when in the interrupt context. The rspin_until_writer_unlock() will only spin if the writer has gotten the lock. If the writer is still in the waiting state, the increment in the reader count will cause the writer to remain in the waiting state and the new interrupt context reader will get the lock and return immediately. The current code, however, does an additional read of the lock value which is not necessary as the information has already been there in the fast path. This may sometime cause an additional cacheline transfer when the lock is highly contended. This patch passes the lock value information gotten in the fast path to the slow path to eliminate the additional read. It also documents the action for the interrupt context readers more clearly. Signed-off-by: Waiman Long Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Will Deacon Cc: Arnd Bergmann Cc: Douglas Hatch Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Scott J Norton Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1434729002-57724-3-git-send-email-Waiman.Long@hp.com Signed-off-by: Ingo Molnar --- include/asm-generic/qrwlock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/asm-generic') diff --git a/include/asm-generic/qrwlock.h b/include/asm-generic/qrwlock.h index 55e3ee1..deb9e8b 100644 --- a/include/asm-generic/qrwlock.h +++ b/include/asm-generic/qrwlock.h @@ -36,7 +36,7 @@ /* * External function declarations */ -extern void queued_read_lock_slowpath(struct qrwlock *lock); +extern void queued_read_lock_slowpath(struct qrwlock *lock, u32 cnts); extern void queued_write_lock_slowpath(struct qrwlock *lock); /** @@ -105,7 +105,7 @@ static inline void queued_read_lock(struct qrwlock *lock) return; /* The slowpath will decrement the reader count, if necessary. */ - queued_read_lock_slowpath(lock); + queued_read_lock_slowpath(lock, cnts); } /** -- cgit v1.1 From 56d1defe0bbddaa97d6e74b51490904130fd4f1d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 15 Jul 2015 15:47:25 +0200 Subject: atomic: Prepare generic atomic implementation for logic ops Clean up the #ifdef guards a bit to prepare for architectures to supply their own logic ops. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner --- include/asm-generic/atomic.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include/asm-generic') diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h index 1973ad2..92947e0 100644 --- a/include/asm-generic/atomic.h +++ b/include/asm-generic/atomic.h @@ -98,14 +98,22 @@ ATOMIC_OP_RETURN(add, +) ATOMIC_OP_RETURN(sub, -) #endif -#ifndef atomic_clear_mask +#ifndef atomic_and ATOMIC_OP(and, &) +#endif + +#ifndef atomic_clear_mask #define atomic_clear_mask(i, v) atomic_and(~(i), (v)) #endif -#ifndef atomic_set_mask +#ifndef atomic_or +#ifndef CONFIG_ARCH_HAS_ATOMIC_OR #define CONFIG_ARCH_HAS_ATOMIC_OR +#endif ATOMIC_OP(or, |) +#endif + +#ifndef atomic_set_mask #define atomic_set_mask(i, v) atomic_or((i), (v)) #endif -- cgit v1.1 From e6942b7de2dfe44ebde9bae57dadece5abca9de8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 23 Apr 2014 19:32:50 +0200 Subject: atomic: Provide atomic_{or,xor,and} Implement atomic logic ops -- atomic_{or,xor,and}. These will replace the atomic_{set,clear}_mask functions that are available on some archs. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner --- include/asm-generic/atomic.h | 21 ++++++++++++--------- include/asm-generic/atomic64.h | 4 ++++ 2 files changed, 16 insertions(+), 9 deletions(-) (limited to 'include/asm-generic') diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h index 92947e0..a41b0b8 100644 --- a/include/asm-generic/atomic.h +++ b/include/asm-generic/atomic.h @@ -102,24 +102,27 @@ ATOMIC_OP_RETURN(sub, -) ATOMIC_OP(and, &) #endif -#ifndef atomic_clear_mask -#define atomic_clear_mask(i, v) atomic_and(~(i), (v)) -#endif - #ifndef atomic_or -#ifndef CONFIG_ARCH_HAS_ATOMIC_OR -#define CONFIG_ARCH_HAS_ATOMIC_OR -#endif ATOMIC_OP(or, |) #endif -#ifndef atomic_set_mask -#define atomic_set_mask(i, v) atomic_or((i), (v)) +#ifndef atomic_xor +ATOMIC_OP(xor, ^) #endif #undef ATOMIC_OP_RETURN #undef ATOMIC_OP +static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v) +{ + atomic_and(~mask, v); +} + +static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v) +{ + atomic_or(mask, v); +} + /* * Atomic operations that C can't guarantee us. Useful for * resource counting etc.. diff --git a/include/asm-generic/atomic64.h b/include/asm-generic/atomic64.h index 30ad9c8..d48e78c 100644 --- a/include/asm-generic/atomic64.h +++ b/include/asm-generic/atomic64.h @@ -32,6 +32,10 @@ extern long long atomic64_##op##_return(long long a, atomic64_t *v); ATOMIC64_OPS(add) ATOMIC64_OPS(sub) +ATOMIC64_OP(and) +ATOMIC64_OP(or) +ATOMIC64_OP(xor) + #undef ATOMIC64_OPS #undef ATOMIC64_OP_RETURN #undef ATOMIC64_OP -- cgit v1.1 From de9e432cb5de1bf2952919dc0b22e4bec0ed8d53 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 24 Apr 2015 01:12:32 +0200 Subject: atomic: Collapse all atomic_{set,clear}_mask definitions Move the now generic definitions of atomic_{set,clear}_mask() into linux/atomic.h to avoid endless and pointless repetition. Also, provide an atomic_andnot() wrapper for those few archs that can implement that. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner --- include/asm-generic/atomic.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/asm-generic') diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h index a41b0b8..d4d7e33 100644 --- a/include/asm-generic/atomic.h +++ b/include/asm-generic/atomic.h @@ -113,16 +113,6 @@ ATOMIC_OP(xor, ^) #undef ATOMIC_OP_RETURN #undef ATOMIC_OP -static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v) -{ - atomic_and(~mask, v); -} - -static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v) -{ - atomic_or(mask, v); -} - /* * Atomic operations that C can't guarantee us. Useful for * resource counting etc.. -- cgit v1.1 From 76695af20c015206cffb84b15912be6797d0cca2 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Sun, 2 Aug 2015 17:11:04 +0200 Subject: locking, arch: use WRITE_ONCE()/READ_ONCE() in smp_store_release()/smp_load_acquire() Replace ACCESS_ONCE() macro in smp_store_release() and smp_load_acquire() with WRITE_ONCE() and READ_ONCE() on x86, arm, arm64, ia64, metag, mips, powerpc, s390, sparc and asm-generic since ACCESS_ONCE() does not work reliably on non-scalar types. WRITE_ONCE() and READ_ONCE() were introduced in the following commits: 230fa253df63 ("kernel: Provide READ_ONCE and ASSIGN_ONCE") 43239cbe79fc ("kernel: Change ASSIGN_ONCE(val, x) to WRITE_ONCE(x, val)") Signed-off-by: Andrey Konovalov Signed-off-by: Peter Zijlstra (Intel) Acked-by: Davidlohr Bueso Acked-by: Michael Ellerman (powerpc) Acked-by: Ralf Baechle Cc: Alexander Duyck Cc: Andre Przywara Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Catalin Marinas Cc: Christian Borntraeger Cc: David S. Miller Cc: Davidlohr Bueso Cc: Dmitry Vyukov Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Heiko Carstens Cc: James Hogan Cc: Linus Torvalds Cc: Martin Schwidefsky Cc: Paul E. McKenney Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Russell King Cc: Thomas Gleixner Cc: Tony Luck Cc: Will Deacon Cc: linux-arch@vger.kernel.org Link: http://lkml.kernel.org/r/1438528264-714-1-git-send-email-andreyknvl@google.com Signed-off-by: Ingo Molnar --- include/asm-generic/barrier.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/asm-generic') diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h index 55e3abc..b42afad 100644 --- a/include/asm-generic/barrier.h +++ b/include/asm-generic/barrier.h @@ -108,12 +108,12 @@ do { \ compiletime_assert_atomic_type(*p); \ smp_mb(); \ - ACCESS_ONCE(*p) = (v); \ + WRITE_ONCE(*p, v); \ } while (0) #define smp_load_acquire(p) \ ({ \ - typeof(*p) ___p1 = ACCESS_ONCE(*p); \ + typeof(*p) ___p1 = READ_ONCE(*p); \ compiletime_assert_atomic_type(*p); \ smp_mb(); \ ___p1; \ -- cgit v1.1 From 586b610e43a5ad5096640312fefa6ce931738c7d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 6 Aug 2015 17:54:38 +0100 Subject: locking, asm-generic: Rework atomic-long.h to avoid bulk code duplication We can use some (admittedly ugly) macros to generate the 32-bit and 64-bit based atomic_long implementations from the same code. Signed-off-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Waiman.Long@hp.com Cc: paulmck@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/1438880084-18856-3-git-send-email-will.deacon@arm.com Signed-off-by: Ingo Molnar --- include/asm-generic/atomic-long.h | 189 ++++++++------------------------------ 1 file changed, 40 insertions(+), 149 deletions(-) (limited to 'include/asm-generic') diff --git a/include/asm-generic/atomic-long.h b/include/asm-generic/atomic-long.h index b7babf0..beaea54 100644 --- a/include/asm-generic/atomic-long.h +++ b/include/asm-generic/atomic-long.h @@ -23,236 +23,127 @@ typedef atomic64_t atomic_long_t; #define ATOMIC_LONG_INIT(i) ATOMIC64_INIT(i) +#define ATOMIC_LONG_PFX(x) atomic64 ## x -static inline long atomic_long_read(atomic_long_t *l) -{ - atomic64_t *v = (atomic64_t *)l; - - return (long)atomic64_read(v); -} - -static inline void atomic_long_set(atomic_long_t *l, long i) -{ - atomic64_t *v = (atomic64_t *)l; - - atomic64_set(v, i); -} - -static inline void atomic_long_inc(atomic_long_t *l) -{ - atomic64_t *v = (atomic64_t *)l; - - atomic64_inc(v); -} - -static inline void atomic_long_dec(atomic_long_t *l) -{ - atomic64_t *v = (atomic64_t *)l; - - atomic64_dec(v); -} - -static inline void atomic_long_add(long i, atomic_long_t *l) -{ - atomic64_t *v = (atomic64_t *)l; - - atomic64_add(i, v); -} - -static inline void atomic_long_sub(long i, atomic_long_t *l) -{ - atomic64_t *v = (atomic64_t *)l; - - atomic64_sub(i, v); -} - -static inline int atomic_long_sub_and_test(long i, atomic_long_t *l) -{ - atomic64_t *v = (atomic64_t *)l; - - return atomic64_sub_and_test(i, v); -} - -static inline int atomic_long_dec_and_test(atomic_long_t *l) -{ - atomic64_t *v = (atomic64_t *)l; - - return atomic64_dec_and_test(v); -} - -static inline int atomic_long_inc_and_test(atomic_long_t *l) -{ - atomic64_t *v = (atomic64_t *)l; - - return atomic64_inc_and_test(v); -} - -static inline int atomic_long_add_negative(long i, atomic_long_t *l) -{ - atomic64_t *v = (atomic64_t *)l; - - return atomic64_add_negative(i, v); -} - -static inline long atomic_long_add_return(long i, atomic_long_t *l) -{ - atomic64_t *v = (atomic64_t *)l; - - return (long)atomic64_add_return(i, v); -} - -static inline long atomic_long_sub_return(long i, atomic_long_t *l) -{ - atomic64_t *v = (atomic64_t *)l; - - return (long)atomic64_sub_return(i, v); -} - -static inline long atomic_long_inc_return(atomic_long_t *l) -{ - atomic64_t *v = (atomic64_t *)l; - - return (long)atomic64_inc_return(v); -} - -static inline long atomic_long_dec_return(atomic_long_t *l) -{ - atomic64_t *v = (atomic64_t *)l; - - return (long)atomic64_dec_return(v); -} - -static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u) -{ - atomic64_t *v = (atomic64_t *)l; - - return (long)atomic64_add_unless(v, a, u); -} - -#define atomic_long_inc_not_zero(l) atomic64_inc_not_zero((atomic64_t *)(l)) - -#define atomic_long_cmpxchg(l, old, new) \ - (atomic64_cmpxchg((atomic64_t *)(l), (old), (new))) -#define atomic_long_xchg(v, new) \ - (atomic64_xchg((atomic64_t *)(v), (new))) - -#else /* BITS_PER_LONG == 64 */ +#else typedef atomic_t atomic_long_t; #define ATOMIC_LONG_INIT(i) ATOMIC_INIT(i) +#define ATOMIC_LONG_PFX(x) atomic ## x + +#endif + static inline long atomic_long_read(atomic_long_t *l) { - atomic_t *v = (atomic_t *)l; + ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l; - return (long)atomic_read(v); + return (long)ATOMIC_LONG_PFX(_read)(v); } static inline void atomic_long_set(atomic_long_t *l, long i) { - atomic_t *v = (atomic_t *)l; + ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l; - atomic_set(v, i); + ATOMIC_LONG_PFX(_set)(v, i); } static inline void atomic_long_inc(atomic_long_t *l) { - atomic_t *v = (atomic_t *)l; + ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l; - atomic_inc(v); + ATOMIC_LONG_PFX(_inc)(v); } static inline void atomic_long_dec(atomic_long_t *l) { - atomic_t *v = (atomic_t *)l; + ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l; - atomic_dec(v); + ATOMIC_LONG_PFX(_dec)(v); } static inline void atomic_long_add(long i, atomic_long_t *l) { - atomic_t *v = (atomic_t *)l; + ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l; - atomic_add(i, v); + ATOMIC_LONG_PFX(_add)(i, v); } static inline void atomic_long_sub(long i, atomic_long_t *l) { - atomic_t *v = (atomic_t *)l; + ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l; - atomic_sub(i, v); + ATOMIC_LONG_PFX(_sub)(i, v); } static inline int atomic_long_sub_and_test(long i, atomic_long_t *l) { - atomic_t *v = (atomic_t *)l; + ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l; - return atomic_sub_and_test(i, v); + return ATOMIC_LONG_PFX(_sub_and_test)(i, v); } static inline int atomic_long_dec_and_test(atomic_long_t *l) { - atomic_t *v = (atomic_t *)l; + ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l; - return atomic_dec_and_test(v); + return ATOMIC_LONG_PFX(_dec_and_test)(v); } static inline int atomic_long_inc_and_test(atomic_long_t *l) { - atomic_t *v = (atomic_t *)l; + ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l; - return atomic_inc_and_test(v); + return ATOMIC_LONG_PFX(_inc_and_test)(v); } static inline int atomic_long_add_negative(long i, atomic_long_t *l) { - atomic_t *v = (atomic_t *)l; + ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l; - return atomic_add_negative(i, v); + return ATOMIC_LONG_PFX(_add_negative)(i, v); } static inline long atomic_long_add_return(long i, atomic_long_t *l) { - atomic_t *v = (atomic_t *)l; + ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l; - return (long)atomic_add_return(i, v); + return (long)ATOMIC_LONG_PFX(_add_return)(i, v); } static inline long atomic_long_sub_return(long i, atomic_long_t *l) { - atomic_t *v = (atomic_t *)l; + ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l; - return (long)atomic_sub_return(i, v); + return (long)ATOMIC_LONG_PFX(_sub_return)(i, v); } static inline long atomic_long_inc_return(atomic_long_t *l) { - atomic_t *v = (atomic_t *)l; + ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l; - return (long)atomic_inc_return(v); + return (long)ATOMIC_LONG_PFX(_inc_return)(v); } static inline long atomic_long_dec_return(atomic_long_t *l) { - atomic_t *v = (atomic_t *)l; + ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l; - return (long)atomic_dec_return(v); + return (long)ATOMIC_LONG_PFX(_dec_return)(v); } static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u) { - atomic_t *v = (atomic_t *)l; + ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l; - return (long)atomic_add_unless(v, a, u); + return (long)ATOMIC_LONG_PFX(_add_unless)(v, a, u); } -#define atomic_long_inc_not_zero(l) atomic_inc_not_zero((atomic_t *)(l)) - +#define atomic_long_inc_not_zero(l) \ + ATOMIC_LONG_PFX(_inc_not_zero)((ATOMIC_LONG_PFX(_t) *)(l)) #define atomic_long_cmpxchg(l, old, new) \ - (atomic_cmpxchg((atomic_t *)(l), (old), (new))) + (ATOMIC_LONG_PFX(_cmpxchg)((ATOMIC_LONG_PFX(_t) *)(l), (old), (new))) #define atomic_long_xchg(v, new) \ - (atomic_xchg((atomic_t *)(v), (new))) - -#endif /* BITS_PER_LONG == 64 */ + (ATOMIC_LONG_PFX(_xchg)((ATOMIC_LONG_PFX(_t) *)(v), (new))) #endif /* _ASM_GENERIC_ATOMIC_LONG_H */ -- cgit v1.1 From 6d79ef2d30ee5af7315535d1e7bf6fce0008f815 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 6 Aug 2015 17:54:39 +0100 Subject: locking, asm-generic: Add _{relaxed|acquire|release}() variants for 'atomic_long_t' This patch adds 'atomic_long_t' wrappers for the new relaxed atomic operations. Signed-off-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Waiman.Long@hp.com Cc: paulmck@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/1438880084-18856-4-git-send-email-will.deacon@arm.com Signed-off-by: Ingo Molnar --- include/asm-generic/atomic-long.h | 86 +++++++++++++++++++++++++++------------ 1 file changed, 59 insertions(+), 27 deletions(-) (limited to 'include/asm-generic') diff --git a/include/asm-generic/atomic-long.h b/include/asm-generic/atomic-long.h index beaea54..a94cbeb 100644 --- a/include/asm-generic/atomic-long.h +++ b/include/asm-generic/atomic-long.h @@ -34,19 +34,69 @@ typedef atomic_t atomic_long_t; #endif -static inline long atomic_long_read(atomic_long_t *l) -{ - ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l; - - return (long)ATOMIC_LONG_PFX(_read)(v); +#define ATOMIC_LONG_READ_OP(mo) \ +static inline long atomic_long_read##mo(atomic_long_t *l) \ +{ \ + ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l; \ + \ + return (long)ATOMIC_LONG_PFX(_read##mo)(v); \ } +ATOMIC_LONG_READ_OP() +ATOMIC_LONG_READ_OP(_acquire) -static inline void atomic_long_set(atomic_long_t *l, long i) -{ - ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l; +#undef ATOMIC_LONG_READ_OP - ATOMIC_LONG_PFX(_set)(v, i); +#define ATOMIC_LONG_SET_OP(mo) \ +static inline void atomic_long_set##mo(atomic_long_t *l, long i) \ +{ \ + ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l; \ + \ + ATOMIC_LONG_PFX(_set##mo)(v, i); \ +} +ATOMIC_LONG_SET_OP() +ATOMIC_LONG_SET_OP(_release) + +#undef ATOMIC_LONG_SET_OP + +#define ATOMIC_LONG_ADD_SUB_OP(op, mo) \ +static inline long \ +atomic_long_##op##_return##mo(long i, atomic_long_t *l) \ +{ \ + ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l; \ + \ + return (long)ATOMIC_LONG_PFX(_##op##_return##mo)(i, v); \ } +ATOMIC_LONG_ADD_SUB_OP(add,) +ATOMIC_LONG_ADD_SUB_OP(add, _relaxed) +ATOMIC_LONG_ADD_SUB_OP(add, _acquire) +ATOMIC_LONG_ADD_SUB_OP(add, _release) +ATOMIC_LONG_ADD_SUB_OP(sub,) +ATOMIC_LONG_ADD_SUB_OP(sub, _relaxed) +ATOMIC_LONG_ADD_SUB_OP(sub, _acquire) +ATOMIC_LONG_ADD_SUB_OP(sub, _release) + +#undef ATOMIC_LONG_ADD_SUB_OP + +#define atomic_long_cmpxchg_relaxed(l, old, new) \ + (ATOMIC_LONG_PFX(_cmpxchg_relaxed)((ATOMIC_LONG_PFX(_t) *)(l), \ + (old), (new))) +#define atomic_long_cmpxchg_acquire(l, old, new) \ + (ATOMIC_LONG_PFX(_cmpxchg_acquire)((ATOMIC_LONG_PFX(_t) *)(l), \ + (old), (new))) +#define atomic_long_cmpxchg_release(l, old, new) \ + (ATOMIC_LONG_PFX(_cmpxchg_release)((ATOMIC_LONG_PFX(_t) *)(l), \ + (old), (new))) +#define atomic_long_cmpxchg(l, old, new) \ + (ATOMIC_LONG_PFX(_cmpxchg)((ATOMIC_LONG_PFX(_t) *)(l), (old), (new))) + +#define atomic_long_xchg_relaxed(v, new) \ + (ATOMIC_LONG_PFX(_xchg_relaxed)((ATOMIC_LONG_PFX(_t) *)(v), (new))) +#define atomic_long_xchg_acquire(v, new) \ + (ATOMIC_LONG_PFX(_xchg_acquire)((ATOMIC_LONG_PFX(_t) *)(v), (new))) +#define atomic_long_xchg_release(v, new) \ + (ATOMIC_LONG_PFX(_xchg_release)((ATOMIC_LONG_PFX(_t) *)(v), (new))) +#define atomic_long_xchg(v, new) \ + (ATOMIC_LONG_PFX(_xchg)((ATOMIC_LONG_PFX(_t) *)(v), (new))) static inline void atomic_long_inc(atomic_long_t *l) { @@ -104,20 +154,6 @@ static inline int atomic_long_add_negative(long i, atomic_long_t *l) return ATOMIC_LONG_PFX(_add_negative)(i, v); } -static inline long atomic_long_add_return(long i, atomic_long_t *l) -{ - ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l; - - return (long)ATOMIC_LONG_PFX(_add_return)(i, v); -} - -static inline long atomic_long_sub_return(long i, atomic_long_t *l) -{ - ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l; - - return (long)ATOMIC_LONG_PFX(_sub_return)(i, v); -} - static inline long atomic_long_inc_return(atomic_long_t *l) { ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l; @@ -141,9 +177,5 @@ static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u) #define atomic_long_inc_not_zero(l) \ ATOMIC_LONG_PFX(_inc_not_zero)((ATOMIC_LONG_PFX(_t) *)(l)) -#define atomic_long_cmpxchg(l, old, new) \ - (ATOMIC_LONG_PFX(_cmpxchg)((ATOMIC_LONG_PFX(_t) *)(l), (old), (new))) -#define atomic_long_xchg(v, new) \ - (ATOMIC_LONG_PFX(_xchg)((ATOMIC_LONG_PFX(_t) *)(v), (new))) #endif /* _ASM_GENERIC_ATOMIC_LONG_H */ -- cgit v1.1 From 2b2a85a4d3534b8884fcfa5bb52837f0e1c672bc Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 6 Aug 2015 17:54:41 +0100 Subject: locking/qrwlock: Implement queue_write_unlock() using smp_store_release() Since the following commit: 536fa402221f ("compiler: Allow 1- and 2-byte smp_load_acquire() and smp_store_release()") smp_store_release() supports byte accesses, so use that in writer unlock and remove the conditional macro override. Signed-off-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Waiman Long Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: paulmck@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/1438880084-18856-6-git-send-email-will.deacon@arm.com Signed-off-by: Ingo Molnar --- include/asm-generic/qrwlock.h | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'include/asm-generic') diff --git a/include/asm-generic/qrwlock.h b/include/asm-generic/qrwlock.h index deb9e8b..eb673dd 100644 --- a/include/asm-generic/qrwlock.h +++ b/include/asm-generic/qrwlock.h @@ -134,21 +134,14 @@ static inline void queued_read_unlock(struct qrwlock *lock) atomic_sub(_QR_BIAS, &lock->cnts); } -#ifndef queued_write_unlock /** * queued_write_unlock - release write lock of a queue rwlock * @lock : Pointer to queue rwlock structure */ static inline void queued_write_unlock(struct qrwlock *lock) { - /* - * If the writer field is atomic, it can be cleared directly. - * Otherwise, an atomic subtraction will be used to clear it. - */ - smp_mb__before_atomic(); - atomic_sub(_QW_LOCKED, &lock->cnts); + smp_store_release((u8 *)&lock->cnts, 0); } -#endif /* * Remapping rwlock architecture specific functions to the corresponding -- cgit v1.1 From 77e430e3e45662b696dc49aa53ea0f7ac63f2574 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 6 Aug 2015 17:54:42 +0100 Subject: locking/qrwlock: Make use of _{acquire|release|relaxed}() atomics The qrwlock implementation is slightly heavy in its use of memory barriers, mainly through the use of _cmpxchg() and _return() atomics, which imply full barrier semantics. This patch modifies the qrwlock code to use the more relaxed atomic routines so that we can reduce the unnecessary barrier overhead on weakly-ordered architectures. Signed-off-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Waiman.Long@hp.com Cc: paulmck@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/1438880084-18856-7-git-send-email-will.deacon@arm.com Signed-off-by: Ingo Molnar --- include/asm-generic/qrwlock.h | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'include/asm-generic') diff --git a/include/asm-generic/qrwlock.h b/include/asm-generic/qrwlock.h index eb673dd..54a8e65 100644 --- a/include/asm-generic/qrwlock.h +++ b/include/asm-generic/qrwlock.h @@ -68,7 +68,7 @@ static inline int queued_read_trylock(struct qrwlock *lock) cnts = atomic_read(&lock->cnts); if (likely(!(cnts & _QW_WMASK))) { - cnts = (u32)atomic_add_return(_QR_BIAS, &lock->cnts); + cnts = (u32)atomic_add_return_acquire(_QR_BIAS, &lock->cnts); if (likely(!(cnts & _QW_WMASK))) return 1; atomic_sub(_QR_BIAS, &lock->cnts); @@ -89,8 +89,8 @@ static inline int queued_write_trylock(struct qrwlock *lock) if (unlikely(cnts)) return 0; - return likely(atomic_cmpxchg(&lock->cnts, - cnts, cnts | _QW_LOCKED) == cnts); + return likely(atomic_cmpxchg_acquire(&lock->cnts, + cnts, cnts | _QW_LOCKED) == cnts); } /** * queued_read_lock - acquire read lock of a queue rwlock @@ -100,7 +100,7 @@ static inline void queued_read_lock(struct qrwlock *lock) { u32 cnts; - cnts = atomic_add_return(_QR_BIAS, &lock->cnts); + cnts = atomic_add_return_acquire(_QR_BIAS, &lock->cnts); if (likely(!(cnts & _QW_WMASK))) return; @@ -115,7 +115,7 @@ static inline void queued_read_lock(struct qrwlock *lock) static inline void queued_write_lock(struct qrwlock *lock) { /* Optimize for the unfair lock case where the fair flag is 0. */ - if (atomic_cmpxchg(&lock->cnts, 0, _QW_LOCKED) == 0) + if (atomic_cmpxchg_acquire(&lock->cnts, 0, _QW_LOCKED) == 0) return; queued_write_lock_slowpath(lock); @@ -130,8 +130,7 @@ static inline void queued_read_unlock(struct qrwlock *lock) /* * Atomically decrement the reader count */ - smp_mb__before_atomic(); - atomic_sub(_QR_BIAS, &lock->cnts); + (void)atomic_sub_return_release(_QR_BIAS, &lock->cnts); } /** -- cgit v1.1