diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-12-20 16:48:59 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-12-20 16:48:59 -0800 |
commit | 60815cf2e05057db5b78e398d9734c493560b11e (patch) | |
tree | 23d7f55df13cc5a0c072cc8a6f361f8e7050b825 | |
parent | bfc7249cc293deac8f2678b7ec3d2407b68c0a33 (diff) | |
parent | 5de72a2247ac05bde7c89039631b3d0c6186fafb (diff) | |
download | op-kernel-dev-60815cf2e05057db5b78e398d9734c493560b11e.zip op-kernel-dev-60815cf2e05057db5b78e398d9734c493560b11e.tar.gz |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/borntraeger/linux
Pull ACCESS_ONCE cleanup preparation from Christian Borntraeger:
"kernel: Provide READ_ONCE and ASSIGN_ONCE
As discussed on LKML http://marc.info/?i=54611D86.4040306%40de.ibm.com
ACCESS_ONCE might fail with specific compilers for non-scalar
accesses.
Here is a set of patches to tackle that problem.
The first patch introduce READ_ONCE and ASSIGN_ONCE. If the data
structure is larger than the machine word size memcpy is used and a
warning is emitted. The next patches fix up several in-tree users of
ACCESS_ONCE on non-scalar types.
This does not yet contain a patch that forces ACCESS_ONCE to work only
on scalar types. This is targetted for the next merge window as Linux
next already contains new offenders regarding ACCESS_ONCE vs.
non-scalar types"
* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/borntraeger/linux:
s390/kvm: REPLACE barrier fixup with READ_ONCE
arm/spinlock: Replace ACCESS_ONCE with READ_ONCE
arm64/spinlock: Replace ACCESS_ONCE READ_ONCE
mips/gup: Replace ACCESS_ONCE with READ_ONCE
x86/gup: Replace ACCESS_ONCE with READ_ONCE
x86/spinlock: Replace ACCESS_ONCE with READ_ONCE
mm: replace ACCESS_ONCE with READ_ONCE or barriers
kernel: Provide READ_ONCE and ASSIGN_ONCE
-rw-r--r-- | arch/arm/include/asm/spinlock.h | 4 | ||||
-rw-r--r-- | arch/arm64/include/asm/spinlock.h | 4 | ||||
-rw-r--r-- | arch/mips/mm/gup.c | 2 | ||||
-rw-r--r-- | arch/s390/kvm/gaccess.c | 18 | ||||
-rw-r--r-- | arch/x86/include/asm/spinlock.h | 8 | ||||
-rw-r--r-- | arch/x86/mm/gup.c | 2 | ||||
-rw-r--r-- | include/linux/compiler.h | 74 | ||||
-rw-r--r-- | mm/gup.c | 2 | ||||
-rw-r--r-- | mm/memory.c | 11 | ||||
-rw-r--r-- | mm/rmap.c | 3 |
10 files changed, 103 insertions, 25 deletions
diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h index ac4bfae..0fa4184 100644 --- a/arch/arm/include/asm/spinlock.h +++ b/arch/arm/include/asm/spinlock.h @@ -120,12 +120,12 @@ static inline int arch_spin_value_unlocked(arch_spinlock_t lock) static inline int arch_spin_is_locked(arch_spinlock_t *lock) { - return !arch_spin_value_unlocked(ACCESS_ONCE(*lock)); + return !arch_spin_value_unlocked(READ_ONCE(*lock)); } static inline int arch_spin_is_contended(arch_spinlock_t *lock) { - struct __raw_tickets tickets = ACCESS_ONCE(lock->tickets); + struct __raw_tickets tickets = READ_ONCE(lock->tickets); return (tickets.next - tickets.owner) > 1; } #define arch_spin_is_contended arch_spin_is_contended diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index c45b7b1..cee1287 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -99,12 +99,12 @@ static inline int arch_spin_value_unlocked(arch_spinlock_t lock) static inline int arch_spin_is_locked(arch_spinlock_t *lock) { - return !arch_spin_value_unlocked(ACCESS_ONCE(*lock)); + return !arch_spin_value_unlocked(READ_ONCE(*lock)); } static inline int arch_spin_is_contended(arch_spinlock_t *lock) { - arch_spinlock_t lockval = ACCESS_ONCE(*lock); + arch_spinlock_t lockval = READ_ONCE(*lock); return (lockval.next - lockval.owner) > 1; } #define arch_spin_is_contended arch_spin_is_contended diff --git a/arch/mips/mm/gup.c b/arch/mips/mm/gup.c index 7cba480..70795a6 100644 --- a/arch/mips/mm/gup.c +++ b/arch/mips/mm/gup.c @@ -30,7 +30,7 @@ retry: return pte; #else - return ACCESS_ONCE(*ptep); + return READ_ONCE(*ptep); #endif } diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 8b9ccf0..8a1be90 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -227,12 +227,10 @@ static void ipte_lock_simple(struct kvm_vcpu *vcpu) goto out; ic = &vcpu->kvm->arch.sca->ipte_control; do { - old = *ic; - barrier(); + old = READ_ONCE(*ic); while (old.k) { cond_resched(); - old = *ic; - barrier(); + old = READ_ONCE(*ic); } new = old; new.k = 1; @@ -251,8 +249,7 @@ static void ipte_unlock_simple(struct kvm_vcpu *vcpu) goto out; ic = &vcpu->kvm->arch.sca->ipte_control; do { - old = *ic; - barrier(); + old = READ_ONCE(*ic); new = old; new.k = 0; } while (cmpxchg(&ic->val, old.val, new.val) != old.val); @@ -267,12 +264,10 @@ static void ipte_lock_siif(struct kvm_vcpu *vcpu) ic = &vcpu->kvm->arch.sca->ipte_control; do { - old = *ic; - barrier(); + old = READ_ONCE(*ic); while (old.kg) { cond_resched(); - old = *ic; - barrier(); + old = READ_ONCE(*ic); } new = old; new.k = 1; @@ -286,8 +281,7 @@ static void ipte_unlock_siif(struct kvm_vcpu *vcpu) ic = &vcpu->kvm->arch.sca->ipte_control; do { - old = *ic; - barrier(); + old = READ_ONCE(*ic); new = old; new.kh--; if (!new.kh) diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index a4efe47..625660f 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -92,7 +92,7 @@ static __always_inline void arch_spin_lock(arch_spinlock_t *lock) unsigned count = SPIN_THRESHOLD; do { - if (ACCESS_ONCE(lock->tickets.head) == inc.tail) + if (READ_ONCE(lock->tickets.head) == inc.tail) goto out; cpu_relax(); } while (--count); @@ -105,7 +105,7 @@ static __always_inline int arch_spin_trylock(arch_spinlock_t *lock) { arch_spinlock_t old, new; - old.tickets = ACCESS_ONCE(lock->tickets); + old.tickets = READ_ONCE(lock->tickets); if (old.tickets.head != (old.tickets.tail & ~TICKET_SLOWPATH_FLAG)) return 0; @@ -162,14 +162,14 @@ static __always_inline void arch_spin_unlock(arch_spinlock_t *lock) static inline int arch_spin_is_locked(arch_spinlock_t *lock) { - struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets); + struct __raw_tickets tmp = READ_ONCE(lock->tickets); return tmp.tail != tmp.head; } static inline int arch_spin_is_contended(arch_spinlock_t *lock) { - struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets); + struct __raw_tickets tmp = READ_ONCE(lock->tickets); return (__ticket_t)(tmp.tail - tmp.head) > TICKET_LOCK_INC; } diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index 207d9aef..d754782 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -15,7 +15,7 @@ static inline pte_t gup_get_pte(pte_t *ptep) { #ifndef CONFIG_X86_PAE - return ACCESS_ONCE(*ptep); + return READ_ONCE(*ptep); #else /* * With get_user_pages_fast, we walk down the pagetables without taking diff --git a/include/linux/compiler.h b/include/linux/compiler.h index d5ad7b1..a1c81f8 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -186,6 +186,80 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); # define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __LINE__) #endif +#include <uapi/linux/types.h> + +static __always_inline void data_access_exceeds_word_size(void) +#ifdef __compiletime_warning +__compiletime_warning("data access exceeds word size and won't be atomic") +#endif +; + +static __always_inline void data_access_exceeds_word_size(void) +{ +} + +static __always_inline void __read_once_size(volatile void *p, void *res, int size) +{ + switch (size) { + case 1: *(__u8 *)res = *(volatile __u8 *)p; break; + case 2: *(__u16 *)res = *(volatile __u16 *)p; break; + case 4: *(__u32 *)res = *(volatile __u32 *)p; break; +#ifdef CONFIG_64BIT + case 8: *(__u64 *)res = *(volatile __u64 *)p; break; +#endif + default: + barrier(); + __builtin_memcpy((void *)res, (const void *)p, size); + data_access_exceeds_word_size(); + barrier(); + } +} + +static __always_inline void __assign_once_size(volatile void *p, void *res, int size) +{ + switch (size) { + case 1: *(volatile __u8 *)p = *(__u8 *)res; break; + case 2: *(volatile __u16 *)p = *(__u16 *)res; break; + case 4: *(volatile __u32 *)p = *(__u32 *)res; break; +#ifdef CONFIG_64BIT + case 8: *(volatile __u64 *)p = *(__u64 *)res; break; +#endif + default: + barrier(); + __builtin_memcpy((void *)p, (const void *)res, size); + data_access_exceeds_word_size(); + barrier(); + } +} + +/* + * Prevent the compiler from merging or refetching reads or writes. The + * compiler is also forbidden from reordering successive instances of + * READ_ONCE, ASSIGN_ONCE and ACCESS_ONCE (see below), but only when the + * compiler is aware of some particular ordering. One way to make the + * compiler aware of ordering is to put the two invocations of READ_ONCE, + * ASSIGN_ONCE or ACCESS_ONCE() in different C statements. + * + * In contrast to ACCESS_ONCE these two macros will also work on aggregate + * data types like structs or unions. If the size of the accessed data + * type exceeds the word size of the machine (e.g., 32 bits or 64 bits) + * READ_ONCE() and ASSIGN_ONCE() will fall back to memcpy and print a + * compile-time warning. + * + * Their two major use cases are: (1) Mediating communication between + * process-level code and irq/NMI handlers, all running on the same CPU, + * and (2) Ensuring that the compiler does not fold, spindle, or otherwise + * mutilate accesses that either do not require ordering or that interact + * with an explicit memory barrier or atomic instruction that provides the + * required ordering. + */ + +#define READ_ONCE(x) \ + ({ typeof(x) __val; __read_once_size(&x, &__val, sizeof(__val)); __val; }) + +#define ASSIGN_ONCE(val, x) \ + ({ typeof(x) __val; __val = val; __assign_once_size(&x, &__val, sizeof(__val)); __val; }) + #endif /* __KERNEL__ */ #endif /* __ASSEMBLY__ */ @@ -968,7 +968,7 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, pudp = pud_offset(&pgd, addr); do { - pud_t pud = ACCESS_ONCE(*pudp); + pud_t pud = READ_ONCE(*pudp); next = pud_addr_end(addr, end); if (pud_none(pud)) diff --git a/mm/memory.c b/mm/memory.c index d8aebc5..649e7d44 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3195,7 +3195,16 @@ static int handle_pte_fault(struct mm_struct *mm, pte_t entry; spinlock_t *ptl; - entry = ACCESS_ONCE(*pte); + /* + * some architectures can have larger ptes than wordsize, + * e.g.ppc44x-defconfig has CONFIG_PTE_64BIT=y and CONFIG_32BIT=y, + * so READ_ONCE or ACCESS_ONCE cannot guarantee atomic accesses. + * The code below just needs a consistent view for the ifs and + * we later double check anyway with the ptl lock held. So here + * a barrier will do. + */ + entry = *pte; + barrier(); if (!pte_present(entry)) { if (pte_none(entry)) { if (vma->vm_ops) { @@ -583,7 +583,8 @@ pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address) * without holding anon_vma lock for write. So when looking for a * genuine pmde (in which to find pte), test present and !THP together. */ - pmde = ACCESS_ONCE(*pmd); + pmde = *pmd; + barrier(); if (!pmd_present(pmde) || pmd_trans_huge(pmde)) pmd = NULL; out: |