From 826681043d7184b4d650cab5b007b9a86b628eb5 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 17 May 2009 16:20:18 +0100 Subject: [ARM] smp: fix cpumask usage in ARM SMP code The ARM SMP code wasn't properly updated for the cpumask changes, which results in smp_timer_broadcast() broadcasting ticks to non-online CPUs. Signed-off-by: Russell King --- arch/arm/include/asm/hardware/gic.h | 2 +- arch/arm/include/asm/smp.h | 12 ++++-------- 2 files changed, 5 insertions(+), 9 deletions(-) (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/hardware/gic.h b/arch/arm/include/asm/hardware/gic.h index 4924914..7f34333b 100644 --- a/arch/arm/include/asm/hardware/gic.h +++ b/arch/arm/include/asm/hardware/gic.h @@ -36,7 +36,7 @@ void gic_dist_init(unsigned int gic_nr, void __iomem *base, unsigned int irq_start); void gic_cpu_init(unsigned int gic_nr, void __iomem *base); void gic_cascade_irq(unsigned int gic_nr, unsigned int irq); -void gic_raise_softirq(cpumask_t cpumask, unsigned int irq); +void gic_raise_softirq(const struct cpumask *mask, unsigned int irq); #endif #endif diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h index fad70da..5995935 100644 --- a/arch/arm/include/asm/smp.h +++ b/arch/arm/include/asm/smp.h @@ -53,17 +53,12 @@ extern void smp_store_cpu_info(unsigned int cpuid); /* * Raise an IPI cross call on CPUs in callmap. */ -extern void smp_cross_call(cpumask_t callmap); - -/* - * Broadcast a timer interrupt to the other CPUs. - */ -extern void smp_send_timer(void); +extern void smp_cross_call(const struct cpumask *mask); /* * Broadcast a clock event to other CPUs. */ -extern void smp_timer_broadcast(cpumask_t mask); +extern void smp_timer_broadcast(const struct cpumask *mask); /* * Boot a secondary CPU, and assign it the specified idle task. @@ -102,7 +97,8 @@ extern int platform_cpu_kill(unsigned int cpu); extern void platform_cpu_enable(unsigned int cpu); extern void arch_send_call_function_single_ipi(int cpu); -extern void arch_send_call_function_ipi(cpumask_t mask); +extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); +#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask /* * Local timer interrupt handling function (can be IPI'ed). -- cgit v1.1 From bac4e960b5ce2453d862beaf20e59aa68af3b43a Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 25 May 2009 20:58:00 +0100 Subject: [ARM] barriers: improve xchg, bitops and atomic SMP barriers Mathieu Desnoyers pointed out that the ARM barriers were lacking: - cmpxchg, xchg and atomic add return need memory barriers on architectures which can reorder the relative order in which memory read/writes can be seen between CPUs, which seems to include recent ARM architectures. Those barriers are currently missing on ARM. - test_and_xxx_bit were missing SMP barriers. So put these barriers in. Provide separate atomic_add/atomic_sub operations which do not require barriers. Reported-Reviewed-and-Acked-by: Mathieu Desnoyers Signed-off-by: Russell King --- arch/arm/include/asm/assembler.h | 13 +++++++++ arch/arm/include/asm/atomic.h | 61 ++++++++++++++++++++++++++++++++++------ arch/arm/include/asm/system.h | 3 ++ 3 files changed, 68 insertions(+), 9 deletions(-) (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 6116e48..15f8a09 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -114,3 +114,16 @@ .align 3; \ .long 9999b,9001f; \ .previous + +/* + * SMP data memory barrier + */ + .macro smp_dmb +#ifdef CONFIG_SMP +#if __LINUX_ARM_ARCH__ >= 7 + dmb +#elif __LINUX_ARM_ARCH__ == 6 + mcr p15, 0, r0, c7, c10, 5 @ dmb +#endif +#endif + .endm diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h index ee99723..16b52f3 100644 --- a/arch/arm/include/asm/atomic.h +++ b/arch/arm/include/asm/atomic.h @@ -44,11 +44,29 @@ static inline void atomic_set(atomic_t *v, int i) : "cc"); } +static inline void atomic_add(int i, atomic_t *v) +{ + unsigned long tmp; + int result; + + __asm__ __volatile__("@ atomic_add\n" +"1: ldrex %0, [%2]\n" +" add %0, %0, %3\n" +" strex %1, %0, [%2]\n" +" teq %1, #0\n" +" bne 1b" + : "=&r" (result), "=&r" (tmp) + : "r" (&v->counter), "Ir" (i) + : "cc"); +} + static inline int atomic_add_return(int i, atomic_t *v) { unsigned long tmp; int result; + smp_mb(); + __asm__ __volatile__("@ atomic_add_return\n" "1: ldrex %0, [%2]\n" " add %0, %0, %3\n" @@ -59,14 +77,34 @@ static inline int atomic_add_return(int i, atomic_t *v) : "r" (&v->counter), "Ir" (i) : "cc"); + smp_mb(); + return result; } +static inline void atomic_sub(int i, atomic_t *v) +{ + unsigned long tmp; + int result; + + __asm__ __volatile__("@ atomic_sub\n" +"1: ldrex %0, [%2]\n" +" sub %0, %0, %3\n" +" strex %1, %0, [%2]\n" +" teq %1, #0\n" +" bne 1b" + : "=&r" (result), "=&r" (tmp) + : "r" (&v->counter), "Ir" (i) + : "cc"); +} + static inline int atomic_sub_return(int i, atomic_t *v) { unsigned long tmp; int result; + smp_mb(); + __asm__ __volatile__("@ atomic_sub_return\n" "1: ldrex %0, [%2]\n" " sub %0, %0, %3\n" @@ -77,6 +115,8 @@ static inline int atomic_sub_return(int i, atomic_t *v) : "r" (&v->counter), "Ir" (i) : "cc"); + smp_mb(); + return result; } @@ -84,6 +124,8 @@ static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new) { unsigned long oldval, res; + smp_mb(); + do { __asm__ __volatile__("@ atomic_cmpxchg\n" "ldrex %1, [%2]\n" @@ -95,6 +137,8 @@ static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new) : "cc"); } while (res); + smp_mb(); + return oldval; } @@ -135,6 +179,7 @@ static inline int atomic_add_return(int i, atomic_t *v) return val; } +#define atomic_add(i, v) (void) atomic_add_return(i, v) static inline int atomic_sub_return(int i, atomic_t *v) { @@ -148,6 +193,7 @@ static inline int atomic_sub_return(int i, atomic_t *v) return val; } +#define atomic_sub(i, v) (void) atomic_sub_return(i, v) static inline int atomic_cmpxchg(atomic_t *v, int old, int new) { @@ -187,10 +233,8 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u) } #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) -#define atomic_add(i, v) (void) atomic_add_return(i, v) -#define atomic_inc(v) (void) atomic_add_return(1, v) -#define atomic_sub(i, v) (void) atomic_sub_return(i, v) -#define atomic_dec(v) (void) atomic_sub_return(1, v) +#define atomic_inc(v) atomic_add(1, v) +#define atomic_dec(v) atomic_sub(1, v) #define atomic_inc_and_test(v) (atomic_add_return(1, v) == 0) #define atomic_dec_and_test(v) (atomic_sub_return(1, v) == 0) @@ -200,11 +244,10 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u) #define atomic_add_negative(i,v) (atomic_add_return(i, v) < 0) -/* Atomic operations are already serializing on ARM */ -#define smp_mb__before_atomic_dec() barrier() -#define smp_mb__after_atomic_dec() barrier() -#define smp_mb__before_atomic_inc() barrier() -#define smp_mb__after_atomic_inc() barrier() +#define smp_mb__before_atomic_dec() smp_mb() +#define smp_mb__after_atomic_dec() smp_mb() +#define smp_mb__before_atomic_inc() smp_mb() +#define smp_mb__after_atomic_inc() smp_mb() #include #endif diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h index bd4dc8e..7fce8f3 100644 --- a/arch/arm/include/asm/system.h +++ b/arch/arm/include/asm/system.h @@ -248,6 +248,8 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size unsigned int tmp; #endif + smp_mb(); + switch (size) { #if __LINUX_ARM_ARCH__ >= 6 case 1: @@ -307,6 +309,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size __bad_xchg(ptr, size), ret = 0; break; } + smp_mb(); return ret; } -- cgit v1.1 From ecd322c9b3e4ac70f9f108badde3eb6b99c7993d Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Thu, 28 May 2009 16:07:39 -0400 Subject: [ARM] Add cmpxchg support for ARMv6+ systems (v5) Add cmpxchg/cmpxchg64 support for ARMv6K and ARMv7 systems (original patch from Catalin Marinas ) The cmpxchg and cmpxchg64 functions can be implemented using the LDREX*/STREX* instructions. Since operand lengths other than 32bit are required, the full implementations are only available if the ARMv6K extensions are present (for the LDREXB, LDREXH and LDREXD instructions). For ARMv6, only 32-bits cmpxchg is available. Mathieu : Make cmpxchg_local always available with best implementation for all type sizes (1, 2, 4 bytes). Make cmpxchg64_local always available. Use "Ir" constraint for "old" operand, like atomic.h atomic_cmpxchg does. Change since v3 : - Add "memory" clobbers (thanks to Nicolas Pitre) - removed __asmeq(), only needed for old compilers, very unlikely on ARMv6+. Note : ARMv7-M should eventually be ifdefed-out of cmpxchg64. But it's not supported by the Linux kernel currently. Put back arm < v6 cmpxchg support. Signed-off-by: Mathieu Desnoyers CC: Catalin Marinas CC: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/include/asm/system.h | 173 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 173 insertions(+) (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h index 7fce8f3..d65b2f5 100644 --- a/arch/arm/include/asm/system.h +++ b/arch/arm/include/asm/system.h @@ -319,6 +319,12 @@ extern void enable_hlt(void); #include +#if __LINUX_ARM_ARCH__ < 6 + +#ifdef CONFIG_SMP +#error "SMP is not supported on this platform" +#endif + /* * cmpxchg_local and cmpxchg64_local are atomic wrt current CPU. Always make * them available. @@ -332,6 +338,173 @@ extern void enable_hlt(void); #include #endif +#else /* __LINUX_ARM_ARCH__ >= 6 */ + +extern void __bad_cmpxchg(volatile void *ptr, int size); + +/* + * cmpxchg only support 32-bits operands on ARMv6. + */ + +static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, + unsigned long new, int size) +{ + unsigned long oldval, res; + + switch (size) { +#ifdef CONFIG_CPU_32v6K + case 1: + do { + asm volatile("@ __cmpxchg1\n" + " ldrexb %1, [%2]\n" + " mov %0, #0\n" + " teq %1, %3\n" + " strexbeq %0, %4, [%2]\n" + : "=&r" (res), "=&r" (oldval) + : "r" (ptr), "Ir" (old), "r" (new) + : "memory", "cc"); + } while (res); + break; + case 2: + do { + asm volatile("@ __cmpxchg1\n" + " ldrexh %1, [%2]\n" + " mov %0, #0\n" + " teq %1, %3\n" + " strexheq %0, %4, [%2]\n" + : "=&r" (res), "=&r" (oldval) + : "r" (ptr), "Ir" (old), "r" (new) + : "memory", "cc"); + } while (res); + break; +#endif /* CONFIG_CPU_32v6K */ + case 4: + do { + asm volatile("@ __cmpxchg4\n" + " ldrex %1, [%2]\n" + " mov %0, #0\n" + " teq %1, %3\n" + " strexeq %0, %4, [%2]\n" + : "=&r" (res), "=&r" (oldval) + : "r" (ptr), "Ir" (old), "r" (new) + : "memory", "cc"); + } while (res); + break; + default: + __bad_cmpxchg(ptr, size); + oldval = 0; + } + + return oldval; +} + +static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old, + unsigned long new, int size) +{ + unsigned long ret; + + smp_mb(); + ret = __cmpxchg(ptr, old, new, size); + smp_mb(); + + return ret; +} + +#define cmpxchg(ptr,o,n) \ + ((__typeof__(*(ptr)))__cmpxchg_mb((ptr), \ + (unsigned long)(o), \ + (unsigned long)(n), \ + sizeof(*(ptr)))) + +static inline unsigned long __cmpxchg_local(volatile void *ptr, + unsigned long old, + unsigned long new, int size) +{ + unsigned long ret; + + switch (size) { +#ifndef CONFIG_CPU_32v6K + case 1: + case 2: + ret = __cmpxchg_local_generic(ptr, old, new, size); + break; +#endif /* !CONFIG_CPU_32v6K */ + default: + ret = __cmpxchg(ptr, old, new, size); + } + + return ret; +} + +#define cmpxchg_local(ptr,o,n) \ + ((__typeof__(*(ptr)))__cmpxchg_local((ptr), \ + (unsigned long)(o), \ + (unsigned long)(n), \ + sizeof(*(ptr)))) + +#ifdef CONFIG_CPU_32v6K + +/* + * Note : ARMv7-M (currently unsupported by Linux) does not support + * ldrexd/strexd. If ARMv7-M is ever supported by the Linux kernel, it should + * not be allowed to use __cmpxchg64. + */ +static inline unsigned long long __cmpxchg64(volatile void *ptr, + unsigned long long old, + unsigned long long new) +{ + register unsigned long long oldval asm("r0"); + register unsigned long long __old asm("r2") = old; + register unsigned long long __new asm("r4") = new; + unsigned long res; + + do { + asm volatile( + " @ __cmpxchg8\n" + " ldrexd %1, %H1, [%2]\n" + " mov %0, #0\n" + " teq %1, %3\n" + " teqeq %H1, %H3\n" + " strexdeq %0, %4, %H4, [%2]\n" + : "=&r" (res), "=&r" (oldval) + : "r" (ptr), "Ir" (__old), "r" (__new) + : "memory", "cc"); + } while (res); + + return oldval; +} + +static inline unsigned long long __cmpxchg64_mb(volatile void *ptr, + unsigned long long old, + unsigned long long new) +{ + unsigned long long ret; + + smp_mb(); + ret = __cmpxchg64(ptr, old, new); + smp_mb(); + + return ret; +} + +#define cmpxchg64(ptr,o,n) \ + ((__typeof__(*(ptr)))__cmpxchg64_mb((ptr), \ + (unsigned long long)(o), \ + (unsigned long long)(n))) + +#define cmpxchg64_local(ptr,o,n) \ + ((__typeof__(*(ptr)))__cmpxchg64((ptr), \ + (unsigned long long)(o), \ + (unsigned long long)(n))) + +#else /* !CONFIG_CPU_32v6K */ + +#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n)) + +#endif /* CONFIG_CPU_32v6K */ + +#endif /* __LINUX_ARM_ARCH__ >= 6 */ + #endif /* __ASSEMBLY__ */ #define arch_align_stack(x) (x) -- cgit v1.1 From c3dc5bec05a2ae03a72ef82e321d77fb549d951c Mon Sep 17 00:00:00 2001 From: Oskar Schirmer Date: Thu, 28 May 2009 14:34:31 -0700 Subject: flat: fix data sections alignment The flat loader uses an architecture's flat_stack_align() to align the stack but assumes word-alignment is enough for the data sections. However, on the Xtensa S6000 we have registers up to 128bit width which can be used from userspace and therefor need userspace stack and data-section alignment of at least this size. This patch drops flat_stack_align() and uses the same alignment that is required for slab caches, ARCH_SLAB_MINALIGN, or wordsize if it's not defined by the architecture. It also fixes m32r which was obviously kaput, aligning an uninitialized stack entry instead of the stack pointer. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Oskar Schirmer Cc: David Howells Cc: Russell King Cc: Bryan Wu Cc: Geert Uytterhoeven Acked-by: Paul Mundt Cc: Greg Ungerer Signed-off-by: Johannes Weiner Acked-by: Mike Frysinger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/include/asm/flat.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/flat.h b/arch/arm/include/asm/flat.h index 1d77e51..59426a4 100644 --- a/arch/arm/include/asm/flat.h +++ b/arch/arm/include/asm/flat.h @@ -5,9 +5,6 @@ #ifndef __ARM_FLAT_H__ #define __ARM_FLAT_H__ -/* An odd number of words will be pushed after this alignment, so - deliberately misalign the value. */ -#define flat_stack_align(sp) sp = (void *)(((unsigned long)(sp) - 4) | 4) #define flat_argvp_envp_on_stack() 1 #define flat_old_ram_flag(flags) (flags) #define flat_reloc_valid(reloc, size) ((reloc) <= (size)) -- cgit v1.1 From eb5f4ca9536ba297c98721ecbbdf41ec5b987bd5 Mon Sep 17 00:00:00 2001 From: Martin Fuzzey Date: Mon, 1 Jun 2009 09:19:37 +0100 Subject: [ARM] 5534/1: kmalloc must return a cache line aligned buffer Define ARCH_KMALLOC_MINALIGN in asm/cache.h At the request of Russell also move ARCH_SLAB_MINALIGN to this file. Signed-off-by: Martin Fuzzey Signed-off-by: Russell King --- arch/arm/include/asm/cache.h | 16 ++++++++++++++++ arch/arm/include/asm/page.h | 7 ------- 2 files changed, 16 insertions(+), 7 deletions(-) (limited to 'arch/arm/include') diff --git a/arch/arm/include/asm/cache.h b/arch/arm/include/asm/cache.h index cb7a9e9..feaa75f 100644 --- a/arch/arm/include/asm/cache.h +++ b/arch/arm/include/asm/cache.h @@ -7,4 +7,20 @@ #define L1_CACHE_SHIFT 5 #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) +/* + * Memory returned by kmalloc() may be used for DMA, so we must make + * sure that all such allocations are cache aligned. Otherwise, + * unrelated code may cause parts of the buffer to be read into the + * cache before the transfer is done, causing old data to be seen by + * the CPU. + */ +#define ARCH_KMALLOC_MINALIGN L1_CACHE_BYTES + +/* + * With EABI on ARMv5 and above we must have 64-bit aligned slab pointers. + */ +#if defined(CONFIG_AEABI) && (__LINUX_ARM_ARCH__ >= 5) +#define ARCH_SLAB_MINALIGN 8 +#endif + #endif diff --git a/arch/arm/include/asm/page.h b/arch/arm/include/asm/page.h index e6eb8a6..7b52277 100644 --- a/arch/arm/include/asm/page.h +++ b/arch/arm/include/asm/page.h @@ -202,13 +202,6 @@ typedef struct page *pgtable_t; (((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \ VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) -/* - * With EABI on ARMv5 and above we must have 64-bit aligned slab pointers. - */ -#if defined(CONFIG_AEABI) && (__LINUX_ARM_ARCH__ >= 5) -#define ARCH_SLAB_MINALIGN 8 -#endif - #include #endif -- cgit v1.1