From c947f69fff183e5d2a06160d9262b5dab7359e95 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 3 Nov 2010 16:00:15 +0000 Subject: ARM: Fix DMA coherent allocator alignment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit An out by one bug meant that the DMA coherent allocator was aligning to one more bit than it should, causing it to run out of available memory quicker. Fix this. Reported-by: Petr Štetiar Signed-off-by: Russell King --- arch/arm/mm/dma-mapping.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index e4dd064..ac6a361 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -198,7 +198,7 @@ __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot) * fragmentation of the DMA space, and also prevents allocations * smaller than a section from crossing a section boundary. */ - bit = fls(size - 1) + 1; + bit = fls(size - 1); if (bit > SECTION_SHIFT) bit = SECTION_SHIFT; align = 1 << bit; -- cgit v1.1 From 932c42b286e2c6479d1cbdee2927cb283b1c0c3b Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 23 Nov 2010 22:27:55 +0000 Subject: ARM: avoid annoying <4>'s in printk output Adding KERN_WARNING in the middle of strings now produces those tokens in the output, rather than accepting the level as was once the case. Fix this in the one reported case. There might be more... Signed-off-by: Russell King --- arch/arm/mm/ioremap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c index 17e7b0b..55c17a6 100644 --- a/arch/arm/mm/ioremap.c +++ b/arch/arm/mm/ioremap.c @@ -206,8 +206,8 @@ void __iomem * __arm_ioremap_pfn_caller(unsigned long pfn, */ if (pfn_valid(pfn)) { printk(KERN_WARNING "BUG: Your driver calls ioremap() on system memory. This leads\n" - KERN_WARNING "to architecturally unpredictable behaviour on ARMv6+, and ioremap()\n" - KERN_WARNING "will fail in the next kernel release. Please fix your driver.\n"); + "to architecturally unpredictable behaviour on ARMv6+, and ioremap()\n" + "will fail in the next kernel release. Please fix your driver.\n"); WARN_ON(1); } -- cgit v1.1 From 6323875db20fd8ca8c8fbbd608bc377f2d4c8cf5 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Mon, 29 Nov 2010 19:43:25 +0100 Subject: ARM: 6501/1: Thumb-2: Correct data alignment for CONFIG_THUMB2_KERNEL in mm/proc-v7.S Directives such as .long and .word do not magically cause the assembler location counter to become aligned in gas. As a result, using these directives in code sections can result in misaligned data words when building a Thumb-2 kernel (CONFIG_THUMB2_KERNEL). This is a Bad Thing, since the ABI permits the compiler to assume that fundamental types of word size or above are word- aligned when accessing them from C. If the data is not really word-aligned, this can cause impaired performance and stray alignment faults in some circumstances. In general, the following rules should be applied when using data word declaration directives inside code sections: * .quad and .double: .align 3 * .long, .word, .single, .float: .align (or .align 2) * .short: No explicit alignment required, since Thumb-2 instructions are always 2 or 4 bytes in size. immediately after an instruction. In this specific case, we can achieve the desired alignment by forcing a 32-bit branch instruction using the W() macro, since the assembler location counter is already 32-bit aligned in this case. Reviewed-by: Will Deacon Signed-off-by: Dave Martin Acked-by: Catalin Marinas Signed-off-by: Russell King --- arch/arm/mm/proc-v7.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 53cbe22..9b9ff5d 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -381,7 +381,7 @@ __v7_ca9mp_proc_info: PMD_SECT_XN | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ - b __v7_ca9mp_setup + W(b) __v7_ca9mp_setup .long cpu_arch_name .long cpu_elf_name .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_TLS @@ -413,7 +413,7 @@ __v7_proc_info: PMD_SECT_XN | \ PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ - b __v7_setup + W(b) __v7_setup .long cpu_arch_name .long cpu_elf_name .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_TLS -- cgit v1.1 From f91e2c3bd427239c198351f44814dd39db91afe0 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 7 Dec 2010 16:52:04 +0100 Subject: ARM: 6527/1: Use CTR instead of CCSIDR for the D-cache line size on ARMv7 The current implementation of the dcache_line_size macro reads the L1 cache size from the CCSIDR register. This, however, is not guaranteed to be the smallest cache line in the cache hierarchy. The patch changes to the macro to use the more architecturally correct CTR register. Reported-by: Kevin Sapp Signed-off-by: Catalin Marinas Signed-off-by: Russell King --- arch/arm/mm/proc-macros.S | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S index 7d63bea..321555b 100644 --- a/arch/arm/mm/proc-macros.S +++ b/arch/arm/mm/proc-macros.S @@ -61,14 +61,14 @@ .endm /* - * cache_line_size - get the cache line size from the CSIDR register - * (available on ARMv7+). It assumes that the CSSR register was configured - * to access the L1 data cache CSIDR. + * dcache_line_size - get the minimum D-cache line size from the CTR register + * on ARMv7. */ .macro dcache_line_size, reg, tmp - mrc p15, 1, \tmp, c0, c0, 0 @ read CSIDR - and \tmp, \tmp, #7 @ cache line size encoding - mov \reg, #16 @ size offset + mrc p15, 0, \tmp, c0, c0, 1 @ read ctr + lsr \tmp, \tmp, #16 + and \tmp, \tmp, #0xf @ cache line size encoding + mov \reg, #4 @ bytes per word mov \reg, \reg, lsl \tmp @ actual cache line size .endm -- cgit v1.1 From da30e0ac0f9a521f0cfec8145ddd1ad131f66d61 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 7 Dec 2010 16:56:29 +0100 Subject: ARM: 6528/1: Use CTR for the I-cache line size on ARMv7 The current implementation of the v7_coherent_*_range function assumes that the D and I cache lines have the same size, which is incorrect architecturally. This patch adds the icache_line_size macro which reads the CTR register. The main loop in v7_coherent_*_range is split in two independent loops or the D and I caches. This also has the performance advantage that the DSB is moved outside the main loop. Reported-by: Kevin Sapp Signed-off-by: Catalin Marinas Signed-off-by: Russell King --- arch/arm/mm/cache-v7.S | 27 +++++++++++++++++---------- arch/arm/mm/proc-macros.S | 10 ++++++++++ 2 files changed, 27 insertions(+), 10 deletions(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S index a3ebf7a..6136e68 100644 --- a/arch/arm/mm/cache-v7.S +++ b/arch/arm/mm/cache-v7.S @@ -173,15 +173,22 @@ ENTRY(v7_coherent_user_range) UNWIND(.fnstart ) dcache_line_size r2, r3 sub r3, r2, #1 - bic r0, r0, r3 + bic r12, r0, r3 1: - USER( mcr p15, 0, r0, c7, c11, 1 ) @ clean D line to the point of unification + USER( mcr p15, 0, r12, c7, c11, 1 ) @ clean D line to the point of unification + add r12, r12, r2 + cmp r12, r1 + blo 1b dsb - USER( mcr p15, 0, r0, c7, c5, 1 ) @ invalidate I line - add r0, r0, r2 + icache_line_size r2, r3 + sub r3, r2, #1 + bic r12, r0, r3 2: - cmp r0, r1 - blo 1b + USER( mcr p15, 0, r12, c7, c5, 1 ) @ invalidate I line + add r12, r12, r2 + cmp r12, r1 + blo 2b +3: mov r0, #0 ALT_SMP(mcr p15, 0, r0, c7, c1, 6) @ invalidate BTB Inner Shareable ALT_UP(mcr p15, 0, r0, c7, c5, 6) @ invalidate BTB @@ -194,10 +201,10 @@ ENTRY(v7_coherent_user_range) * isn't mapped, just try the next page. */ 9001: - mov r0, r0, lsr #12 - mov r0, r0, lsl #12 - add r0, r0, #4096 - b 2b + mov r12, r12, lsr #12 + mov r12, r12, lsl #12 + add r12, r12, #4096 + b 3b UNWIND(.fnend ) ENDPROC(v7_coherent_kern_range) ENDPROC(v7_coherent_user_range) diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S index 321555b..b795afd 100644 --- a/arch/arm/mm/proc-macros.S +++ b/arch/arm/mm/proc-macros.S @@ -72,6 +72,16 @@ mov \reg, \reg, lsl \tmp @ actual cache line size .endm +/* + * icache_line_size - get the minimum I-cache line size from the CTR register + * on ARMv7. + */ + .macro icache_line_size, reg, tmp + mrc p15, 0, \tmp, c0, c0, 1 @ read ctr + and \tmp, \tmp, #0xf @ cache line size encoding + mov \reg, #4 @ bytes per word + mov \reg, \reg, lsl \tmp @ actual cache line size + .endm /* * Sanity check the PTE configuration for the code below - which makes -- cgit v1.1 From 85b093bcc5322baa811a03ec73de0909c157f181 Mon Sep 17 00:00:00 2001 From: Valentine Barshak Date: Tue, 14 Dec 2010 00:03:16 +0100 Subject: ARM: 6535/1: V6 MPCore v6_dma_inv_range and v6_dma_flush_range RWFO fix Cache ownership must be acquired by reading/writing data from the cache line to make cache operation have the desired effect on the SMP MPCore CPU. However, the ownership is never acquired in the v6_dma_inv_range function when cleaning the first line and flushing the last one, in case the address is not aligned to D_CACHE_LINE_SIZE boundary. Fix this by reading/writing data if needed, before performing cache operations. While at it, fix v6_dma_flush_range to prevent RWFO outside the buffer. Cc: stable@kernel.org Signed-off-by: Valentine Barshak Signed-off-by: George G. Davis Acked-by: Catalin Marinas Signed-off-by: Russell King --- arch/arm/mm/cache-v6.S | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) (limited to 'arch/arm/mm') diff --git a/arch/arm/mm/cache-v6.S b/arch/arm/mm/cache-v6.S index 99fa688..c96fa1b 100644 --- a/arch/arm/mm/cache-v6.S +++ b/arch/arm/mm/cache-v6.S @@ -203,6 +203,10 @@ ENTRY(v6_flush_kern_dcache_area) * - end - virtual end address of region */ v6_dma_inv_range: +#ifdef CONFIG_DMA_CACHE_RWFO + ldrb r2, [r0] @ read for ownership + strb r2, [r0] @ write for ownership +#endif tst r0, #D_CACHE_LINE_SIZE - 1 bic r0, r0, #D_CACHE_LINE_SIZE - 1 #ifdef HARVARD_CACHE @@ -211,6 +215,10 @@ v6_dma_inv_range: mcrne p15, 0, r0, c7, c11, 1 @ clean unified line #endif tst r1, #D_CACHE_LINE_SIZE - 1 +#ifdef CONFIG_DMA_CACHE_RWFO + ldrneb r2, [r1, #-1] @ read for ownership + strneb r2, [r1, #-1] @ write for ownership +#endif bic r1, r1, #D_CACHE_LINE_SIZE - 1 #ifdef HARVARD_CACHE mcrne p15, 0, r1, c7, c14, 1 @ clean & invalidate D line @@ -218,10 +226,6 @@ v6_dma_inv_range: mcrne p15, 0, r1, c7, c15, 1 @ clean & invalidate unified line #endif 1: -#ifdef CONFIG_DMA_CACHE_RWFO - ldr r2, [r0] @ read for ownership - str r2, [r0] @ write for ownership -#endif #ifdef HARVARD_CACHE mcr p15, 0, r0, c7, c6, 1 @ invalidate D line #else @@ -229,6 +233,10 @@ v6_dma_inv_range: #endif add r0, r0, #D_CACHE_LINE_SIZE cmp r0, r1 +#ifdef CONFIG_DMA_CACHE_RWFO + ldrlo r2, [r0] @ read for ownership + strlo r2, [r0] @ write for ownership +#endif blo 1b mov r0, #0 mcr p15, 0, r0, c7, c10, 4 @ drain write buffer @@ -263,12 +271,12 @@ v6_dma_clean_range: * - end - virtual end address of region */ ENTRY(v6_dma_flush_range) - bic r0, r0, #D_CACHE_LINE_SIZE - 1 -1: #ifdef CONFIG_DMA_CACHE_RWFO - ldr r2, [r0] @ read for ownership - str r2, [r0] @ write for ownership + ldrb r2, [r0] @ read for ownership + strb r2, [r0] @ write for ownership #endif + bic r0, r0, #D_CACHE_LINE_SIZE - 1 +1: #ifdef HARVARD_CACHE mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D line #else @@ -276,6 +284,10 @@ ENTRY(v6_dma_flush_range) #endif add r0, r0, #D_CACHE_LINE_SIZE cmp r0, r1 +#ifdef CONFIG_DMA_CACHE_RWFO + ldrlob r2, [r0] @ read for ownership + strlob r2, [r0] @ write for ownership +#endif blo 1b mov r0, #0 mcr p15, 0, r0, c7, c10, 4 @ drain write buffer -- cgit v1.1