diff options
Diffstat (limited to 'arch/arm/mm')
-rw-r--r-- | arch/arm/mm/cache-l2x0.c | 48 | ||||
-rw-r--r-- | arch/arm/mm/cache-tauros3.h | 41 | ||||
-rw-r--r-- | arch/arm/mm/cache-v7.S | 14 | ||||
-rw-r--r-- | arch/arm/mm/context.c | 41 | ||||
-rw-r--r-- | arch/arm/mm/dma-mapping.c | 88 | ||||
-rw-r--r-- | arch/arm/mm/flush.c | 6 | ||||
-rw-r--r-- | arch/arm/mm/mmap.c | 2 | ||||
-rw-r--r-- | arch/arm/mm/pgd.c | 5 |
8 files changed, 160 insertions, 85 deletions
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index 447da6f..7abde2c 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -25,6 +25,7 @@ #include <asm/cacheflush.h> #include <asm/hardware/cache-l2x0.h> +#include "cache-tauros3.h" #include "cache-aurora-l2.h" #define CACHE_LINE_SIZE 32 @@ -767,6 +768,14 @@ static void aurora_save(void) l2x0_saved_regs.aux_ctrl = readl_relaxed(l2x0_base + L2X0_AUX_CTRL); } +static void __init tauros3_save(void) +{ + l2x0_saved_regs.aux2_ctrl = + readl_relaxed(l2x0_base + TAUROS3_AUX2_CTRL); + l2x0_saved_regs.prefetch_ctrl = + readl_relaxed(l2x0_base + L2X0_PREFETCH_CTRL); +} + static void l2x0_resume(void) { if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) { @@ -821,6 +830,18 @@ static void aurora_resume(void) } } +static void tauros3_resume(void) +{ + if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) { + writel_relaxed(l2x0_saved_regs.aux2_ctrl, + l2x0_base + TAUROS3_AUX2_CTRL); + writel_relaxed(l2x0_saved_regs.prefetch_ctrl, + l2x0_base + L2X0_PREFETCH_CTRL); + } + + l2x0_resume(); +} + static void __init aurora_broadcast_l2_commands(void) { __u32 u; @@ -906,6 +927,15 @@ static const struct l2x0_of_data aurora_no_outer_data = { }, }; +static const struct l2x0_of_data tauros3_data = { + .setup = NULL, + .save = tauros3_save, + /* Tauros3 broadcasts L1 cache operations to L2 */ + .outer_cache = { + .resume = tauros3_resume, + }, +}; + static const struct l2x0_of_data bcm_l2x0_data = { .setup = pl310_of_setup, .save = pl310_save, @@ -922,17 +952,19 @@ static const struct l2x0_of_data bcm_l2x0_data = { }; static const struct of_device_id l2x0_ids[] __initconst = { - { .compatible = "arm,pl310-cache", .data = (void *)&pl310_data }, - { .compatible = "arm,l220-cache", .data = (void *)&l2x0_data }, { .compatible = "arm,l210-cache", .data = (void *)&l2x0_data }, - { .compatible = "marvell,aurora-system-cache", - .data = (void *)&aurora_no_outer_data}, - { .compatible = "marvell,aurora-outer-cache", - .data = (void *)&aurora_with_outer_data}, - { .compatible = "brcm,bcm11351-a2-pl310-cache", - .data = (void *)&bcm_l2x0_data}, + { .compatible = "arm,l220-cache", .data = (void *)&l2x0_data }, + { .compatible = "arm,pl310-cache", .data = (void *)&pl310_data }, { .compatible = "bcm,bcm11351-a2-pl310-cache", /* deprecated name */ .data = (void *)&bcm_l2x0_data}, + { .compatible = "brcm,bcm11351-a2-pl310-cache", + .data = (void *)&bcm_l2x0_data}, + { .compatible = "marvell,aurora-outer-cache", + .data = (void *)&aurora_with_outer_data}, + { .compatible = "marvell,aurora-system-cache", + .data = (void *)&aurora_no_outer_data}, + { .compatible = "marvell,tauros3-cache", + .data = (void *)&tauros3_data }, {} }; diff --git a/arch/arm/mm/cache-tauros3.h b/arch/arm/mm/cache-tauros3.h new file mode 100644 index 0000000..02c0a97 --- /dev/null +++ b/arch/arm/mm/cache-tauros3.h @@ -0,0 +1,41 @@ +/* + * Marvell Tauros3 cache controller includes + * + * Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com> + * + * based on GPL'ed 2.6 kernel sources + * (c) Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __ASM_ARM_HARDWARE_TAUROS3_H +#define __ASM_ARM_HARDWARE_TAUROS3_H + +/* + * Marvell Tauros3 L2CC is compatible with PL310 r0p0 + * but with PREFETCH_CTRL (r2p0) and an additional event counter. + * Also, there is AUX2_CTRL for some Marvell specific control. + */ + +#define TAUROS3_EVENT_CNT2_CFG 0x224 +#define TAUROS3_EVENT_CNT2_VAL 0x228 +#define TAUROS3_INV_ALL 0x780 +#define TAUROS3_CLEAN_ALL 0x784 +#define TAUROS3_AUX2_CTRL 0x820 + +/* Registers shifts and masks */ +#define TAUROS3_AUX2_CTRL_LINEFILL_BURST8_EN (1 << 2) + +#endif diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S index b5c467a..778bcf8 100644 --- a/arch/arm/mm/cache-v7.S +++ b/arch/arm/mm/cache-v7.S @@ -146,18 +146,18 @@ flush_levels: ldr r7, =0x7fff ands r7, r7, r1, lsr #13 @ extract max number of the index size loop1: - mov r9, r4 @ create working copy of max way size + mov r9, r7 @ create working copy of max index loop2: - ARM( orr r11, r10, r9, lsl r5 ) @ factor way and cache number into r11 - THUMB( lsl r6, r9, r5 ) + ARM( orr r11, r10, r4, lsl r5 ) @ factor way and cache number into r11 + THUMB( lsl r6, r4, r5 ) THUMB( orr r11, r10, r6 ) @ factor way and cache number into r11 - ARM( orr r11, r11, r7, lsl r2 ) @ factor index number into r11 - THUMB( lsl r6, r7, r2 ) + ARM( orr r11, r11, r9, lsl r2 ) @ factor index number into r11 + THUMB( lsl r6, r9, r2 ) THUMB( orr r11, r11, r6 ) @ factor index number into r11 mcr p15, 0, r11, c7, c14, 2 @ clean & invalidate by set/way - subs r9, r9, #1 @ decrement the way + subs r9, r9, #1 @ decrement the index bge loop2 - subs r7, r7, #1 @ decrement the index + subs r4, r4, #1 @ decrement the way bge loop1 skip: add r10, r10, #2 @ increment cache number diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c index 84e6f77..6eb97b3 100644 --- a/arch/arm/mm/context.c +++ b/arch/arm/mm/context.c @@ -36,8 +36,8 @@ * The context ID is used by debuggers and trace logic, and * should be unique within all running processes. * - * In big endian operation, the two 32 bit words are swapped if accesed by - * non 64-bit operations. + * In big endian operation, the two 32 bit words are swapped if accessed + * by non-64-bit operations. */ #define ASID_FIRST_VERSION (1ULL << ASID_BITS) #define NUM_USER_ASIDS ASID_FIRST_VERSION @@ -78,20 +78,21 @@ void a15_erratum_get_cpumask(int this_cpu, struct mm_struct *mm, #endif #ifdef CONFIG_ARM_LPAE -static void cpu_set_reserved_ttbr0(void) -{ - /* - * Set TTBR0 to swapper_pg_dir which contains only global entries. The - * ASID is set to 0. - */ - cpu_set_ttbr(0, __pa(swapper_pg_dir)); - isb(); -} +/* + * With LPAE, the ASID and page tables are updated atomicly, so there is + * no need for a reserved set of tables (the active ASID tracking prevents + * any issues across a rollover). + */ +#define cpu_set_reserved_ttbr0() #else static void cpu_set_reserved_ttbr0(void) { u32 ttb; - /* Copy TTBR1 into TTBR0 */ + /* + * Copy TTBR1 into TTBR0. + * This points at swapper_pg_dir, which contains only global + * entries so any speculative walks are perfectly safe. + */ asm volatile( " mrc p15, 0, %0, c2, c0, 1 @ read TTBR1\n" " mcr p15, 0, %0, c2, c0, 0 @ set TTBR0\n" @@ -179,6 +180,7 @@ static int is_reserved_asid(u64 asid) static u64 new_context(struct mm_struct *mm, unsigned int cpu) { + static u32 cur_idx = 1; u64 asid = atomic64_read(&mm->context.id); u64 generation = atomic64_read(&asid_generation); @@ -193,10 +195,13 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu) * Allocate a free ASID. If we can't find one, take a * note of the currently active ASIDs and mark the TLBs * as requiring flushes. We always count from ASID #1, - * as we reserve ASID #0 to switch via TTBR0 and indicate - * rollover events. + * as we reserve ASID #0 to switch via TTBR0 and to + * avoid speculative page table walks from hitting in + * any partial walk caches, which could be populated + * from overlapping level-1 descriptors used to map both + * the module area and the userspace stack. */ - asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1); + asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, cur_idx); if (asid == NUM_USER_ASIDS) { generation = atomic64_add_return(ASID_FIRST_VERSION, &asid_generation); @@ -204,6 +209,7 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu) asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1); } __set_bit(asid, asid_map); + cur_idx = asid; asid |= generation; cpumask_clear(mm_cpumask(mm)); } @@ -221,8 +227,9 @@ void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk) __check_vmalloc_seq(mm); /* - * Required during context switch to avoid speculative page table - * walking with the wrong TTBR. + * We cannot update the pgd and the ASID atomicly with classic + * MMU, so switch exclusively to global mappings to avoid + * speculative page table walking with the wrong TTBR. */ cpu_set_reserved_ttbr0(); diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index f0ea013..1a77450 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -9,6 +9,7 @@ * * DMA uncached mapping support. */ +#include <linux/bootmem.h> #include <linux/module.h> #include <linux/mm.h> #include <linux/gfp.h> @@ -157,6 +158,44 @@ struct dma_map_ops arm_coherent_dma_ops = { }; EXPORT_SYMBOL(arm_coherent_dma_ops); +static int __dma_supported(struct device *dev, u64 mask, bool warn) +{ + unsigned long max_dma_pfn; + + /* + * If the mask allows for more memory than we can address, + * and we actually have that much memory, then we must + * indicate that DMA to this device is not supported. + */ + if (sizeof(mask) != sizeof(dma_addr_t) && + mask > (dma_addr_t)~0 && + dma_to_pfn(dev, ~0) < max_pfn) { + if (warn) { + dev_warn(dev, "Coherent DMA mask %#llx is larger than dma_addr_t allows\n", + mask); + dev_warn(dev, "Driver did not use or check the return value from dma_set_coherent_mask()?\n"); + } + return 0; + } + + max_dma_pfn = min(max_pfn, arm_dma_pfn_limit); + + /* + * Translate the device's DMA mask to a PFN limit. This + * PFN number includes the page which we can DMA to. + */ + if (dma_to_pfn(dev, mask) < max_dma_pfn) { + if (warn) + dev_warn(dev, "Coherent DMA mask %#llx (pfn %#lx-%#lx) covers a smaller range of system memory than the DMA zone pfn 0x0-%#lx\n", + mask, + dma_to_pfn(dev, 0), dma_to_pfn(dev, mask) + 1, + max_dma_pfn + 1); + return 0; + } + + return 1; +} + static u64 get_coherent_dma_mask(struct device *dev) { u64 mask = (u64)DMA_BIT_MASK(32); @@ -173,32 +212,8 @@ static u64 get_coherent_dma_mask(struct device *dev) return 0; } - /* - * If the mask allows for more memory than we can address, - * and we actually have that much memory, then fail the - * allocation. - */ - if (sizeof(mask) != sizeof(dma_addr_t) && - mask > (dma_addr_t)~0 && - dma_to_pfn(dev, ~0) > arm_dma_pfn_limit) { - dev_warn(dev, "Coherent DMA mask %#llx is larger than dma_addr_t allows\n", - mask); - dev_warn(dev, "Driver did not use or check the return value from dma_set_coherent_mask()?\n"); - return 0; - } - - /* - * Now check that the mask, when translated to a PFN, - * fits within the allowable addresses which we can - * allocate. - */ - if (dma_to_pfn(dev, mask) < arm_dma_pfn_limit) { - dev_warn(dev, "Coherent DMA mask %#llx (pfn %#lx-%#lx) covers a smaller range of system memory than the DMA zone pfn 0x0-%#lx\n", - mask, - dma_to_pfn(dev, 0), dma_to_pfn(dev, mask) + 1, - arm_dma_pfn_limit + 1); + if (!__dma_supported(dev, mask, true)) return 0; - } } return mask; @@ -1027,28 +1042,7 @@ void arm_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, */ int dma_supported(struct device *dev, u64 mask) { - unsigned long limit; - - /* - * If the mask allows for more memory than we can address, - * and we actually have that much memory, then we must - * indicate that DMA to this device is not supported. - */ - if (sizeof(mask) != sizeof(dma_addr_t) && - mask > (dma_addr_t)~0 && - dma_to_pfn(dev, ~0) > arm_dma_pfn_limit) - return 0; - - /* - * Translate the device's DMA mask to a PFN limit. This - * PFN number includes the page which we can DMA to. - */ - limit = dma_to_pfn(dev, mask); - - if (limit < arm_dma_pfn_limit) - return 0; - - return 1; + return __dma_supported(dev, mask, false); } EXPORT_SYMBOL(dma_supported); diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c index 6d5ba9a..3387e60 100644 --- a/arch/arm/mm/flush.c +++ b/arch/arm/mm/flush.c @@ -175,16 +175,16 @@ void __flush_dcache_page(struct address_space *mapping, struct page *page) unsigned long i; if (cache_is_vipt_nonaliasing()) { for (i = 0; i < (1 << compound_order(page)); i++) { - void *addr = kmap_atomic(page); + void *addr = kmap_atomic(page + i); __cpuc_flush_dcache_area(addr, PAGE_SIZE); kunmap_atomic(addr); } } else { for (i = 0; i < (1 << compound_order(page)); i++) { - void *addr = kmap_high_get(page); + void *addr = kmap_high_get(page + i); if (addr) { __cpuc_flush_dcache_area(addr, PAGE_SIZE); - kunmap_high(page); + kunmap_high(page + i); } } } diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c index d27158c3..5e85ed3 100644 --- a/arch/arm/mm/mmap.c +++ b/arch/arm/mm/mmap.c @@ -146,7 +146,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, info.flags = VM_UNMAPPED_AREA_TOPDOWN; info.length = len; - info.low_limit = PAGE_SIZE; + info.low_limit = FIRST_USER_ADDRESS; info.high_limit = mm->mmap_base; info.align_mask = do_align ? (PAGE_MASK & (SHMLBA - 1)) : 0; info.align_offset = pgoff << PAGE_SHIFT; diff --git a/arch/arm/mm/pgd.c b/arch/arm/mm/pgd.c index 0acb089..2493795 100644 --- a/arch/arm/mm/pgd.c +++ b/arch/arm/mm/pgd.c @@ -23,7 +23,7 @@ #define __pgd_alloc() kmalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL) #define __pgd_free(pgd) kfree(pgd) #else -#define __pgd_alloc() (pgd_t *)__get_free_pages(GFP_KERNEL, 2) +#define __pgd_alloc() (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_REPEAT, 2) #define __pgd_free(pgd) free_pages((unsigned long)pgd, 2) #endif @@ -87,7 +87,8 @@ pgd_t *pgd_alloc(struct mm_struct *mm) init_pud = pud_offset(init_pgd, 0); init_pmd = pmd_offset(init_pud, 0); init_pte = pte_offset_map(init_pmd, 0); - set_pte_ext(new_pte, *init_pte, 0); + set_pte_ext(new_pte + 0, init_pte[0], 0); + set_pte_ext(new_pte + 1, init_pte[1], 0); pte_unmap(init_pte); pte_unmap(new_pte); } |