diff options
Diffstat (limited to 'arch/arm/mm')
-rw-r--r-- | arch/arm/mm/cache-l2x0.c | 158 | ||||
-rw-r--r-- | arch/arm/mm/context.c | 55 | ||||
-rw-r--r-- | arch/arm/mm/dma-mapping.c | 20 | ||||
-rw-r--r-- | arch/arm/mm/flush.c | 2 | ||||
-rw-r--r-- | arch/arm/mm/ioremap.c | 10 | ||||
-rw-r--r-- | arch/arm/mm/nommu.c | 6 |
6 files changed, 229 insertions, 22 deletions
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index c465fac..d70e0ab 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -523,6 +523,147 @@ static void aurora_flush_range(unsigned long start, unsigned long end) } } +/* + * For certain Broadcom SoCs, depending on the address range, different offsets + * need to be added to the address before passing it to L2 for + * invalidation/clean/flush + * + * Section Address Range Offset EMI + * 1 0x00000000 - 0x3FFFFFFF 0x80000000 VC + * 2 0x40000000 - 0xBFFFFFFF 0x40000000 SYS + * 3 0xC0000000 - 0xFFFFFFFF 0x80000000 VC + * + * When the start and end addresses have crossed two different sections, we + * need to break the L2 operation into two, each within its own section. + * For example, if we need to invalidate addresses starts at 0xBFFF0000 and + * ends at 0xC0001000, we need do invalidate 1) 0xBFFF0000 - 0xBFFFFFFF and 2) + * 0xC0000000 - 0xC0001000 + * + * Note 1: + * By breaking a single L2 operation into two, we may potentially suffer some + * performance hit, but keep in mind the cross section case is very rare + * + * Note 2: + * We do not need to handle the case when the start address is in + * Section 1 and the end address is in Section 3, since it is not a valid use + * case + * + * Note 3: + * Section 1 in practical terms can no longer be used on rev A2. Because of + * that the code does not need to handle section 1 at all. + * + */ +#define BCM_SYS_EMI_START_ADDR 0x40000000UL +#define BCM_VC_EMI_SEC3_START_ADDR 0xC0000000UL + +#define BCM_SYS_EMI_OFFSET 0x40000000UL +#define BCM_VC_EMI_OFFSET 0x80000000UL + +static inline int bcm_addr_is_sys_emi(unsigned long addr) +{ + return (addr >= BCM_SYS_EMI_START_ADDR) && + (addr < BCM_VC_EMI_SEC3_START_ADDR); +} + +static inline unsigned long bcm_l2_phys_addr(unsigned long addr) +{ + if (bcm_addr_is_sys_emi(addr)) + return addr + BCM_SYS_EMI_OFFSET; + else + return addr + BCM_VC_EMI_OFFSET; +} + +static void bcm_inv_range(unsigned long start, unsigned long end) +{ + unsigned long new_start, new_end; + + BUG_ON(start < BCM_SYS_EMI_START_ADDR); + + if (unlikely(end <= start)) + return; + + new_start = bcm_l2_phys_addr(start); + new_end = bcm_l2_phys_addr(end); + + /* normal case, no cross section between start and end */ + if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) { + l2x0_inv_range(new_start, new_end); + return; + } + + /* They cross sections, so it can only be a cross from section + * 2 to section 3 + */ + l2x0_inv_range(new_start, + bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1)); + l2x0_inv_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR), + new_end); +} + +static void bcm_clean_range(unsigned long start, unsigned long end) +{ + unsigned long new_start, new_end; + + BUG_ON(start < BCM_SYS_EMI_START_ADDR); + + if (unlikely(end <= start)) + return; + + if ((end - start) >= l2x0_size) { + l2x0_clean_all(); + return; + } + + new_start = bcm_l2_phys_addr(start); + new_end = bcm_l2_phys_addr(end); + + /* normal case, no cross section between start and end */ + if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) { + l2x0_clean_range(new_start, new_end); + return; + } + + /* They cross sections, so it can only be a cross from section + * 2 to section 3 + */ + l2x0_clean_range(new_start, + bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1)); + l2x0_clean_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR), + new_end); +} + +static void bcm_flush_range(unsigned long start, unsigned long end) +{ + unsigned long new_start, new_end; + + BUG_ON(start < BCM_SYS_EMI_START_ADDR); + + if (unlikely(end <= start)) + return; + + if ((end - start) >= l2x0_size) { + l2x0_flush_all(); + return; + } + + new_start = bcm_l2_phys_addr(start); + new_end = bcm_l2_phys_addr(end); + + /* normal case, no cross section between start and end */ + if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) { + l2x0_flush_range(new_start, new_end); + return; + } + + /* They cross sections, so it can only be a cross from section + * 2 to section 3 + */ + l2x0_flush_range(new_start, + bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1)); + l2x0_flush_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR), + new_end); +} + static void __init l2x0_of_setup(const struct device_node *np, u32 *aux_val, u32 *aux_mask) { @@ -765,6 +906,21 @@ static const struct l2x0_of_data aurora_no_outer_data = { }, }; +static const struct l2x0_of_data bcm_l2x0_data = { + .setup = pl310_of_setup, + .save = pl310_save, + .outer_cache = { + .resume = pl310_resume, + .inv_range = bcm_inv_range, + .clean_range = bcm_clean_range, + .flush_range = bcm_flush_range, + .sync = l2x0_cache_sync, + .flush_all = l2x0_flush_all, + .inv_all = l2x0_inv_all, + .disable = l2x0_disable, + }, +}; + static const struct of_device_id l2x0_ids[] __initconst = { { .compatible = "arm,pl310-cache", .data = (void *)&pl310_data }, { .compatible = "arm,l220-cache", .data = (void *)&l2x0_data }, @@ -773,6 +929,8 @@ static const struct of_device_id l2x0_ids[] __initconst = { .data = (void *)&aurora_no_outer_data}, { .compatible = "marvell,aurora-outer-cache", .data = (void *)&aurora_with_outer_data}, + { .compatible = "bcm,bcm11351-a2-pl310-cache", + .data = (void *)&bcm_l2x0_data}, {} }; diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c index 2ac3737..eeab06e 100644 --- a/arch/arm/mm/context.c +++ b/arch/arm/mm/context.c @@ -39,19 +39,43 @@ * non 64-bit operations. */ #define ASID_FIRST_VERSION (1ULL << ASID_BITS) -#define NUM_USER_ASIDS (ASID_FIRST_VERSION - 1) - -#define ASID_TO_IDX(asid) ((asid & ~ASID_MASK) - 1) -#define IDX_TO_ASID(idx) ((idx + 1) & ~ASID_MASK) +#define NUM_USER_ASIDS ASID_FIRST_VERSION static DEFINE_RAW_SPINLOCK(cpu_asid_lock); static atomic64_t asid_generation = ATOMIC64_INIT(ASID_FIRST_VERSION); static DECLARE_BITMAP(asid_map, NUM_USER_ASIDS); -DEFINE_PER_CPU(atomic64_t, active_asids); +static DEFINE_PER_CPU(atomic64_t, active_asids); static DEFINE_PER_CPU(u64, reserved_asids); static cpumask_t tlb_flush_pending; +#ifdef CONFIG_ARM_ERRATA_798181 +void a15_erratum_get_cpumask(int this_cpu, struct mm_struct *mm, + cpumask_t *mask) +{ + int cpu; + unsigned long flags; + u64 context_id, asid; + + raw_spin_lock_irqsave(&cpu_asid_lock, flags); + context_id = mm->context.id.counter; + for_each_online_cpu(cpu) { + if (cpu == this_cpu) + continue; + /* + * We only need to send an IPI if the other CPUs are + * running the same ASID as the one being invalidated. + */ + asid = per_cpu(active_asids, cpu).counter; + if (asid == 0) + asid = per_cpu(reserved_asids, cpu); + if (context_id == asid) + cpumask_set_cpu(cpu, mask); + } + raw_spin_unlock_irqrestore(&cpu_asid_lock, flags); +} +#endif + #ifdef CONFIG_ARM_LPAE static void cpu_set_reserved_ttbr0(void) { @@ -128,7 +152,16 @@ static void flush_context(unsigned int cpu) asid = 0; } else { asid = atomic64_xchg(&per_cpu(active_asids, i), 0); - __set_bit(ASID_TO_IDX(asid), asid_map); + /* + * If this CPU has already been through a + * rollover, but hasn't run another task in + * the meantime, we must preserve its reserved + * ASID, as this is the only trace we have of + * the process it is still running. + */ + if (asid == 0) + asid = per_cpu(reserved_asids, i); + __set_bit(asid & ~ASID_MASK, asid_map); } per_cpu(reserved_asids, i) = asid; } @@ -167,17 +200,19 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu) /* * Allocate a free ASID. If we can't find one, take a * note of the currently active ASIDs and mark the TLBs - * as requiring flushes. + * as requiring flushes. We always count from ASID #1, + * as we reserve ASID #0 to switch via TTBR0 and indicate + * rollover events. */ - asid = find_first_zero_bit(asid_map, NUM_USER_ASIDS); + asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1); if (asid == NUM_USER_ASIDS) { generation = atomic64_add_return(ASID_FIRST_VERSION, &asid_generation); flush_context(cpu); - asid = find_first_zero_bit(asid_map, NUM_USER_ASIDS); + asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1); } __set_bit(asid, asid_map); - asid = generation | IDX_TO_ASID(asid); + asid |= generation; cpumask_clear(mm_cpumask(mm)); } diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index ef3e0f3..c038ec0 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -880,10 +880,24 @@ static void __dma_page_dev_to_cpu(struct page *page, unsigned long off, dma_cache_maint_page(page, off, size, dir, dmac_unmap_area); /* - * Mark the D-cache clean for this page to avoid extra flushing. + * Mark the D-cache clean for these pages to avoid extra flushing. */ - if (dir != DMA_TO_DEVICE && off == 0 && size >= PAGE_SIZE) - set_bit(PG_dcache_clean, &page->flags); + if (dir != DMA_TO_DEVICE && size >= PAGE_SIZE) { + unsigned long pfn; + size_t left = size; + + pfn = page_to_pfn(page) + off / PAGE_SIZE; + off %= PAGE_SIZE; + if (off) { + pfn++; + left -= PAGE_SIZE - off; + } + while (left >= PAGE_SIZE) { + page = pfn_to_page(pfn++); + set_bit(PG_dcache_clean, &page->flags); + left -= PAGE_SIZE; + } + } } /** diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c index 32aa586..e4ac5d8 100644 --- a/arch/arm/mm/flush.c +++ b/arch/arm/mm/flush.c @@ -287,7 +287,7 @@ void flush_dcache_page(struct page *page) mapping = page_mapping(page); if (!cache_ops_need_broadcast() && - mapping && !mapping_mapped(mapping)) + mapping && !page_mapped(page)) clear_bit(PG_dcache_clean, &page->flags); else { __flush_dcache_page(mapping, page); diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c index 04d9006..f123d6e 100644 --- a/arch/arm/mm/ioremap.c +++ b/arch/arm/mm/ioremap.c @@ -331,10 +331,10 @@ void __iomem * __arm_ioremap_pfn_caller(unsigned long pfn, return (void __iomem *) (offset + addr); } -void __iomem *__arm_ioremap_caller(unsigned long phys_addr, size_t size, +void __iomem *__arm_ioremap_caller(phys_addr_t phys_addr, size_t size, unsigned int mtype, void *caller) { - unsigned long last_addr; + phys_addr_t last_addr; unsigned long offset = phys_addr & ~PAGE_MASK; unsigned long pfn = __phys_to_pfn(phys_addr); @@ -367,12 +367,12 @@ __arm_ioremap_pfn(unsigned long pfn, unsigned long offset, size_t size, } EXPORT_SYMBOL(__arm_ioremap_pfn); -void __iomem * (*arch_ioremap_caller)(unsigned long, size_t, +void __iomem * (*arch_ioremap_caller)(phys_addr_t, size_t, unsigned int, void *) = __arm_ioremap_caller; void __iomem * -__arm_ioremap(unsigned long phys_addr, size_t size, unsigned int mtype) +__arm_ioremap(phys_addr_t phys_addr, size_t size, unsigned int mtype) { return arch_ioremap_caller(phys_addr, size, mtype, __builtin_return_address(0)); @@ -387,7 +387,7 @@ EXPORT_SYMBOL(__arm_ioremap); * CONFIG_GENERIC_ALLOCATOR for allocating external memory. */ void __iomem * -__arm_ioremap_exec(unsigned long phys_addr, size_t size, bool cached) +__arm_ioremap_exec(phys_addr_t phys_addr, size_t size, bool cached) { unsigned int mtype; diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c index eb5293a..7fe0524 100644 --- a/arch/arm/mm/nommu.c +++ b/arch/arm/mm/nommu.c @@ -87,16 +87,16 @@ void __iomem *__arm_ioremap_pfn_caller(unsigned long pfn, unsigned long offset, return __arm_ioremap_pfn(pfn, offset, size, mtype); } -void __iomem *__arm_ioremap(unsigned long phys_addr, size_t size, +void __iomem *__arm_ioremap(phys_addr_t phys_addr, size_t size, unsigned int mtype) { return (void __iomem *)phys_addr; } EXPORT_SYMBOL(__arm_ioremap); -void __iomem * (*arch_ioremap_caller)(unsigned long, size_t, unsigned int, void *); +void __iomem * (*arch_ioremap_caller)(phys_addr_t, size_t, unsigned int, void *); -void __iomem *__arm_ioremap_caller(unsigned long phys_addr, size_t size, +void __iomem *__arm_ioremap_caller(phys_addr_t phys_addr, size_t size, unsigned int mtype, void *caller) { return __arm_ioremap(phys_addr, size, mtype); |