From 654d364e26c797e8a5f9e2a1393607e6ca0106eb Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Wed, 9 Sep 2009 14:04:06 +0900 Subject: sh: sh4_flush_cache_mm() optimizations. The i-cache flush in the case of VM_EXEC was added way back when as a sanity measure, and in practice we only care about evicting aliases from the d-cache. As a result, it's possible to drop the i-cache flush completely here. After careful profiling it's also come up that all of the work associated with hunting down aliases and doing ranged flushing ends up generating more overhead than simply blasting away the entire dcache, particularly if there are many mm's that need to be iterated over. As a result of that, just move back to flush_dcache_all() in these cases, which restores the old behaviour, and vastly simplifies the path. Additionally, on platforms without aliases at all, this can simply be nopped out. Presently we have the alias check in the SH-4 specific version, but this is true for all of the platforms, so move the check up to a generic location. This cuts down quite a bit on superfluous cacheop IPIs. Signed-off-by: Paul Mundt --- arch/sh/mm/cache-sh4.c | 124 ++----------------------------------------------- arch/sh/mm/cache.c | 6 +++ 2 files changed, 10 insertions(+), 120 deletions(-) (limited to 'arch/sh') diff --git a/arch/sh/mm/cache-sh4.c b/arch/sh/mm/cache-sh4.c index b2453bb..a5c339b 100644 --- a/arch/sh/mm/cache-sh4.c +++ b/arch/sh/mm/cache-sh4.c @@ -170,89 +170,13 @@ static void sh4_flush_cache_all(void *unused) flush_icache_all(); } -static void __flush_cache_mm(struct mm_struct *mm, unsigned long start, - unsigned long end) -{ - unsigned long d = 0, p = start & PAGE_MASK; - unsigned long alias_mask = boot_cpu_data.dcache.alias_mask; - unsigned long n_aliases = boot_cpu_data.dcache.n_aliases; - unsigned long select_bit; - unsigned long all_aliases_mask; - unsigned long addr_offset; - pgd_t *dir; - pmd_t *pmd; - pud_t *pud; - pte_t *pte; - int i; - - dir = pgd_offset(mm, p); - pud = pud_offset(dir, p); - pmd = pmd_offset(pud, p); - end = PAGE_ALIGN(end); - - all_aliases_mask = (1 << n_aliases) - 1; - - do { - if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) { - p &= PMD_MASK; - p += PMD_SIZE; - pmd++; - - continue; - } - - pte = pte_offset_kernel(pmd, p); - - do { - unsigned long phys; - pte_t entry = *pte; - - if (!(pte_val(entry) & _PAGE_PRESENT)) { - pte++; - p += PAGE_SIZE; - continue; - } - - phys = pte_val(entry) & PTE_PHYS_MASK; - - if ((p ^ phys) & alias_mask) { - d |= 1 << ((p & alias_mask) >> PAGE_SHIFT); - d |= 1 << ((phys & alias_mask) >> PAGE_SHIFT); - - if (d == all_aliases_mask) - goto loop_exit; - } - - pte++; - p += PAGE_SIZE; - } while (p < end && ((unsigned long)pte & ~PAGE_MASK)); - pmd++; - } while (p < end); - -loop_exit: - addr_offset = 0; - select_bit = 1; - - for (i = 0; i < n_aliases; i++) { - if (d & select_bit) { - (*__flush_dcache_segment_fn)(addr_offset, PAGE_SIZE); - wmb(); - } - - select_bit <<= 1; - addr_offset += PAGE_SIZE; - } -} - /* * Note : (RPC) since the caches are physically tagged, the only point * of flush_cache_mm for SH-4 is to get rid of aliases from the * D-cache. The assumption elsewhere, e.g. flush_cache_range, is that * lines can stay resident so long as the virtual address they were * accessed with (hence cache set) is in accord with the physical - * address (i.e. tag). It's no different here. So I reckon we don't - * need to flush the I-cache, since aliases don't matter for that. We - * should try that. + * address (i.e. tag). It's no different here. * * Caller takes mm->mmap_sem. */ @@ -263,33 +187,7 @@ static void sh4_flush_cache_mm(void *arg) if (cpu_context(smp_processor_id(), mm) == NO_CONTEXT) return; - /* - * If cache is only 4k-per-way, there are never any 'aliases'. Since - * the cache is physically tagged, the data can just be left in there. - */ - if (boot_cpu_data.dcache.n_aliases == 0) - return; - - /* - * Don't bother groveling around the dcache for the VMA ranges - * if there are too many PTEs to make it worthwhile. - */ - if (mm->nr_ptes >= MAX_DCACHE_PAGES) - flush_dcache_all(); - else { - struct vm_area_struct *vma; - - /* - * In this case there are reasonably sized ranges to flush, - * iterate through the VMA list and take care of any aliases. - */ - for (vma = mm->mmap; vma; vma = vma->vm_next) - __flush_cache_mm(mm, vma->vm_start, vma->vm_end); - } - - /* Only touch the icache if one of the VMAs has VM_EXEC set. */ - if (mm->exec_vm) - flush_icache_all(); + flush_dcache_all(); } /* @@ -372,24 +270,10 @@ static void sh4_flush_cache_range(void *args) if (boot_cpu_data.dcache.n_aliases == 0) return; - /* - * Don't bother with the lookup and alias check if we have a - * wide range to cover, just blow away the dcache in its - * entirety instead. -- PFM. - */ - if (((end - start) >> PAGE_SHIFT) >= MAX_DCACHE_PAGES) - flush_dcache_all(); - else - __flush_cache_mm(vma->vm_mm, start, end); + flush_dcache_all(); - if (vma->vm_flags & VM_EXEC) { - /* - * TODO: Is this required??? Need to look at how I-cache - * coherency is assured when new programs are loaded to see if - * this matters. - */ + if (vma->vm_flags & VM_EXEC) flush_icache_all(); - } } /** diff --git a/arch/sh/mm/cache.c b/arch/sh/mm/cache.c index 35c37b7..4aa9260 100644 --- a/arch/sh/mm/cache.c +++ b/arch/sh/mm/cache.c @@ -164,11 +164,17 @@ void flush_cache_all(void) void flush_cache_mm(struct mm_struct *mm) { + if (boot_cpu_data.dcache.n_aliases == 0) + return; + cacheop_on_each_cpu(local_flush_cache_mm, mm, 1); } void flush_cache_dup_mm(struct mm_struct *mm) { + if (boot_cpu_data.dcache.n_aliases == 0) + return; + cacheop_on_each_cpu(local_flush_cache_dup_mm, mm, 1); } -- cgit v1.1 From 31c9efde786252112cc3d04a1ed3513b6ec63a7b Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Wed, 9 Sep 2009 14:10:28 +0900 Subject: sh: Kill off broken PHYSADDR() usage in sh4_flush_dcache_page(). PHYSADDR() runs in to issues in 32-bit mode when we do not have the legacy P1/P2 areas mapped, as such, we need to use page_to_phys() directly, which also happens to do the right thing in legacy 29-bit mode. Signed-off-by: Paul Mundt --- arch/sh/mm/cache-sh4.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/sh') diff --git a/arch/sh/mm/cache-sh4.c b/arch/sh/mm/cache-sh4.c index a5c339b..f099960 100644 --- a/arch/sh/mm/cache-sh4.c +++ b/arch/sh/mm/cache-sh4.c @@ -123,12 +123,12 @@ static void sh4_flush_dcache_page(void *arg) else #endif { - unsigned long phys = PHYSADDR(page_address(page)); + unsigned long phys = page_to_phys(page); unsigned long addr = CACHE_OC_ADDRESS_ARRAY; int i, n; /* Loop all the D-cache */ - n = boot_cpu_data.dcache.n_aliases; + n = boot_cpu_data.dcache.way_incr >> 12; for (i = 0; i < n; i++, addr += 4096) flush_cache_4096(addr, phys); } -- cgit v1.1 From bd6df57481b329dfeeb4889068848ee4f4761561 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Wed, 9 Sep 2009 14:22:15 +0900 Subject: sh: Kill off segment-based d-cache flushing on SH-4. This kills off the unrolled segment based flushers on SH-4 and switches over to a generic unrolled approach derived from the writethrough segment flusher. Signed-off-by: Paul Mundt --- arch/sh/mm/cache-sh4.c | 291 ++++--------------------------------------------- 1 file changed, 20 insertions(+), 271 deletions(-) (limited to 'arch/sh') diff --git a/arch/sh/mm/cache-sh4.c b/arch/sh/mm/cache-sh4.c index f099960..92b7d94 100644 --- a/arch/sh/mm/cache-sh4.c +++ b/arch/sh/mm/cache-sh4.c @@ -30,14 +30,6 @@ static void __flush_cache_4096(unsigned long addr, unsigned long phys, unsigned long exec_offset); /* - * This is initialised here to ensure that it is not placed in the BSS. If - * that were to happen, note that cache_init gets called before the BSS is - * cleared, so this would get nulled out which would be hopeless. - */ -static void (*__flush_dcache_segment_fn)(unsigned long, unsigned long) = - (void (*)(unsigned long, unsigned long))0xdeadbeef; - -/* * Write back the range of D-cache, and purge the I-cache. * * Called from kernel/module.c:sys_init_module and routine for a.out format, @@ -158,10 +150,27 @@ static void __uses_jump_to_uncached flush_icache_all(void) local_irq_restore(flags); } -static inline void flush_dcache_all(void) +static void flush_dcache_all(void) { - (*__flush_dcache_segment_fn)(0UL, boot_cpu_data.dcache.way_size); - wmb(); + unsigned long addr, end_addr, entry_offset; + + end_addr = CACHE_OC_ADDRESS_ARRAY + + (current_cpu_data.dcache.sets << + current_cpu_data.dcache.entry_shift) * + current_cpu_data.dcache.ways; + + entry_offset = 1 << current_cpu_data.dcache.entry_shift; + + for (addr = CACHE_OC_ADDRESS_ARRAY; addr < end_addr; ) { + __raw_writel(0, addr); addr += entry_offset; + __raw_writel(0, addr); addr += entry_offset; + __raw_writel(0, addr); addr += entry_offset; + __raw_writel(0, addr); addr += entry_offset; + __raw_writel(0, addr); addr += entry_offset; + __raw_writel(0, addr); addr += entry_offset; + __raw_writel(0, addr); addr += entry_offset; + __raw_writel(0, addr); addr += entry_offset; + } } static void sh4_flush_cache_all(void *unused) @@ -347,245 +356,6 @@ static void __flush_cache_4096(unsigned long addr, unsigned long phys, } while (--way_count != 0); } -/* - * Break the 1, 2 and 4 way variants of this out into separate functions to - * avoid nearly all the overhead of having the conditional stuff in the function - * bodies (+ the 1 and 2 way cases avoid saving any registers too). - * - * We want to eliminate unnecessary bus transactions, so this code uses - * a non-obvious technique. - * - * Loop over a cache way sized block of, one cache line at a time. For each - * line, use movca.a to cause the current cache line contents to be written - * back, but without reading anything from main memory. However this has the - * side effect that the cache is now caching that memory location. So follow - * this with a cache invalidate to mark the cache line invalid. And do all - * this with interrupts disabled, to avoid the cache line being accidently - * evicted while it is holding garbage. - * - * This also breaks in a number of circumstances: - * - if there are modifications to the region of memory just above - * empty_zero_page (for example because a breakpoint has been placed - * there), then these can be lost. - * - * This is because the the memory address which the cache temporarily - * caches in the above description is empty_zero_page. So the - * movca.l hits the cache (it is assumed that it misses, or at least - * isn't dirty), modifies the line and then invalidates it, losing the - * required change. - * - * - If caches are disabled or configured in write-through mode, then - * the movca.l writes garbage directly into memory. - */ -static void __flush_dcache_segment_writethrough(unsigned long start, - unsigned long extent_per_way) -{ - unsigned long addr; - int i; - - addr = CACHE_OC_ADDRESS_ARRAY | (start & cpu_data->dcache.entry_mask); - - while (extent_per_way) { - for (i = 0; i < cpu_data->dcache.ways; i++) - __raw_writel(0, addr + cpu_data->dcache.way_incr * i); - - addr += cpu_data->dcache.linesz; - extent_per_way -= cpu_data->dcache.linesz; - } -} - -static void __flush_dcache_segment_1way(unsigned long start, - unsigned long extent_per_way) -{ - unsigned long orig_sr, sr_with_bl; - unsigned long base_addr; - unsigned long way_incr, linesz, way_size; - struct cache_info *dcache; - register unsigned long a0, a0e; - - asm volatile("stc sr, %0" : "=r" (orig_sr)); - sr_with_bl = orig_sr | (1<<28); - base_addr = ((unsigned long)&empty_zero_page[0]); - - /* - * The previous code aligned base_addr to 16k, i.e. the way_size of all - * existing SH-4 D-caches. Whilst I don't see a need to have this - * aligned to any better than the cache line size (which it will be - * anyway by construction), let's align it to at least the way_size of - * any existing or conceivable SH-4 D-cache. -- RPC - */ - base_addr = ((base_addr >> 16) << 16); - base_addr |= start; - - dcache = &boot_cpu_data.dcache; - linesz = dcache->linesz; - way_incr = dcache->way_incr; - way_size = dcache->way_size; - - a0 = base_addr; - a0e = base_addr + extent_per_way; - do { - asm volatile("ldc %0, sr" : : "r" (sr_with_bl)); - asm volatile("movca.l r0, @%0\n\t" - "ocbi @%0" : : "r" (a0)); - a0 += linesz; - asm volatile("movca.l r0, @%0\n\t" - "ocbi @%0" : : "r" (a0)); - a0 += linesz; - asm volatile("movca.l r0, @%0\n\t" - "ocbi @%0" : : "r" (a0)); - a0 += linesz; - asm volatile("movca.l r0, @%0\n\t" - "ocbi @%0" : : "r" (a0)); - asm volatile("ldc %0, sr" : : "r" (orig_sr)); - a0 += linesz; - } while (a0 < a0e); -} - -static void __flush_dcache_segment_2way(unsigned long start, - unsigned long extent_per_way) -{ - unsigned long orig_sr, sr_with_bl; - unsigned long base_addr; - unsigned long way_incr, linesz, way_size; - struct cache_info *dcache; - register unsigned long a0, a1, a0e; - - asm volatile("stc sr, %0" : "=r" (orig_sr)); - sr_with_bl = orig_sr | (1<<28); - base_addr = ((unsigned long)&empty_zero_page[0]); - - /* See comment under 1-way above */ - base_addr = ((base_addr >> 16) << 16); - base_addr |= start; - - dcache = &boot_cpu_data.dcache; - linesz = dcache->linesz; - way_incr = dcache->way_incr; - way_size = dcache->way_size; - - a0 = base_addr; - a1 = a0 + way_incr; - a0e = base_addr + extent_per_way; - do { - asm volatile("ldc %0, sr" : : "r" (sr_with_bl)); - asm volatile("movca.l r0, @%0\n\t" - "movca.l r0, @%1\n\t" - "ocbi @%0\n\t" - "ocbi @%1" : : - "r" (a0), "r" (a1)); - a0 += linesz; - a1 += linesz; - asm volatile("movca.l r0, @%0\n\t" - "movca.l r0, @%1\n\t" - "ocbi @%0\n\t" - "ocbi @%1" : : - "r" (a0), "r" (a1)); - a0 += linesz; - a1 += linesz; - asm volatile("movca.l r0, @%0\n\t" - "movca.l r0, @%1\n\t" - "ocbi @%0\n\t" - "ocbi @%1" : : - "r" (a0), "r" (a1)); - a0 += linesz; - a1 += linesz; - asm volatile("movca.l r0, @%0\n\t" - "movca.l r0, @%1\n\t" - "ocbi @%0\n\t" - "ocbi @%1" : : - "r" (a0), "r" (a1)); - asm volatile("ldc %0, sr" : : "r" (orig_sr)); - a0 += linesz; - a1 += linesz; - } while (a0 < a0e); -} - -static void __flush_dcache_segment_4way(unsigned long start, - unsigned long extent_per_way) -{ - unsigned long orig_sr, sr_with_bl; - unsigned long base_addr; - unsigned long way_incr, linesz, way_size; - struct cache_info *dcache; - register unsigned long a0, a1, a2, a3, a0e; - - asm volatile("stc sr, %0" : "=r" (orig_sr)); - sr_with_bl = orig_sr | (1<<28); - base_addr = ((unsigned long)&empty_zero_page[0]); - - /* See comment under 1-way above */ - base_addr = ((base_addr >> 16) << 16); - base_addr |= start; - - dcache = &boot_cpu_data.dcache; - linesz = dcache->linesz; - way_incr = dcache->way_incr; - way_size = dcache->way_size; - - a0 = base_addr; - a1 = a0 + way_incr; - a2 = a1 + way_incr; - a3 = a2 + way_incr; - a0e = base_addr + extent_per_way; - do { - asm volatile("ldc %0, sr" : : "r" (sr_with_bl)); - asm volatile("movca.l r0, @%0\n\t" - "movca.l r0, @%1\n\t" - "movca.l r0, @%2\n\t" - "movca.l r0, @%3\n\t" - "ocbi @%0\n\t" - "ocbi @%1\n\t" - "ocbi @%2\n\t" - "ocbi @%3\n\t" : : - "r" (a0), "r" (a1), "r" (a2), "r" (a3)); - a0 += linesz; - a1 += linesz; - a2 += linesz; - a3 += linesz; - asm volatile("movca.l r0, @%0\n\t" - "movca.l r0, @%1\n\t" - "movca.l r0, @%2\n\t" - "movca.l r0, @%3\n\t" - "ocbi @%0\n\t" - "ocbi @%1\n\t" - "ocbi @%2\n\t" - "ocbi @%3\n\t" : : - "r" (a0), "r" (a1), "r" (a2), "r" (a3)); - a0 += linesz; - a1 += linesz; - a2 += linesz; - a3 += linesz; - asm volatile("movca.l r0, @%0\n\t" - "movca.l r0, @%1\n\t" - "movca.l r0, @%2\n\t" - "movca.l r0, @%3\n\t" - "ocbi @%0\n\t" - "ocbi @%1\n\t" - "ocbi @%2\n\t" - "ocbi @%3\n\t" : : - "r" (a0), "r" (a1), "r" (a2), "r" (a3)); - a0 += linesz; - a1 += linesz; - a2 += linesz; - a3 += linesz; - asm volatile("movca.l r0, @%0\n\t" - "movca.l r0, @%1\n\t" - "movca.l r0, @%2\n\t" - "movca.l r0, @%3\n\t" - "ocbi @%0\n\t" - "ocbi @%1\n\t" - "ocbi @%2\n\t" - "ocbi @%3\n\t" : : - "r" (a0), "r" (a1), "r" (a2), "r" (a3)); - asm volatile("ldc %0, sr" : : "r" (orig_sr)); - a0 += linesz; - a1 += linesz; - a2 += linesz; - a3 += linesz; - } while (a0 < a0e); -} - extern void __weak sh4__flush_region_init(void); /* @@ -593,32 +363,11 @@ extern void __weak sh4__flush_region_init(void); */ void __init sh4_cache_init(void) { - unsigned int wt_enabled = !!(__raw_readl(CCR) & CCR_CACHE_WT); - printk("PVR=%08x CVR=%08x PRR=%08x\n", ctrl_inl(CCN_PVR), ctrl_inl(CCN_CVR), ctrl_inl(CCN_PRR)); - if (wt_enabled) - __flush_dcache_segment_fn = __flush_dcache_segment_writethrough; - else { - switch (boot_cpu_data.dcache.ways) { - case 1: - __flush_dcache_segment_fn = __flush_dcache_segment_1way; - break; - case 2: - __flush_dcache_segment_fn = __flush_dcache_segment_2way; - break; - case 4: - __flush_dcache_segment_fn = __flush_dcache_segment_4way; - break; - default: - panic("unknown number of cache ways\n"); - break; - } - } - local_flush_icache_range = sh4_flush_icache_range; local_flush_dcache_page = sh4_flush_dcache_page; local_flush_cache_all = sh4_flush_cache_all; -- cgit v1.1 From deaef20e9789d93c06d2d3b5ffc99939814802ca Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Wed, 9 Sep 2009 16:06:39 +0900 Subject: sh: Rework sh4_flush_cache_page() for coherent kmap mapping. This builds on top of the MIPS r4k code that does roughly the same thing. This permits the use of kmap_coherent() for mapped pages with dirty dcache lines and falls back on kmap_atomic() otherwise. This also fixes up a problem with the alias check and defers to shm_align_mask directly. Signed-off-by: Paul Mundt --- arch/sh/mm/cache-sh4.c | 75 ++++++++++++++++++++++++++++++++------------------ 1 file changed, 48 insertions(+), 27 deletions(-) (limited to 'arch/sh') diff --git a/arch/sh/mm/cache-sh4.c b/arch/sh/mm/cache-sh4.c index 92b7d94..e3fbd99 100644 --- a/arch/sh/mm/cache-sh4.c +++ b/arch/sh/mm/cache-sh4.c @@ -2,7 +2,7 @@ * arch/sh/mm/cache-sh4.c * * Copyright (C) 1999, 2000, 2002 Niibe Yutaka - * Copyright (C) 2001 - 2007 Paul Mundt + * Copyright (C) 2001 - 2009 Paul Mundt * Copyright (C) 2003 Richard Curnow * Copyright (c) 2007 STMicroelectronics (R&D) Ltd. * @@ -15,6 +15,8 @@ #include #include #include +#include +#include #include #include @@ -23,7 +25,6 @@ * flushing. Anything exceeding this will simply flush the dcache in its * entirety. */ -#define MAX_DCACHE_PAGES 64 /* XXX: Tune for ways */ #define MAX_ICACHE_PAGES 32 static void __flush_cache_4096(unsigned long addr, unsigned long phys, @@ -209,44 +210,64 @@ static void sh4_flush_cache_page(void *args) { struct flusher_data *data = args; struct vm_area_struct *vma; + struct page *page; unsigned long address, pfn, phys; - unsigned int alias_mask; + int map_coherent = 0; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + void *vaddr; vma = data->vma; address = data->addr1; pfn = data->addr2; phys = pfn << PAGE_SHIFT; + page = pfn_to_page(pfn); if (cpu_context(smp_processor_id(), vma->vm_mm) == NO_CONTEXT) return; - alias_mask = boot_cpu_data.dcache.alias_mask; - - /* We only need to flush D-cache when we have alias */ - if ((address^phys) & alias_mask) { - /* Loop 4K of the D-cache */ - flush_cache_4096( - CACHE_OC_ADDRESS_ARRAY | (address & alias_mask), - phys); - /* Loop another 4K of the D-cache */ - flush_cache_4096( - CACHE_OC_ADDRESS_ARRAY | (phys & alias_mask), - phys); - } + address &= PAGE_MASK; + pgd = pgd_offset(vma->vm_mm, address); + pud = pud_offset(pgd, address); + pmd = pmd_offset(pud, address); + pte = pte_offset_kernel(pmd, address); + + /* If the page isn't present, there is nothing to do here. */ + if (!(pte_val(*pte) & _PAGE_PRESENT)) + return; - alias_mask = boot_cpu_data.icache.alias_mask; - if (vma->vm_flags & VM_EXEC) { + if ((vma->vm_mm == current->active_mm)) + vaddr = NULL; + else { /* - * Evict entries from the portion of the cache from which code - * may have been executed at this address (virtual). There's - * no need to evict from the portion corresponding to the - * physical address as for the D-cache, because we know the - * kernel has never executed the code through its identity - * translation. + * Use kmap_coherent or kmap_atomic to do flushes for + * another ASID than the current one. */ - flush_cache_4096( - CACHE_IC_ADDRESS_ARRAY | (address & alias_mask), - phys); + map_coherent = (current_cpu_data.dcache.n_aliases && + !test_bit(PG_dcache_dirty, &page->flags) && + page_mapped(page)); + if (map_coherent) + vaddr = kmap_coherent(page, address); + else + vaddr = kmap_atomic(page, KM_USER0); + + address = (unsigned long)vaddr; + } + + if (pages_do_alias(address, phys)) + flush_cache_4096(CACHE_OC_ADDRESS_ARRAY | + (address & shm_align_mask), phys); + + if (vma->vm_flags & VM_EXEC) + flush_icache_all(); + + if (vaddr) { + if (map_coherent) + kunmap_coherent(vaddr); + else + kunmap_atomic(vaddr, KM_USER0); } } -- cgit v1.1 From c4845a4b2288a9e5d96a0558e474809028c8aff3 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Wed, 9 Sep 2009 17:13:07 +0900 Subject: sh: Fix up redundant cache flushing for PAGE_SIZE > 4k. If PAGE_SIZE is presently over 4k we do a lot of extra flushing given that we purge the cache 4k at a time. Make it explicitly 4k per iteration, rather than iterating for PAGE_SIZE before looping over again. Signed-off-by: Paul Mundt --- arch/sh/mm/cache-sh4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/sh') diff --git a/arch/sh/mm/cache-sh4.c b/arch/sh/mm/cache-sh4.c index e3fbd99..8362d31 100644 --- a/arch/sh/mm/cache-sh4.c +++ b/arch/sh/mm/cache-sh4.c @@ -357,7 +357,7 @@ static void __flush_cache_4096(unsigned long addr, unsigned long phys, * pointless nead-of-loop check for 0 iterations. */ do { - ea = base_addr + PAGE_SIZE; + ea = base_addr + 4096; a = base_addr; p = phys; -- cgit v1.1 From f9e2bdfdbb4c9da13422b349227be8c7b41dbd44 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Wed, 9 Sep 2009 17:14:19 +0900 Subject: sh: Factor in cpu id for selection of cache colour fixmap. In the SMP VIPT case the page copy/clear ops still perform colouring, care needs to be taken that CPUs don't end up stepping on each other, so we give them a bit of room to work with. At the same time, we reduce the worst-case colouring given that these pages are always consumed. Signed-off-by: Paul Mundt --- arch/sh/include/asm/fixmap.h | 6 +++--- arch/sh/mm/kmap.c | 4 +++- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'arch/sh') diff --git a/arch/sh/include/asm/fixmap.h b/arch/sh/include/asm/fixmap.h index 721fcc4..76c5a30 100644 --- a/arch/sh/include/asm/fixmap.h +++ b/arch/sh/include/asm/fixmap.h @@ -14,9 +14,9 @@ #define _ASM_FIXMAP_H #include +#include #include #ifdef CONFIG_HIGHMEM -#include #include #endif @@ -46,9 +46,9 @@ * fix-mapped? */ enum fixed_addresses { -#define FIX_N_COLOURS 16 +#define FIX_N_COLOURS 8 FIX_CMAP_BEGIN, - FIX_CMAP_END = FIX_CMAP_BEGIN + FIX_N_COLOURS, + FIX_CMAP_END = FIX_CMAP_BEGIN + (FIX_N_COLOURS * NR_CPUS), FIX_UNCACHED, #ifdef CONFIG_HIGHMEM FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ diff --git a/arch/sh/mm/kmap.c b/arch/sh/mm/kmap.c index 16e01b5..15d74ea42 100644 --- a/arch/sh/mm/kmap.c +++ b/arch/sh/mm/kmap.c @@ -39,7 +39,9 @@ void *kmap_coherent(struct page *page, unsigned long addr) pagefault_disable(); idx = FIX_CMAP_END - - ((addr & current_cpu_data.dcache.alias_mask) >> PAGE_SHIFT); + (((addr >> PAGE_SHIFT) & (FIX_N_COLOURS - 1)) + + (FIX_N_COLOURS * smp_processor_id())); + vaddr = __fix_to_virt(idx); BUG_ON(!pte_none(*(kmap_coherent_pte - idx))); -- cgit v1.1 From 48ff3e04ffd5e1b578462eb1958f15ca122c7fbf Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 28 Sep 2009 15:04:04 +0900 Subject: sh: Handle ioport_map() cases for >= P1SEG addresses. This fixes up the case where certain drivers already do their own remapping and subsequently attempt to use the PIO calls for I/O. In this case there is no additional remapping that needs to be done, and the address can be casted in to the cookie directly. Signed-off-by: Paul Mundt --- arch/sh/kernel/io_generic.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/sh') diff --git a/arch/sh/kernel/io_generic.c b/arch/sh/kernel/io_generic.c index 4ff5072..b8fa652 100644 --- a/arch/sh/kernel/io_generic.c +++ b/arch/sh/kernel/io_generic.c @@ -147,6 +147,9 @@ void generic_outsl(unsigned long port, const void *src, unsigned long count) void __iomem *generic_ioport_map(unsigned long addr, unsigned int size) { + if (PXSEG(addr) >= P1SEG) + return (void __iomem *)addr; + return (void __iomem *)(addr + generic_io_base); } -- cgit v1.1 From d44ee12ad61ff7aa7a6344560bd430cb72fcbc27 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 28 Sep 2009 15:05:41 +0900 Subject: sh: Set the default I/O port base to P2SEG. This bumps up the default I/O base to P2SEG, which allows legacy probing to bail out gracefully rather than oopsing. Platforms that have a real PIO offset still need to fix this up on their own, although most platforms are content with P2SEG already. The previous change to teach ioport_map() about >= P1SEG offsets in combination with this patch allows both the already remapped and the legacy address probing to pass through and succeed. Fixes up an oops with i8042 on the sh7785lcr board. Signed-off-by: Paul Mundt --- arch/sh/kernel/machvec.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/sh') diff --git a/arch/sh/kernel/machvec.c b/arch/sh/kernel/machvec.c index 548f660..cbce639 100644 --- a/arch/sh/kernel/machvec.c +++ b/arch/sh/kernel/machvec.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -133,4 +134,6 @@ void __init sh_mv_setup(void) if (!sh_mv.mv_nr_irqs) sh_mv.mv_nr_irqs = NR_IRQS; + + __set_io_port_base(P2SEG); } -- cgit v1.1 From 421f7a5dbdd1720c718eed187c46cf202529cf7d Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Sat, 26 Sep 2009 19:44:05 +0000 Subject: sh: Don't enable interrupts in the page fault path There's already code in do_page_fault() to conditionally enable interrupts, so we don't need to unconditonally enable them before calling it. This fixes a lockdep warning where we called trace_hardirqs_off() but with irqs still enabled. Signed-off-by: Matt Fleming Signed-off-by: Paul Mundt --- arch/sh/kernel/cpu/sh3/entry.S | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/sh') diff --git a/arch/sh/kernel/cpu/sh3/entry.S b/arch/sh/kernel/cpu/sh3/entry.S index 0151933..bb407ef 100644 --- a/arch/sh/kernel/cpu/sh3/entry.S +++ b/arch/sh/kernel/cpu/sh3/entry.S @@ -152,8 +152,6 @@ call_do_page_fault: mov.l 1f, r0 mov.l @r0, r6 - sti - mov.l 3f, r0 mov.l 4f, r1 mov r15, r4 -- cgit v1.1 From fe1dbfd3f992d5432d7463a5bd6c2fc96d3eccd8 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 28 Sep 2009 15:22:10 +0900 Subject: sh: magicpanelr2 and dreamcast can use the generic I/O base. There is now no need for the magicpanelr2 and dreamcast platforms to set their own I/O port bas values, given that the generic machvec code now sets this to P2SEG for everyone. Signed-off-by: Paul Mundt --- arch/sh/boards/board-magicpanelr2.c | 2 -- arch/sh/boards/mach-dreamcast/setup.c | 2 -- 2 files changed, 4 deletions(-) (limited to 'arch/sh') diff --git a/arch/sh/boards/board-magicpanelr2.c b/arch/sh/boards/board-magicpanelr2.c index 0a37c8b..99ffc5f 100644 --- a/arch/sh/boards/board-magicpanelr2.c +++ b/arch/sh/boards/board-magicpanelr2.c @@ -205,8 +205,6 @@ static void __init setup_port_multiplexing(void) static void __init mpr2_setup(char **cmdline_p) { - __set_io_port_base(0xa0000000); - /* set Pin Select Register A: * /PCC_CD1, /PCC_CD2, PCC_BVD1, PCC_BVD2, * /IOIS16, IRQ4, IRQ5, USB1d_SUSPEND diff --git a/arch/sh/boards/mach-dreamcast/setup.c b/arch/sh/boards/mach-dreamcast/setup.c index ebe9922..a4b7402 100644 --- a/arch/sh/boards/mach-dreamcast/setup.c +++ b/arch/sh/boards/mach-dreamcast/setup.c @@ -42,8 +42,6 @@ static void __init dreamcast_setup(char **cmdline_p) /* Acknowledge any previous events */ /* XXX */ - __set_io_port_base(0xa0000000); - /* Assign all virtual IRQs to the System ASIC int. handler */ for (i = HW_EVENT_IRQ_BASE; i < HW_EVENT_IRQ_MAX; i++) set_irq_chip_and_handler(i, &systemasic_int, -- cgit v1.1 From 8810e0553fec6ff0a0db1431e388de39e2a2a512 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Mon, 28 Sep 2009 08:21:41 +0000 Subject: sh: mach-ecovec24: Add TouchScreen support Signed-off-by: Kuninori Morimoto Signed-off-by: Paul Mundt --- arch/sh/boards/mach-ecovec24/setup.c | 42 ++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'arch/sh') diff --git a/arch/sh/boards/mach-ecovec24/setup.c b/arch/sh/boards/mach-ecovec24/setup.c index 5f9881e1..52912a6 100644 --- a/arch/sh/boards/mach-ecovec24/setup.c +++ b/arch/sh/boards/mach-ecovec24/setup.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include