From 6656920b0b50beacb6cb64cf55273cbb686e436e Mon Sep 17 00:00:00 2001 From: Chris Zankel Date: Wed, 22 Aug 2007 10:14:51 -0700 Subject: [XTENSA] Add support for cache-aliasing Add support for processors that have cache-aliasing issues, such as the Stretch S5000 processor. Cache-aliasing means that the size of the cache (for one way) is larger than the page size, thus, a page can end up in several places in cache depending on the virtual to physical translation. The method used here is to map a user page temporarily through the auto-refill way 0 and of of the DTLB. We probably will want to revisit this issue and use a better approach with kmap/kunmap. Signed-off-by: Chris Zankel --- arch/xtensa/kernel/asm-offsets.c | 13 +- arch/xtensa/kernel/entry.S | 75 +++++++++- arch/xtensa/mm/Makefile | 6 +- arch/xtensa/mm/cache.c | 256 ++++++++++++++++++++++++++++++++ arch/xtensa/mm/fault.c | 6 +- arch/xtensa/mm/init.c | 252 ++------------------------------ arch/xtensa/mm/misc.S | 306 ++++++++++++++++++++++++++++++++++----- 7 files changed, 624 insertions(+), 290 deletions(-) create mode 100644 arch/xtensa/mm/cache.c (limited to 'arch/xtensa') diff --git a/arch/xtensa/kernel/asm-offsets.c b/arch/xtensa/kernel/asm-offsets.c index d0323cd..d5ffe7b 100644 --- a/arch/xtensa/kernel/asm-offsets.c +++ b/arch/xtensa/kernel/asm-offsets.c @@ -18,12 +18,13 @@ #include #include #include +#include + #include #include #include #define DEFINE(sym, val) asm volatile("\n->" #sym " %0 " #val : : "i" (val)) -#define BLANK() asm volatile("\n->" : : ) int main(void) { @@ -63,7 +64,6 @@ int main(void) DEFINE(PT_SIZE, sizeof(struct pt_regs)); DEFINE(PT_AREG_END, offsetof (struct pt_regs, areg[XCHAL_NUM_AREGS])); DEFINE(PT_USER_SIZE, offsetof(struct pt_regs, areg[XCHAL_NUM_AREGS])); - BLANK(); /* struct task_struct */ DEFINE(TASK_PTRACE, offsetof (struct task_struct, ptrace)); @@ -73,27 +73,26 @@ int main(void) DEFINE(TASK_THREAD, offsetof (struct task_struct, thread)); DEFINE(TASK_THREAD_INFO, offsetof (struct task_struct, stack)); DEFINE(TASK_STRUCT_SIZE, sizeof (struct task_struct)); - BLANK(); /* struct thread_info (offset from start_struct) */ DEFINE(THREAD_RA, offsetof (struct task_struct, thread.ra)); DEFINE(THREAD_SP, offsetof (struct task_struct, thread.sp)); DEFINE(THREAD_CP_SAVE, offsetof (struct task_struct, thread.cp_save)); DEFINE(THREAD_CURRENT_DS, offsetof (struct task_struct, thread.current_ds)); - BLANK(); /* struct mm_struct */ DEFINE(MM_USERS, offsetof(struct mm_struct, mm_users)); DEFINE(MM_PGD, offsetof (struct mm_struct, pgd)); DEFINE(MM_CONTEXT, offsetof (struct mm_struct, context)); - BLANK(); - DEFINE(PT_SINGLESTEP_BIT, PT_SINGLESTEP_BIT); + + /* struct page */ + DEFINE(PAGE_FLAGS, offsetof(struct page, flags)); /* constants */ DEFINE(_CLONE_VM, CLONE_VM); DEFINE(_CLONE_UNTRACED, CLONE_UNTRACED); + DEFINE(PG_ARCH_1, PG_arch_1); return 0; } - diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S index 65741e3..91a689e 100644 --- a/arch/xtensa/kernel/entry.S +++ b/arch/xtensa/kernel/entry.S @@ -7,7 +7,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 2004-2005 by Tensilica Inc. + * Copyright (C) 2004-2007 by Tensilica Inc. * * Chris Zankel * @@ -169,7 +169,7 @@ _user_exception: * We have to save all registers up to the first '1' from * the right, except the current frame (bit 0). * Assume a2 is: 001001000110001 - * All regiser frames starting from the top fiel to the marked '1' + * All register frames starting from the top field to the marked '1' * must be saved. */ @@ -1590,7 +1590,7 @@ ENTRY(fast_second_level_miss) * The messy computation for 'pteval' above really simplifies * into the following: * - * pteval = ((pmdval - PAGE_OFFSET) & PAGE_MASK) | PAGE_KERNEL + * pteval = ((pmdval - PAGE_OFFSET) & PAGE_MASK) | PAGE_DIRECTORY */ movi a1, -PAGE_OFFSET @@ -1602,7 +1602,7 @@ ENTRY(fast_second_level_miss) or a0, a0, a1 # ... | PAGE_DIRECTORY /* - * We utilize all three wired-ways (7-9( to hold pmd translations. + * We utilize all three wired-ways (7-9) to hold pmd translations. * Memory regions are mapped to the DTLBs according to bits 28 and 29. * This allows to map the three most common regions to three different * DTLBs: @@ -1652,6 +1652,73 @@ ENTRY(fast_second_level_miss) 9: l32i a0, a1, TASK_ACTIVE_MM # unlikely case mm == 0 j 8b +#if (DCACHE_WAY_SIZE > PAGE_SIZE) + +2: /* Special case for cache aliasing. + * We (should) only get here if a clear_user_page, copy_user_page + * or the aliased cache flush functions got preemptively interrupted + * by another task. Re-establish temporary mapping to the + * TLBTEMP_BASE areas. + */ + + /* We shouldn't be in a double exception */ + + l32i a0, a2, PT_DEPC + bgeui a0, VALID_DOUBLE_EXCEPTION_ADDRESS, 2f + + /* Make sure the exception originated in the special functions */ + + movi a0, __tlbtemp_mapping_start + rsr a3, EPC_1 + bltu a3, a0, 2f + movi a0, __tlbtemp_mapping_end + bgeu a3, a0, 2f + + /* Check if excvaddr was in one of the TLBTEMP_BASE areas. */ + + movi a3, TLBTEMP_BASE_1 + rsr a0, EXCVADDR + bltu a0, a3, 2f + + addi a1, a0, -(2 << (DCACHE_ALIAS_ORDER + PAGE_SHIFT)) + bgeu a1, a3, 2f + + /* Check if we have to restore an ITLB mapping. */ + + movi a1, __tlbtemp_mapping_itlb + rsr a3, EPC_1 + sub a3, a3, a1 + + /* Calculate VPN */ + + movi a1, PAGE_MASK + and a1, a1, a0 + + /* Jump for ITLB entry */ + + bgez a3, 1f + + /* We can use up to two TLBTEMP areas, one for src and one for dst. */ + + extui a3, a0, PAGE_SHIFT + DCACHE_ALIAS_ORDER, 1 + add a1, a3, a1 + + /* PPN is in a6 for the first TLBTEMP area and in a7 for the second. */ + + mov a0, a6 + movnez a0, a7, a3 + j 3b + + /* ITLB entry. We only use dst in a6. */ + +1: witlb a6, a1 + isync + j 4b + + +#endif // DCACHE_WAY_SIZE > PAGE_SIZE + + 2: /* Invalid PGD, default exception handling */ movi a3, exc_table diff --git a/arch/xtensa/mm/Makefile b/arch/xtensa/mm/Makefile index a5aed59..10aec22 100644 --- a/arch/xtensa/mm/Makefile +++ b/arch/xtensa/mm/Makefile @@ -5,9 +5,5 @@ # removes any old dependencies. DON'T put your own dependencies here # unless it's something special (ie not a .c file). # -# Note 2! The CFLAGS definition is now in the main makefile... -obj-y := init.o fault.o tlb.o misc.o -obj-m := -obj-n := -obj- := +obj-y := init.o fault.o tlb.o misc.o cache.o diff --git a/arch/xtensa/mm/cache.c b/arch/xtensa/mm/cache.c new file mode 100644 index 0000000..9a1fa94 --- /dev/null +++ b/arch/xtensa/mm/cache.c @@ -0,0 +1,256 @@ +/* + * arch/xtensa/mm/cache.c + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2001-2006 Tensilica Inc. + * + * Chris Zankel + * Joe Taylor + * Marc Gauthier + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +//#define printd(x...) printk(x) +#define printd(x...) do { } while(0) + +/* + * Note: + * The kernel provides one architecture bit PG_arch_1 in the page flags that + * can be used for cache coherency. + * + * I$-D$ coherency. + * + * The Xtensa architecture doesn't keep the instruction cache coherent with + * the data cache. We use the architecture bit to indicate if the caches + * are coherent. The kernel clears this bit whenever a page is added to the + * page cache. At that time, the caches might not be in sync. We, therefore, + * define this flag as 'clean' if set. + * + * D-cache aliasing. + * + * With cache aliasing, we have to always flush the cache when pages are + * unmapped (see tlb_start_vma(). So, we use this flag to indicate a dirty + * page. + * + * + * + */ + +#if (DCACHE_WAY_SIZE > PAGE_SIZE) && XCHAL_DCACHE_IS_WRITEBACK + +/* + * Any time the kernel writes to a user page cache page, or it is about to + * read from a page cache page this routine is called. + * + */ + +void flush_dcache_page(struct page *page) +{ + struct address_space *mapping = page_mapping(page); + + /* + * If we have a mapping but the page is not mapped to user-space + * yet, we simply mark this page dirty and defer flushing the + * caches until update_mmu(). + */ + + if (mapping && !mapping_mapped(mapping)) { + if (!test_bit(PG_arch_1, &page->flags)) + set_bit(PG_arch_1, &page->flags); + return; + + } else { + + unsigned long phys = page_to_phys(page); + unsigned long temp = page->index << PAGE_SHIFT; + unsigned long alias = !(DCACHE_ALIAS_EQ(temp, phys)); + unsigned long virt; + + /* + * Flush the page in kernel space and user space. + * Note that we can omit that step if aliasing is not + * an issue, but we do have to synchronize I$ and D$ + * if we have a mapping. + */ + + if (!alias && !mapping) + return; + + __flush_invalidate_dcache_page((long)page_address(page)); + + virt = TLBTEMP_BASE_1 + (temp & DCACHE_ALIAS_MASK); + + if (alias) + __flush_invalidate_dcache_page_alias(virt, phys); + + if (mapping) + __invalidate_icache_page_alias(virt, phys); + } + + /* There shouldn't be an entry in the cache for this page anymore. */ +} + + +/* + * For now, flush the whole cache. FIXME?? + */ + +void flush_cache_range(struct vm_area_struct* vma, + unsigned long start, unsigned long end) +{ + __flush_invalidate_dcache_all(); + __invalidate_icache_all(); +} + +/* + * Remove any entry in the cache for this page. + * + * Note that this function is only called for user pages, so use the + * alias versions of the cache flush functions. + */ + +void flush_cache_page(struct vm_area_struct* vma, unsigned long address, + unsigned long pfn) +{ + /* Note that we have to use the 'alias' address to avoid multi-hit */ + + unsigned long phys = page_to_phys(pfn_to_page(pfn)); + unsigned long virt = TLBTEMP_BASE_1 + (address & DCACHE_ALIAS_MASK); + + __flush_invalidate_dcache_page_alias(virt, phys); + __invalidate_icache_page_alias(virt, phys); +} + +#endif + +void +update_mmu_cache(struct vm_area_struct * vma, unsigned long addr, pte_t pte) +{ + unsigned long pfn = pte_pfn(pte); + struct page *page; + + if (!pfn_valid(pfn)) + return; + + page = pfn_to_page(pfn); + + /* Invalidate old entry in TLBs */ + + invalidate_itlb_mapping(addr); + invalidate_dtlb_mapping(addr); + +#if (DCACHE_WAY_SIZE > PAGE_SIZE) && XCHAL_DCACHE_IS_WRITEBACK + + if (!PageReserved(page) && test_bit(PG_arch_1, &page->flags)) { + + unsigned long vaddr = TLBTEMP_BASE_1 + (addr & DCACHE_ALIAS_MASK); + unsigned long paddr = (unsigned long) page_address(page); + unsigned long phys = page_to_phys(page); + + __flush_invalidate_dcache_page(paddr); + + __flush_invalidate_dcache_page_alias(vaddr, phys); + __invalidate_icache_page_alias(vaddr, phys); + + clear_bit(PG_arch_1, &page->flags); + } +#else + if (!PageReserved(page) && !test_bit(PG_arch_1, &page->flags) + && (vma->vm_flags & VM_EXEC) != 0) { + unsigned long vaddr = addr & PAGE_MASK; + __flush_dcache_page(vaddr); + __invalidate_icache_page(vaddr); + set_bit(PG_arch_1, &page->flags); + } +#endif +} + +/* + * access_process_vm() has called get_user_pages(), which has done a + * flush_dcache_page() on the page. + */ + +#if (DCACHE_WAY_SIZE > PAGE_SIZE) && XCHAL_DCACHE_IS_WRITEBACK + +void copy_to_user_page(struct vm_area_struct *vma, struct page *page, + unsigned long vaddr, void *dst, const void *src, + unsigned long len) +{ + unsigned long phys = page_to_phys(page); + unsigned long alias = !(DCACHE_ALIAS_EQ(vaddr, phys)); + + /* Flush and invalidate user page if aliased. */ + + if (alias) { + unsigned long temp = TLBTEMP_BASE_1 + (vaddr & DCACHE_ALIAS_MASK); + __flush_invalidate_dcache_page_alias(temp, phys); + } + + /* Copy data */ + + memcpy(dst, src, len); + + /* + * Flush and invalidate kernel page if aliased and synchronize + * data and instruction caches for executable pages. + */ + + if (alias) { + unsigned long temp = TLBTEMP_BASE_1 + (vaddr & DCACHE_ALIAS_MASK); + + __flush_invalidate_dcache_range((unsigned long) dst, len); + if ((vma->vm_flags & VM_EXEC) != 0) { + __invalidate_icache_page_alias(temp, phys); + } + + } else if ((vma->vm_flags & VM_EXEC) != 0) { + __flush_dcache_range((unsigned long)dst,len); + __invalidate_icache_range((unsigned long) dst, len); + } +} + +extern void copy_from_user_page(struct vm_area_struct *vma, struct page *page, + unsigned long vaddr, void *dst, const void *src, + unsigned long len) +{ + unsigned long phys = page_to_phys(page); + unsigned long alias = !(DCACHE_ALIAS_EQ(vaddr, phys)); + + /* + * Flush user page if aliased. + * (Note: a simply flush would be sufficient) + */ + + if (alias) { + unsigned long temp = TLBTEMP_BASE_1 + (vaddr & DCACHE_ALIAS_MASK); + __flush_invalidate_dcache_page_alias(temp, phys); + } + + memcpy(dst, src, len); +} + +#endif diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c index 1600406..45d28f2 100644 --- a/arch/xtensa/mm/fault.c +++ b/arch/xtensa/mm/fault.c @@ -24,6 +24,8 @@ unsigned long asid_cache = ASID_USER_FIRST; void bad_page_fault(struct pt_regs*, unsigned long, int); +#undef DEBUG_PAGE_FAULT + /* * This routine handles page faults. It determines the address, * and the problem, and then passes it off to one of the appropriate @@ -64,7 +66,7 @@ void do_page_fault(struct pt_regs *regs) exccause == EXCCAUSE_ITLB_MISS || exccause == EXCCAUSE_FETCH_CACHE_ATTRIBUTE) ? 1 : 0; -#if 0 +#ifdef DEBUG_PAGE_FAULT printk("[%s:%d:%08x:%d:%08x:%s%s]\n", current->comm, current->pid, address, exccause, regs->pc, is_write? "w":"", is_exec? "x":""); #endif @@ -219,7 +221,7 @@ bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) /* Are we prepared to handle this kernel fault? */ if ((entry = search_exception_tables(regs->pc)) != NULL) { -#if 1 +#ifdef DEBUG_PAGE_FAULT printk(KERN_DEBUG "%s: Exception at pc=%#010lx (%lx)\n", current->comm, regs->pc, entry->fixup); #endif diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c index 8415c76..b3086f3 100644 --- a/arch/xtensa/mm/init.c +++ b/arch/xtensa/mm/init.c @@ -15,40 +15,24 @@ * Kevin Chea */ -#include -#include -#include #include #include -#include -#include -#include #include #include +#include +#include +#include +#include #include #include #include #include -#include #include #include -#include - -#define DEBUG 0 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); -//static DEFINE_SPINLOCK(tlb_lock); - -/* - * This flag is used to indicate that the page was mapped and modified in - * kernel space, so the cache is probably dirty at that address. - * If cache aliasing is enabled and the page color mismatches, update_mmu_cache - * synchronizes the caches if this bit is set. - */ - -#define PG_cache_clean PG_arch_1 /* References to section boundaries */ @@ -323,228 +307,22 @@ void show_mem(void) printk("%d free pages\n", free); } -/* ------------------------------------------------------------------------- */ - -#if (DCACHE_WAY_SIZE > PAGE_SIZE) - -/* - * With cache aliasing, the page color of the page in kernel space and user - * space might mismatch. We temporarily map the page to a different virtual - * address with the same color and clear the page there. - */ - -void clear_user_page(void *kaddr, unsigned long vaddr, struct page* page) -{ - - /* There shouldn't be any entries for this page. */ - - __flush_invalidate_dcache_page_phys(__pa(page_address(page))); - - if (!PAGE_COLOR_EQ(vaddr, kaddr)) { - unsigned long v, p; - - /* Temporarily map page to DTLB_WAY_DCACHE_ALIAS0. */ - - spin_lock(&tlb_lock); - - p = (unsigned long)pte_val((mk_pte(page,PAGE_KERNEL))); - kaddr = (void*)PAGE_COLOR_MAP0(vaddr); - v = (unsigned long)kaddr | DTLB_WAY_DCACHE_ALIAS0; - __asm__ __volatile__("wdtlb %0,%1; dsync" : :"a" (p), "a" (v)); - - clear_page(kaddr); - - spin_unlock(&tlb_lock); - } else { - clear_page(kaddr); - } - - /* We need to make sure that i$ and d$ are coherent. */ - - clear_bit(PG_cache_clean, &page->flags); -} - -/* - * With cache aliasing, we have to make sure that the page color of the page - * in kernel space matches that of the virtual user address before we read - * the page. If the page color differ, we create a temporary DTLB entry with - * the corrent page color and use this 'temporary' address as the source. - * We then use the same approach as in clear_user_page and copy the data - * to the kernel space and clear the PG_cache_clean bit to synchronize caches - * later. - * - * Note: - * Instead of using another 'way' for the temporary DTLB entry, we could - * probably use the same entry that points to the kernel address (after - * saving the original value and restoring it when we are done). - */ +struct kmem_cache *pgtable_cache __read_mostly; -void copy_user_page(void* to, void* from, unsigned long vaddr, - struct page* to_page) +static void pgd_ctor(void *addr, struct kmem_cache *cache, unsigned long flags) { - /* There shouldn't be any entries for the new page. */ - - __flush_invalidate_dcache_page_phys(__pa(page_address(to_page))); - - spin_lock(&tlb_lock); - - if (!PAGE_COLOR_EQ(vaddr, from)) { - unsigned long v, p, t; - - __asm__ __volatile__ ("pdtlb %1,%2; rdtlb1 %0,%1" - : "=a"(p), "=a"(t) : "a"(from)); - from = (void*)PAGE_COLOR_MAP0(vaddr); - v = (unsigned long)from | DTLB_WAY_DCACHE_ALIAS0; - __asm__ __volatile__ ("wdtlb %0,%1; dsync" ::"a" (p), "a" (v)); - } - - if (!PAGE_COLOR_EQ(vaddr, to)) { - unsigned long v, p; - - p = (unsigned long)pte_val((mk_pte(to_page,PAGE_KERNEL))); - to = (void*)PAGE_COLOR_MAP1(vaddr); - v = (unsigned long)to | DTLB_WAY_DCACHE_ALIAS1; - __asm__ __volatile__ ("wdtlb %0,%1; dsync" ::"a" (p), "a" (v)); - } - copy_page(to, from); - - spin_unlock(&tlb_lock); - - /* We need to make sure that i$ and d$ are coherent. */ - - clear_bit(PG_cache_clean, &to_page->flags); -} - - - -/* - * Any time the kernel writes to a user page cache page, or it is about to - * read from a page cache page this routine is called. - * - * Note: - * The kernel currently only provides one architecture bit in the page - * flags that we use for I$/D$ coherency. Maybe, in future, we can - * use a sepearte bit for deferred dcache aliasing: - * If the page is not mapped yet, we only need to set a flag, - * if mapped, we need to invalidate the page. - */ -// FIXME: we probably need this for WB caches not only for Page Coloring.. - -void flush_dcache_page(struct page *page) -{ - unsigned long addr = __pa(page_address(page)); - struct address_space *mapping = page_mapping(page); - - __flush_invalidate_dcache_page_phys(addr); - - if (!test_bit(PG_cache_clean, &page->flags)) - return; - - /* If this page hasn't been mapped, yet, handle I$/D$ coherency later.*/ -#if 0 - if (mapping && !mapping_mapped(mapping)) - clear_bit(PG_cache_clean, &page->flags); - else -#endif - __invalidate_icache_page_phys(addr); -} - -void flush_cache_range(struct vm_area_struct* vma, unsigned long s, - unsigned long e) -{ - __flush_invalidate_cache_all(); -} - -void flush_cache_page(struct vm_area_struct* vma, unsigned long address, - unsigned long pfn) -{ - struct page *page = pfn_to_page(pfn); - - /* Remove any entry for the old mapping. */ - - if (current->active_mm == vma->vm_mm) { - unsigned long addr = __pa(page_address(page)); - __flush_invalidate_dcache_page_phys(addr); - if ((vma->vm_flags & VM_EXEC) != 0) - __invalidate_icache_page_phys(addr); - } else { - BUG(); - } -} - -#endif /* (DCACHE_WAY_SIZE > PAGE_SIZE) */ - - -pte_t* pte_alloc_one_kernel (struct mm_struct* mm, unsigned long addr) -{ - pte_t* pte = (pte_t*)__get_free_pages(GFP_KERNEL|__GFP_REPEAT, 0); - if (likely(pte)) { - pte_t* ptep = (pte_t*)(pte_val(*pte) + PAGE_OFFSET); - int i; - for (i = 0; i < 1024; i++, ptep++) - pte_clear(mm, addr, ptep); - } - return pte; -} - -struct page* pte_alloc_one(struct mm_struct *mm, unsigned long addr) -{ - struct page *page; - - page = alloc_pages(GFP_KERNEL | __GFP_REPEAT, 0); - - if (likely(page)) { - pte_t* ptep = kmap_atomic(page, KM_USER0); - int i; + pte_t* ptep = (pte_t*)addr; + int i; - for (i = 0; i < 1024; i++, ptep++) - pte_clear(mm, addr, ptep); + for (i = 0; i < 1024; i++, ptep++) + pte_clear(NULL, 0, ptep); - kunmap_atomic(ptep, KM_USER0); - } - return page; } - -/* - * Handle D$/I$ coherency. - * - * Note: - * We only have one architecture bit for the page flags, so we cannot handle - * cache aliasing, yet. - */ - -void -update_mmu_cache(struct vm_area_struct * vma, unsigned long addr, pte_t pte) +void __init pgtable_cache_init(void) { - unsigned long pfn = pte_pfn(pte); - struct page *page; - unsigned long vaddr = addr & PAGE_MASK; - - if (!pfn_valid(pfn)) - return; - - page = pfn_to_page(pfn); - - invalidate_itlb_mapping(addr); - invalidate_dtlb_mapping(addr); - - /* We have a new mapping. Use it. */ - - write_dtlb_entry(pte, dtlb_probe(addr)); - - /* If the processor can execute from this page, synchronize D$/I$. */ - - if ((vma->vm_flags & VM_EXEC) != 0) { - - write_itlb_entry(pte, itlb_probe(addr)); - - /* Synchronize caches, if not clean. */ - - if (!test_and_set_bit(PG_cache_clean, &page->flags)) { - __flush_dcache_page(vaddr); - __invalidate_icache_page(vaddr); - } - } + pgtable_cache = kmem_cache_create("pgd", + PAGE_SIZE, PAGE_SIZE, + SLAB_HWCACHE_ALIGN, + pgd_ctor); } - diff --git a/arch/xtensa/mm/misc.S b/arch/xtensa/mm/misc.S index ae08533..e1f8803 100644 --- a/arch/xtensa/mm/misc.S +++ b/arch/xtensa/mm/misc.S @@ -7,29 +7,33 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 2001 - 2005 Tensilica Inc. + * Copyright (C) 2001 - 2007 Tensilica Inc. * * Chris Zankel */ -/* Note: we might want to implement some of the loops as zero-overhead-loops, - * where applicable and if supported by the processor. - */ #include #include #include #include #include +#include + -/* clear_page (page) */ +/* + * clear_page and clear_user_page are the same for non-cache-aliased configs. + * + * clear_page (unsigned long page) + * a2 + */ ENTRY(clear_page) entry a1, 16 - addi a4, a2, PAGE_SIZE - movi a3, 0 -1: s32i a3, a2, 0 + movi a3, 0 + __loopi a2, a7, PAGE_SIZE, 32 + s32i a3, a2, 0 s32i a3, a2, 4 s32i a3, a2, 8 s32i a3, a2, 12 @@ -37,42 +41,277 @@ ENTRY(clear_page) s32i a3, a2, 20 s32i a3, a2, 24 s32i a3, a2, 28 - addi a2, a2, 32 - blt a2, a4, 1b + __endla a2, a7, 32 retw /* + * copy_page and copy_user_page are the same for non-cache-aliased configs. + * * copy_page (void *to, void *from) - * a2 a3 + * a2 a3 */ ENTRY(copy_page) entry a1, 16 - addi a4, a2, PAGE_SIZE - -1: l32i a5, a3, 0 - l32i a6, a3, 4 - l32i a7, a3, 8 - s32i a5, a2, 0 - s32i a6, a2, 4 - s32i a7, a2, 8 - l32i a5, a3, 12 - l32i a6, a3, 16 - l32i a7, a3, 20 - s32i a5, a2, 12 - s32i a6, a2, 16 - s32i a7, a2, 20 - l32i a5, a3, 24 - l32i a6, a3, 28 - s32i a5, a2, 24 - s32i a6, a2, 28 - addi a2, a2, 32 - addi a3, a3, 32 - blt a2, a4, 1b + __loopi a2, a4, PAGE_SIZE, 32 + + l32i a8, a3, 0 + l32i a9, a3, 4 + s32i a8, a2, 0 + s32i a9, a2, 4 + + l32i a8, a3, 8 + l32i a9, a3, 12 + s32i a8, a2, 8 + s32i a9, a2, 12 + + l32i a8, a3, 16 + l32i a9, a3, 20 + s32i a8, a2, 16 + s32i a9, a2, 20 + + l32i a8, a3, 24 + l32i a9, a3, 28 + s32i a8, a2, 24 + s32i a9, a2, 28 + + addi a2, a2, 32 + addi a3, a3, 32 + + __endl a2, a4 + + retw + +/* + * If we have to deal with cache aliasing, we use temporary memory mappings + * to ensure that the source and destination pages have the same color as + * the virtual address. We use way 0 and 1 for temporary mappings in such cases. + * + * The temporary DTLB entries shouldn't be flushed by interrupts, but are + * flushed by preemptive task switches. Special code in the + * fast_second_level_miss handler re-established the temporary mapping. + * It requires that the PPNs for the destination and source addresses are + * in a6, and a7, respectively. + */ + +/* TLB miss exceptions are treated special in the following region */ + +ENTRY(__tlbtemp_mapping_start) + +#if (DCACHE_WAY_SIZE > PAGE_SIZE) + +/* + * clear_user_page (void *addr, unsigned long vaddr, struct page *page) + * a2 a3 a4 + */ + +ENTRY(clear_user_page) + entry a1, 32 + + /* Mark page dirty and determine alias. */ + + movi a7, (1 << PG_ARCH_1) + l32i a5, a4, PAGE_FLAGS + xor a6, a2, a3 + extui a3, a3, PAGE_SHIFT, DCACHE_ALIAS_ORDER + extui a6, a6, PAGE_SHIFT, DCACHE_ALIAS_ORDER + or a5, a5, a7 + slli a3, a3, PAGE_SHIFT + s32i a5, a4, PAGE_FLAGS + + /* Skip setting up a temporary DTLB if not aliased. */ + + beqz a6, 1f + + /* Invalidate kernel page. */ + + mov a10, a2 + call8 __invalidate_dcache_page + + /* Setup a temporary DTLB with the color of the VPN */ + + movi a4, -PAGE_OFFSET + (PAGE_KERNEL | _PAGE_HW_WRITE) + movi a5, TLBTEMP_BASE_1 # virt + add a6, a2, a4 # ppn + add a2, a5, a3 # add 'color' + + wdtlb a6, a2 + dsync + +1: movi a3, 0 + __loopi a2, a7, PAGE_SIZE, 32 + s32i a3, a2, 0 + s32i a3, a2, 4 + s32i a3, a2, 8 + s32i a3, a2, 12 + s32i a3, a2, 16 + s32i a3, a2, 20 + s32i a3, a2, 24 + s32i a3, a2, 28 + __endla a2, a7, 32 + + bnez a6, 1f + retw + + /* We need to invalidate the temporary idtlb entry, if any. */ + +1: addi a2, a2, -PAGE_SIZE + idtlb a2 + dsync + + retw + +/* + * copy_page_user (void *to, void *from, unsigned long vaddr, struct page *page) + * a2 a3 a4 a5 + */ + +ENTRY(copy_user_page) + + entry a1, 32 + + /* Mark page dirty and determine alias for destination. */ + + movi a8, (1 << PG_ARCH_1) + l32i a9, a5, PAGE_FLAGS + xor a6, a2, a4 + xor a7, a3, a4 + extui a4, a4, PAGE_SHIFT, DCACHE_ALIAS_ORDER + extui a6, a6, PAGE_SHIFT, DCACHE_ALIAS_ORDER + extui a7, a7, PAGE_SHIFT, DCACHE_ALIAS_ORDER + or a9, a9, a8 + slli a4, a4, PAGE_SHIFT + s32i a9, a5, PAGE_FLAGS + movi a5, -PAGE_OFFSET + (PAGE_KERNEL | _PAGE_HW_WRITE) + + beqz a6, 1f + + /* Invalidate dcache */ + + mov a10, a2 + call8 __invalidate_dcache_page + + /* Setup a temporary DTLB with a matching color. */ + + movi a8, TLBTEMP_BASE_1 # base + add a6, a2, a5 # ppn + add a2, a8, a4 # add 'color' + + wdtlb a6, a2 + dsync + + /* Skip setting up a temporary DTLB for destination if not aliased. */ + +1: beqz a7, 1f + + /* Setup a temporary DTLB with a matching color. */ + + movi a8, TLBTEMP_BASE_2 # base + add a7, a3, a5 # ppn + add a3, a8, a4 + addi a8, a3, 1 # way1 + + wdtlb a7, a8 + dsync + +1: __loopi a2, a4, PAGE_SIZE, 32 + + l32i a8, a3, 0 + l32i a9, a3, 4 + s32i a8, a2, 0 + s32i a9, a2, 4 + + l32i a8, a3, 8 + l32i a9, a3, 12 + s32i a8, a2, 8 + s32i a9, a2, 12 + + l32i a8, a3, 16 + l32i a9, a3, 20 + s32i a8, a2, 16 + s32i a9, a2, 20 + + l32i a8, a3, 24 + l32i a9, a3, 28 + s32i a8, a2, 24 + s32i a9, a2, 28 + + addi a2, a2, 32 + addi a3, a3, 32 + + __endl a2, a4 + + /* We need to invalidate any temporary mapping! */ + + bnez a6, 1f + bnez a7, 2f + retw + +1: addi a2, a2, -PAGE_SIZE + idtlb a2 + dsync + bnez a7, 2f + retw + +2: addi a3, a3, -PAGE_SIZE+1 + idtlb a3 + dsync + + retw + +#endif + +#if (DCACHE_WAY_SIZE > PAGE_SIZE) + +/* + * void __flush_invalidate_dcache_page_alias (addr, phys) + * a2 a3 + */ + +ENTRY(__flush_invalidate_dcache_page_alias) + entry sp, 16 + + movi a7, 0 # required for exception handler + addi a6, a3, (PAGE_KERNEL | _PAGE_HW_WRITE) + mov a4, a2 + wdtlb a6, a2 + dsync + + ___flush_invalidate_dcache_page a2 a3 + + idtlb a4 + dsync + + retw + +#endif + +ENTRY(__tlbtemp_mapping_itlb) + +#if (ICACHE_WAY_SIZE > PAGE_SIZE) + +ENTRY(__invalidate_icache_page_alias) + entry sp, 16 + + addi a6, a3, (PAGE_KERNEL | _PAGE_HW_WRITE) + mov a4, a2 + witlb a6, a2 + isync + + ___invalidate_icache_page a2 a3 + + iitlb a4 + isync retw +#endif + +/* End of special treatment in tlb miss exception */ + +ENTRY(__tlbtemp_mapping_end) + /* * void __invalidate_icache_page(ulong start) */ @@ -121,8 +360,6 @@ ENTRY(__flush_dcache_page) dsync retw - - /* * void __invalidate_icache_range(ulong start, ulong size) */ @@ -168,7 +405,6 @@ ENTRY(__invalidate_dcache_range) ___invalidate_dcache_range a2 a3 a4 - retw /* -- cgit v1.1