diff options
-rw-r--r-- | arch/sparc64/kernel/tsb.S | 71 | ||||
-rw-r--r-- | arch/sparc64/mm/fault.c | 8 | ||||
-rw-r--r-- | arch/sparc64/mm/init.c | 7 | ||||
-rw-r--r-- | arch/sparc64/mm/tsb.c | 185 | ||||
-rw-r--r-- | include/asm-sparc64/mmu_context.h | 50 |
5 files changed, 203 insertions, 118 deletions
diff --git a/arch/sparc64/kernel/tsb.S b/arch/sparc64/kernel/tsb.S index d738910..1b154c8 100644 --- a/arch/sparc64/kernel/tsb.S +++ b/arch/sparc64/kernel/tsb.S @@ -34,8 +34,9 @@ tsb_miss_itlb: ldxa [%g4] ASI_IMMU, %g4 /* At this point we have: - * %g4 -- missing virtual address * %g1 -- TSB entry address + * %g3 -- FAULT_CODE_{D,I}TLB + * %g4 -- missing virtual address * %g6 -- TAG TARGET (vaddr >> 22) */ tsb_miss_page_table_walk: @@ -45,6 +46,12 @@ tsb_miss_page_table_walk: tsb_miss_page_table_walk_sun4v_fastpath: USER_PGTABLE_WALK_TL1(%g4, %g7, %g5, %g2, tsb_do_fault) + /* At this point we have: + * %g1 -- TSB entry address + * %g3 -- FAULT_CODE_{D,I}TLB + * %g5 -- physical address of PTE in Linux page tables + * %g6 -- TAG TARGET (vaddr >> 22) + */ tsb_reload: TSB_LOCK_TAG(%g1, %g2, %g7) @@ -199,6 +206,7 @@ __tsb_insert: wrpr %o5, %pstate retl nop + .size __tsb_insert, .-__tsb_insert /* Flush the given TSB entry if it has the matching * tag. @@ -208,6 +216,7 @@ __tsb_insert: */ .align 32 .globl tsb_flush + .type tsb_flush,#function tsb_flush: sethi %hi(TSB_TAG_LOCK_HIGH), %g2 1: TSB_LOAD_TAG(%o0, %g1) @@ -225,6 +234,7 @@ tsb_flush: nop 2: retl TSB_MEMBAR + .size tsb_flush, .-tsb_flush /* Reload MMU related context switch state at * schedule() time. @@ -241,6 +251,7 @@ tsb_flush: */ .align 32 .globl __tsb_context_switch + .type __tsb_context_switch,#function __tsb_context_switch: rdpr %pstate, %o5 wrpr %o5, PSTATE_IE, %pstate @@ -302,3 +313,61 @@ __tsb_context_switch: retl nop + .size __tsb_context_switch, .-__tsb_context_switch + +#define TSB_PASS_BITS ((1 << TSB_TAG_LOCK_BIT) | \ + (1 << TSB_TAG_INVALID_BIT)) + + .align 32 + .globl copy_tsb + .type copy_tsb,#function +copy_tsb: /* %o0=old_tsb_base, %o1=old_tsb_size + * %o2=new_tsb_base, %o3=new_tsb_size + */ + sethi %uhi(TSB_PASS_BITS), %g7 + srlx %o3, 4, %o3 + add %o0, %o1, %g1 /* end of old tsb */ + sllx %g7, 32, %g7 + sub %o3, 1, %o3 /* %o3 == new tsb hash mask */ + +661: prefetcha [%o0] ASI_N, #one_read + .section .tsb_phys_patch, "ax" + .word 661b + prefetcha [%o0] ASI_PHYS_USE_EC, #one_read + .previous + +90: andcc %o0, (64 - 1), %g0 + bne 1f + add %o0, 64, %o5 + +661: prefetcha [%o5] ASI_N, #one_read + .section .tsb_phys_patch, "ax" + .word 661b + prefetcha [%o5] ASI_PHYS_USE_EC, #one_read + .previous + +1: TSB_LOAD_QUAD(%o0, %g2) /* %g2/%g3 == TSB entry */ + andcc %g2, %g7, %g0 /* LOCK or INVALID set? */ + bne,pn %xcc, 80f /* Skip it */ + sllx %g2, 22, %o4 /* TAG --> VADDR */ + + /* This can definitely be computed faster... */ + srlx %o0, 4, %o5 /* Build index */ + and %o5, 511, %o5 /* Mask index */ + sllx %o5, PAGE_SHIFT, %o5 /* Put into vaddr position */ + or %o4, %o5, %o4 /* Full VADDR. */ + srlx %o4, PAGE_SHIFT, %o4 /* Shift down to create index */ + and %o4, %o3, %o4 /* Mask with new_tsb_nents-1 */ + sllx %o4, 4, %o4 /* Shift back up into tsb ent offset */ + TSB_STORE(%o2 + %o4, %g2) /* Store TAG */ + add %o4, 0x8, %o4 /* Advance to TTE */ + TSB_STORE(%o2 + %o4, %g3) /* Store TTE */ + +80: add %o0, 16, %o0 + cmp %o0, %g1 + bne,pt %xcc, 90b + nop + + retl + TSB_MEMBAR + .size copy_tsb, .-copy_tsb diff --git a/arch/sparc64/mm/fault.c b/arch/sparc64/mm/fault.c index b97bd05..63b6cc0 100644 --- a/arch/sparc64/mm/fault.c +++ b/arch/sparc64/mm/fault.c @@ -29,6 +29,7 @@ #include <asm/lsu.h> #include <asm/sections.h> #include <asm/kdebug.h> +#include <asm/mmu_context.h> /* * To debug kernel to catch accesses to certain virtual/physical addresses. @@ -258,7 +259,7 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) struct vm_area_struct *vma; unsigned int insn = 0; int si_code, fault_code; - unsigned long address; + unsigned long address, mm_rss; fault_code = get_thread_fault_code(); @@ -407,6 +408,11 @@ good_area: } up_read(&mm->mmap_sem); + + mm_rss = get_mm_rss(mm); + if (unlikely(mm_rss >= mm->context.tsb_rss_limit)) + tsb_grow(mm, mm_rss); + return; /* diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c index b40f647..d703b67 100644 --- a/arch/sparc64/mm/init.c +++ b/arch/sparc64/mm/init.c @@ -279,7 +279,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t p { struct mm_struct *mm; struct tsb *tsb; - unsigned long tag; + unsigned long tag, flags; if (tlb_type != hypervisor) { unsigned long pfn = pte_pfn(pte); @@ -308,10 +308,15 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t p } mm = vma->vm_mm; + + spin_lock_irqsave(&mm->context.lock, flags); + tsb = &mm->context.tsb[(address >> PAGE_SHIFT) & (mm->context.tsb_nentries - 1UL)]; tag = (address >> 22UL); tsb_insert(tsb, tag, pte_val(pte)); + + spin_unlock_irqrestore(&mm->context.lock, flags); } void flush_dcache_page(struct page *page) diff --git a/arch/sparc64/mm/tsb.c b/arch/sparc64/mm/tsb.c index f36799b..7fbe1e0c 100644 --- a/arch/sparc64/mm/tsb.c +++ b/arch/sparc64/mm/tsb.c @@ -48,11 +48,15 @@ void flush_tsb_kernel_range(unsigned long start, unsigned long end) void flush_tsb_user(struct mmu_gather *mp) { struct mm_struct *mm = mp->mm; - struct tsb *tsb = mm->context.tsb; - unsigned long nentries = mm->context.tsb_nentries; - unsigned long base; + unsigned long nentries, base, flags; + struct tsb *tsb; int i; + spin_lock_irqsave(&mm->context.lock, flags); + + tsb = mm->context.tsb; + nentries = mm->context.tsb_nentries; + if (tlb_type == cheetah_plus || tlb_type == hypervisor) base = __pa(tsb); else @@ -70,6 +74,8 @@ void flush_tsb_user(struct mmu_gather *mp) tsb_flush(ent, tag); } + + spin_unlock_irqrestore(&mm->context.lock, flags); } static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_bytes) @@ -201,86 +207,9 @@ static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_bytes) } } -/* The page tables are locked against modifications while this - * runs. - * - * XXX do some prefetching... - */ -static void copy_tsb(struct tsb *old_tsb, unsigned long old_size, - struct tsb *new_tsb, unsigned long new_size) -{ - unsigned long old_nentries = old_size / sizeof(struct tsb); - unsigned long new_nentries = new_size / sizeof(struct tsb); - unsigned long i; - - for (i = 0; i < old_nentries; i++) { - register unsigned long tag asm("o4"); - register unsigned long pte asm("o5"); - unsigned long v, hash; - - if (tlb_type == hypervisor) { - __asm__ __volatile__( - "ldda [%2] %3, %0" - : "=r" (tag), "=r" (pte) - : "r" (__pa(&old_tsb[i])), - "i" (ASI_QUAD_LDD_PHYS_4V)); - } else if (tlb_type == cheetah_plus) { - __asm__ __volatile__( - "ldda [%2] %3, %0" - : "=r" (tag), "=r" (pte) - : "r" (__pa(&old_tsb[i])), - "i" (ASI_QUAD_LDD_PHYS)); - } else { - __asm__ __volatile__( - "ldda [%2] %3, %0" - : "=r" (tag), "=r" (pte) - : "r" (&old_tsb[i]), - "i" (ASI_NUCLEUS_QUAD_LDD)); - } - - if (tag & ((1UL << TSB_TAG_LOCK_BIT) | - (1UL << TSB_TAG_INVALID_BIT))) - continue; - - /* We only put base page size PTEs into the TSB, - * but that might change in the future. This code - * would need to be changed if we start putting larger - * page size PTEs into there. - */ - WARN_ON((pte & _PAGE_ALL_SZ_BITS) != _PAGE_SZBITS); - - /* The tag holds bits 22 to 63 of the virtual address - * and the context. Clear out the context, and shift - * up to make a virtual address. - */ - v = (tag & ((1UL << 42UL) - 1UL)) << 22UL; - - /* The implied bits of the tag (bits 13 to 21) are - * determined by the TSB entry index, so fill that in. - */ - v |= (i & (512UL - 1UL)) << 13UL; - - hash = tsb_hash(v, new_nentries); - if (tlb_type == cheetah_plus || - tlb_type == hypervisor) { - __asm__ __volatile__( - "stxa %0, [%1] %2\n\t" - "stxa %3, [%4] %2" - : /* no outputs */ - : "r" (tag), - "r" (__pa(&new_tsb[hash].tag)), - "i" (ASI_PHYS_USE_EC), - "r" (pte), - "r" (__pa(&new_tsb[hash].pte))); - } else { - new_tsb[hash].tag = tag; - new_tsb[hash].pte = pte; - } - } -} - /* When the RSS of an address space exceeds mm->context.tsb_rss_limit, - * update_mmu_cache() invokes this routine to try and grow the TSB. + * do_sparc64_fault() invokes this routine to try and grow the TSB. + * * When we reach the maximum TSB size supported, we stick ~0UL into * mm->context.tsb_rss_limit so the grow checks in update_mmu_cache() * will not trigger any longer. @@ -293,12 +222,12 @@ static void copy_tsb(struct tsb *old_tsb, unsigned long old_size, * the number of entries that the current TSB can hold at once. Currently, * we trigger when the RSS hits 3/4 of the TSB capacity. */ -void tsb_grow(struct mm_struct *mm, unsigned long rss, gfp_t gfp_flags) +void tsb_grow(struct mm_struct *mm, unsigned long rss) { unsigned long max_tsb_size = 1 * 1024 * 1024; - unsigned long size, old_size; + unsigned long size, old_size, flags; struct page *page; - struct tsb *old_tsb; + struct tsb *old_tsb, *new_tsb; if (max_tsb_size > (PAGE_SIZE << MAX_ORDER)) max_tsb_size = (PAGE_SIZE << MAX_ORDER); @@ -311,12 +240,51 @@ void tsb_grow(struct mm_struct *mm, unsigned long rss, gfp_t gfp_flags) break; } - page = alloc_pages(gfp_flags, get_order(size)); + page = alloc_pages(GFP_KERNEL, get_order(size)); if (unlikely(!page)) return; /* Mark all tags as invalid. */ - memset(page_address(page), 0x40, size); + new_tsb = page_address(page); + memset(new_tsb, 0x40, size); + + /* Ok, we are about to commit the changes. If we are + * growing an existing TSB the locking is very tricky, + * so WATCH OUT! + * + * We have to hold mm->context.lock while committing to the + * new TSB, this synchronizes us with processors in + * flush_tsb_user() and switch_mm() for this address space. + * + * But even with that lock held, processors run asynchronously + * accessing the old TSB via TLB miss handling. This is OK + * because those actions are just propagating state from the + * Linux page tables into the TSB, page table mappings are not + * being changed. If a real fault occurs, the processor will + * synchronize with us when it hits flush_tsb_user(), this is + * also true for the case where vmscan is modifying the page + * tables. The only thing we need to be careful with is to + * skip any locked TSB entries during copy_tsb(). + * + * When we finish committing to the new TSB, we have to drop + * the lock and ask all other cpus running this address space + * to run tsb_context_switch() to see the new TSB table. + */ + spin_lock_irqsave(&mm->context.lock, flags); + + old_tsb = mm->context.tsb; + old_size = mm->context.tsb_nentries * sizeof(struct tsb); + + /* Handle multiple threads trying to grow the TSB at the same time. + * One will get in here first, and bump the size and the RSS limit. + * The others will get in here next and hit this check. + */ + if (unlikely(old_tsb && (rss < mm->context.tsb_rss_limit))) { + spin_unlock_irqrestore(&mm->context.lock, flags); + + free_pages((unsigned long) new_tsb, get_order(size)); + return; + } if (size == max_tsb_size) mm->context.tsb_rss_limit = ~0UL; @@ -324,30 +292,37 @@ void tsb_grow(struct mm_struct *mm, unsigned long rss, gfp_t gfp_flags) mm->context.tsb_rss_limit = ((size / sizeof(struct tsb)) * 3) / 4; - old_tsb = mm->context.tsb; - old_size = mm->context.tsb_nentries * sizeof(struct tsb); - - if (old_tsb) - copy_tsb(old_tsb, old_size, page_address(page), size); + if (old_tsb) { + extern void copy_tsb(unsigned long old_tsb_base, + unsigned long old_tsb_size, + unsigned long new_tsb_base, + unsigned long new_tsb_size); + unsigned long old_tsb_base = (unsigned long) old_tsb; + unsigned long new_tsb_base = (unsigned long) new_tsb; + + if (tlb_type == cheetah_plus || tlb_type == hypervisor) { + old_tsb_base = __pa(old_tsb_base); + new_tsb_base = __pa(new_tsb_base); + } + copy_tsb(old_tsb_base, old_size, new_tsb_base, size); + } - mm->context.tsb = page_address(page); + mm->context.tsb = new_tsb; setup_tsb_params(mm, size); + spin_unlock_irqrestore(&mm->context.lock, flags); + /* If old_tsb is NULL, we're being invoked for the first time * from init_new_context(). */ if (old_tsb) { - /* Now force all other processors to reload the new - * TSB state. - */ - smp_tsb_sync(mm); - - /* Finally reload it on the local cpu. No further - * references will remain to the old TSB and we can - * thus free it up. - */ + /* Reload it on the local cpu. */ tsb_context_switch(mm); + /* Now force other processors to do the same. */ + smp_tsb_sync(mm); + + /* Now it is safe to free the old tsb. */ free_pages((unsigned long) old_tsb, get_order(old_size)); } } @@ -363,7 +338,11 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) * will be confused and think there is an older TSB to free up. */ mm->context.tsb = NULL; - tsb_grow(mm, 0, GFP_KERNEL); + + /* If this is fork, inherit the parent's TSB size. We would + * grow it to that size on the first page fault anyways. + */ + tsb_grow(mm, get_mm_rss(mm)); if (unlikely(!mm->context.tsb)) return -ENOMEM; diff --git a/include/asm-sparc64/mmu_context.h b/include/asm-sparc64/mmu_context.h index ca36ea9..e797432 100644 --- a/include/asm-sparc64/mmu_context.h +++ b/include/asm-sparc64/mmu_context.h @@ -42,7 +42,7 @@ static inline void tsb_context_switch(struct mm_struct *mm) __pa(&mm->context.tsb_descr)); } -extern void tsb_grow(struct mm_struct *mm, unsigned long mm_rss, gfp_t gfp_flags); +extern void tsb_grow(struct mm_struct *mm, unsigned long mm_rss); #ifdef CONFIG_SMP extern void smp_tsb_sync(struct mm_struct *mm); #else @@ -74,18 +74,43 @@ static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, str ctx_valid = CTX_VALID(mm->context); if (!ctx_valid) get_new_mmu_context(mm); - spin_unlock_irqrestore(&mm->context.lock, flags); - if (!ctx_valid || (old_mm != mm)) { - load_secondary_context(mm); - tsb_context_switch(mm); - } + /* We have to be extremely careful here or else we will miss + * a TSB grow if we switch back and forth between a kernel + * thread and an address space which has it's TSB size increased + * on another processor. + * + * It is possible to play some games in order to optimize the + * switch, but the safest thing to do is to unconditionally + * perform the secondary context load and the TSB context switch. + * + * For reference the bad case is, for address space "A": + * + * CPU 0 CPU 1 + * run address space A + * set cpu0's bits in cpu_vm_mask + * switch to kernel thread, borrow + * address space A via entry_lazy_tlb + * run address space A + * set cpu1's bit in cpu_vm_mask + * flush_tlb_pending() + * reset cpu_vm_mask to just cpu1 + * TSB grow + * run address space A + * context was valid, so skip + * TSB context switch + * + * At that point cpu0 continues to use a stale TSB, the one from + * before the TSB grow performed on cpu1. cpu1 did not cross-call + * cpu0 to update it's TSB because at that point the cpu_vm_mask + * only had cpu1 set in it. + */ + load_secondary_context(mm); + tsb_context_switch(mm); - /* Even if (mm == old_mm) we _must_ check - * the cpu_vm_mask. If we do not we could - * corrupt the TLB state because of how - * smp_flush_tlb_{page,range,mm} on sparc64 - * and lazy tlb switches work. -DaveM + /* Any time a processor runs a context on an address space + * for the first time, we must flush that context out of the + * local TLB. */ cpu = smp_processor_id(); if (!ctx_valid || !cpu_isset(cpu, mm->cpu_vm_mask)) { @@ -93,6 +118,7 @@ static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, str __flush_tlb_mm(CTX_HWBITS(mm->context), SECONDARY_CONTEXT); } + spin_unlock_irqrestore(&mm->context.lock, flags); } #define deactivate_mm(tsk,mm) do { } while (0) @@ -109,11 +135,11 @@ static inline void activate_mm(struct mm_struct *active_mm, struct mm_struct *mm cpu = smp_processor_id(); if (!cpu_isset(cpu, mm->cpu_vm_mask)) cpu_set(cpu, mm->cpu_vm_mask); - spin_unlock_irqrestore(&mm->context.lock, flags); load_secondary_context(mm); __flush_tlb_mm(CTX_HWBITS(mm->context), SECONDARY_CONTEXT); tsb_context_switch(mm); + spin_unlock_irqrestore(&mm->context.lock, flags); } #endif /* !(__ASSEMBLY__) */ |