diff options
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r-- | arch/powerpc/mm/44x_mmu.c | 29 | ||||
-rw-r--r-- | arch/powerpc/mm/fault.c | 3 | ||||
-rw-r--r-- | arch/powerpc/mm/hash_low_64.S | 17 | ||||
-rw-r--r-- | arch/powerpc/mm/hash_utils_64.c | 42 | ||||
-rw-r--r-- | arch/powerpc/mm/hugetlbpage.c | 13 | ||||
-rw-r--r-- | arch/powerpc/mm/init_32.c | 7 | ||||
-rw-r--r-- | arch/powerpc/mm/init_64.c | 4 | ||||
-rw-r--r-- | arch/powerpc/mm/mem.c | 9 | ||||
-rw-r--r-- | arch/powerpc/mm/mmu_decl.h | 6 | ||||
-rw-r--r-- | arch/powerpc/mm/numa.c | 310 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable_32.c | 6 | ||||
-rw-r--r-- | arch/powerpc/mm/ppc_mmu_32.c | 27 | ||||
-rw-r--r-- | arch/powerpc/mm/slice.c | 177 | ||||
-rw-r--r-- | arch/powerpc/mm/stab.c | 4 | ||||
-rw-r--r-- | arch/powerpc/mm/tlb_64.c | 7 |
15 files changed, 494 insertions, 167 deletions
diff --git a/arch/powerpc/mm/44x_mmu.c b/arch/powerpc/mm/44x_mmu.c index 953fb91..98052ac 100644 --- a/arch/powerpc/mm/44x_mmu.c +++ b/arch/powerpc/mm/44x_mmu.c @@ -27,6 +27,7 @@ #include <asm/mmu.h> #include <asm/system.h> #include <asm/page.h> +#include <asm/cacheflush.h> #include "mmu_decl.h" @@ -37,11 +38,35 @@ unsigned int tlb_44x_index; /* = 0 */ unsigned int tlb_44x_hwater = PPC44x_TLB_SIZE - 1 - PPC44x_EARLY_TLBS; int icache_44x_need_flush; +static void __init ppc44x_update_tlb_hwater(void) +{ + extern unsigned int tlb_44x_patch_hwater_D[]; + extern unsigned int tlb_44x_patch_hwater_I[]; + + /* The TLB miss handlers hard codes the watermark in a cmpli + * instruction to improve performances rather than loading it + * from the global variable. Thus, we patch the instructions + * in the 2 TLB miss handlers when updating the value + */ + tlb_44x_patch_hwater_D[0] = (tlb_44x_patch_hwater_D[0] & 0xffff0000) | + tlb_44x_hwater; + flush_icache_range((unsigned long)&tlb_44x_patch_hwater_D[0], + (unsigned long)&tlb_44x_patch_hwater_D[1]); + tlb_44x_patch_hwater_I[0] = (tlb_44x_patch_hwater_I[0] & 0xffff0000) | + tlb_44x_hwater; + flush_icache_range((unsigned long)&tlb_44x_patch_hwater_I[0], + (unsigned long)&tlb_44x_patch_hwater_I[1]); +} + /* * "Pins" a 256MB TLB entry in AS0 for kernel lowmem */ static void __init ppc44x_pin_tlb(unsigned int virt, unsigned int phys) { + unsigned int entry = tlb_44x_hwater--; + + ppc44x_update_tlb_hwater(); + __asm__ __volatile__( "tlbwe %2,%3,%4\n" "tlbwe %1,%3,%5\n" @@ -50,7 +75,7 @@ static void __init ppc44x_pin_tlb(unsigned int virt, unsigned int phys) : "r" (PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G), "r" (phys), "r" (virt | PPC44x_TLB_VALID | PPC44x_TLB_256M), - "r" (tlb_44x_hwater--), /* slot for this TLB entry */ + "r" (entry), "i" (PPC44x_TLB_PAGEID), "i" (PPC44x_TLB_XLAT), "i" (PPC44x_TLB_ATTRIB)); @@ -58,6 +83,8 @@ static void __init ppc44x_pin_tlb(unsigned int virt, unsigned int phys) void __init MMU_init_hw(void) { + ppc44x_update_tlb_hwater(); + flush_instruction_cache(); } diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 7b25107..1707d00 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -306,7 +306,8 @@ good_area: flush_dcache_icache_page(page); set_bit(PG_arch_1, &page->flags); } - pte_update(ptep, 0, _PAGE_HWEXEC); + pte_update(ptep, 0, _PAGE_HWEXEC | + _PAGE_ACCESSED); _tlbie(address, mm->context.id); pte_unmap_unlock(ptep, ptl); up_read(&mm->mmap_sem); diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S index 70f4c83..a719f53 100644 --- a/arch/powerpc/mm/hash_low_64.S +++ b/arch/powerpc/mm/hash_low_64.S @@ -388,7 +388,7 @@ _GLOBAL(__hash_page_4K) */ rlwinm r30,r4,32-9+7,31-7,31-7 /* _PAGE_RW -> _PAGE_DIRTY */ or r30,r30,r31 - ori r30,r30,_PAGE_BUSY | _PAGE_ACCESSED | _PAGE_HASHPTE + ori r30,r30,_PAGE_BUSY | _PAGE_ACCESSED oris r30,r30,_PAGE_COMBO@h /* Write the linux PTE atomically (setting busy) */ stdcx. r30,0,r6 @@ -468,7 +468,7 @@ END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE) * go to out-of-line code to try to modify the HPTE. We look for * the bit at (1 >> (index + 32)) */ - andi. r0,r31,_PAGE_HASHPTE + rldicl. r0,r31,64-12,48 li r26,0 /* Default hidx */ beq htab_insert_pte @@ -726,11 +726,11 @@ BEGIN_FTR_SECTION bne- ht64_bail_ok END_FTR_SECTION_IFCLR(CPU_FTR_CI_LARGE_PAGE) /* Prepare new PTE value (turn access RW into DIRTY, then - * add BUSY,HASHPTE and ACCESSED) + * add BUSY and ACCESSED) */ rlwinm r30,r4,32-9+7,31-7,31-7 /* _PAGE_RW -> _PAGE_DIRTY */ or r30,r30,r31 - ori r30,r30,_PAGE_BUSY | _PAGE_ACCESSED | _PAGE_HASHPTE + ori r30,r30,_PAGE_BUSY | _PAGE_ACCESSED /* Write the linux PTE atomically (setting busy) */ stdcx. r30,0,r6 bne- 1b @@ -798,18 +798,21 @@ END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE) /* Check if we may already be in the hashtable, in this case, we * go to out-of-line code to try to modify the HPTE */ - andi. r0,r31,_PAGE_HASHPTE + rldicl. r0,r31,64-12,48 bne ht64_modify_pte ht64_insert_pte: /* Clear hpte bits in new pte (we also clear BUSY btw) and - * add _PAGE_HASHPTE + * add _PAGE_HPTE_SUB0 */ lis r0,_PAGE_HPTEFLAGS@h ori r0,r0,_PAGE_HPTEFLAGS@l andc r30,r30,r0 +#ifdef CONFIG_PPC_64K_PAGES + oris r30,r30,_PAGE_HPTE_SUB0@h +#else ori r30,r30,_PAGE_HASHPTE - +#endif /* Phyical address in r5 */ rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT sldi r5,r5,PAGE_SHIFT diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 0f2d239..8d3b58e 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -120,7 +120,7 @@ static DEFINE_SPINLOCK(linear_map_hash_lock); /* Pre-POWER4 CPUs (4k pages only) */ -struct mmu_psize_def mmu_psize_defaults_old[] = { +static struct mmu_psize_def mmu_psize_defaults_old[] = { [MMU_PAGE_4K] = { .shift = 12, .sllp = 0, @@ -134,7 +134,7 @@ struct mmu_psize_def mmu_psize_defaults_old[] = { * * Support for 16Mb large pages */ -struct mmu_psize_def mmu_psize_defaults_gp[] = { +static struct mmu_psize_def mmu_psize_defaults_gp[] = { [MMU_PAGE_4K] = { .shift = 12, .sllp = 0, @@ -533,8 +533,6 @@ void __init htab_initialize(void) unsigned long base = 0, size = 0, limit; int i; - extern unsigned long tce_alloc_start, tce_alloc_end; - DBG(" -> htab_initialize()\n"); /* Initialize segment sizes */ @@ -697,6 +695,28 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap) return pp; } +#ifdef CONFIG_PPC_MM_SLICES +unsigned int get_paca_psize(unsigned long addr) +{ + unsigned long index, slices; + + if (addr < SLICE_LOW_TOP) { + slices = get_paca()->context.low_slices_psize; + index = GET_LOW_SLICE_INDEX(addr); + } else { + slices = get_paca()->context.high_slices_psize; + index = GET_HIGH_SLICE_INDEX(addr); + } + return (slices >> (index * 4)) & 0xF; +} + +#else +unsigned int get_paca_psize(unsigned long addr) +{ + return get_paca()->context.user_psize; +} +#endif + /* * Demote a segment to using 4k pages. * For now this makes the whole process use 4k pages. @@ -704,13 +724,13 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap) #ifdef CONFIG_PPC_64K_PAGES void demote_segment_4k(struct mm_struct *mm, unsigned long addr) { - if (mm->context.user_psize == MMU_PAGE_4K) + if (get_slice_psize(mm, addr) == MMU_PAGE_4K) return; - slice_set_user_psize(mm, MMU_PAGE_4K); + slice_set_range_psize(mm, addr, 1, MMU_PAGE_4K); #ifdef CONFIG_SPU_BASE spu_flush_all_slbs(mm); #endif - if (get_paca()->context.user_psize != MMU_PAGE_4K) { + if (get_paca_psize(addr) != MMU_PAGE_4K) { get_paca()->context = mm->context; slb_flush_and_rebolt(); } @@ -794,11 +814,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) DBG_LOW(" user region with no mm !\n"); return 1; } -#ifdef CONFIG_PPC_MM_SLICES psize = get_slice_psize(mm, ea); -#else - psize = mm->context.user_psize; -#endif ssize = user_segment_size(ea); vsid = get_vsid(mm->context.id, ea, ssize); break; @@ -870,7 +886,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) /* Do actual hashing */ #ifdef CONFIG_PPC_64K_PAGES /* If _PAGE_4K_PFN is set, make sure this is a 4k segment */ - if (pte_val(*ptep) & _PAGE_4K_PFN) { + if ((pte_val(*ptep) & _PAGE_4K_PFN) && psize == MMU_PAGE_64K) { demote_segment_4k(mm, ea); psize = MMU_PAGE_4K; } @@ -899,7 +915,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) } } if (user_region) { - if (psize != get_paca()->context.user_psize) { + if (psize != get_paca_psize(ea)) { get_paca()->context = mm->context; slb_flush_and_rebolt(); } diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index a02266d..0d12fba 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -458,8 +458,7 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access, old_pte = pte_val(*ptep); if (old_pte & _PAGE_BUSY) goto out; - new_pte = old_pte | _PAGE_BUSY | - _PAGE_ACCESSED | _PAGE_HASHPTE; + new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED; } while(old_pte != __cmpxchg_u64((unsigned long *)ptep, old_pte, new_pte)); @@ -499,12 +498,14 @@ repeat: HPTES_PER_GROUP) & ~0x7UL; /* clear HPTE slot informations in new PTE */ +#ifdef CONFIG_PPC_64K_PAGES + new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HPTE_SUB0; +#else new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE; - +#endif /* Add in WIMG bits */ - /* XXX We should store these in the pte */ - /* --BenH: I think they are ... */ - rflags |= _PAGE_COHERENT; + rflags |= (new_pte & (_PAGE_WRITETHRU | _PAGE_NO_CACHE | + _PAGE_COHERENT | _PAGE_GUARDED)); /* Insert into the hash table, primary slot */ slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 0, diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 1952b4d..388ceda 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -43,6 +43,7 @@ #include <asm/btext.h> #include <asm/tlb.h> #include <asm/sections.h> +#include <asm/system.h> #include "mmu_decl.h" @@ -56,8 +57,8 @@ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); -unsigned long total_memory; -unsigned long total_lowmem; +phys_addr_t total_memory; +phys_addr_t total_lowmem; phys_addr_t memstart_addr = (phys_addr_t)~0ull; EXPORT_SYMBOL(memstart_addr); @@ -76,8 +77,6 @@ void MMU_init(void); /* XXX should be in current.h -- paulus */ extern struct task_struct *current_set[NR_CPUS]; -extern int init_bootmem_done; - /* * this tells the system to map all of ram with the segregs * (i.e. page tables) instead of the bats. diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 6aa6537..6ef63ca 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -185,7 +185,7 @@ void pgtable_cache_init(void) * do this by hand as the proffered address may not be correctly aligned. * Subtraction of non-aligned pointers produces undefined results. */ -unsigned long __meminit vmemmap_section_start(unsigned long page) +static unsigned long __meminit vmemmap_section_start(unsigned long page) { unsigned long offset = page - ((unsigned long)(vmemmap)); @@ -198,7 +198,7 @@ unsigned long __meminit vmemmap_section_start(unsigned long page) * which overlaps this vmemmap page is initialised then this page is * initialised already. */ -int __meminit vmemmap_populated(unsigned long start, int page_size) +static int __meminit vmemmap_populated(unsigned long start, int page_size) { unsigned long end = start + page_size; diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 51f82d8..1ca2235 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -44,6 +44,7 @@ #include <asm/btext.h> #include <asm/tlb.h> #include <asm/sections.h> +#include <asm/sparsemem.h> #include <asm/vdso.h> #include <asm/fixmap.h> @@ -329,7 +330,7 @@ static int __init mark_nonram_nosave(void) void __init paging_init(void) { unsigned long total_ram = lmb_phys_mem_size(); - unsigned long top_of_ram = lmb_end_of_DRAM(); + phys_addr_t top_of_ram = lmb_end_of_DRAM(); unsigned long max_zone_pfns[MAX_NR_ZONES]; #ifdef CONFIG_PPC32 @@ -348,10 +349,10 @@ void __init paging_init(void) kmap_prot = PAGE_KERNEL; #endif /* CONFIG_HIGHMEM */ - printk(KERN_DEBUG "Top of RAM: 0x%lx, Total RAM: 0x%lx\n", - top_of_ram, total_ram); + printk(KERN_DEBUG "Top of RAM: 0x%llx, Total RAM: 0x%lx\n", + (u64)top_of_ram, total_ram); printk(KERN_DEBUG "Memory hole size: %ldMB\n", - (top_of_ram - total_ram) >> 20); + (long int)((top_of_ram - total_ram) >> 20)); memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); #ifdef CONFIG_HIGHMEM max_zone_pfns[ZONE_DMA] = lowmem_end_addr >> PAGE_SHIFT; diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index 0480225..fab3cfa 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -29,7 +29,7 @@ extern void hash_preload(struct mm_struct *mm, unsigned long ea, #ifdef CONFIG_PPC32 extern void mapin_ram(void); extern int map_page(unsigned long va, phys_addr_t pa, int flags); -extern void setbat(int index, unsigned long virt, unsigned long phys, +extern void setbat(int index, unsigned long virt, phys_addr_t phys, unsigned int size, int flags); extern void settlbcam(int index, unsigned long virt, phys_addr_t phys, unsigned int size, int flags, unsigned int pid); @@ -49,8 +49,8 @@ extern unsigned int num_tlbcam_entries; extern unsigned long ioremap_bot; extern unsigned long __max_low_memory; extern phys_addr_t __initial_memory_limit_addr; -extern unsigned long total_memory; -extern unsigned long total_lowmem; +extern phys_addr_t total_memory; +extern phys_addr_t total_lowmem; extern phys_addr_t memstart_addr; extern phys_addr_t lowmem_end_addr; diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index dc704da..cf4bffb 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -268,6 +268,144 @@ static unsigned long __devinit read_n_cells(int n, const unsigned int **buf) return result; } +struct of_drconf_cell { + u64 base_addr; + u32 drc_index; + u32 reserved; + u32 aa_index; + u32 flags; +}; + +#define DRCONF_MEM_ASSIGNED 0x00000008 +#define DRCONF_MEM_AI_INVALID 0x00000040 +#define DRCONF_MEM_RESERVED 0x00000080 + +/* + * Read the next lmb list entry from the ibm,dynamic-memory property + * and return the information in the provided of_drconf_cell structure. + */ +static void read_drconf_cell(struct of_drconf_cell *drmem, const u32 **cellp) +{ + const u32 *cp; + + drmem->base_addr = read_n_cells(n_mem_addr_cells, cellp); + + cp = *cellp; + drmem->drc_index = cp[0]; + drmem->reserved = cp[1]; + drmem->aa_index = cp[2]; + drmem->flags = cp[3]; + + *cellp = cp + 4; +} + +/* + * Retreive and validate the ibm,dynamic-memory property of the device tree. + * + * The layout of the ibm,dynamic-memory property is a number N of lmb + * list entries followed by N lmb list entries. Each lmb list entry + * contains information as layed out in the of_drconf_cell struct above. + */ +static int of_get_drconf_memory(struct device_node *memory, const u32 **dm) +{ + const u32 *prop; + u32 len, entries; + + prop = of_get_property(memory, "ibm,dynamic-memory", &len); + if (!prop || len < sizeof(unsigned int)) + return 0; + + entries = *prop++; + + /* Now that we know the number of entries, revalidate the size + * of the property read in to ensure we have everything + */ + if (len < (entries * (n_mem_addr_cells + 4) + 1) * sizeof(unsigned int)) + return 0; + + *dm = prop; + return entries; +} + +/* + * Retreive and validate the ibm,lmb-size property for drconf memory + * from the device tree. + */ +static u64 of_get_lmb_size(struct device_node *memory) +{ + const u32 *prop; + u32 len; + + prop = of_get_property(memory, "ibm,lmb-size", &len); + if (!prop || len < sizeof(unsigned int)) + return 0; + + return read_n_cells(n_mem_size_cells, &prop); +} + +struct assoc_arrays { + u32 n_arrays; + u32 array_sz; + const u32 *arrays; +}; + +/* + * Retreive and validate the list of associativity arrays for drconf + * memory from the ibm,associativity-lookup-arrays property of the + * device tree.. + * + * The layout of the ibm,associativity-lookup-arrays property is a number N + * indicating the number of associativity arrays, followed by a number M + * indicating the size of each associativity array, followed by a list + * of N associativity arrays. + */ +static int of_get_assoc_arrays(struct device_node *memory, + struct assoc_arrays *aa) +{ + const u32 *prop; + u32 len; + + prop = of_get_property(memory, "ibm,associativity-lookup-arrays", &len); + if (!prop || len < 2 * sizeof(unsigned int)) + return -1; + + aa->n_arrays = *prop++; + aa->array_sz = *prop++; + + /* Now that we know the number of arrrays and size of each array, + * revalidate the size of the property read in. + */ + if (len < (aa->n_arrays * aa->array_sz + 2) * sizeof(unsigned int)) + return -1; + + aa->arrays = prop; + return 0; +} + +/* + * This is like of_node_to_nid_single() for memory represented in the + * ibm,dynamic-reconfiguration-memory node. + */ +static int of_drconf_to_nid_single(struct of_drconf_cell *drmem, + struct assoc_arrays *aa) +{ + int default_nid = 0; + int nid = default_nid; + int index; + + if (min_common_depth > 0 && min_common_depth <= aa->array_sz && + !(drmem->flags & DRCONF_MEM_AI_INVALID) && + drmem->aa_index < aa->n_arrays) { + index = drmem->aa_index * aa->array_sz + min_common_depth - 1; + nid = aa->arrays[index]; + + if (nid == 0xffff || nid >= MAX_NUMNODES) + nid = default_nid; + } + + return nid; +} + /* * Figure out to which domain a cpu belongs and stick it there. * Return the id of the domain used. @@ -355,57 +493,50 @@ static unsigned long __init numa_enforce_memory_limit(unsigned long start, */ static void __init parse_drconf_memory(struct device_node *memory) { - const unsigned int *lm, *dm, *aa; - unsigned int ls, ld, la; - unsigned int n, aam, aalen; - unsigned long lmb_size, size, start; - int nid, default_nid = 0; - unsigned int ai, flags; - - lm = of_get_property(memory, "ibm,lmb-size", &ls); - dm = of_get_property(memory, "ibm,dynamic-memory", &ld); - aa = of_get_property(memory, "ibm,associativity-lookup-arrays", &la); - if (!lm || !dm || !aa || - ls < sizeof(unsigned int) || ld < sizeof(unsigned int) || - la < 2 * sizeof(unsigned int)) + const u32 *dm; + unsigned int n, rc; + unsigned long lmb_size, size; + int nid; + struct assoc_arrays aa; + + n = of_get_drconf_memory(memory, &dm); + if (!n) + return; + + lmb_size = of_get_lmb_size(memory); + if (!lmb_size) return; - lmb_size = read_n_cells(n_mem_size_cells, &lm); - n = *dm++; /* number of LMBs */ - aam = *aa++; /* number of associativity lists */ - aalen = *aa++; /* length of each associativity list */ - if (ld < (n * (n_mem_addr_cells + 4) + 1) * sizeof(unsigned int) || - la < (aam * aalen + 2) * sizeof(unsigned int)) + rc = of_get_assoc_arrays(memory, &aa); + if (rc) return; for (; n != 0; --n) { - start = read_n_cells(n_mem_addr_cells, &dm); - ai = dm[2]; - flags = dm[3]; - dm += 4; - /* 0x80 == reserved, 0x8 = assigned to us */ - if ((flags & 0x80) || !(flags & 0x8)) + struct of_drconf_cell drmem; + + read_drconf_cell(&drmem, &dm); + + /* skip this block if the reserved bit is set in flags (0x80) + or if the block is not assigned to this partition (0x8) */ + if ((drmem.flags & DRCONF_MEM_RESERVED) + || !(drmem.flags & DRCONF_MEM_ASSIGNED)) continue; - nid = default_nid; - /* flags & 0x40 means associativity index is invalid */ - if (min_common_depth > 0 && min_common_depth <= aalen && - (flags & 0x40) == 0 && ai < aam) { - /* this is like of_node_to_nid_single */ - nid = aa[ai * aalen + min_common_depth - 1]; - if (nid == 0xffff || nid >= MAX_NUMNODES) - nid = default_nid; - } - fake_numa_create_new_node(((start + lmb_size) >> PAGE_SHIFT), - &nid); + nid = of_drconf_to_nid_single(&drmem, &aa); + + fake_numa_create_new_node( + ((drmem.base_addr + lmb_size) >> PAGE_SHIFT), + &nid); + node_set_online(nid); - size = numa_enforce_memory_limit(start, lmb_size); + size = numa_enforce_memory_limit(drmem.base_addr, lmb_size); if (!size) continue; - add_active_range(nid, start >> PAGE_SHIFT, - (start >> PAGE_SHIFT) + (size >> PAGE_SHIFT)); + add_active_range(nid, drmem.base_addr >> PAGE_SHIFT, + (drmem.base_addr >> PAGE_SHIFT) + + (size >> PAGE_SHIFT)); } } @@ -770,6 +901,79 @@ early_param("numa", early_numa); #ifdef CONFIG_MEMORY_HOTPLUG /* + * Validate the node associated with the memory section we are + * trying to add. + */ +int valid_hot_add_scn(int *nid, unsigned long start, u32 lmb_size, + unsigned long scn_addr) +{ + nodemask_t nodes; + + if (*nid < 0 || !node_online(*nid)) + *nid = any_online_node(NODE_MASK_ALL); + + if ((scn_addr >= start) && (scn_addr < (start + lmb_size))) { + nodes_setall(nodes); + while (NODE_DATA(*nid)->node_spanned_pages == 0) { + node_clear(*nid, nodes); + *nid = any_online_node(nodes); + } + + return 1; + } + + return 0; +} + +/* + * Find the node associated with a hot added memory section represented + * by the ibm,dynamic-reconfiguration-memory node. + */ +static int hot_add_drconf_scn_to_nid(struct device_node *memory, + unsigned long scn_addr) +{ + const u32 *dm; + unsigned int n, rc; + unsigned long lmb_size; + int default_nid = any_online_node(NODE_MASK_ALL); + int nid; + struct assoc_arrays aa; + + n = of_get_drconf_memory(memory, &dm); + if (!n) + return default_nid;; + + lmb_size = of_get_lmb_size(memory); + if (!lmb_size) + return default_nid; + + rc = of_get_assoc_arrays(memory, &aa); + if (rc) + return default_nid; + + for (; n != 0; --n) { + struct of_drconf_cell drmem; + + read_drconf_cell(&drmem, &dm); + + /* skip this block if it is reserved or not assigned to + * this partition */ + if ((drmem.flags & DRCONF_MEM_RESERVED) + || !(drmem.flags & DRCONF_MEM_ASSIGNED)) + continue; + + nid = of_drconf_to_nid_single(&drmem, &aa); + + if (valid_hot_add_scn(&nid, drmem.base_addr, lmb_size, + scn_addr)) + return nid; + } + + BUG(); /* section address should be found above */ + return 0; +} + +/* * Find the node associated with a hot added memory section. Section * corresponds to a SPARSEMEM section, not an LMB. It is assumed that * sections are fully contained within a single LMB. @@ -777,12 +981,17 @@ early_param("numa", early_numa); int hot_add_scn_to_nid(unsigned long scn_addr) { struct device_node *memory = NULL; - nodemask_t nodes; - int default_nid = any_online_node(NODE_MASK_ALL); int nid; if (!numa_enabled || (min_common_depth < 0)) - return default_nid; + return any_online_node(NODE_MASK_ALL); + + memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); + if (memory) { + nid = hot_add_drconf_scn_to_nid(memory, scn_addr); + of_node_put(memory); + return nid; + } while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { unsigned long start, size; @@ -801,13 +1010,9 @@ ha_new_range: size = read_n_cells(n_mem_size_cells, &memcell_buf); nid = of_node_to_nid_single(memory); - /* Domains not present at boot default to 0 */ - if (nid < 0 || !node_online(nid)) - nid = default_nid; - - if ((scn_addr >= start) && (scn_addr < (start + size))) { + if (valid_hot_add_scn(&nid, start, size, scn_addr)) { of_node_put(memory); - goto got_nid; + return nid; } if (--ranges) /* process all ranges in cell */ @@ -815,14 +1020,5 @@ ha_new_range: } BUG(); /* section address should be found above */ return 0; - - /* Temporary code to ensure that returned node is not empty */ -got_nid: - nodes_setall(nodes); - while (NODE_DATA(nid)->node_spanned_pages == 0) { - node_clear(nid, nodes); - nid = any_online_node(nodes); - } - return nid; } #endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index e0ff59f..c758407 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -53,9 +53,9 @@ extern void hash_page_sync(void); #endif #ifdef HAVE_BATS -extern unsigned long v_mapped_by_bats(unsigned long va); -extern unsigned long p_mapped_by_bats(unsigned long pa); -void setbat(int index, unsigned long virt, unsigned long phys, +extern phys_addr_t v_mapped_by_bats(unsigned long va); +extern unsigned long p_mapped_by_bats(phys_addr_t pa); +void setbat(int index, unsigned long virt, phys_addr_t phys, unsigned int size, int flags); #else /* !HAVE_BATS */ diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c index cef9f15..c53145f 100644 --- a/arch/powerpc/mm/ppc_mmu_32.c +++ b/arch/powerpc/mm/ppc_mmu_32.c @@ -38,21 +38,18 @@ struct hash_pte *Hash, *Hash_end; unsigned long Hash_size, Hash_mask; unsigned long _SDR1; -union ubat { /* BAT register values to be loaded */ - struct ppc_bat bat; - u32 word[2]; -} BATS[8][2]; /* 8 pairs of IBAT, DBAT */ +struct ppc_bat BATS[8][2]; /* 8 pairs of IBAT, DBAT */ struct batrange { /* stores address ranges mapped by BATs */ unsigned long start; unsigned long limit; - unsigned long phys; + phys_addr_t phys; } bat_addrs[8]; /* * Return PA for this VA if it is mapped by a BAT, or 0 */ -unsigned long v_mapped_by_bats(unsigned long va) +phys_addr_t v_mapped_by_bats(unsigned long va) { int b; for (b = 0; b < 4; ++b) @@ -64,7 +61,7 @@ unsigned long v_mapped_by_bats(unsigned long va) /* * Return VA for a given PA or 0 if not mapped */ -unsigned long p_mapped_by_bats(unsigned long pa) +unsigned long p_mapped_by_bats(phys_addr_t pa) { int b; for (b = 0; b < 4; ++b) @@ -119,12 +116,12 @@ unsigned long __init mmu_mapin_ram(void) * The parameters are not checked; in particular size must be a power * of 2 between 128k and 256M. */ -void __init setbat(int index, unsigned long virt, unsigned long phys, +void __init setbat(int index, unsigned long virt, phys_addr_t phys, unsigned int size, int flags) { unsigned int bl; int wimgxpp; - union ubat *bat = BATS[index]; + struct ppc_bat *bat = BATS[index]; if (((flags & _PAGE_NO_CACHE) == 0) && cpu_has_feature(CPU_FTR_NEED_COHERENT)) @@ -137,15 +134,15 @@ void __init setbat(int index, unsigned long virt, unsigned long phys, wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE | _PAGE_COHERENT | _PAGE_GUARDED); wimgxpp |= (flags & _PAGE_RW)? BPP_RW: BPP_RX; - bat[1].word[0] = virt | (bl << 2) | 2; /* Vs=1, Vp=0 */ - bat[1].word[1] = phys | wimgxpp; + bat[1].batu = virt | (bl << 2) | 2; /* Vs=1, Vp=0 */ + bat[1].batl = BAT_PHYS_ADDR(phys) | wimgxpp; #ifndef CONFIG_KGDB /* want user access for breakpoints */ if (flags & _PAGE_USER) #endif - bat[1].bat.batu.vp = 1; + bat[1].batu |= 1; /* Vp = 1 */ if (flags & _PAGE_GUARDED) { /* G bit must be zero in IBATs */ - bat[0].word[0] = bat[0].word[1] = 0; + bat[0].batu = bat[0].batl = 0; } else { /* make IBAT same as DBAT */ bat[0] = bat[1]; @@ -158,8 +155,8 @@ void __init setbat(int index, unsigned long virt, unsigned long phys, | _PAGE_COHERENT); wimgxpp |= (flags & _PAGE_RW)? ((flags & _PAGE_USER)? PP_RWRW: PP_RWXX): PP_RXRX; - bat->word[0] = virt | wimgxpp | 4; /* Ks=0, Ku=1 */ - bat->word[1] = phys | bl | 0x40; /* V=1 */ + bat->batu = virt | wimgxpp | 4; /* Ks=0, Ku=1 */ + bat->batl = phys | bl | 0x40; /* V=1 */ } bat_addrs[index].start = virt; diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index 2bd12d9..db44e02 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -215,10 +215,7 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz mm->context.high_slices_psize); spin_unlock_irqrestore(&slice_convert_lock, flags); - mb(); - /* XXX this is sub-optimal but will do for now */ - on_each_cpu(slice_flush_segments, mm, 1); #ifdef CONFIG_SPU_BASE spu_flush_all_slbs(mm); #endif @@ -384,17 +381,34 @@ static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len, return slice_find_area_bottomup(mm, len, mask, psize, use_cache); } +#define or_mask(dst, src) do { \ + (dst).low_slices |= (src).low_slices; \ + (dst).high_slices |= (src).high_slices; \ +} while (0) + +#define andnot_mask(dst, src) do { \ + (dst).low_slices &= ~(src).low_slices; \ + (dst).high_slices &= ~(src).high_slices; \ +} while (0) + +#ifdef CONFIG_PPC_64K_PAGES +#define MMU_PAGE_BASE MMU_PAGE_64K +#else +#define MMU_PAGE_BASE MMU_PAGE_4K +#endif + unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, unsigned long flags, unsigned int psize, int topdown, int use_cache) { - struct slice_mask mask; + struct slice_mask mask = {0, 0}; struct slice_mask good_mask; struct slice_mask potential_mask = {0,0} /* silence stupid warning */; - int pmask_set = 0; + struct slice_mask compat_mask = {0, 0}; int fixed = (flags & MAP_FIXED); int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT); struct mm_struct *mm = current->mm; + unsigned long newaddr; /* Sanity checks */ BUG_ON(mm->task_size == 0); @@ -416,21 +430,48 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, if (!fixed && addr) { addr = _ALIGN_UP(addr, 1ul << pshift); slice_dbg(" aligned addr=%lx\n", addr); + /* Ignore hint if it's too large or overlaps a VMA */ + if (addr > mm->task_size - len || + !slice_area_is_free(mm, addr, len)) + addr = 0; } - /* First makeup a "good" mask of slices that have the right size + /* First make up a "good" mask of slices that have the right size * already */ good_mask = slice_mask_for_size(mm, psize); slice_print_mask(" good_mask", good_mask); - /* First check hint if it's valid or if we have MAP_FIXED */ - if ((addr != 0 || fixed) && (mm->task_size - len) >= addr) { + /* + * Here "good" means slices that are already the right page size, + * "compat" means slices that have a compatible page size (i.e. + * 4k in a 64k pagesize kernel), and "free" means slices without + * any VMAs. + * + * If MAP_FIXED: + * check if fits in good | compat => OK + * check if fits in good | compat | free => convert free + * else bad + * If have hint: + * check if hint fits in good => OK + * check if hint fits in good | free => convert free + * Otherwise: + * search in good, found => OK + * search in good | free, found => convert free + * search in good | compat | free, found => convert free. + */ - /* Don't bother with hint if it overlaps a VMA */ - if (!fixed && !slice_area_is_free(mm, addr, len)) - goto search; +#ifdef CONFIG_PPC_64K_PAGES + /* If we support combo pages, we can allow 64k pages in 4k slices */ + if (psize == MMU_PAGE_64K) { + compat_mask = slice_mask_for_size(mm, MMU_PAGE_4K); + if (fixed) + or_mask(good_mask, compat_mask); + } +#endif + /* First check hint if it's valid or if we have MAP_FIXED */ + if (addr != 0 || fixed) { /* Build a mask for the requested range */ mask = slice_range_to_mask(addr, len); slice_print_mask(" mask", mask); @@ -442,54 +483,66 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, slice_dbg(" fits good !\n"); return addr; } - - /* We don't fit in the good mask, check what other slices are - * empty and thus can be converted + } else { + /* Now let's see if we can find something in the existing + * slices for that size */ - potential_mask = slice_mask_for_free(mm); - potential_mask.low_slices |= good_mask.low_slices; - potential_mask.high_slices |= good_mask.high_slices; - pmask_set = 1; - slice_print_mask(" potential", potential_mask); - if (slice_check_fit(mask, potential_mask)) { - slice_dbg(" fits potential !\n"); - goto convert; + newaddr = slice_find_area(mm, len, good_mask, psize, topdown, + use_cache); + if (newaddr != -ENOMEM) { + /* Found within the good mask, we don't have to setup, + * we thus return directly + */ + slice_dbg(" found area at 0x%lx\n", newaddr); + return newaddr; } } - /* If we have MAP_FIXED and failed the above step, then error out */ + /* We don't fit in the good mask, check what other slices are + * empty and thus can be converted + */ + potential_mask = slice_mask_for_free(mm); + or_mask(potential_mask, good_mask); + slice_print_mask(" potential", potential_mask); + + if ((addr != 0 || fixed) && slice_check_fit(mask, potential_mask)) { + slice_dbg(" fits potential !\n"); + goto convert; + } + + /* If we have MAP_FIXED and failed the above steps, then error out */ if (fixed) return -EBUSY; - search: slice_dbg(" search...\n"); - /* Now let's see if we can find something in the existing slices - * for that size + /* If we had a hint that didn't work out, see if we can fit + * anywhere in the good area. */ - addr = slice_find_area(mm, len, good_mask, psize, topdown, use_cache); - if (addr != -ENOMEM) { - /* Found within the good mask, we don't have to setup, - * we thus return directly - */ - slice_dbg(" found area at 0x%lx\n", addr); - return addr; - } - - /* Won't fit, check what can be converted */ - if (!pmask_set) { - potential_mask = slice_mask_for_free(mm); - potential_mask.low_slices |= good_mask.low_slices; - potential_mask.high_slices |= good_mask.high_slices; - pmask_set = 1; - slice_print_mask(" potential", potential_mask); + if (addr) { + addr = slice_find_area(mm, len, good_mask, psize, topdown, + use_cache); + if (addr != -ENOMEM) { + slice_dbg(" found area at 0x%lx\n", addr); + return addr; + } } /* Now let's see if we can find something in the existing slices - * for that size + * for that size plus free slices */ addr = slice_find_area(mm, len, potential_mask, psize, topdown, use_cache); + +#ifdef CONFIG_PPC_64K_PAGES + if (addr == -ENOMEM && psize == MMU_PAGE_64K) { + /* retry the search with 4k-page slices included */ + or_mask(potential_mask, compat_mask); + addr = slice_find_area(mm, len, potential_mask, psize, + topdown, use_cache); + } +#endif + if (addr == -ENOMEM) return -ENOMEM; @@ -498,7 +551,13 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, slice_print_mask(" mask", mask); convert: - slice_convert(mm, mask, psize); + andnot_mask(mask, good_mask); + andnot_mask(mask, compat_mask); + if (mask.low_slices || mask.high_slices) { + slice_convert(mm, mask, psize); + if (psize > MMU_PAGE_BASE) + on_each_cpu(slice_flush_segments, mm, 1); + } return addr; } @@ -598,6 +657,36 @@ void slice_set_user_psize(struct mm_struct *mm, unsigned int psize) spin_unlock_irqrestore(&slice_convert_lock, flags); } +void slice_set_psize(struct mm_struct *mm, unsigned long address, + unsigned int psize) +{ + unsigned long i, flags; + u64 *p; + + spin_lock_irqsave(&slice_convert_lock, flags); + if (address < SLICE_LOW_TOP) { + i = GET_LOW_SLICE_INDEX(address); + p = &mm->context.low_slices_psize; + } else { + i = GET_HIGH_SLICE_INDEX(address); + p = &mm->context.high_slices_psize; + } + *p = (*p & ~(0xful << (i * 4))) | ((unsigned long) psize << (i * 4)); + spin_unlock_irqrestore(&slice_convert_lock, flags); + +#ifdef CONFIG_SPU_BASE + spu_flush_all_slbs(mm); +#endif +} + +void slice_set_range_psize(struct mm_struct *mm, unsigned long start, + unsigned long len, unsigned int psize) +{ + struct slice_mask mask = slice_range_to_mask(start, len); + + slice_convert(mm, mask, psize); +} + /* * is_hugepage_only_range() is used by generic code to verify wether * a normal mmap mapping (non hugetlbfs) is valid on a given area. diff --git a/arch/powerpc/mm/stab.c b/arch/powerpc/mm/stab.c index efbbd13..60e6032 100644 --- a/arch/powerpc/mm/stab.c +++ b/arch/powerpc/mm/stab.c @@ -30,8 +30,8 @@ struct stab_entry { }; #define NR_STAB_CACHE_ENTRIES 8 -DEFINE_PER_CPU(long, stab_cache_ptr); -DEFINE_PER_CPU(long, stab_cache[NR_STAB_CACHE_ENTRIES]); +static DEFINE_PER_CPU(long, stab_cache_ptr); +static DEFINE_PER_CPU(long, stab_cache[NR_STAB_CACHE_ENTRIES]); /* * Create a segment table entry for the given esid/vsid pair. diff --git a/arch/powerpc/mm/tlb_64.c b/arch/powerpc/mm/tlb_64.c index 69ad829..a01b5c6 100644 --- a/arch/powerpc/mm/tlb_64.c +++ b/arch/powerpc/mm/tlb_64.c @@ -37,8 +37,8 @@ DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch); * include/asm-powerpc/tlb.h file -- tgall */ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); -DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur); -unsigned long pte_freelist_forced_free; +static DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur); +static unsigned long pte_freelist_forced_free; struct pte_freelist_batch { @@ -47,9 +47,6 @@ struct pte_freelist_batch pgtable_free_t tables[0]; }; -DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur); -unsigned long pte_freelist_forced_free; - #define PTE_FREELIST_SIZE \ ((PAGE_SIZE - sizeof(struct pte_freelist_batch)) \ / sizeof(pgtable_free_t)) |