diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-02-12 18:54:28 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-02-12 18:54:28 -0800 |
commit | 818099574b04c5301eacbbcd441022b353a65466 (patch) | |
tree | 77b3645b375105cb0389df2b4ea5ffa90329f7f8 /arch/powerpc | |
parent | 802ea9d8645d33d24b7b4cd4537c14f3e698bde0 (diff) | |
parent | 6016daed58ee482a2f7684e93342e89139cf4419 (diff) | |
download | op-kernel-dev-818099574b04c5301eacbbcd441022b353a65466.zip op-kernel-dev-818099574b04c5301eacbbcd441022b353a65466.tar.gz |
Merge branch 'akpm' (patches from Andrew)
Merge third set of updates from Andrew Morton:
- the rest of MM
[ This includes getting rid of the numa hinting bits, in favor of
just generic protnone logic. Yay. - Linus ]
- core kernel
- procfs
- some of lib/ (lots of lib/ material this time)
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (104 commits)
lib/lcm.c: replace include
lib/percpu_ida.c: remove redundant includes
lib/strncpy_from_user.c: replace module.h include
lib/stmp_device.c: replace module.h include
lib/sort.c: move include inside #if 0
lib/show_mem.c: remove redundant include
lib/radix-tree.c: change to simpler include
lib/plist.c: remove redundant include
lib/nlattr.c: remove redundant include
lib/kobject_uevent.c: remove redundant include
lib/llist.c: remove redundant include
lib/md5.c: simplify include
lib/list_sort.c: rearrange includes
lib/genalloc.c: remove redundant include
lib/idr.c: remove redundant include
lib/halfmd4.c: simplify includes
lib/dynamic_queue_limits.c: simplify includes
lib/sort.c: use simpler includes
lib/interval_tree.c: simplify includes
hexdump: make it return number of bytes placed in buffer
...
Diffstat (limited to 'arch/powerpc')
-rw-r--r-- | arch/powerpc/include/asm/pgtable.h | 54 | ||||
-rw-r--r-- | arch/powerpc/include/asm/pte-common.h | 5 | ||||
-rw-r--r-- | arch/powerpc/include/asm/pte-hash64.h | 6 | ||||
-rw-r--r-- | arch/powerpc/include/asm/thread_info.h | 4 | ||||
-rw-r--r-- | arch/powerpc/kernel/signal_32.c | 4 | ||||
-rw-r--r-- | arch/powerpc/kernel/signal_64.c | 2 | ||||
-rw-r--r-- | arch/powerpc/kernel/time.c | 32 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_rm_mmu.c | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/copro_fault.c | 8 | ||||
-rw-r--r-- | arch/powerpc/mm/fault.c | 25 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable.c | 11 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable_64.c | 3 |
12 files changed, 70 insertions, 86 deletions
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index 7e77f2c..79fee2e 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -40,63 +40,27 @@ static inline int pte_none(pte_t pte) { return (pte_val(pte) & ~_PTE_NONE_MASK) static inline pgprot_t pte_pgprot(pte_t pte) { return __pgprot(pte_val(pte) & PAGE_PROT_BITS); } #ifdef CONFIG_NUMA_BALANCING -static inline int pte_present(pte_t pte) -{ - return pte_val(pte) & _PAGE_NUMA_MASK; -} - -#define pte_present_nonuma pte_present_nonuma -static inline int pte_present_nonuma(pte_t pte) -{ - return pte_val(pte) & (_PAGE_PRESENT); -} - -#define ptep_set_numa ptep_set_numa -static inline void ptep_set_numa(struct mm_struct *mm, unsigned long addr, - pte_t *ptep) -{ - if ((pte_val(*ptep) & _PAGE_PRESENT) == 0) - VM_BUG_ON(1); - - pte_update(mm, addr, ptep, _PAGE_PRESENT, _PAGE_NUMA, 0); - return; -} - -#define pmdp_set_numa pmdp_set_numa -static inline void pmdp_set_numa(struct mm_struct *mm, unsigned long addr, - pmd_t *pmdp) -{ - if ((pmd_val(*pmdp) & _PAGE_PRESENT) == 0) - VM_BUG_ON(1); - - pmd_hugepage_update(mm, addr, pmdp, _PAGE_PRESENT, _PAGE_NUMA); - return; -} - /* - * Generic NUMA pte helpers expect pteval_t and pmdval_t types to exist - * which was inherited from x86. For the purposes of powerpc pte_basic_t and - * pmd_t are equivalent + * These work without NUMA balancing but the kernel does not care. See the + * comment in include/asm-generic/pgtable.h . On powerpc, this will only + * work for user pages and always return true for kernel pages. */ -#define pteval_t pte_basic_t -#define pmdval_t pmd_t -static inline pteval_t ptenuma_flags(pte_t pte) +static inline int pte_protnone(pte_t pte) { - return pte_val(pte) & _PAGE_NUMA_MASK; + return (pte_val(pte) & + (_PAGE_PRESENT | _PAGE_USER)) == _PAGE_PRESENT; } -static inline pmdval_t pmdnuma_flags(pmd_t pmd) +static inline int pmd_protnone(pmd_t pmd) { - return pmd_val(pmd) & _PAGE_NUMA_MASK; + return pte_protnone(pmd_pte(pmd)); } - -# else +#endif /* CONFIG_NUMA_BALANCING */ static inline int pte_present(pte_t pte) { return pte_val(pte) & _PAGE_PRESENT; } -#endif /* CONFIG_NUMA_BALANCING */ /* Conversion functions: convert a page and protection to a page entry, * and a page entry and page directory to the page they refer to. diff --git a/arch/powerpc/include/asm/pte-common.h b/arch/powerpc/include/asm/pte-common.h index 2aef9b7..c5a755e 100644 --- a/arch/powerpc/include/asm/pte-common.h +++ b/arch/powerpc/include/asm/pte-common.h @@ -104,11 +104,6 @@ extern unsigned long bad_call_to_PMD_PAGE_SIZE(void); _PAGE_USER | _PAGE_ACCESSED | _PAGE_RO | \ _PAGE_RW | _PAGE_HWWRITE | _PAGE_DIRTY | _PAGE_EXEC) -#ifdef CONFIG_NUMA_BALANCING -/* Mask of bits that distinguish present and numa ptes */ -#define _PAGE_NUMA_MASK (_PAGE_NUMA|_PAGE_PRESENT) -#endif - /* * We define 2 sets of base prot bits, one for basic pages (ie, * cacheable kernel and user pages) and one for non cacheable diff --git a/arch/powerpc/include/asm/pte-hash64.h b/arch/powerpc/include/asm/pte-hash64.h index 2505d8e..55aea0c 100644 --- a/arch/powerpc/include/asm/pte-hash64.h +++ b/arch/powerpc/include/asm/pte-hash64.h @@ -27,12 +27,6 @@ #define _PAGE_RW 0x0200 /* software: user write access allowed */ #define _PAGE_BUSY 0x0800 /* software: PTE & hash are busy */ -/* - * Used for tracking numa faults - */ -#define _PAGE_NUMA 0x00000010 /* Gather numa placement stats */ - - /* No separate kernel read-only */ #define _PAGE_KERNEL_RW (_PAGE_RW | _PAGE_DIRTY) /* user access blocked by key */ #define _PAGE_KERNEL_RO _PAGE_KERNEL_RW diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index e8abc83..7248979 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -43,7 +43,6 @@ struct thread_info { int cpu; /* cpu we're on */ int preempt_count; /* 0 => preemptable, <0 => BUG */ - struct restart_block restart_block; unsigned long local_flags; /* private flags for thread */ /* low level flags - has atomic operations done on it */ @@ -59,9 +58,6 @@ struct thread_info { .exec_domain = &default_exec_domain, \ .cpu = 0, \ .preempt_count = INIT_PREEMPT_COUNT, \ - .restart_block = { \ - .fn = do_no_restart_syscall, \ - }, \ .flags = 0, \ } diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index b171001..d3a831a 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -1231,7 +1231,7 @@ long sys_rt_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, int tm_restore = 0; #endif /* Always make any pending restarted system calls return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; + current->restart_block.fn = do_no_restart_syscall; rt_sf = (struct rt_sigframe __user *) (regs->gpr[1] + __SIGNAL_FRAMESIZE + 16); @@ -1504,7 +1504,7 @@ long sys_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8, #endif /* Always make any pending restarted system calls return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; + current->restart_block.fn = do_no_restart_syscall; sf = (struct sigframe __user *)(regs->gpr[1] + __SIGNAL_FRAMESIZE); sc = &sf->sctx; diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 2cb0c94..c7c24d2 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -666,7 +666,7 @@ int sys_rt_sigreturn(unsigned long r3, unsigned long r4, unsigned long r5, #endif /* Always make any pending restarted system calls return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; + current->restart_block.fn = do_no_restart_syscall; if (!access_ok(VERIFY_READ, uc, sizeof(*uc))) goto badframe; diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index fa7c4f1..7316dd1 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -621,6 +621,38 @@ unsigned long long sched_clock(void) return mulhdu(get_tb() - boot_tb, tb_to_ns_scale) << tb_to_ns_shift; } + +#ifdef CONFIG_PPC_PSERIES + +/* + * Running clock - attempts to give a view of time passing for a virtualised + * kernels. + * Uses the VTB register if available otherwise a next best guess. + */ +unsigned long long running_clock(void) +{ + /* + * Don't read the VTB as a host since KVM does not switch in host + * timebase into the VTB when it takes a guest off the CPU, reading the + * VTB would result in reading 'last switched out' guest VTB. + * + * Host kernels are often compiled with CONFIG_PPC_PSERIES checked, it + * would be unsafe to rely only on the #ifdef above. + */ + if (firmware_has_feature(FW_FEATURE_LPAR) && + cpu_has_feature(CPU_FTR_ARCH_207S)) + return mulhdu(get_vtb() - boot_tb, tb_to_ns_scale) << tb_to_ns_shift; + + /* + * This is a next best approximation without a VTB. + * On a host which is running bare metal there should never be any stolen + * time and on a host which doesn't do any virtualisation TB *should* equal + * VTB so it makes no difference anyway. + */ + return local_clock() - cputime_to_nsecs(kcpustat_this_cpu->cpustat[CPUTIME_STEAL]); +} +#endif + static int __init get_freq(char *name, int cells, unsigned long *val) { struct device_node *cpu; diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 510bdfb..625407e 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -212,7 +212,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, /* Look up the Linux PTE for the backing page */ pte_size = psize; pte = lookup_linux_pte_and_update(pgdir, hva, writing, &pte_size); - if (pte_present(pte) && !pte_numa(pte)) { + if (pte_present(pte) && !pte_protnone(pte)) { if (writing && !pte_write(pte)) /* make the actual HPTE be read-only */ ptel = hpte_make_readonly(ptel); diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c index 1b5305d..f031a47 100644 --- a/arch/powerpc/mm/copro_fault.c +++ b/arch/powerpc/mm/copro_fault.c @@ -64,10 +64,14 @@ int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea, if (!(vma->vm_flags & VM_WRITE)) goto out_unlock; } else { - if (dsisr & DSISR_PROTFAULT) - goto out_unlock; if (!(vma->vm_flags & (VM_READ | VM_EXEC))) goto out_unlock; + /* + * protfault should only happen due to us + * mapping a region readonly temporarily. PROT_NONE + * is also covered by the VMA check above. + */ + WARN_ON_ONCE(dsisr & DSISR_PROTFAULT); } ret = 0; diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 6154b0a..b396868 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -389,19 +389,6 @@ good_area: #endif /* CONFIG_8xx */ if (is_exec) { -#ifdef CONFIG_PPC_STD_MMU - /* Protection fault on exec go straight to failure on - * Hash based MMUs as they either don't support per-page - * execute permission, or if they do, it's handled already - * at the hash level. This test would probably have to - * be removed if we change the way this works to make hash - * processors use the same I/D cache coherency mechanism - * as embedded. - */ - if (error_code & DSISR_PROTFAULT) - goto bad_area; -#endif /* CONFIG_PPC_STD_MMU */ - /* * Allow execution from readable areas if the MMU does not * provide separate controls over reading and executing. @@ -416,6 +403,14 @@ good_area: (cpu_has_feature(CPU_FTR_NOEXECUTE) || !(vma->vm_flags & (VM_READ | VM_WRITE)))) goto bad_area; +#ifdef CONFIG_PPC_STD_MMU + /* + * protfault should only happen due to us + * mapping a region readonly temporarily. PROT_NONE + * is also covered by the VMA check above. + */ + WARN_ON_ONCE(error_code & DSISR_PROTFAULT); +#endif /* CONFIG_PPC_STD_MMU */ /* a write */ } else if (is_write) { if (!(vma->vm_flags & VM_WRITE)) @@ -423,11 +418,9 @@ good_area: flags |= FAULT_FLAG_WRITE; /* a read */ } else { - /* protection fault */ - if (error_code & 0x08000000) - goto bad_area; if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) goto bad_area; + WARN_ON_ONCE(error_code & DSISR_PROTFAULT); } /* diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index c90e602..83dfcb5 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -172,9 +172,14 @@ static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma, void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) { -#ifdef CONFIG_DEBUG_VM - WARN_ON(pte_val(*ptep) & _PAGE_PRESENT); -#endif + /* + * When handling numa faults, we already have the pte marked + * _PAGE_PRESENT, but we can be sure that it is not in hpte. + * Hence we can use set_pte_at for them. + */ + VM_WARN_ON((pte_val(*ptep) & (_PAGE_PRESENT | _PAGE_USER)) == + (_PAGE_PRESENT | _PAGE_USER)); + /* Note: mm->context.id might not yet have been assigned as * this context might not have been activated yet when this * is called. diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 4fe5f64..91bb883 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -718,7 +718,8 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t pmd) { #ifdef CONFIG_DEBUG_VM - WARN_ON(pmd_val(*pmdp) & _PAGE_PRESENT); + WARN_ON((pmd_val(*pmdp) & (_PAGE_PRESENT | _PAGE_USER)) == + (_PAGE_PRESENT | _PAGE_USER)); assert_spin_locked(&mm->page_table_lock); WARN_ON(!pmd_trans_huge(pmd)); #endif |