From 5877231f646bbd6d1d545e7af83aaa6e6b746013 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 6 Dec 2013 00:08:22 +0530 Subject: mm: Move change_prot_numa outside CONFIG_ARCH_USES_NUMA_PROT_NONE change_prot_numa should work even if _PAGE_NUMA != _PAGE_PROTNONE. On archs like ppc64 that don't use _PAGE_PROTNONE and also have a separate page table outside linux pagetable, we just need to make sure that when calling change_prot_numa we flush the hardware page table entry so that next page access result in a numa fault. We still need to make sure we use the numa faulting logic only when CONFIG_NUMA_BALANCING is set. This implies the migrate-on-fault (Lazy migration) via mbind will only work if CONFIG_NUMA_BALANCING is set. Signed-off-by: Aneesh Kumar K.V Reviewed-by: Rik van Riel Acked-by: Mel Gorman Signed-off-by: Benjamin Herrenschmidt --- mm/mempolicy.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'mm/mempolicy.c') diff --git a/mm/mempolicy.c b/mm/mempolicy.c index eca4a31..9f73b29 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -613,7 +613,7 @@ static inline int queue_pages_pgd_range(struct vm_area_struct *vma, return 0; } -#ifdef CONFIG_ARCH_USES_NUMA_PROT_NONE +#ifdef CONFIG_NUMA_BALANCING /* * This is used to mark a range of virtual addresses to be inaccessible. * These are later cleared by a NUMA hinting fault. Depending on these @@ -627,7 +627,6 @@ unsigned long change_prot_numa(struct vm_area_struct *vma, unsigned long addr, unsigned long end) { int nr_updated; - BUILD_BUG_ON(_PAGE_NUMA != _PAGE_PROTNONE); nr_updated = change_protection(vma, addr, end, vma->vm_page_prot, 0, 1); if (nr_updated) @@ -641,7 +640,7 @@ static unsigned long change_prot_numa(struct vm_area_struct *vma, { return 0; } -#endif /* CONFIG_ARCH_USES_NUMA_PROT_NONE */ +#endif /* CONFIG_NUMA_BALANCING */ /* * Walk through page tables and collect pages to be migrated. -- cgit v1.1 From 54a43d54988a3731d644fdeb7a1d6f46b4ac64c7 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 23 Jan 2014 15:53:13 -0800 Subject: numa: add a sysctl for numa_balancing Add a working sysctl to enable/disable automatic numa memory balancing at runtime. This allows us to track down performance problems with this feature and is generally a good idea. This was possible earlier through debugfs, but only with special debugging options set. Also fix the boot message. [akpm@linux-foundation.org: s/sched_numa_balancing/sysctl_numa_balancing/] Signed-off-by: Andi Kleen Acked-by: Mel Gorman Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mempolicy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/mempolicy.c') diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 0cd2c4d..947293e 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2668,7 +2668,7 @@ static void __init check_numabalancing_enable(void) if (nr_node_ids > 1 && !numabalancing_override) { printk(KERN_INFO "Enabling automatic NUMA balancing. " - "Configure with numa_balancing= or sysctl"); + "Configure with numa_balancing= or the kernel.numa_balancing sysctl"); set_numabalancing_state(numabalancing_default); } } -- cgit v1.1 From cc81717ed3bc6d4f3738d13a1e097437caada0e9 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Thu, 23 Jan 2014 15:53:15 -0800 Subject: mm: new_vma_page() cannot see NULL vma for hugetlb pages Commit 11c731e81bb0 ("mm/mempolicy: fix !vma in new_vma_page()") has removed BUG_ON(!vma) from new_vma_page which is partially correct because page_address_in_vma will return EFAULT for non-linear mappings and at least shared shmem might be mapped this way. The patch also tried to prevent NULL ptr for hugetlb pages which is not correct AFAICS because hugetlb pages cannot be mapped as VM_NONLINEAR and other conditions in page_address_in_vma seem to be legit and catch real bugs. This patch restores BUG_ON for PageHuge to catch potential issues when the to-be-migrated page is not setup properly. Signed-off-by: Michal Hocko Reviewed-by: Bob Liu Cc: Sasha Levin Cc: Wanpeng Li Cc: Naoya Horiguchi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mempolicy.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'mm/mempolicy.c') diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 947293e..463b7fb 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1199,10 +1199,8 @@ static struct page *new_vma_page(struct page *page, unsigned long private, int * } if (PageHuge(page)) { - if (vma) - return alloc_huge_page_noerr(vma, address, 1); - else - return NULL; + BUG_ON(!vma); + return alloc_huge_page_noerr(vma, address, 1); } /* * if !vma, alloc_page_vma() will use task or system default policy -- cgit v1.1 From c297663c0b3930491a3cb2aba4b6e5a7159c3503 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 29 Jan 2014 14:05:42 -0800 Subject: mm: numa: initialise numa balancing after jump label initialisation The command line parsing takes place before jump labels are initialised which generates a warning if numa_balancing= is specified and CONFIG_JUMP_LABEL is set. On older kernels before commit c4b2c0c5f647 ("static_key: WARN on usage before jump_label_init was called") the kernel would have crashed. This patch enables automatic numa balancing later in the initialisation process if numa_balancing= is specified. Signed-off-by: Mel Gorman Acked-by: Rik van Riel Cc: stable Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mempolicy.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'mm/mempolicy.c') diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 36cb46c..79cea01 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2654,7 +2654,7 @@ void mpol_free_shared_policy(struct shared_policy *p) } #ifdef CONFIG_NUMA_BALANCING -static bool __initdata numabalancing_override; +static int __initdata numabalancing_override; static void __init check_numabalancing_enable(void) { @@ -2663,9 +2663,15 @@ static void __init check_numabalancing_enable(void) if (IS_ENABLED(CONFIG_NUMA_BALANCING_DEFAULT_ENABLED)) numabalancing_default = true; + /* Parsed by setup_numabalancing. override == 1 enables, -1 disables */ + if (numabalancing_override) + set_numabalancing_state(numabalancing_override == 1); + if (nr_node_ids > 1 && !numabalancing_override) { - printk(KERN_INFO "Enabling automatic NUMA balancing. " - "Configure with numa_balancing= or the kernel.numa_balancing sysctl"); + printk(KERN_INFO "%s automatic NUMA balancing. " + "Configure with numa_balancing= or the " + "kernel.numa_balancing sysctl", + numabalancing_default ? "Enabling" : "Disabling"); set_numabalancing_state(numabalancing_default); } } @@ -2675,13 +2681,12 @@ static int __init setup_numabalancing(char *str) int ret = 0; if (!str) goto out; - numabalancing_override = true; if (!strcmp(str, "enable")) { - set_numabalancing_state(true); + numabalancing_override = 1; ret = 1; } else if (!strcmp(str, "disable")) { - set_numabalancing_state(false); + numabalancing_override = -1; ret = 1; } out: -- cgit v1.1 From 4a404bea941ac3c62e11b88c9d16197334eee2f1 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 29 Jan 2014 14:05:43 -0800 Subject: mm/mempolicy.c: convert to pr_foo() A few printk(KERN_*'s have snuck in there. Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mempolicy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'mm/mempolicy.c') diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 79cea01..873de7e 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2668,7 +2668,7 @@ static void __init check_numabalancing_enable(void) set_numabalancing_state(numabalancing_override == 1); if (nr_node_ids > 1 && !numabalancing_override) { - printk(KERN_INFO "%s automatic NUMA balancing. " + pr_info("%s automatic NUMA balancing. " "Configure with numa_balancing= or the " "kernel.numa_balancing sysctl", numabalancing_default ? "Enabling" : "Disabling"); @@ -2691,7 +2691,7 @@ static int __init setup_numabalancing(char *str) } out: if (!ret) - printk(KERN_WARNING "Unable to parse numa_balancing=\n"); + pr_warn("Unable to parse numa_balancing=\n"); return ret; } -- cgit v1.1 From 8790c71a18e5d2d93532ae250bcf5eddbba729cd Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Thu, 30 Jan 2014 15:46:08 -0800 Subject: mm/mempolicy.c: fix mempolicy printing in numa_maps As a result of commit 5606e3877ad8 ("mm: numa: Migrate on reference policy"), /proc//numa_maps prints the mempolicy for any as "prefer:N" for the local node, N, of the process reading the file. This should only be printed when the mempolicy of is MPOL_PREFERRED for node N. If the process is actually only using the default mempolicy for local node allocation, make sure "default" is printed as expected. Signed-off-by: David Rientjes Reported-by: Robert Lippert Cc: Peter Zijlstra Acked-by: Mel Gorman Cc: Ingo Molnar Cc: [3.7+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mempolicy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/mempolicy.c') diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 873de7e..ae3c8f3 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2930,7 +2930,7 @@ void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol) unsigned short mode = MPOL_DEFAULT; unsigned short flags = 0; - if (pol && pol != &default_policy) { + if (pol && pol != &default_policy && !(pol->flags & MPOL_F_MORON)) { mode = pol->mode; flags = pol->flags; } -- cgit v1.1