diff options
author | Mel Gorman <mgorman@suse.de> | 2012-11-15 01:24:32 +0000 |
---|---|---|
committer | Mel Gorman <mgorman@suse.de> | 2012-12-11 14:42:49 +0000 |
commit | 9532fec118d485ea37ab6e3ea372d68cd8b4cd0d (patch) | |
tree | 5076f3da1ff244df554e99b8701749423a6b92ad | |
parent | 5606e3877ad8baea42f3a71ebde0a03622bbb551 (diff) | |
download | op-kernel-dev-9532fec118d485ea37ab6e3ea372d68cd8b4cd0d.zip op-kernel-dev-9532fec118d485ea37ab6e3ea372d68cd8b4cd0d.tar.gz |
mm: numa: Migrate pages handled during a pmd_numa hinting fault
To say that the PMD handling code was incorrectly transferred from autonuma
is an understatement. The intention was to handle a PMDs worth of pages
in the same fault and effectively batch the taking of the PTL and page
migration. The copied version instead has the impact of clearing a number
of pte_numa PTE entries and whether any page migration takes place depends
on racing. This just happens to work in some cases.
This patch handles pte_numa faults in batch when a pmd_numa fault is
handled. The pages are migrated if they are currently misplaced.
Essentially this is making an assumption that NUMA locality is
on a PMD boundary but that could be addressed by only setting
pmd_numa if all the pages within that PMD are on the same node
if necessary.
Signed-off-by: Mel Gorman <mgorman@suse.de>
-rw-r--r-- | mm/memory.c | 51 | ||||
-rw-r--r-- | mm/mprotect.c | 25 |
2 files changed, 54 insertions, 22 deletions
diff --git a/mm/memory.c b/mm/memory.c index 8a7b4cc..84c6d9e 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3449,6 +3449,18 @@ static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma, return __do_fault(mm, vma, address, pmd, pgoff, flags, orig_pte); } +int numa_migrate_prep(struct page *page, struct vm_area_struct *vma, + unsigned long addr, int current_nid) +{ + get_page(page); + + count_vm_numa_event(NUMA_HINT_FAULTS); + if (current_nid == numa_node_id()) + count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL); + + return mpol_misplaced(page, vma, addr); +} + int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, pte_t pte, pte_t *ptep, pmd_t *pmd) { @@ -3477,18 +3489,14 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, set_pte_at(mm, addr, ptep, pte); update_mmu_cache(vma, addr, ptep); - count_vm_numa_event(NUMA_HINT_FAULTS); page = vm_normal_page(vma, addr, pte); if (!page) { pte_unmap_unlock(ptep, ptl); return 0; } - get_page(page); current_nid = page_to_nid(page); - if (current_nid == numa_node_id()) - count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL); - target_nid = mpol_misplaced(page, vma, addr); + target_nid = numa_migrate_prep(page, vma, addr, current_nid); pte_unmap_unlock(ptep, ptl); if (target_nid == -1) { /* @@ -3505,7 +3513,8 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, current_nid = target_nid; out: - task_numa_fault(current_nid, 1); + if (current_nid != -1) + task_numa_fault(current_nid, 1); return 0; } @@ -3521,8 +3530,6 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, spinlock_t *ptl; bool numa = false; int local_nid = numa_node_id(); - unsigned long nr_faults = 0; - unsigned long nr_faults_local = 0; spin_lock(&mm->page_table_lock); pmd = *pmdp; @@ -3545,7 +3552,8 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, for (addr = _addr + offset; addr < _addr + PMD_SIZE; pte++, addr += PAGE_SIZE) { pte_t pteval = *pte; struct page *page; - int curr_nid; + int curr_nid = local_nid; + int target_nid; if (!pte_present(pteval)) continue; if (!pte_numa(pteval)) @@ -3566,21 +3574,30 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, /* only check non-shared pages */ if (unlikely(page_mapcount(page) != 1)) continue; - pte_unmap_unlock(pte, ptl); - curr_nid = page_to_nid(page); - task_numa_fault(curr_nid, 1); + /* + * Note that the NUMA fault is later accounted to either + * the node that is currently running or where the page is + * migrated to. + */ + curr_nid = local_nid; + target_nid = numa_migrate_prep(page, vma, addr, + page_to_nid(page)); + if (target_nid == -1) { + put_page(page); + continue; + } - nr_faults++; - if (curr_nid == local_nid) - nr_faults_local++; + /* Migrate to the requested node */ + pte_unmap_unlock(pte, ptl); + if (migrate_misplaced_page(page, target_nid)) + curr_nid = target_nid; + task_numa_fault(curr_nid, 1); pte = pte_offset_map_lock(mm, pmdp, addr, &ptl); } pte_unmap_unlock(orig_pte, ptl); - count_vm_numa_events(NUMA_HINT_FAULTS, nr_faults); - count_vm_numa_events(NUMA_HINT_FAULTS_LOCAL, nr_faults_local); return 0; } #else diff --git a/mm/mprotect.c b/mm/mprotect.c index 7ef6ae9..dce6fb4 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -37,12 +37,14 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, unsigned long end, pgprot_t newprot, - int dirty_accountable, int prot_numa) + int dirty_accountable, int prot_numa, bool *ret_all_same_node) { struct mm_struct *mm = vma->vm_mm; pte_t *pte, oldpte; spinlock_t *ptl; unsigned long pages = 0; + bool all_same_node = true; + int last_nid = -1; pte = pte_offset_map_lock(mm, pmd, addr, &ptl); arch_enter_lazy_mmu_mode(); @@ -61,6 +63,12 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, page = vm_normal_page(vma, addr, oldpte); if (page) { + int this_nid = page_to_nid(page); + if (last_nid == -1) + last_nid = this_nid; + if (last_nid != this_nid) + all_same_node = false; + /* only check non-shared pages */ if (!pte_numa(oldpte) && page_mapcount(page) == 1) { @@ -81,7 +89,6 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, if (updated) pages++; - ptep_modify_prot_commit(mm, addr, pte, ptent); } else if (IS_ENABLED(CONFIG_MIGRATION) && !pte_file(oldpte)) { swp_entry_t entry = pte_to_swp_entry(oldpte); @@ -101,6 +108,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, arch_leave_lazy_mmu_mode(); pte_unmap_unlock(pte - 1, ptl); + *ret_all_same_node = all_same_node; return pages; } @@ -127,6 +135,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t * pmd_t *pmd; unsigned long next; unsigned long pages = 0; + bool all_same_node; pmd = pmd_offset(pud, addr); do { @@ -143,9 +152,15 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t * if (pmd_none_or_clear_bad(pmd)) continue; pages += change_pte_range(vma, pmd, addr, next, newprot, - dirty_accountable, prot_numa); - - if (prot_numa) + dirty_accountable, prot_numa, &all_same_node); + + /* + * If we are changing protections for NUMA hinting faults then + * set pmd_numa if the examined pages were all on the same + * node. This allows a regular PMD to be handled as one fault + * and effectively batches the taking of the PTL + */ + if (prot_numa && all_same_node) change_pmd_protnuma(vma->vm_mm, addr, pmd); } while (pmd++, addr = next, addr != end); |