From 148deab223b23734069abcacb5c7118b0e7deadc Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 14 Dec 2016 15:08:49 -0800 Subject: radix-tree: improve multiorder iterators This fixes several interlinked problems with the iterators in the presence of multiorder entries. 1. radix_tree_iter_next() would only advance by one slot, which would result in the iterators returning the same entry more than once if there were sibling entries. 2. radix_tree_next_slot() could return an internal pointer instead of a user pointer if a tagged multiorder entry was immediately followed by an entry of lower order. 3. radix_tree_next_slot() expanded to a lot more code than it used to when multiorder support was compiled in. And I wasn't comfortable with entry_to_node() being in a header file. Fixing radix_tree_iter_next() for the presence of sibling entries necessarily involves examining the contents of the radix tree, so we now need to pass 'slot' to radix_tree_iter_next(), and we need to change the calling convention so it is called *before* dropping the lock which protects the tree. Also rename it to radix_tree_iter_resume(), as some people thought it was necessary to call radix_tree_iter_next() each time around the loop. radix_tree_next_slot() becomes closer to how it looked before multiorder support was introduced. It only checks to see if the next entry in the chunk is a sibling entry or a pointer to a node; this should be rare enough that handling this case out of line is not a performance impact (and such impact is amortised by the fact that the entry we just processed was a multiorder entry). Also, radix_tree_next_slot() used to force a new chunk lookup for untagged entries, which is more expensive than the out of line sibling entry skipping. Link: http://lkml.kernel.org/r/1480369871-5271-55-git-send-email-mawilcox@linuxonhyperv.com Signed-off-by: Matthew Wilcox Tested-by: Kirill A. Shutemov Cc: Konstantin Khlebnikov Cc: Ross Zwisler Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/khugepaged.c | 7 +++---- mm/shmem.c | 6 +++--- 2 files changed, 6 insertions(+), 7 deletions(-) (limited to 'mm') diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 7434a63..e32389a 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1446,7 +1446,7 @@ static void collapse_shmem(struct mm_struct *mm, radix_tree_replace_slot(&mapping->page_tree, slot, new_page + (index % HPAGE_PMD_NR)); - slot = radix_tree_iter_next(&iter); + slot = radix_tree_iter_resume(slot, &iter); index++; continue; out_lru: @@ -1546,7 +1546,6 @@ tree_unlocked: /* Put holes back where they were */ radix_tree_delete(&mapping->page_tree, iter.index); - slot = radix_tree_iter_next(&iter); continue; } @@ -1557,11 +1556,11 @@ tree_unlocked: page_ref_unfreeze(page, 2); radix_tree_replace_slot(&mapping->page_tree, slot, page); + slot = radix_tree_iter_resume(slot, &iter); spin_unlock_irq(&mapping->tree_lock); putback_lru_page(page); unlock_page(page); spin_lock_irq(&mapping->tree_lock); - slot = radix_tree_iter_next(&iter); } VM_BUG_ON(nr_none); spin_unlock_irq(&mapping->tree_lock); @@ -1641,8 +1640,8 @@ static void khugepaged_scan_shmem(struct mm_struct *mm, present++; if (need_resched()) { + slot = radix_tree_iter_resume(slot, &iter); cond_resched_rcu(); - slot = radix_tree_iter_next(&iter); } } rcu_read_unlock(); diff --git a/mm/shmem.c b/mm/shmem.c index abd7403..be11c6d 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -661,8 +661,8 @@ unsigned long shmem_partial_swap_usage(struct address_space *mapping, swapped++; if (need_resched()) { + slot = radix_tree_iter_resume(slot, &iter); cond_resched_rcu(); - slot = radix_tree_iter_next(&iter); } } @@ -2447,8 +2447,8 @@ static void shmem_tag_pins(struct address_space *mapping) } if (need_resched()) { + slot = radix_tree_iter_resume(slot, &iter); cond_resched_rcu(); - slot = radix_tree_iter_next(&iter); } } rcu_read_unlock(); @@ -2517,8 +2517,8 @@ static int shmem_wait_for_pins(struct address_space *mapping) spin_unlock_irq(&mapping->tree_lock); continue_resched: if (need_resched()) { + slot = radix_tree_iter_resume(slot, &iter); cond_resched_rcu(); - slot = radix_tree_iter_next(&iter); } } rcu_read_unlock(); -- cgit v1.1