summaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
authorVlastimil Babka <vbabka@suse.cz>2014-10-09 15:27:14 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2014-10-09 22:25:54 -0400
commit1f9efdef4f3f1d2a073e524113fd0038af636f2b (patch)
tree0de1b4fcc02a95df56e7c6847d1b1592d1b09a13 /mm/page_alloc.c
parent7d49d8868336bbf4f68714d8282ca5fd65e387ed (diff)
downloadop-kernel-dev-1f9efdef4f3f1d2a073e524113fd0038af636f2b.zip
op-kernel-dev-1f9efdef4f3f1d2a073e524113fd0038af636f2b.tar.gz
mm, compaction: khugepaged should not give up due to need_resched()
Async compaction aborts when it detects zone lock contention or need_resched() is true. David Rientjes has reported that in practice, most direct async compactions for THP allocation abort due to need_resched(). This means that a second direct compaction is never attempted, which might be OK for a page fault, but khugepaged is intended to attempt a sync compaction in such case and in these cases it won't. This patch replaces "bool contended" in compact_control with an int that distinguishes between aborting due to need_resched() and aborting due to lock contention. This allows propagating the abort through all compaction functions as before, but passing the abort reason up to __alloc_pages_slowpath() which decides when to continue with direct reclaim and another compaction attempt. Another problem is that try_to_compact_pages() did not act upon the reported contention (both need_resched() or lock contention) immediately and would proceed with another zone from the zonelist. When need_resched() is true, that means initializing another zone compaction, only to check again need_resched() in isolate_migratepages() and aborting. For zone lock contention, the unintended consequence is that the lock contended status reported back to the allocator is detrmined from the last zone where compaction was attempted, which is rather arbitrary. This patch fixes the problem in the following way: - async compaction of a zone aborting due to need_resched() or fatal signal pending means that further zones should not be tried. We report COMPACT_CONTENDED_SCHED to the allocator. - aborting zone compaction due to lock contention means we can still try another zone, since it has different set of locks. We report back COMPACT_CONTENDED_LOCK only if *all* zones where compaction was attempted, it was aborted due to lock contention. As a result of these fixes, khugepaged will proceed with second sync compaction as intended, when the preceding async compaction aborted due to need_resched(). Page fault compactions aborting due to need_resched() will spare some cycles previously wasted by initializing another zone compaction only to abort again. Lock contention will be reported only when compaction in all zones aborted due to lock contention, and therefore it's not a good idea to try again after reclaim. In stress-highalloc from mmtests configured to use __GFP_NO_KSWAPD, this has improved number of THP collapse allocations by 10%, which shows positive effect on khugepaged. The benchmark's success rates are unchanged as it is not recognized as khugepaged. Numbers of compact_stall and compact_fail events have however decreased by 20%, with compact_success still a bit improved, which is good. With benchmark configured not to use __GFP_NO_KSWAPD, there is 6% improvement in THP collapse allocations, and only slight improvement in stalls and failures. [akpm@linux-foundation.org: fix warnings] Reported-by: David Rientjes <rientjes@google.com> Signed-off-by: Vlastimil Babka <vbabka@suse.cz> Cc: Minchan Kim <minchan@kernel.org> Acked-by: Mel Gorman <mgorman@suse.de> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Michal Nazarewicz <mina86@mina86.com> Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Cc: Christoph Lameter <cl@linux.com> Cc: Rik van Riel <riel@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c45
1 files changed, 33 insertions, 12 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index dfbf54b..313338d7 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2297,7 +2297,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
struct zonelist *zonelist, enum zone_type high_zoneidx,
nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
int classzone_idx, int migratetype, enum migrate_mode mode,
- bool *contended_compaction, bool *deferred_compaction)
+ int *contended_compaction, bool *deferred_compaction)
{
struct zone *last_compact_zone = NULL;
unsigned long compact_result;
@@ -2371,7 +2371,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
struct zonelist *zonelist, enum zone_type high_zoneidx,
nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
int classzone_idx, int migratetype, enum migrate_mode mode,
- bool *contended_compaction, bool *deferred_compaction)
+ int *contended_compaction, bool *deferred_compaction)
{
return NULL;
}
@@ -2547,7 +2547,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
unsigned long did_some_progress;
enum migrate_mode migration_mode = MIGRATE_ASYNC;
bool deferred_compaction = false;
- bool contended_compaction = false;
+ int contended_compaction = COMPACT_CONTENDED_NONE;
/*
* In the slowpath, we sanity check order to avoid ever trying to
@@ -2651,15 +2651,36 @@ rebalance:
if (page)
goto got_pg;
- /*
- * If compaction is deferred for high-order allocations, it is because
- * sync compaction recently failed. In this is the case and the caller
- * requested a movable allocation that does not heavily disrupt the
- * system then fail the allocation instead of entering direct reclaim.
- */
- if ((deferred_compaction || contended_compaction) &&
- (gfp_mask & __GFP_NO_KSWAPD))
- goto nopage;
+ /* Checks for THP-specific high-order allocations */
+ if ((gfp_mask & GFP_TRANSHUGE) == GFP_TRANSHUGE) {
+ /*
+ * If compaction is deferred for high-order allocations, it is
+ * because sync compaction recently failed. If this is the case
+ * and the caller requested a THP allocation, we do not want
+ * to heavily disrupt the system, so we fail the allocation
+ * instead of entering direct reclaim.
+ */
+ if (deferred_compaction)
+ goto nopage;
+
+ /*
+ * In all zones where compaction was attempted (and not
+ * deferred or skipped), lock contention has been detected.
+ * For THP allocation we do not want to disrupt the others
+ * so we fallback to base pages instead.
+ */
+ if (contended_compaction == COMPACT_CONTENDED_LOCK)
+ goto nopage;
+
+ /*
+ * If compaction was aborted due to need_resched(), we do not
+ * want to further increase allocation latency, unless it is
+ * khugepaged trying to collapse.
+ */
+ if (contended_compaction == COMPACT_CONTENDED_SCHED
+ && !(current->flags & PF_KTHREAD))
+ goto nopage;
+ }
/*
* It can become very expensive to allocate transparent hugepages at
OpenPOWER on IntegriCloud