summaryrefslogtreecommitdiffstats
path: root/mm/vmscan.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r--mm/vmscan.c62
1 files changed, 42 insertions, 20 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index bd9a72b..224dd29 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -91,6 +91,9 @@ struct scan_control {
/* Can pages be swapped as part of reclaim? */
unsigned int may_swap:1;
+ /* Can cgroups be reclaimed below their normal consumption range? */
+ unsigned int may_thrash:1;
+
unsigned int hibernation_mode:1;
/* One of the zones is ready for compaction */
@@ -497,7 +500,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping,
}
if (mapping->a_ops->writepage == NULL)
return PAGE_ACTIVATE;
- if (!may_write_to_queue(mapping->backing_dev_info, sc))
+ if (!may_write_to_queue(inode_to_bdi(mapping->host), sc))
return PAGE_KEEP;
if (clear_page_dirty_for_io(page)) {
@@ -876,7 +879,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
*/
mapping = page_mapping(page);
if (((dirty || writeback) && mapping &&
- bdi_write_congested(mapping->backing_dev_info)) ||
+ bdi_write_congested(inode_to_bdi(mapping->host))) ||
(writeback && PageReclaim(page)))
nr_congested++;
@@ -1903,8 +1906,12 @@ static void get_scan_count(struct lruvec *lruvec, int swappiness,
* latencies, so it's better to scan a minimum amount there as
* well.
*/
- if (current_is_kswapd() && !zone_reclaimable(zone))
- force_scan = true;
+ if (current_is_kswapd()) {
+ if (!zone_reclaimable(zone))
+ force_scan = true;
+ if (!mem_cgroup_lruvec_online(lruvec))
+ force_scan = true;
+ }
if (!global_reclaim(sc))
force_scan = true;
@@ -2290,6 +2297,12 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc,
struct lruvec *lruvec;
int swappiness;
+ if (mem_cgroup_low(root, memcg)) {
+ if (!sc->may_thrash)
+ continue;
+ mem_cgroup_events(memcg, MEMCG_LOW, 1);
+ }
+
lruvec = mem_cgroup_zone_lruvec(zone, memcg);
swappiness = mem_cgroup_swappiness(memcg);
@@ -2311,8 +2324,7 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc,
mem_cgroup_iter_break(root, memcg);
break;
}
- memcg = mem_cgroup_iter(root, memcg, &reclaim);
- } while (memcg);
+ } while ((memcg = mem_cgroup_iter(root, memcg, &reclaim)));
/*
* Shrink the slab caches in the same proportion that
@@ -2515,10 +2527,11 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
struct scan_control *sc)
{
+ int initial_priority = sc->priority;
unsigned long total_scanned = 0;
unsigned long writeback_threshold;
bool zones_reclaimable;
-
+retry:
delayacct_freepages_start();
if (global_reclaim(sc))
@@ -2568,6 +2581,13 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
if (sc->compaction_ready)
return 1;
+ /* Untapped cgroup reserves? Don't OOM, retry. */
+ if (!sc->may_thrash) {
+ sc->priority = initial_priority;
+ sc->may_thrash = 1;
+ goto retry;
+ }
+
/* Any of the zones still reclaimable? Don't OOM. */
if (zones_reclaimable)
return 1;
@@ -2656,7 +2676,7 @@ static bool throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist,
* should make reasonable progress.
*/
for_each_zone_zonelist_nodemask(zone, z, zonelist,
- gfp_mask, nodemask) {
+ gfp_zone(gfp_mask), nodemask) {
if (zone_idx(zone) > ZONE_NORMAL)
continue;
@@ -2921,18 +2941,20 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, long remaining,
return false;
/*
- * There is a potential race between when kswapd checks its watermarks
- * and a process gets throttled. There is also a potential race if
- * processes get throttled, kswapd wakes, a large process exits therby
- * balancing the zones that causes kswapd to miss a wakeup. If kswapd
- * is going to sleep, no process should be sleeping on pfmemalloc_wait
- * so wake them now if necessary. If necessary, processes will wake
- * kswapd and get throttled again
+ * The throttled processes are normally woken up in balance_pgdat() as
+ * soon as pfmemalloc_watermark_ok() is true. But there is a potential
+ * race between when kswapd checks the watermarks and a process gets
+ * throttled. There is also a potential race if processes get
+ * throttled, kswapd wakes, a large process exits thereby balancing the
+ * zones, which causes kswapd to exit balance_pgdat() before reaching
+ * the wake up checks. If kswapd is going to sleep, no process should
+ * be sleeping on pfmemalloc_wait, so wake them now if necessary. If
+ * the wake up is premature, processes will wake kswapd and get
+ * throttled again. The difference from wake ups in balance_pgdat() is
+ * that here we are under prepare_to_wait().
*/
- if (waitqueue_active(&pgdat->pfmemalloc_wait)) {
- wake_up(&pgdat->pfmemalloc_wait);
- return false;
- }
+ if (waitqueue_active(&pgdat->pfmemalloc_wait))
+ wake_up_all(&pgdat->pfmemalloc_wait);
return pgdat_balanced(pgdat, order, classzone_idx);
}
@@ -3173,7 +3195,7 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
*/
if (waitqueue_active(&pgdat->pfmemalloc_wait) &&
pfmemalloc_watermark_ok(pgdat))
- wake_up(&pgdat->pfmemalloc_wait);
+ wake_up_all(&pgdat->pfmemalloc_wait);
/*
* Fragmentation may mean that the system cannot be rebalanced
OpenPOWER on IntegriCloud