diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/page_alloc.c | 17 | ||||
-rw-r--r-- | mm/vmscan.c | 68 |
2 files changed, 82 insertions, 3 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index c2e2974..df54e2f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -878,7 +878,9 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, mark = (*z)->pages_high; if (!zone_watermark_ok(*z, order, mark, classzone_idx, alloc_flags)) - continue; + if (!zone_reclaim_mode || + !zone_reclaim(*z, gfp_mask, order)) + continue; } page = buffered_rmqueue(zonelist, *z, order, gfp_mask); @@ -1595,13 +1597,22 @@ static void __init build_zonelists(pg_data_t *pgdat) prev_node = local_node; nodes_clear(used_mask); while ((node = find_next_best_node(local_node, &used_mask)) >= 0) { + int distance = node_distance(local_node, node); + + /* + * If another node is sufficiently far away then it is better + * to reclaim pages in a zone before going off node. + */ + if (distance > RECLAIM_DISTANCE) + zone_reclaim_mode = 1; + /* * We don't want to pressure a particular node. * So adding penalty to the first node in same * distance group to make it round-robin. */ - if (node_distance(local_node, node) != - node_distance(local_node, prev_node)) + + if (distance != node_distance(local_node, prev_node)) node_load[node] += load; prev_node = node; load--; diff --git a/mm/vmscan.c b/mm/vmscan.c index e5117b6..2e34b61 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1572,3 +1572,71 @@ static int __init kswapd_init(void) } module_init(kswapd_init) + +#ifdef CONFIG_NUMA +/* + * Zone reclaim mode + * + * If non-zero call zone_reclaim when the number of free pages falls below + * the watermarks. + * + * In the future we may add flags to the mode. However, the page allocator + * should only have to check that zone_reclaim_mode != 0 before calling + * zone_reclaim(). + */ +int zone_reclaim_mode __read_mostly; + +/* + * Mininum time between zone reclaim scans + */ +#define ZONE_RECLAIM_INTERVAL HZ/2 +/* + * Try to free up some pages from this zone through reclaim. + */ +int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) +{ + int nr_pages = 1 << order; + struct task_struct *p = current; + struct reclaim_state reclaim_state; + struct scan_control sc = { + .gfp_mask = gfp_mask, + .may_writepage = 0, + .may_swap = 0, + .nr_mapped = read_page_state(nr_mapped), + .nr_scanned = 0, + .nr_reclaimed = 0, + .priority = 0 + }; + + if (!(gfp_mask & __GFP_WAIT) || + zone->zone_pgdat->node_id != numa_node_id() || + zone->all_unreclaimable || + atomic_read(&zone->reclaim_in_progress) > 0) + return 0; + + if (time_before(jiffies, + zone->last_unsuccessful_zone_reclaim + ZONE_RECLAIM_INTERVAL)) + return 0; + + disable_swap_token(); + + if (nr_pages > SWAP_CLUSTER_MAX) + sc.swap_cluster_max = nr_pages; + else + sc.swap_cluster_max = SWAP_CLUSTER_MAX; + + cond_resched(); + p->flags |= PF_MEMALLOC; + reclaim_state.reclaimed_slab = 0; + p->reclaim_state = &reclaim_state; + shrink_zone(zone, &sc); + p->reclaim_state = NULL; + current->flags &= ~PF_MEMALLOC; + + if (sc.nr_reclaimed == 0) + zone->last_unsuccessful_zone_reclaim = jiffies; + + return sc.nr_reclaimed > nr_pages; +} +#endif + |