diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-09-12 15:01:38 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-09-12 15:01:38 -0700 |
commit | 26935fb06ee88f1188789807687c03041f3c70d9 (patch) | |
tree | 381c487716540b52348d78bee6555f8fa61d77ef /mm/vmscan.c | |
parent | 3cc69b638e11bfda5d013c2b75b60934aa0e88a1 (diff) | |
parent | bf2ba3bc185269eca274b458aac46ba1ad7c1121 (diff) | |
download | op-kernel-dev-26935fb06ee88f1188789807687c03041f3c70d9.zip op-kernel-dev-26935fb06ee88f1188789807687c03041f3c70d9.tar.gz |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull vfs pile 4 from Al Viro:
"list_lru pile, mostly"
This came out of Andrew's pile, Al ended up doing the merge work so that
Andrew didn't have to.
Additionally, a few fixes.
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (42 commits)
super: fix for destroy lrus
list_lru: dynamically adjust node arrays
shrinker: Kill old ->shrink API.
shrinker: convert remaining shrinkers to count/scan API
staging/lustre/libcfs: cleanup linux-mem.h
staging/lustre/ptlrpc: convert to new shrinker API
staging/lustre/obdclass: convert lu_object shrinker to count/scan API
staging/lustre/ldlm: convert to shrinkers to count/scan API
hugepage: convert huge zero page shrinker to new shrinker API
i915: bail out earlier when shrinker cannot acquire mutex
drivers: convert shrinkers to new count/scan API
fs: convert fs shrinkers to new scan/count API
xfs: fix dquot isolation hang
xfs-convert-dquot-cache-lru-to-list_lru-fix
xfs: convert dquot cache lru to list_lru
xfs: rework buffer dispose list tracking
xfs-convert-buftarg-lru-to-generic-code-fix
xfs: convert buftarg LRU to generic code
fs: convert inode and dentry shrinking to be node aware
vmscan: per-node deferred work
...
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r-- | mm/vmscan.c | 241 |
1 files changed, 140 insertions, 101 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c index fe715da..beb3577 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -174,14 +174,31 @@ static unsigned long get_lru_size(struct lruvec *lruvec, enum lru_list lru) } /* - * Add a shrinker callback to be called from the vm + * Add a shrinker callback to be called from the vm. */ -void register_shrinker(struct shrinker *shrinker) +int register_shrinker(struct shrinker *shrinker) { - atomic_long_set(&shrinker->nr_in_batch, 0); + size_t size = sizeof(*shrinker->nr_deferred); + + /* + * If we only have one possible node in the system anyway, save + * ourselves the trouble and disable NUMA aware behavior. This way we + * will save memory and some small loop time later. + */ + if (nr_node_ids == 1) + shrinker->flags &= ~SHRINKER_NUMA_AWARE; + + if (shrinker->flags & SHRINKER_NUMA_AWARE) + size *= nr_node_ids; + + shrinker->nr_deferred = kzalloc(size, GFP_KERNEL); + if (!shrinker->nr_deferred) + return -ENOMEM; + down_write(&shrinker_rwsem); list_add_tail(&shrinker->list, &shrinker_list); up_write(&shrinker_rwsem); + return 0; } EXPORT_SYMBOL(register_shrinker); @@ -196,15 +213,102 @@ void unregister_shrinker(struct shrinker *shrinker) } EXPORT_SYMBOL(unregister_shrinker); -static inline int do_shrinker_shrink(struct shrinker *shrinker, - struct shrink_control *sc, - unsigned long nr_to_scan) -{ - sc->nr_to_scan = nr_to_scan; - return (*shrinker->shrink)(shrinker, sc); +#define SHRINK_BATCH 128 + +static unsigned long +shrink_slab_node(struct shrink_control *shrinkctl, struct shrinker *shrinker, + unsigned long nr_pages_scanned, unsigned long lru_pages) +{ + unsigned long freed = 0; + unsigned long long delta; + long total_scan; + long max_pass; + long nr; + long new_nr; + int nid = shrinkctl->nid; + long batch_size = shrinker->batch ? shrinker->batch + : SHRINK_BATCH; + + max_pass = shrinker->count_objects(shrinker, shrinkctl); + if (max_pass == 0) + return 0; + + /* + * copy the current shrinker scan count into a local variable + * and zero it so that other concurrent shrinker invocations + * don't also do this scanning work. + */ + nr = atomic_long_xchg(&shrinker->nr_deferred[nid], 0); + + total_scan = nr; + delta = (4 * nr_pages_scanned) / shrinker->seeks; + delta *= max_pass; + do_div(delta, lru_pages + 1); + total_scan += delta; + if (total_scan < 0) { + printk(KERN_ERR + "shrink_slab: %pF negative objects to delete nr=%ld\n", + shrinker->scan_objects, total_scan); + total_scan = max_pass; + } + + /* + * We need to avoid excessive windup on filesystem shrinkers + * due to large numbers of GFP_NOFS allocations causing the + * shrinkers to return -1 all the time. This results in a large + * nr being built up so when a shrink that can do some work + * comes along it empties the entire cache due to nr >>> + * max_pass. This is bad for sustaining a working set in + * memory. + * + * Hence only allow the shrinker to scan the entire cache when + * a large delta change is calculated directly. + */ + if (delta < max_pass / 4) + total_scan = min(total_scan, max_pass / 2); + + /* + * Avoid risking looping forever due to too large nr value: + * never try to free more than twice the estimate number of + * freeable entries. + */ + if (total_scan > max_pass * 2) + total_scan = max_pass * 2; + + trace_mm_shrink_slab_start(shrinker, shrinkctl, nr, + nr_pages_scanned, lru_pages, + max_pass, delta, total_scan); + + while (total_scan >= batch_size) { + unsigned long ret; + + shrinkctl->nr_to_scan = batch_size; + ret = shrinker->scan_objects(shrinker, shrinkctl); + if (ret == SHRINK_STOP) + break; + freed += ret; + + count_vm_events(SLABS_SCANNED, batch_size); + total_scan -= batch_size; + + cond_resched(); + } + + /* + * move the unused scan count back into the shrinker in a + * manner that handles concurrent updates. If we exhausted the + * scan, there is no need to do an update. + */ + if (total_scan > 0) + new_nr = atomic_long_add_return(total_scan, + &shrinker->nr_deferred[nid]); + else + new_nr = atomic_long_read(&shrinker->nr_deferred[nid]); + + trace_mm_shrink_slab_end(shrinker, freed, nr, new_nr); + return freed; } -#define SHRINK_BATCH 128 /* * Call the shrink functions to age shrinkable caches * @@ -224,115 +328,45 @@ static inline int do_shrinker_shrink(struct shrinker *shrinker, * * Returns the number of slab objects which we shrunk. */ -unsigned long shrink_slab(struct shrink_control *shrink, +unsigned long shrink_slab(struct shrink_control *shrinkctl, unsigned long nr_pages_scanned, unsigned long lru_pages) { struct shrinker *shrinker; - unsigned long ret = 0; + unsigned long freed = 0; if (nr_pages_scanned == 0) nr_pages_scanned = SWAP_CLUSTER_MAX; if (!down_read_trylock(&shrinker_rwsem)) { - /* Assume we'll be able to shrink next time */ - ret = 1; + /* + * If we would return 0, our callers would understand that we + * have nothing else to shrink and give up trying. By returning + * 1 we keep it going and assume we'll be able to shrink next + * time. + */ + freed = 1; goto out; } list_for_each_entry(shrinker, &shrinker_list, list) { - unsigned long long delta; - long total_scan; - long max_pass; - int shrink_ret = 0; - long nr; - long new_nr; - long batch_size = shrinker->batch ? shrinker->batch - : SHRINK_BATCH; - - max_pass = do_shrinker_shrink(shrinker, shrink, 0); - if (max_pass <= 0) - continue; - - /* - * copy the current shrinker scan count into a local variable - * and zero it so that other concurrent shrinker invocations - * don't also do this scanning work. - */ - nr = atomic_long_xchg(&shrinker->nr_in_batch, 0); - - total_scan = nr; - delta = (4 * nr_pages_scanned) / shrinker->seeks; - delta *= max_pass; - do_div(delta, lru_pages + 1); - total_scan += delta; - if (total_scan < 0) { - printk(KERN_ERR "shrink_slab: %pF negative objects to " - "delete nr=%ld\n", - shrinker->shrink, total_scan); - total_scan = max_pass; - } - - /* - * We need to avoid excessive windup on filesystem shrinkers - * due to large numbers of GFP_NOFS allocations causing the - * shrinkers to return -1 all the time. This results in a large - * nr being built up so when a shrink that can do some work - * comes along it empties the entire cache due to nr >>> - * max_pass. This is bad for sustaining a working set in - * memory. - * - * Hence only allow the shrinker to scan the entire cache when - * a large delta change is calculated directly. - */ - if (delta < max_pass / 4) - total_scan = min(total_scan, max_pass / 2); - - /* - * Avoid risking looping forever due to too large nr value: - * never try to free more than twice the estimate number of - * freeable entries. - */ - if (total_scan > max_pass * 2) - total_scan = max_pass * 2; - - trace_mm_shrink_slab_start(shrinker, shrink, nr, - nr_pages_scanned, lru_pages, - max_pass, delta, total_scan); - - while (total_scan >= batch_size) { - int nr_before; + for_each_node_mask(shrinkctl->nid, shrinkctl->nodes_to_scan) { + if (!node_online(shrinkctl->nid)) + continue; - nr_before = do_shrinker_shrink(shrinker, shrink, 0); - shrink_ret = do_shrinker_shrink(shrinker, shrink, - batch_size); - if (shrink_ret == -1) + if (!(shrinker->flags & SHRINKER_NUMA_AWARE) && + (shrinkctl->nid != 0)) break; - if (shrink_ret < nr_before) - ret += nr_before - shrink_ret; - count_vm_events(SLABS_SCANNED, batch_size); - total_scan -= batch_size; - cond_resched(); - } + freed += shrink_slab_node(shrinkctl, shrinker, + nr_pages_scanned, lru_pages); - /* - * move the unused scan count back into the shrinker in a - * manner that handles concurrent updates. If we exhausted the - * scan, there is no need to do an update. - */ - if (total_scan > 0) - new_nr = atomic_long_add_return(total_scan, - &shrinker->nr_in_batch); - else - new_nr = atomic_long_read(&shrinker->nr_in_batch); - - trace_mm_shrink_slab_end(shrinker, shrink_ret, nr, new_nr); + } } up_read(&shrinker_rwsem); out: cond_resched(); - return ret; + return freed; } static inline int is_page_cache_freeable(struct page *page) @@ -2368,12 +2402,16 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, */ if (global_reclaim(sc)) { unsigned long lru_pages = 0; + + nodes_clear(shrink->nodes_to_scan); for_each_zone_zonelist(zone, z, zonelist, gfp_zone(sc->gfp_mask)) { if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) continue; lru_pages += zone_reclaimable_pages(zone); + node_set(zone_to_nid(zone), + shrink->nodes_to_scan); } shrink_slab(shrink, sc->nr_scanned, lru_pages); @@ -2829,6 +2867,8 @@ static bool kswapd_shrink_zone(struct zone *zone, return true; shrink_zone(zone, sc); + nodes_clear(shrink.nodes_to_scan); + node_set(zone_to_nid(zone), shrink.nodes_to_scan); reclaim_state->reclaimed_slab = 0; shrink_slab(&shrink, sc->nr_scanned, lru_pages); @@ -3520,10 +3560,9 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) * number of slab pages and shake the slab until it is reduced * by the same nr_pages that we used for reclaiming unmapped * pages. - * - * Note that shrink_slab will free memory on all zones and may - * take a long time. */ + nodes_clear(shrink.nodes_to_scan); + node_set(zone_to_nid(zone), shrink.nodes_to_scan); for (;;) { unsigned long lru_pages = zone_reclaimable_pages(zone); |