summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorMel Gorman <mel@csn.ul.ie>2008-04-28 02:12:17 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2008-04-28 08:58:18 -0700
commitdd1a239f6f2d4d3eedd318583ec319aa145b324c (patch)
treeaff4224c96b5e2e67588c3946858a724863eeaf9 /mm
parent54a6eb5c4765aa573a030ceeba2c14e3d2ea5706 (diff)
downloadop-kernel-dev-dd1a239f6f2d4d3eedd318583ec319aa145b324c.zip
op-kernel-dev-dd1a239f6f2d4d3eedd318583ec319aa145b324c.tar.gz
mm: have zonelist contains structs with both a zone pointer and zone_idx
Filtering zonelists requires very frequent use of zone_idx(). This is costly as it involves a lookup of another structure and a substraction operation. As the zone_idx is often required, it should be quickly accessible. The node idx could also be stored here if it was found that accessing zone->node is significant which may be the case on workloads where nodemasks are heavily used. This patch introduces a struct zoneref to store a zone pointer and a zone index. The zonelist then consists of an array of these struct zonerefs which are looked up as necessary. Helpers are given for accessing the zone index as well as the node index. [kamezawa.hiroyu@jp.fujitsu.com: Suggested struct zoneref instead of embedding information in pointers] [hugh@veritas.com: mm-have-zonelist: fix memcg ooms] [hugh@veritas.com: just return do_try_to_free_pages] [hugh@veritas.com: do_try_to_free_pages gfp_mask redundant] Signed-off-by: Mel Gorman <mel@csn.ul.ie> Acked-by: Christoph Lameter <clameter@sgi.com> Acked-by: David Rientjes <rientjes@google.com> Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Christoph Lameter <clameter@sgi.com> Cc: Nick Piggin <nickpiggin@yahoo.com.au> Signed-off-by: Hugh Dickins <hugh@veritas.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/hugetlb.c3
-rw-r--r--mm/mempolicy.c36
-rw-r--r--mm/oom_kill.c45
-rw-r--r--mm/page_alloc.c68
-rw-r--r--mm/slab.c2
-rw-r--r--mm/slub.c2
-rw-r--r--mm/vmscan.c22
7 files changed, 96 insertions, 82 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index ddd141c..4bced0d 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -97,7 +97,8 @@ static struct page *dequeue_huge_page_vma(struct vm_area_struct *vma,
struct mempolicy *mpol;
struct zonelist *zonelist = huge_zonelist(vma, address,
htlb_alloc_mask, &mpol);
- struct zone *zone, **z;
+ struct zone *zone;
+ struct zoneref *z;
for_each_zone_zonelist(zone, z, zonelist, MAX_NR_ZONES - 1) {
nid = zone_to_nid(zone);
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 5d20bf4..90193a2 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -186,7 +186,7 @@ static struct zonelist *bind_zonelist(nodemask_t *nodes)
for_each_node_mask(nd, *nodes) {
struct zone *z = &NODE_DATA(nd)->node_zones[k];
if (z->present_pages > 0)
- zl->zones[num++] = z;
+ zoneref_set_zone(z, &zl->_zonerefs[num++]);
}
if (k == 0)
break;
@@ -196,7 +196,8 @@ static struct zonelist *bind_zonelist(nodemask_t *nodes)
kfree(zl);
return ERR_PTR(-EINVAL);
}
- zl->zones[num] = NULL;
+ zl->_zonerefs[num].zone = NULL;
+ zl->_zonerefs[num].zone_idx = 0;
return zl;
}
@@ -504,9 +505,11 @@ static void get_zonemask(struct mempolicy *p, nodemask_t *nodes)
nodes_clear(*nodes);
switch (p->policy) {
case MPOL_BIND:
- for (i = 0; p->v.zonelist->zones[i]; i++)
- node_set(zone_to_nid(p->v.zonelist->zones[i]),
- *nodes);
+ for (i = 0; p->v.zonelist->_zonerefs[i].zone; i++) {
+ struct zoneref *zref;
+ zref = &p->v.zonelist->_zonerefs[i];
+ node_set(zonelist_node_idx(zref), *nodes);
+ }
break;
case MPOL_DEFAULT:
break;
@@ -1212,12 +1215,13 @@ unsigned slab_node(struct mempolicy *policy)
case MPOL_INTERLEAVE:
return interleave_nodes(policy);
- case MPOL_BIND:
+ case MPOL_BIND: {
/*
* Follow bind policy behavior and start allocation at the
* first node.
*/
- return zone_to_nid(policy->v.zonelist->zones[0]);
+ return zonelist_node_idx(policy->v.zonelist->_zonerefs);
+ }
case MPOL_PREFERRED:
if (policy->v.preferred_node >= 0)
@@ -1323,7 +1327,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
zl = node_zonelist(nid, gfp);
page = __alloc_pages(gfp, order, zl);
- if (page && page_zone(page) == zl->zones[0])
+ if (page && page_zone(page) == zonelist_zone(&zl->_zonerefs[0]))
inc_zone_page_state(page, NUMA_INTERLEAVE_HIT);
return page;
}
@@ -1463,10 +1467,14 @@ int __mpol_equal(struct mempolicy *a, struct mempolicy *b)
return a->v.preferred_node == b->v.preferred_node;
case MPOL_BIND: {
int i;
- for (i = 0; a->v.zonelist->zones[i]; i++)
- if (a->v.zonelist->zones[i] != b->v.zonelist->zones[i])
+ for (i = 0; a->v.zonelist->_zonerefs[i].zone; i++) {
+ struct zone *za, *zb;
+ za = zonelist_zone(&a->v.zonelist->_zonerefs[i]);
+ zb = zonelist_zone(&b->v.zonelist->_zonerefs[i]);
+ if (za != zb)
return 0;
- return b->v.zonelist->zones[i] == NULL;
+ }
+ return b->v.zonelist->_zonerefs[i].zone == NULL;
}
default:
BUG();
@@ -1785,12 +1793,12 @@ static void mpol_rebind_policy(struct mempolicy *pol,
break;
case MPOL_BIND: {
nodemask_t nodes;
- struct zone **z;
+ struct zoneref *z;
struct zonelist *zonelist;
nodes_clear(nodes);
- for (z = pol->v.zonelist->zones; *z; z++)
- node_set(zone_to_nid(*z), nodes);
+ for (z = pol->v.zonelist->_zonerefs; z->zone; z++)
+ node_set(zonelist_node_idx(z), nodes);
nodes_remap(tmp, nodes, *mpolmask, *newmask);
nodes = tmp;
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 2c93502..e41504a 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -176,7 +176,7 @@ static inline enum oom_constraint constrained_alloc(struct zonelist *zonelist,
{
#ifdef CONFIG_NUMA
struct zone *zone;
- struct zone **z;
+ struct zoneref *z;
enum zone_type high_zoneidx = gfp_zone(gfp_mask);
nodemask_t nodes = node_states[N_HIGH_MEMORY];
@@ -462,29 +462,29 @@ EXPORT_SYMBOL_GPL(unregister_oom_notifier);
* if a parallel OOM killing is already taking place that includes a zone in
* the zonelist. Otherwise, locks all zones in the zonelist and returns 1.
*/
-int try_set_zone_oom(struct zonelist *zonelist)
+int try_set_zone_oom(struct zonelist *zonelist, gfp_t gfp_mask)
{
- struct zone **z;
+ struct zoneref *z;
+ struct zone *zone;
int ret = 1;
- z = zonelist->zones;
-
spin_lock(&zone_scan_mutex);
- do {
- if (zone_is_oom_locked(*z)) {
+ for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) {
+ if (zone_is_oom_locked(zone)) {
ret = 0;
goto out;
}
- } while (*(++z) != NULL);
+ }
+
+ for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) {
+ /*
+ * Lock each zone in the zonelist under zone_scan_mutex so a
+ * parallel invocation of try_set_zone_oom() doesn't succeed
+ * when it shouldn't.
+ */
+ zone_set_flag(zone, ZONE_OOM_LOCKED);
+ }
- /*
- * Lock each zone in the zonelist under zone_scan_mutex so a parallel
- * invocation of try_set_zone_oom() doesn't succeed when it shouldn't.
- */
- z = zonelist->zones;
- do {
- zone_set_flag(*z, ZONE_OOM_LOCKED);
- } while (*(++z) != NULL);
out:
spin_unlock(&zone_scan_mutex);
return ret;
@@ -495,16 +495,15 @@ out:
* allocation attempts with zonelists containing them may now recall the OOM
* killer, if necessary.
*/
-void clear_zonelist_oom(struct zonelist *zonelist)
+void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_mask)
{
- struct zone **z;
-
- z = zonelist->zones;
+ struct zoneref *z;
+ struct zone *zone;
spin_lock(&zone_scan_mutex);
- do {
- zone_clear_flag(*z, ZONE_OOM_LOCKED);
- } while (*(++z) != NULL);
+ for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) {
+ zone_clear_flag(zone, ZONE_OOM_LOCKED);
+ }
spin_unlock(&zone_scan_mutex);
}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 4ccb865..6d94d04 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1317,7 +1317,7 @@ static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags)
* We are low on memory in the second scan, and should leave no stone
* unturned looking for a free page.
*/
-static int zlc_zone_worth_trying(struct zonelist *zonelist, struct zone **z,
+static int zlc_zone_worth_trying(struct zonelist *zonelist, struct zoneref *z,
nodemask_t *allowednodes)
{
struct zonelist_cache *zlc; /* cached zonelist speedup info */
@@ -1328,7 +1328,7 @@ static int zlc_zone_worth_trying(struct zonelist *zonelist, struct zone **z,
if (!zlc)
return 1;
- i = z - zonelist->zones;
+ i = z - zonelist->_zonerefs;
n = zlc->z_to_n[i];
/* This zone is worth trying if it is allowed but not full */
@@ -1340,7 +1340,7 @@ static int zlc_zone_worth_trying(struct zonelist *zonelist, struct zone **z,
* zlc->fullzones, so that subsequent attempts to allocate a page
* from that zone don't waste time re-examining it.
*/
-static void zlc_mark_zone_full(struct zonelist *zonelist, struct zone **z)
+static void zlc_mark_zone_full(struct zonelist *zonelist, struct zoneref *z)
{
struct zonelist_cache *zlc; /* cached zonelist speedup info */
int i; /* index of *z in zonelist zones */
@@ -1349,7 +1349,7 @@ static void zlc_mark_zone_full(struct zonelist *zonelist, struct zone **z)
if (!zlc)
return;
- i = z - zonelist->zones;
+ i = z - zonelist->_zonerefs;
set_bit(i, zlc->fullzones);
}
@@ -1361,13 +1361,13 @@ static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags)
return NULL;
}
-static int zlc_zone_worth_trying(struct zonelist *zonelist, struct zone **z,
+static int zlc_zone_worth_trying(struct zonelist *zonelist, struct zoneref *z,
nodemask_t *allowednodes)
{
return 1;
}
-static void zlc_mark_zone_full(struct zonelist *zonelist, struct zone **z)
+static void zlc_mark_zone_full(struct zonelist *zonelist, struct zoneref *z)
{
}
#endif /* CONFIG_NUMA */
@@ -1380,7 +1380,7 @@ static struct page *
get_page_from_freelist(gfp_t gfp_mask, unsigned int order,
struct zonelist *zonelist, int high_zoneidx, int alloc_flags)
{
- struct zone **z;
+ struct zoneref *z;
struct page *page = NULL;
int classzone_idx;
struct zone *zone, *preferred_zone;
@@ -1389,8 +1389,8 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order,
int did_zlc_setup = 0; /* just call zlc_setup() one time */
z = first_zones_zonelist(zonelist, high_zoneidx);
- classzone_idx = zone_idx(*z);
- preferred_zone = *z;
+ classzone_idx = zonelist_zone_idx(z);
+ preferred_zone = zonelist_zone(z);
zonelist_scan:
/*
@@ -1453,7 +1453,8 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order,
{
const gfp_t wait = gfp_mask & __GFP_WAIT;
enum zone_type high_zoneidx = gfp_zone(gfp_mask);
- struct zone **z;
+ struct zoneref *z;
+ struct zone *zone;
struct page *page;
struct reclaim_state reclaim_state;
struct task_struct *p = current;
@@ -1467,9 +1468,9 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order,
return NULL;
restart:
- z = zonelist->zones; /* the list of zones suitable for gfp_mask */
+ z = zonelist->_zonerefs; /* the list of zones suitable for gfp_mask */
- if (unlikely(*z == NULL)) {
+ if (unlikely(!z->zone)) {
/*
* Happens if we have an empty zonelist as a result of
* GFP_THISNODE being used on a memoryless node
@@ -1493,8 +1494,8 @@ restart:
if (NUMA_BUILD && (gfp_mask & GFP_THISNODE) == GFP_THISNODE)
goto nopage;
- for (z = zonelist->zones; *z; z++)
- wakeup_kswapd(*z, order);
+ for_each_zone_zonelist(zone, z, zonelist, high_zoneidx)
+ wakeup_kswapd(zone, order);
/*
* OK, we're below the kswapd watermark and have kicked background
@@ -1575,7 +1576,7 @@ nofail_alloc:
if (page)
goto got_pg;
} else if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) {
- if (!try_set_zone_oom(zonelist)) {
+ if (!try_set_zone_oom(zonelist, gfp_mask)) {
schedule_timeout_uninterruptible(1);
goto restart;
}
@@ -1589,18 +1590,18 @@ nofail_alloc:
page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
zonelist, high_zoneidx, ALLOC_WMARK_HIGH|ALLOC_CPUSET);
if (page) {
- clear_zonelist_oom(zonelist);
+ clear_zonelist_oom(zonelist, gfp_mask);
goto got_pg;
}
/* The OOM killer will not help higher order allocs so fail */
if (order > PAGE_ALLOC_COSTLY_ORDER) {
- clear_zonelist_oom(zonelist);
+ clear_zonelist_oom(zonelist, gfp_mask);
goto nopage;
}
out_of_memory(zonelist, gfp_mask, order);
- clear_zonelist_oom(zonelist);
+ clear_zonelist_oom(zonelist, gfp_mask);
goto restart;
}
@@ -1702,7 +1703,7 @@ EXPORT_SYMBOL(free_pages);
static unsigned int nr_free_zone_pages(int offset)
{
- struct zone **z;
+ struct zoneref *z;
struct zone *zone;
/* Just pick one node, since fallback list is circular */
@@ -1896,7 +1897,8 @@ static int build_zonelists_node(pg_data_t *pgdat, struct zonelist *zonelist,
zone_type--;
zone = pgdat->node_zones + zone_type;
if (populated_zone(zone)) {
- zonelist->zones[nr_zones++] = zone;
+ zoneref_set_zone(zone,
+ &zonelist->_zonerefs[nr_zones++]);
check_highest_zone(zone_type);
}
@@ -2072,11 +2074,12 @@ static void build_zonelists_in_node_order(pg_data_t *pgdat, int node)
struct zonelist *zonelist;
zonelist = &pgdat->node_zonelists[0];
- for (j = 0; zonelist->zones[j] != NULL; j++)
+ for (j = 0; zonelist->_zonerefs[j].zone != NULL; j++)
;
j = build_zonelists_node(NODE_DATA(node), zonelist, j,
MAX_NR_ZONES - 1);
- zonelist->zones[j] = NULL;
+ zonelist->_zonerefs[j].zone = NULL;
+ zonelist->_zonerefs[j].zone_idx = 0;
}
/*
@@ -2089,7 +2092,8 @@ static void build_thisnode_zonelists(pg_data_t *pgdat)
zonelist = &pgdat->node_zonelists[1];
j = build_zonelists_node(pgdat, zonelist, 0, MAX_NR_ZONES - 1);
- zonelist->zones[j] = NULL;
+ zonelist->_zonerefs[j].zone = NULL;
+ zonelist->_zonerefs[j].zone_idx = 0;
}
/*
@@ -2114,12 +2118,14 @@ static void build_zonelists_in_zone_order(pg_data_t *pgdat, int nr_nodes)
node = node_order[j];
z = &NODE_DATA(node)->node_zones[zone_type];
if (populated_zone(z)) {
- zonelist->zones[pos++] = z;
+ zoneref_set_zone(z,
+ &zonelist->_zonerefs[pos++]);
check_highest_zone(zone_type);
}
}
}
- zonelist->zones[pos] = NULL;
+ zonelist->_zonerefs[pos].zone = NULL;
+ zonelist->_zonerefs[pos].zone_idx = 0;
}
static int default_zonelist_order(void)
@@ -2196,7 +2202,8 @@ static void build_zonelists(pg_data_t *pgdat)
/* initialize zonelists */
for (i = 0; i < MAX_ZONELISTS; i++) {
zonelist = pgdat->node_zonelists + i;
- zonelist->zones[0] = NULL;
+ zonelist->_zonerefs[0].zone = NULL;
+ zonelist->_zonerefs[0].zone_idx = 0;
}
/* NUMA-aware ordering of nodes */
@@ -2248,13 +2255,13 @@ static void build_zonelist_cache(pg_data_t *pgdat)
{
struct zonelist *zonelist;
struct zonelist_cache *zlc;
- struct zone **z;
+ struct zoneref *z;
zonelist = &pgdat->node_zonelists[0];
zonelist->zlcache_ptr = zlc = &zonelist->zlcache;
bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST);
- for (z = zonelist->zones; *z; z++)
- zlc->z_to_n[z - zonelist->zones] = zone_to_nid(*z);
+ for (z = zonelist->_zonerefs; z->zone; z++)
+ zlc->z_to_n[z - zonelist->_zonerefs] = zonelist_node_idx(z);
}
@@ -2297,7 +2304,8 @@ static void build_zonelists(pg_data_t *pgdat)
MAX_NR_ZONES - 1);
}
- zonelist->zones[j] = NULL;
+ zonelist->_zonerefs[j].zone = NULL;
+ zonelist->_zonerefs[j].zone_idx = 0;
}
/* non-NUMA variant of zonelist performance cache - just NULL zlcache_ptr */
diff --git a/mm/slab.c b/mm/slab.c
index 2985184..7bc4a13 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3242,7 +3242,7 @@ static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
{
struct zonelist *zonelist;
gfp_t local_flags;
- struct zone **z;
+ struct zoneref *z;
struct zone *zone;
enum zone_type high_zoneidx = gfp_zone(flags);
void *obj = NULL;
diff --git a/mm/slub.c b/mm/slub.c
index 80d20cc..48fff83 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1284,7 +1284,7 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags)
{
#ifdef CONFIG_NUMA
struct zonelist *zonelist;
- struct zone **z;
+ struct zoneref *z;
struct zone *zone;
enum zone_type high_zoneidx = gfp_zone(flags);
struct page *page;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 0515b8f..eceac9f 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1251,7 +1251,7 @@ static unsigned long shrink_zones(int priority, struct zonelist *zonelist,
{
enum zone_type high_zoneidx = gfp_zone(sc->gfp_mask);
unsigned long nr_reclaimed = 0;
- struct zone **z;
+ struct zoneref *z;
struct zone *zone;
sc->all_unreclaimable = 1;
@@ -1301,7 +1301,7 @@ static unsigned long shrink_zones(int priority, struct zonelist *zonelist,
* allocation attempt will fail.
*/
static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
- gfp_t gfp_mask, struct scan_control *sc)
+ struct scan_control *sc)
{
int priority;
int ret = 0;
@@ -1309,9 +1309,9 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
unsigned long nr_reclaimed = 0;
struct reclaim_state *reclaim_state = current->reclaim_state;
unsigned long lru_pages = 0;
- struct zone **z;
+ struct zoneref *z;
struct zone *zone;
- enum zone_type high_zoneidx = gfp_zone(gfp_mask);
+ enum zone_type high_zoneidx = gfp_zone(sc->gfp_mask);
if (scan_global_lru(sc))
count_vm_event(ALLOCSTALL);
@@ -1339,7 +1339,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
* over limit cgroups
*/
if (scan_global_lru(sc)) {
- shrink_slab(sc->nr_scanned, gfp_mask, lru_pages);
+ shrink_slab(sc->nr_scanned, sc->gfp_mask, lru_pages);
if (reclaim_state) {
nr_reclaimed += reclaim_state->reclaimed_slab;
reclaim_state->reclaimed_slab = 0;
@@ -1410,7 +1410,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
.isolate_pages = isolate_pages_global,
};
- return do_try_to_free_pages(zonelist, gfp_mask, &sc);
+ return do_try_to_free_pages(zonelist, &sc);
}
#ifdef CONFIG_CGROUP_MEM_RES_CTLR
@@ -1419,7 +1419,6 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
gfp_t gfp_mask)
{
struct scan_control sc = {
- .gfp_mask = gfp_mask,
.may_writepage = !laptop_mode,
.may_swap = 1,
.swap_cluster_max = SWAP_CLUSTER_MAX,
@@ -1429,12 +1428,11 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
.isolate_pages = mem_cgroup_isolate_pages,
};
struct zonelist *zonelist;
- int target_zone = gfp_zone(GFP_HIGHUSER_MOVABLE);
- zonelist = &NODE_DATA(numa_node_id())->node_zonelists[target_zone];
- if (do_try_to_free_pages(zonelist, sc.gfp_mask, &sc))
- return 1;
- return 0;
+ sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
+ (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
+ zonelist = NODE_DATA(numa_node_id())->node_zonelists;
+ return do_try_to_free_pages(zonelist, &sc);
}
#endif
OpenPOWER on IntegriCloud