summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/memory.c13
-rw-r--r--mm/mempolicy.c130
-rw-r--r--mm/page_alloc.c27
-rw-r--r--mm/page_isolation.c26
-rw-r--r--mm/shmem.c4
-rw-r--r--mm/vmscan.c4
6 files changed, 91 insertions, 113 deletions
diff --git a/mm/memory.c b/mm/memory.c
index e0a9b0c..bb1369f 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -184,10 +184,14 @@ static int tlb_next_batch(struct mmu_gather *tlb)
return 1;
}
+ if (tlb->batch_count == MAX_GATHER_BATCH_COUNT)
+ return 0;
+
batch = (void *)__get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0);
if (!batch)
return 0;
+ tlb->batch_count++;
batch->next = NULL;
batch->nr = 0;
batch->max = MAX_GATHER_BATCH;
@@ -216,6 +220,7 @@ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, bool fullmm)
tlb->local.nr = 0;
tlb->local.max = ARRAY_SIZE(tlb->__pages);
tlb->active = &tlb->local;
+ tlb->batch_count = 0;
#ifdef CONFIG_HAVE_RCU_TABLE_FREE
tlb->batch = NULL;
@@ -3706,6 +3711,14 @@ retry:
if (pmd_trans_huge(orig_pmd)) {
unsigned int dirty = flags & FAULT_FLAG_WRITE;
+ /*
+ * If the pmd is splitting, return and retry the
+ * the fault. Alternative: wait until the split
+ * is done, and goto retry.
+ */
+ if (pmd_trans_splitting(orig_pmd))
+ return 0;
+
if (pmd_numa(orig_pmd))
return do_huge_pmd_numa_page(mm, vma, address,
orig_pmd, pmd);
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index d1b315e..e2df1c1 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2132,7 +2132,7 @@ bool __mpol_equal(struct mempolicy *a, struct mempolicy *b)
*/
/* lookup first element intersecting start-end */
-/* Caller holds sp->mutex */
+/* Caller holds sp->lock */
static struct sp_node *
sp_lookup(struct shared_policy *sp, unsigned long start, unsigned long end)
{
@@ -2196,13 +2196,13 @@ mpol_shared_policy_lookup(struct shared_policy *sp, unsigned long idx)
if (!sp->root.rb_node)
return NULL;
- mutex_lock(&sp->mutex);
+ spin_lock(&sp->lock);
sn = sp_lookup(sp, idx, idx+1);
if (sn) {
mpol_get(sn->policy);
pol = sn->policy;
}
- mutex_unlock(&sp->mutex);
+ spin_unlock(&sp->lock);
return pol;
}
@@ -2328,6 +2328,14 @@ static void sp_delete(struct shared_policy *sp, struct sp_node *n)
sp_free(n);
}
+static void sp_node_init(struct sp_node *node, unsigned long start,
+ unsigned long end, struct mempolicy *pol)
+{
+ node->start = start;
+ node->end = end;
+ node->policy = pol;
+}
+
static struct sp_node *sp_alloc(unsigned long start, unsigned long end,
struct mempolicy *pol)
{
@@ -2344,10 +2352,7 @@ static struct sp_node *sp_alloc(unsigned long start, unsigned long end,
return NULL;
}
newpol->flags |= MPOL_F_SHARED;
-
- n->start = start;
- n->end = end;
- n->policy = newpol;
+ sp_node_init(n, start, end, newpol);
return n;
}
@@ -2357,9 +2362,12 @@ static int shared_policy_replace(struct shared_policy *sp, unsigned long start,
unsigned long end, struct sp_node *new)
{
struct sp_node *n;
+ struct sp_node *n_new = NULL;
+ struct mempolicy *mpol_new = NULL;
int ret = 0;
- mutex_lock(&sp->mutex);
+restart:
+ spin_lock(&sp->lock);
n = sp_lookup(sp, start, end);
/* Take care of old policies in the same range. */
while (n && n->start < end) {
@@ -2372,14 +2380,16 @@ static int shared_policy_replace(struct shared_policy *sp, unsigned long start,
} else {
/* Old policy spanning whole new range. */
if (n->end > end) {
- struct sp_node *new2;
- new2 = sp_alloc(end, n->end, n->policy);
- if (!new2) {
- ret = -ENOMEM;
- goto out;
- }
+ if (!n_new)
+ goto alloc_new;
+
+ *mpol_new = *n->policy;
+ atomic_set(&mpol_new->refcnt, 1);
+ sp_node_init(n_new, n->end, end, mpol_new);
+ sp_insert(sp, n_new);
n->end = start;
- sp_insert(sp, new2);
+ n_new = NULL;
+ mpol_new = NULL;
break;
} else
n->end = start;
@@ -2390,9 +2400,27 @@ static int shared_policy_replace(struct shared_policy *sp, unsigned long start,
}
if (new)
sp_insert(sp, new);
-out:
- mutex_unlock(&sp->mutex);
+ spin_unlock(&sp->lock);
+ ret = 0;
+
+err_out:
+ if (mpol_new)
+ mpol_put(mpol_new);
+ if (n_new)
+ kmem_cache_free(sn_cache, n_new);
+
return ret;
+
+alloc_new:
+ spin_unlock(&sp->lock);
+ ret = -ENOMEM;
+ n_new = kmem_cache_alloc(sn_cache, GFP_KERNEL);
+ if (!n_new)
+ goto err_out;
+ mpol_new = kmem_cache_alloc(policy_cache, GFP_KERNEL);
+ if (!mpol_new)
+ goto err_out;
+ goto restart;
}
/**
@@ -2410,7 +2438,7 @@ void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol)
int ret;
sp->root = RB_ROOT; /* empty tree == default mempolicy */
- mutex_init(&sp->mutex);
+ spin_lock_init(&sp->lock);
if (mpol) {
struct vm_area_struct pvma;
@@ -2476,14 +2504,14 @@ void mpol_free_shared_policy(struct shared_policy *p)
if (!p->root.rb_node)
return;
- mutex_lock(&p->mutex);
+ spin_lock(&p->lock);
next = rb_first(&p->root);
while (next) {
n = rb_entry(next, struct sp_node, nd);
next = rb_next(&n->nd);
sp_delete(p, n);
}
- mutex_unlock(&p->mutex);
+ spin_unlock(&p->lock);
}
#ifdef CONFIG_NUMA_BALANCING
@@ -2595,8 +2623,7 @@ void numa_default_policy(void)
*/
/*
- * "local" is pseudo-policy: MPOL_PREFERRED with MPOL_F_LOCAL flag
- * Used only for mpol_parse_str() and mpol_to_str()
+ * "local" is implemented internally by MPOL_PREFERRED with MPOL_F_LOCAL flag.
*/
static const char * const policy_modes[] =
{
@@ -2610,28 +2637,20 @@ static const char * const policy_modes[] =
#ifdef CONFIG_TMPFS
/**
- * mpol_parse_str - parse string to mempolicy
+ * mpol_parse_str - parse string to mempolicy, for tmpfs mpol mount option.
* @str: string containing mempolicy to parse
* @mpol: pointer to struct mempolicy pointer, returned on success.
- * @no_context: flag whether to "contextualize" the mempolicy
*
* Format of input:
* <mode>[=<flags>][:<nodelist>]
*
- * if @no_context is true, save the input nodemask in w.user_nodemask in
- * the returned mempolicy. This will be used to "clone" the mempolicy in
- * a specific context [cpuset] at a later time. Used to parse tmpfs mpol
- * mount option. Note that if 'static' or 'relative' mode flags were
- * specified, the input nodemask will already have been saved. Saving
- * it again is redundant, but safe.
- *
* On success, returns 0, else 1
*/
-int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context)
+int mpol_parse_str(char *str, struct mempolicy **mpol)
{
struct mempolicy *new = NULL;
unsigned short mode;
- unsigned short uninitialized_var(mode_flags);
+ unsigned short mode_flags;
nodemask_t nodes;
char *nodelist = strchr(str, ':');
char *flags = strchr(str, '=');
@@ -2719,24 +2738,23 @@ int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context)
if (IS_ERR(new))
goto out;
- if (no_context) {
- /* save for contextualization */
- new->w.user_nodemask = nodes;
- } else {
- int ret;
- NODEMASK_SCRATCH(scratch);
- if (scratch) {
- task_lock(current);
- ret = mpol_set_nodemask(new, &nodes, scratch);
- task_unlock(current);
- } else
- ret = -ENOMEM;
- NODEMASK_SCRATCH_FREE(scratch);
- if (ret) {
- mpol_put(new);
- goto out;
- }
- }
+ /*
+ * Save nodes for mpol_to_str() to show the tmpfs mount options
+ * for /proc/mounts, /proc/pid/mounts and /proc/pid/mountinfo.
+ */
+ if (mode != MPOL_PREFERRED)
+ new->v.nodes = nodes;
+ else if (nodelist)
+ new->v.preferred_node = first_node(nodes);
+ else
+ new->flags |= MPOL_F_LOCAL;
+
+ /*
+ * Save nodes for contextualization: this will be used to "clone"
+ * the mempolicy in a specific context [cpuset] at a later time.
+ */
+ new->w.user_nodemask = nodes;
+
err = 0;
out:
@@ -2756,13 +2774,12 @@ out:
* @buffer: to contain formatted mempolicy string
* @maxlen: length of @buffer
* @pol: pointer to mempolicy to be formatted
- * @no_context: "context free" mempolicy - use nodemask in w.user_nodemask
*
* Convert a mempolicy into a string.
* Returns the number of characters in buffer (if positive)
* or an error (negative)
*/
-int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, int no_context)
+int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
{
char *p = buffer;
int l;
@@ -2788,7 +2805,7 @@ int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, int no_context)
case MPOL_PREFERRED:
nodes_clear(nodes);
if (flags & MPOL_F_LOCAL)
- mode = MPOL_LOCAL; /* pseudo-policy */
+ mode = MPOL_LOCAL;
else
node_set(pol->v.preferred_node, nodes);
break;
@@ -2796,10 +2813,7 @@ int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, int no_context)
case MPOL_BIND:
/* Fall through */
case MPOL_INTERLEAVE:
- if (no_context)
- nodes = pol->w.user_nodemask;
- else
- nodes = pol->v.nodes;
+ nodes = pol->v.nodes;
break;
default:
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 4ba5e37..bc6cc0e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -221,11 +221,6 @@ EXPORT_SYMBOL(nr_online_nodes);
int page_group_by_mobility_disabled __read_mostly;
-/*
- * NOTE:
- * Don't use set_pageblock_migratetype(page, MIGRATE_ISOLATE) directly.
- * Instead, use {un}set_pageblock_isolate.
- */
void set_pageblock_migratetype(struct page *page, int migratetype)
{
@@ -1655,20 +1650,6 @@ static bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark,
return true;
}
-#ifdef CONFIG_MEMORY_ISOLATION
-static inline unsigned long nr_zone_isolate_freepages(struct zone *zone)
-{
- if (unlikely(zone->nr_pageblock_isolate))
- return zone->nr_pageblock_isolate * pageblock_nr_pages;
- return 0;
-}
-#else
-static inline unsigned long nr_zone_isolate_freepages(struct zone *zone)
-{
- return 0;
-}
-#endif
-
bool zone_watermark_ok(struct zone *z, int order, unsigned long mark,
int classzone_idx, int alloc_flags)
{
@@ -1684,14 +1665,6 @@ bool zone_watermark_ok_safe(struct zone *z, int order, unsigned long mark,
if (z->percpu_drift_mark && free_pages < z->percpu_drift_mark)
free_pages = zone_page_state_snapshot(z, NR_FREE_PAGES);
- /*
- * If the zone has MIGRATE_ISOLATE type free pages, we should consider
- * it. nr_zone_isolate_freepages is never accurate so kswapd might not
- * sleep although it could do so. But this is more desirable for memory
- * hotplug than sleeping which can cause a livelock in the direct
- * reclaim path.
- */
- free_pages -= nr_zone_isolate_freepages(z);
return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags,
free_pages);
}
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index 9d2264e..383bdbb 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -8,28 +8,6 @@
#include <linux/memory.h>
#include "internal.h"
-/* called while holding zone->lock */
-static void set_pageblock_isolate(struct page *page)
-{
- if (get_pageblock_migratetype(page) == MIGRATE_ISOLATE)
- return;
-
- set_pageblock_migratetype(page, MIGRATE_ISOLATE);
- page_zone(page)->nr_pageblock_isolate++;
-}
-
-/* called while holding zone->lock */
-static void restore_pageblock_isolate(struct page *page, int migratetype)
-{
- struct zone *zone = page_zone(page);
- if (WARN_ON(get_pageblock_migratetype(page) != MIGRATE_ISOLATE))
- return;
-
- BUG_ON(zone->nr_pageblock_isolate <= 0);
- set_pageblock_migratetype(page, migratetype);
- zone->nr_pageblock_isolate--;
-}
-
int set_migratetype_isolate(struct page *page, bool skip_hwpoisoned_pages)
{
struct zone *zone;
@@ -80,7 +58,7 @@ out:
unsigned long nr_pages;
int migratetype = get_pageblock_migratetype(page);
- set_pageblock_isolate(page);
+ set_pageblock_migratetype(page, MIGRATE_ISOLATE);
nr_pages = move_freepages_block(zone, page, MIGRATE_ISOLATE);
__mod_zone_freepage_state(zone, -nr_pages, migratetype);
@@ -103,7 +81,7 @@ void unset_migratetype_isolate(struct page *page, unsigned migratetype)
goto out;
nr_pages = move_freepages_block(zone, page, migratetype);
__mod_zone_freepage_state(zone, nr_pages, migratetype);
- restore_pageblock_isolate(page, migratetype);
+ set_pageblock_migratetype(page, migratetype);
out:
spin_unlock_irqrestore(&zone->lock, flags);
}
diff --git a/mm/shmem.c b/mm/shmem.c
index 5c90d84..5dd56f6 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -889,7 +889,7 @@ static void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol)
if (!mpol || mpol->mode == MPOL_DEFAULT)
return; /* show nothing */
- mpol_to_str(buffer, sizeof(buffer), mpol, 1);
+ mpol_to_str(buffer, sizeof(buffer), mpol);
seq_printf(seq, ",mpol=%s", buffer);
}
@@ -2463,7 +2463,7 @@ static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo,
if (!gid_valid(sbinfo->gid))
goto bad_val;
} else if (!strcmp(this_char,"mpol")) {
- if (mpol_parse_str(value, &sbinfo->mpol, 1))
+ if (mpol_parse_str(value, &sbinfo->mpol))
goto bad_val;
} else {
printk(KERN_ERR "tmpfs: Bad mount option %s\n",
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 16b42af..196709f5 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3122,8 +3122,8 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
not required for correctness. So if the last cpu in a node goes
away, we get changed to run anywhere: as the first one comes back,
restore their cpu bindings. */
-static int __devinit cpu_callback(struct notifier_block *nfb,
- unsigned long action, void *hcpu)
+static int cpu_callback(struct notifier_block *nfb, unsigned long action,
+ void *hcpu)
{
int nid;
OpenPOWER on IntegriCloud