summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig1
-rw-r--r--mm/filemap.c1
-rw-r--r--mm/fremap.c2
-rw-r--r--mm/hugetlb.c6
-rw-r--r--mm/memory.c23
-rw-r--r--mm/mempolicy.c84
-rw-r--r--mm/migrate.c11
-rw-r--r--mm/page-writeback.c4
-rw-r--r--mm/page_alloc.c2
-rw-r--r--mm/slub.c46
10 files changed, 137 insertions, 43 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index e24d348..a7609cb 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -137,6 +137,7 @@ config SPLIT_PTLOCK_CPUS
int
default "4096" if ARM && !CPU_CACHE_VIPT
default "4096" if PARISC && !PA20
+ default "4096" if XEN
default "4"
#
diff --git a/mm/filemap.c b/mm/filemap.c
index 90b657b..15c8413 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1388,6 +1388,7 @@ retry_find:
size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
if (unlikely(vmf->pgoff >= size)) {
unlock_page(page);
+ page_cache_release(page);
goto outside_data_content;
}
diff --git a/mm/fremap.c b/mm/fremap.c
index c395b1a..95bcb56 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -160,7 +160,7 @@ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
if (vma->vm_private_data && !(vma->vm_flags & VM_NONLINEAR))
goto out;
- if (!vma->vm_flags & VM_CAN_NONLINEAR)
+ if (!(vma->vm_flags & VM_CAN_NONLINEAR))
goto out;
if (end <= start || start < vma->vm_start || end > vma->vm_end)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index de4cf45..eab8c42 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -42,7 +42,7 @@ static void clear_huge_page(struct page *page, unsigned long addr)
might_sleep();
for (i = 0; i < (HPAGE_SIZE/PAGE_SIZE); i++) {
cond_resched();
- clear_user_highpage(page + i, addr);
+ clear_user_highpage(page + i, addr + i * PAGE_SIZE);
}
}
@@ -71,8 +71,9 @@ static struct page *dequeue_huge_page(struct vm_area_struct *vma,
{
int nid;
struct page *page = NULL;
+ struct mempolicy *mpol;
struct zonelist *zonelist = huge_zonelist(vma, address,
- htlb_alloc_mask);
+ htlb_alloc_mask, &mpol);
struct zone **z;
for (z = zonelist->zones; *z; z++) {
@@ -87,6 +88,7 @@ static struct page *dequeue_huge_page(struct vm_area_struct *vma,
break;
}
}
+ mpol_free(mpol); /* unref if mpol !NULL */
return page;
}
diff --git a/mm/memory.c b/mm/memory.c
index ca8cac1..f82b359b 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1639,6 +1639,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
struct page *old_page, *new_page;
pte_t entry;
int reuse = 0, ret = 0;
+ int page_mkwrite = 0;
struct page *dirty_page = NULL;
old_page = vm_normal_page(vma, address, orig_pte);
@@ -1687,6 +1688,8 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
page_cache_release(old_page);
if (!pte_same(*page_table, orig_pte))
goto unlock;
+
+ page_mkwrite = 1;
}
dirty_page = old_page;
get_page(dirty_page);
@@ -1774,7 +1777,7 @@ unlock:
* do_no_page is protected similarly.
*/
wait_on_page_locked(dirty_page);
- set_page_dirty_balance(dirty_page);
+ set_page_dirty_balance(dirty_page, page_mkwrite);
put_page(dirty_page);
}
return ret;
@@ -2307,13 +2310,14 @@ oom:
* do not need to flush old virtual caches or the TLB.
*
* We enter with non-exclusive mmap_sem (to exclude vma changes,
- * but allow concurrent faults), and pte mapped but not yet locked.
+ * but allow concurrent faults), and pte neither mapped nor locked.
* We return with mmap_sem still held, but pte unmapped and unlocked.
*/
static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long address, pte_t *page_table, pmd_t *pmd,
+ unsigned long address, pmd_t *pmd,
pgoff_t pgoff, unsigned int flags, pte_t orig_pte)
{
+ pte_t *page_table;
spinlock_t *ptl;
struct page *page;
pte_t entry;
@@ -2321,13 +2325,13 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
struct page *dirty_page = NULL;
struct vm_fault vmf;
int ret;
+ int page_mkwrite = 0;
vmf.virtual_address = (void __user *)(address & PAGE_MASK);
vmf.pgoff = pgoff;
vmf.flags = flags;
vmf.page = NULL;
- pte_unmap(page_table);
BUG_ON(vma->vm_flags & VM_PFNMAP);
if (likely(vma->vm_ops->fault)) {
@@ -2398,6 +2402,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
anon = 1; /* no anon but release vmf.page */
goto out;
}
+ page_mkwrite = 1;
}
}
@@ -2453,7 +2458,7 @@ out_unlocked:
if (anon)
page_cache_release(vmf.page);
else if (dirty_page) {
- set_page_dirty_balance(dirty_page);
+ set_page_dirty_balance(dirty_page, page_mkwrite);
put_page(dirty_page);
}
@@ -2468,8 +2473,8 @@ static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
- vma->vm_start) >> PAGE_CACHE_SHIFT) + vma->vm_pgoff;
unsigned int flags = (write_access ? FAULT_FLAG_WRITE : 0);
- return __do_fault(mm, vma, address, page_table, pmd, pgoff,
- flags, orig_pte);
+ pte_unmap(page_table);
+ return __do_fault(mm, vma, address, pmd, pgoff, flags, orig_pte);
}
@@ -2552,9 +2557,7 @@ static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
}
pgoff = pte_to_pgoff(orig_pte);
-
- return __do_fault(mm, vma, address, page_table, pmd, pgoff,
- flags, orig_pte);
+ return __do_fault(mm, vma, address, pmd, pgoff, flags, orig_pte);
}
/*
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 172abff..3d6ac95 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -955,6 +955,11 @@ asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode,
goto out;
}
+ if (!nodes_subset(new, node_online_map)) {
+ err = -EINVAL;
+ goto out;
+ }
+
err = security_task_movememory(task);
if (err)
goto out;
@@ -1072,21 +1077,37 @@ asmlinkage long compat_sys_mbind(compat_ulong_t start, compat_ulong_t len,
#endif
-/* Return effective policy for a VMA */
+/*
+ * get_vma_policy(@task, @vma, @addr)
+ * @task - task for fallback if vma policy == default
+ * @vma - virtual memory area whose policy is sought
+ * @addr - address in @vma for shared policy lookup
+ *
+ * Returns effective policy for a VMA at specified address.
+ * Falls back to @task or system default policy, as necessary.
+ * Returned policy has extra reference count if shared, vma,
+ * or some other task's policy [show_numa_maps() can pass
+ * @task != current]. It is the caller's responsibility to
+ * free the reference in these cases.
+ */
static struct mempolicy * get_vma_policy(struct task_struct *task,
struct vm_area_struct *vma, unsigned long addr)
{
struct mempolicy *pol = task->mempolicy;
+ int shared_pol = 0;
if (vma) {
- if (vma->vm_ops && vma->vm_ops->get_policy)
+ if (vma->vm_ops && vma->vm_ops->get_policy) {
pol = vma->vm_ops->get_policy(vma, addr);
- else if (vma->vm_policy &&
+ shared_pol = 1; /* if pol non-NULL, add ref below */
+ } else if (vma->vm_policy &&
vma->vm_policy->policy != MPOL_DEFAULT)
pol = vma->vm_policy;
}
if (!pol)
pol = &default_policy;
+ else if (!shared_pol && pol != current->mempolicy)
+ mpol_get(pol); /* vma or other task's policy */
return pol;
}
@@ -1202,19 +1223,45 @@ static inline unsigned interleave_nid(struct mempolicy *pol,
}
#ifdef CONFIG_HUGETLBFS
-/* Return a zonelist suitable for a huge page allocation. */
+/*
+ * huge_zonelist(@vma, @addr, @gfp_flags, @mpol)
+ * @vma = virtual memory area whose policy is sought
+ * @addr = address in @vma for shared policy lookup and interleave policy
+ * @gfp_flags = for requested zone
+ * @mpol = pointer to mempolicy pointer for reference counted 'BIND policy
+ *
+ * Returns a zonelist suitable for a huge page allocation.
+ * If the effective policy is 'BIND, returns pointer to policy's zonelist.
+ * If it is also a policy for which get_vma_policy() returns an extra
+ * reference, we must hold that reference until after allocation.
+ * In that case, return policy via @mpol so hugetlb allocation can drop
+ * the reference. For non-'BIND referenced policies, we can/do drop the
+ * reference here, so the caller doesn't need to know about the special case
+ * for default and current task policy.
+ */
struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr,
- gfp_t gfp_flags)
+ gfp_t gfp_flags, struct mempolicy **mpol)
{
struct mempolicy *pol = get_vma_policy(current, vma, addr);
+ struct zonelist *zl;
+ *mpol = NULL; /* probably no unref needed */
if (pol->policy == MPOL_INTERLEAVE) {
unsigned nid;
nid = interleave_nid(pol, vma, addr, HPAGE_SHIFT);
+ __mpol_free(pol); /* finished with pol */
return NODE_DATA(nid)->node_zonelists + gfp_zone(gfp_flags);
}
- return zonelist_policy(GFP_HIGHUSER, pol);
+
+ zl = zonelist_policy(GFP_HIGHUSER, pol);
+ if (unlikely(pol != &default_policy && pol != current->mempolicy)) {
+ if (pol->policy != MPOL_BIND)
+ __mpol_free(pol); /* finished with pol */
+ else
+ *mpol = pol; /* unref needed after allocation */
+ }
+ return zl;
}
#endif
@@ -1259,6 +1306,7 @@ struct page *
alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr)
{
struct mempolicy *pol = get_vma_policy(current, vma, addr);
+ struct zonelist *zl;
cpuset_update_task_memory_state();
@@ -1268,7 +1316,19 @@ alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr)
nid = interleave_nid(pol, vma, addr, PAGE_SHIFT);
return alloc_page_interleave(gfp, 0, nid);
}
- return __alloc_pages(gfp, 0, zonelist_policy(gfp, pol));
+ zl = zonelist_policy(gfp, pol);
+ if (pol != &default_policy && pol != current->mempolicy) {
+ /*
+ * slow path: ref counted policy -- shared or vma
+ */
+ struct page *page = __alloc_pages(gfp, 0, zl);
+ __mpol_free(pol);
+ return page;
+ }
+ /*
+ * fast path: default or task policy
+ */
+ return __alloc_pages(gfp, 0, zl);
}
/**
@@ -1867,6 +1927,7 @@ int show_numa_map(struct seq_file *m, void *v)
struct numa_maps *md;
struct file *file = vma->vm_file;
struct mm_struct *mm = vma->vm_mm;
+ struct mempolicy *pol;
int n;
char buffer[50];
@@ -1877,8 +1938,13 @@ int show_numa_map(struct seq_file *m, void *v)
if (!md)
return 0;
- mpol_to_str(buffer, sizeof(buffer),
- get_vma_policy(priv->task, vma, vma->vm_start));
+ pol = get_vma_policy(priv->task, vma, vma->vm_start);
+ mpol_to_str(buffer, sizeof(buffer), pol);
+ /*
+ * unref shared or other task's mempolicy
+ */
+ if (pol != &default_policy && pol != current->mempolicy)
+ __mpol_free(pol);
seq_printf(m, "%08lx %s", vma->vm_start, buffer);
diff --git a/mm/migrate.c b/mm/migrate.c
index 37c73b9..e2fdbce 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -611,6 +611,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
int rc = 0;
int *result = NULL;
struct page *newpage = get_new_page(page, private, &result);
+ int rcu_locked = 0;
if (!newpage)
return -ENOMEM;
@@ -636,8 +637,13 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
* we cannot notice that anon_vma is freed while we migrates a page.
* This rcu_read_lock() delays freeing anon_vma pointer until the end
* of migration. File cache pages are no problem because of page_lock()
+ * File Caches may use write_page() or lock_page() in migration, then,
+ * just care Anon page here.
*/
- rcu_read_lock();
+ if (PageAnon(page)) {
+ rcu_read_lock();
+ rcu_locked = 1;
+ }
/*
* This is a corner case handling.
* When a new swap-cache is read into, it is linked to LRU
@@ -656,7 +662,8 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
if (rc)
remove_migration_ptes(page, page);
rcu_unlock:
- rcu_read_unlock();
+ if (rcu_locked)
+ rcu_read_unlock();
unlock:
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 63512a9..4472036 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -274,9 +274,9 @@ static void balance_dirty_pages(struct address_space *mapping)
pdflush_operation(background_writeout, 0);
}
-void set_page_dirty_balance(struct page *page)
+void set_page_dirty_balance(struct page *page, int page_mkwrite)
{
- if (set_page_dirty(page)) {
+ if (set_page_dirty(page) || page_mkwrite) {
struct address_space *mapping = page_mapping(page);
if (mapping)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6427653..1a8c595 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2345,6 +2345,8 @@ static int __cpuinit process_zones(int cpu)
return 0;
bad:
for_each_zone(dzone) {
+ if (!populated_zone(dzone))
+ continue;
if (dzone == zone)
break;
kfree(zone_pcp(dzone, cpu));
diff --git a/mm/slub.c b/mm/slub.c
index 04151da..addb20a 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -986,7 +986,9 @@ out:
__setup("slub_debug", setup_slub_debug);
-static void kmem_cache_open_debug_check(struct kmem_cache *s)
+static unsigned long kmem_cache_flags(unsigned long objsize,
+ unsigned long flags, const char *name,
+ void (*ctor)(void *, struct kmem_cache *, unsigned long))
{
/*
* The page->offset field is only 16 bit wide. This is an offset
@@ -1000,19 +1002,21 @@ static void kmem_cache_open_debug_check(struct kmem_cache *s)
* Debugging or ctor may create a need to move the free
* pointer. Fail if this happens.
*/
- if (s->objsize >= 65535 * sizeof(void *)) {
- BUG_ON(s->flags & (SLAB_RED_ZONE | SLAB_POISON |
+ if (objsize >= 65535 * sizeof(void *)) {
+ BUG_ON(flags & (SLAB_RED_ZONE | SLAB_POISON |
SLAB_STORE_USER | SLAB_DESTROY_BY_RCU));
- BUG_ON(s->ctor);
- }
- else
+ BUG_ON(ctor);
+ } else {
/*
* Enable debugging if selected on the kernel commandline.
*/
if (slub_debug && (!slub_debug_slabs ||
- strncmp(slub_debug_slabs, s->name,
+ strncmp(slub_debug_slabs, name,
strlen(slub_debug_slabs)) == 0))
- s->flags |= slub_debug;
+ flags |= slub_debug;
+ }
+
+ return flags;
}
#else
static inline void setup_object_debug(struct kmem_cache *s,
@@ -1029,7 +1033,12 @@ static inline int slab_pad_check(struct kmem_cache *s, struct page *page)
static inline int check_object(struct kmem_cache *s, struct page *page,
void *object, int active) { return 1; }
static inline void add_full(struct kmem_cache_node *n, struct page *page) {}
-static inline void kmem_cache_open_debug_check(struct kmem_cache *s) {}
+static inline unsigned long kmem_cache_flags(unsigned long objsize,
+ unsigned long flags, const char *name,
+ void (*ctor)(void *, struct kmem_cache *, unsigned long))
+{
+ return flags;
+}
#define slub_debug 0
#endif
/*
@@ -2088,9 +2097,8 @@ static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags,
s->name = name;
s->ctor = ctor;
s->objsize = size;
- s->flags = flags;
s->align = align;
- kmem_cache_open_debug_check(s);
+ s->flags = kmem_cache_flags(size, flags, name, ctor);
if (!calculate_sizes(s))
goto error;
@@ -2660,7 +2668,7 @@ static int slab_unmergeable(struct kmem_cache *s)
}
static struct kmem_cache *find_mergeable(size_t size,
- size_t align, unsigned long flags,
+ size_t align, unsigned long flags, const char *name,
void (*ctor)(void *, struct kmem_cache *, unsigned long))
{
struct kmem_cache *s;
@@ -2674,6 +2682,7 @@ static struct kmem_cache *find_mergeable(size_t size,
size = ALIGN(size, sizeof(void *));
align = calculate_alignment(flags, align, size);
size = ALIGN(size, align);
+ flags = kmem_cache_flags(size, flags, name, NULL);
list_for_each_entry(s, &slab_caches, list) {
if (slab_unmergeable(s))
@@ -2682,8 +2691,7 @@ static struct kmem_cache *find_mergeable(size_t size,
if (size > s->size)
continue;
- if (((flags | slub_debug) & SLUB_MERGE_SAME) !=
- (s->flags & SLUB_MERGE_SAME))
+ if ((flags & SLUB_MERGE_SAME) != (s->flags & SLUB_MERGE_SAME))
continue;
/*
* Check if alignment is compatible.
@@ -2707,7 +2715,7 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size,
struct kmem_cache *s;
down_write(&slub_lock);
- s = find_mergeable(size, align, flags, ctor);
+ s = find_mergeable(size, align, flags, name, ctor);
if (s) {
s->refcount++;
/*
@@ -3813,7 +3821,9 @@ static int __init slab_sysfs_init(void)
list_for_each_entry(s, &slab_caches, list) {
err = sysfs_slab_add(s);
- BUG_ON(err);
+ if (err)
+ printk(KERN_ERR "SLUB: Unable to add boot slab %s"
+ " to sysfs\n", s->name);
}
while (alias_list) {
@@ -3821,7 +3831,9 @@ static int __init slab_sysfs_init(void)
alias_list = alias_list->next;
err = sysfs_slab_alias(al->s, al->name);
- BUG_ON(err);
+ if (err)
+ printk(KERN_ERR "SLUB: Unable to add boot slab alias"
+ " %s to sysfs\n", s->name);
kfree(al);
}
OpenPOWER on IntegriCloud