summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/fremap.c2
-rw-r--r--mm/hugetlb.c28
-rw-r--r--mm/memory.c2
-rw-r--r--mm/migrate.c2
-rw-r--r--mm/mlock.c48
-rw-r--r--mm/mmap.c48
-rw-r--r--mm/mprotect.c5
-rw-r--r--mm/page-writeback.c20
-rw-r--r--mm/page_cgroup.c3
-rw-r--r--mm/rmap.c3
-rw-r--r--mm/slub.c2
11 files changed, 85 insertions, 78 deletions
diff --git a/mm/fremap.c b/mm/fremap.c
index 736ba7f..b6ec85a 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -198,7 +198,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
flags &= MAP_NONBLOCK;
get_file(file);
addr = mmap_region(file, start, size,
- flags, vma->vm_flags, pgoff, 1);
+ flags, vma->vm_flags, pgoff);
fput(file);
if (IS_ERR_VALUE(addr)) {
err = addr;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 618e983..107da3d 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2269,12 +2269,18 @@ void hugetlb_change_protection(struct vm_area_struct *vma,
int hugetlb_reserve_pages(struct inode *inode,
long from, long to,
- struct vm_area_struct *vma)
+ struct vm_area_struct *vma,
+ int acctflag)
{
long ret, chg;
struct hstate *h = hstate_inode(inode);
- if (vma && vma->vm_flags & VM_NORESERVE)
+ /*
+ * Only apply hugepage reservation if asked. At fault time, an
+ * attempt will be made for VM_NORESERVE to allocate a page
+ * and filesystem quota without using reserves
+ */
+ if (acctflag & VM_NORESERVE)
return 0;
/*
@@ -2299,13 +2305,31 @@ int hugetlb_reserve_pages(struct inode *inode,
if (chg < 0)
return chg;
+ /* There must be enough filesystem quota for the mapping */
if (hugetlb_get_quota(inode->i_mapping, chg))
return -ENOSPC;
+
+ /*
+ * Check enough hugepages are available for the reservation.
+ * Hand back the quota if there are not
+ */
ret = hugetlb_acct_memory(h, chg);
if (ret < 0) {
hugetlb_put_quota(inode->i_mapping, chg);
return ret;
}
+
+ /*
+ * Account for the reservations made. Shared mappings record regions
+ * that have reservations as they are shared by multiple VMAs.
+ * When the last VMA disappears, the region map says how much
+ * the reservation was and the page cache tells how much of
+ * the reservation was consumed. Private mappings are per-VMA and
+ * only the consumed reservations are tracked. When the VMA
+ * disappears, the original reservation is the VMA size and the
+ * consumed reservations are stored in the map. Hence, nothing
+ * else has to be done for private mappings here
+ */
if (!vma || vma->vm_flags & VM_SHARED)
region_add(&inode->i_mapping->private_list, from, to);
return 0;
diff --git a/mm/memory.c b/mm/memory.c
index 22bfa7a..baa999e 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1999,7 +1999,7 @@ gotten:
* Don't let another task, with possibly unlocked vma,
* keep the mlocked page.
*/
- if (vma->vm_flags & VM_LOCKED) {
+ if ((vma->vm_flags & VM_LOCKED) && old_page) {
lock_page(old_page); /* for LRU manipulation */
clear_page_mlock(old_page);
unlock_page(old_page);
diff --git a/mm/migrate.c b/mm/migrate.c
index 2bb4e1d..a9eff3f 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1129,7 +1129,7 @@ int migrate_vmas(struct mm_struct *mm, const nodemask_t *to,
struct vm_area_struct *vma;
int err = 0;
- for(vma = mm->mmap; vma->vm_next && !err; vma = vma->vm_next) {
+ for (vma = mm->mmap; vma && !err; vma = vma->vm_next) {
if (vma->vm_ops && vma->vm_ops->migrate) {
err = vma->vm_ops->migrate(vma, to, from, flags);
if (err)
diff --git a/mm/mlock.c b/mm/mlock.c
index 2904a34..037161d 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -294,14 +294,10 @@ static inline int __mlock_posix_error_return(long retval)
*
* return number of pages [> 0] to be removed from locked_vm on success
* of "special" vmas.
- *
- * return negative error if vma spanning @start-@range disappears while
- * mmap semaphore is dropped. Unlikely?
*/
long mlock_vma_pages_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
- struct mm_struct *mm = vma->vm_mm;
int nr_pages = (end - start) / PAGE_SIZE;
BUG_ON(!(vma->vm_flags & VM_LOCKED));
@@ -314,20 +310,11 @@ long mlock_vma_pages_range(struct vm_area_struct *vma,
if (!((vma->vm_flags & (VM_DONTEXPAND | VM_RESERVED)) ||
is_vm_hugetlb_page(vma) ||
vma == get_gate_vma(current))) {
- long error;
- downgrade_write(&mm->mmap_sem);
-
- error = __mlock_vma_pages_range(vma, start, end, 1);
- up_read(&mm->mmap_sem);
- /* vma can change or disappear */
- down_write(&mm->mmap_sem);
- vma = find_vma(mm, start);
- /* non-NULL vma must contain @start, but need to check @end */
- if (!vma || end > vma->vm_end)
- return -ENOMEM;
+ __mlock_vma_pages_range(vma, start, end, 1);
- return 0; /* hide other errors from mmap(), et al */
+ /* Hide errors from mmap() and other callers */
+ return 0;
}
/*
@@ -438,41 +425,14 @@ success:
vma->vm_flags = newflags;
if (lock) {
- /*
- * mmap_sem is currently held for write. Downgrade the write
- * lock to a read lock so that other faults, mmap scans, ...
- * while we fault in all pages.
- */
- downgrade_write(&mm->mmap_sem);
-
ret = __mlock_vma_pages_range(vma, start, end, 1);
- /*
- * Need to reacquire mmap sem in write mode, as our callers
- * expect this. We have no support for atomically upgrading
- * a sem to write, so we need to check for ranges while sem
- * is unlocked.
- */
- up_read(&mm->mmap_sem);
- /* vma can change or disappear */
- down_write(&mm->mmap_sem);
- *prev = find_vma(mm, start);
- /* non-NULL *prev must contain @start, but need to check @end */
- if (!(*prev) || end > (*prev)->vm_end)
- ret = -ENOMEM;
- else if (ret > 0) {
+ if (ret > 0) {
mm->locked_vm -= ret;
ret = 0;
} else
ret = __mlock_posix_error_return(ret); /* translate if needed */
} else {
- /*
- * TODO: for unlocking, pages will already be resident, so
- * we don't need to wait for allocations/reclaim/pagein, ...
- * However, unlocking a very large region can still take a
- * while. Should we downgrade the semaphore for both lock
- * AND unlock ?
- */
__mlock_vma_pages_range(vma, start, end, 0);
}
diff --git a/mm/mmap.c b/mm/mmap.c
index 214b6a2..00ced3e 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -918,7 +918,6 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
struct inode *inode;
unsigned int vm_flags;
int error;
- int accountable = 1;
unsigned long reqprot = prot;
/*
@@ -1019,8 +1018,6 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
return -EPERM;
vm_flags &= ~VM_MAYEXEC;
}
- if (is_file_hugepages(file))
- accountable = 0;
if (!file->f_op || !file->f_op->mmap)
return -ENODEV;
@@ -1053,8 +1050,7 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
if (error)
return error;
- return mmap_region(file, addr, len, flags, vm_flags, pgoff,
- accountable);
+ return mmap_region(file, addr, len, flags, vm_flags, pgoff);
}
EXPORT_SYMBOL(do_mmap_pgoff);
@@ -1092,17 +1088,23 @@ int vma_wants_writenotify(struct vm_area_struct *vma)
/*
* We account for memory if it's a private writeable mapping,
- * and VM_NORESERVE wasn't set.
+ * not hugepages and VM_NORESERVE wasn't set.
*/
-static inline int accountable_mapping(unsigned int vm_flags)
+static inline int accountable_mapping(struct file *file, unsigned int vm_flags)
{
+ /*
+ * hugetlb has its own accounting separate from the core VM
+ * VM_HUGETLB may not be set yet so we cannot check for that flag.
+ */
+ if (file && is_file_hugepages(file))
+ return 0;
+
return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE;
}
unsigned long mmap_region(struct file *file, unsigned long addr,
unsigned long len, unsigned long flags,
- unsigned int vm_flags, unsigned long pgoff,
- int accountable)
+ unsigned int vm_flags, unsigned long pgoff)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma, *prev;
@@ -1128,18 +1130,22 @@ munmap_back:
/*
* Set 'VM_NORESERVE' if we should not account for the
- * memory use of this mapping. We only honor MAP_NORESERVE
- * if we're allowed to overcommit memory.
+ * memory use of this mapping.
*/
- if ((flags & MAP_NORESERVE) && sysctl_overcommit_memory != OVERCOMMIT_NEVER)
- vm_flags |= VM_NORESERVE;
- if (!accountable)
- vm_flags |= VM_NORESERVE;
+ if ((flags & MAP_NORESERVE)) {
+ /* We honor MAP_NORESERVE if allowed to overcommit */
+ if (sysctl_overcommit_memory != OVERCOMMIT_NEVER)
+ vm_flags |= VM_NORESERVE;
+
+ /* hugetlb applies strict overcommit unless MAP_NORESERVE */
+ if (file && is_file_hugepages(file))
+ vm_flags |= VM_NORESERVE;
+ }
/*
* Private writable mapping: check memory availability
*/
- if (accountable_mapping(vm_flags)) {
+ if (accountable_mapping(file, vm_flags)) {
charged = len >> PAGE_SHIFT;
if (security_vm_enough_memory(charged))
return -ENOMEM;
@@ -2078,12 +2084,8 @@ void exit_mmap(struct mm_struct *mm)
unsigned long end;
/* mm's last user has gone, and its about to be pulled down */
- arch_exit_mmap(mm);
mmu_notifier_release(mm);
- if (!mm->mmap) /* Can happen if dup_mmap() received an OOM */
- return;
-
if (mm->locked_vm) {
vma = mm->mmap;
while (vma) {
@@ -2092,7 +2094,13 @@ void exit_mmap(struct mm_struct *mm)
vma = vma->vm_next;
}
}
+
+ arch_exit_mmap(mm);
+
vma = mm->mmap;
+ if (!vma) /* Can happen if dup_mmap() received an OOM */
+ return;
+
lru_add_drain();
flush_cache_mm(mm);
tlb = tlb_gather_mmu(mm, 1);
diff --git a/mm/mprotect.c b/mm/mprotect.c
index abe2694..258197b 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -151,10 +151,11 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
/*
* If we make a private mapping writable we increase our commit;
* but (without finer accounting) cannot reduce our commit if we
- * make it unwritable again.
+ * make it unwritable again. hugetlb mapping were accounted for
+ * even if read-only so there is no need to account for them here
*/
if (newflags & VM_WRITE) {
- if (!(oldflags & (VM_ACCOUNT|VM_WRITE|
+ if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_HUGETLB|
VM_SHARED|VM_NORESERVE))) {
charged = nrpages;
if (security_vm_enough_memory(charged))
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index b493db7..6106a5c 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -209,7 +209,7 @@ int dirty_bytes_handler(struct ctl_table *table, int write,
struct file *filp, void __user *buffer, size_t *lenp,
loff_t *ppos)
{
- int old_bytes = vm_dirty_bytes;
+ unsigned long old_bytes = vm_dirty_bytes;
int ret;
ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);
@@ -1051,13 +1051,25 @@ continue_unlock:
}
}
- if (wbc->sync_mode == WB_SYNC_NONE) {
- wbc->nr_to_write--;
- if (wbc->nr_to_write <= 0) {
+ if (nr_to_write > 0) {
+ nr_to_write--;
+ if (nr_to_write == 0 &&
+ wbc->sync_mode == WB_SYNC_NONE) {
+ /*
+ * We stop writing back only if we are
+ * not doing integrity sync. In case of
+ * integrity sync we have to keep going
+ * because someone may be concurrently
+ * dirtying pages, and we might have
+ * synced a lot of newly appeared dirty
+ * pages, but have not synced all of the
+ * old dirty pages.
+ */
done = 1;
break;
}
}
+
if (wbc->nonblocking && bdi_write_congested(bdi)) {
wbc->encountered_congestion = 1;
done = 1;
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
index 7006a11..ceecfbb 100644
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -114,7 +114,8 @@ static int __init_refok init_section_page_cgroup(unsigned long pfn)
nid = page_to_nid(pfn_to_page(pfn));
table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION;
if (slab_is_available()) {
- base = kmalloc_node(table_size, GFP_KERNEL, nid);
+ base = kmalloc_node(table_size,
+ GFP_KERNEL | __GFP_NOWARN, nid);
if (!base)
base = vmalloc_node(table_size, nid);
} else {
diff --git a/mm/rmap.c b/mm/rmap.c
index ac4af8c..1652166 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1072,7 +1072,8 @@ static int try_to_unmap_file(struct page *page, int unlock, int migration)
spin_lock(&mapping->i_mmap_lock);
vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
if (MLOCK_PAGES && unlikely(unlock)) {
- if (!(vma->vm_flags & VM_LOCKED))
+ if (!((vma->vm_flags & VM_LOCKED) &&
+ page_mapped_in_vma(page, vma)))
continue; /* must visit all vmas */
ret = SWAP_MLOCK;
} else {
diff --git a/mm/slub.c b/mm/slub.c
index 6392ae5..bdc9abb 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1996,7 +1996,7 @@ static struct kmem_cache_cpu *alloc_kmem_cache_cpu(struct kmem_cache *s,
static void free_kmem_cache_cpu(struct kmem_cache_cpu *c, int cpu)
{
if (c < per_cpu(kmem_cache_cpu, cpu) ||
- c > per_cpu(kmem_cache_cpu, cpu) + NR_KMEM_CACHE_CPU) {
+ c >= per_cpu(kmem_cache_cpu, cpu) + NR_KMEM_CACHE_CPU) {
kfree(c);
return;
}
OpenPOWER on IntegriCloud