summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig2
-rw-r--r--mm/frontswap.c2
-rw-r--r--mm/hugetlb.c19
-rw-r--r--mm/memcontrol.c14
-rw-r--r--mm/memory.c19
-rw-r--r--mm/migrate.c23
-rw-r--r--mm/page_alloc.c6
-rw-r--r--mm/readahead.c2
-rw-r--r--mm/shmem.c37
-rw-r--r--mm/slab_common.c4
-rw-r--r--mm/swap_state.c18
-rw-r--r--mm/swapfile.c2
-rw-r--r--mm/truncate.c117
13 files changed, 190 insertions, 75 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index e742d06..f5e698e 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -173,7 +173,7 @@ config HAVE_BOOTMEM_INFO_NODE
config MEMORY_HOTPLUG
bool "Allow for memory hot-add"
depends on SPARSEMEM || X86_64_ACPI_NUMA
- depends on HOTPLUG && ARCH_ENABLE_MEMORY_HOTPLUG
+ depends on ARCH_ENABLE_MEMORY_HOTPLUG
depends on (IA64 || X86 || PPC_BOOK3S_64 || SUPERH || S390)
config MEMORY_HOTPLUG_SPARSE
diff --git a/mm/frontswap.c b/mm/frontswap.c
index 538367e..1b24bdc 100644
--- a/mm/frontswap.c
+++ b/mm/frontswap.c
@@ -319,7 +319,7 @@ void __frontswap_invalidate_area(unsigned type)
return;
frontswap_ops->invalidate_area(type);
atomic_set(&sis->frontswap_pages, 0);
- memset(sis->frontswap_map, 0, sis->max / sizeof(long));
+ bitmap_zero(sis->frontswap_map, sis->max);
}
clear_bit(type, need_init);
}
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 63217261f..aed085a 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -690,6 +690,23 @@ int PageHuge(struct page *page)
}
EXPORT_SYMBOL_GPL(PageHuge);
+pgoff_t __basepage_index(struct page *page)
+{
+ struct page *page_head = compound_head(page);
+ pgoff_t index = page_index(page_head);
+ unsigned long compound_idx;
+
+ if (!PageHuge(page_head))
+ return page_index(page);
+
+ if (compound_order(page_head) >= MAX_ORDER)
+ compound_idx = page_to_pfn(page) - page_to_pfn(page_head);
+ else
+ compound_idx = page - page_head;
+
+ return (index << compound_order(page_head)) + compound_idx;
+}
+
static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid)
{
struct page *page;
@@ -2839,7 +2856,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
if (ptep) {
entry = huge_ptep_get(ptep);
if (unlikely(is_hugetlb_entry_migration(entry))) {
- migration_entry_wait(mm, (pmd_t *)ptep, address);
+ migration_entry_wait_huge(mm, ptep);
return 0;
} else if (unlikely(is_hugetlb_entry_hwpoisoned(entry)))
return VM_FAULT_HWPOISON_LARGE |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 010d6c1..1947218 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1199,7 +1199,6 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
mz = mem_cgroup_zoneinfo(root, nid, zid);
iter = &mz->reclaim_iter[reclaim->priority];
- last_visited = iter->last_visited;
if (prev && reclaim->generation != iter->generation) {
iter->last_visited = NULL;
goto out_unlock;
@@ -1218,13 +1217,12 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
* is alive.
*/
dead_count = atomic_read(&root->dead_count);
- smp_rmb();
- last_visited = iter->last_visited;
- if (last_visited) {
- if ((dead_count != iter->last_dead_count) ||
- !css_tryget(&last_visited->css)) {
+ if (dead_count == iter->last_dead_count) {
+ smp_rmb();
+ last_visited = iter->last_visited;
+ if (last_visited &&
+ !css_tryget(&last_visited->css))
last_visited = NULL;
- }
}
}
@@ -3141,8 +3139,6 @@ int memcg_update_cache_size(struct kmem_cache *s, int num_groups)
return -ENOMEM;
}
- INIT_WORK(&s->memcg_params->destroy,
- kmem_cache_destroy_work_func);
s->memcg_params->is_root_cache = true;
/*
diff --git a/mm/memory.c b/mm/memory.c
index 6dc1882..95d0cce 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -220,7 +220,6 @@ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, bool fullmm)
tlb->start = -1UL;
tlb->end = 0;
tlb->need_flush = 0;
- tlb->fast_mode = (num_possible_cpus() == 1);
tlb->local.next = NULL;
tlb->local.nr = 0;
tlb->local.max = ARRAY_SIZE(tlb->__pages);
@@ -244,9 +243,6 @@ void tlb_flush_mmu(struct mmu_gather *tlb)
tlb_table_flush(tlb);
#endif
- if (tlb_fast_mode(tlb))
- return;
-
for (batch = &tlb->local; batch; batch = batch->next) {
free_pages_and_swap_cache(batch->pages, batch->nr);
batch->nr = 0;
@@ -288,11 +284,6 @@ int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
VM_BUG_ON(!tlb->need_flush);
- if (tlb_fast_mode(tlb)) {
- free_page_and_swap_cache(page);
- return 1; /* avoid calling tlb_flush_mmu() */
- }
-
batch = tlb->active;
batch->pages[batch->nr++] = page;
if (batch->nr == batch->max) {
@@ -4210,7 +4201,7 @@ void print_vma_addr(char *prefix, unsigned long ip)
up_read(&mm->mmap_sem);
}
-#ifdef CONFIG_PROVE_LOCKING
+#if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_DEBUG_ATOMIC_SLEEP)
void might_fault(void)
{
/*
@@ -4222,13 +4213,17 @@ void might_fault(void)
if (segment_eq(get_fs(), KERNEL_DS))
return;
- might_sleep();
/*
* it would be nicer only to annotate paths which are not under
* pagefault_disable, however that requires a larger audit and
* providing helpers like get_user_atomic.
*/
- if (!in_atomic() && current->mm)
+ if (in_atomic())
+ return;
+
+ __might_sleep(__FILE__, __LINE__, 0);
+
+ if (current->mm)
might_lock_read(&current->mm->mmap_sem);
}
EXPORT_SYMBOL(might_fault);
diff --git a/mm/migrate.c b/mm/migrate.c
index b1f5750..6f0c244 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -200,15 +200,14 @@ static void remove_migration_ptes(struct page *old, struct page *new)
* get to the page and wait until migration is finished.
* When we return from this function the fault will be retried.
*/
-void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
- unsigned long address)
+static void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep,
+ spinlock_t *ptl)
{
- pte_t *ptep, pte;
- spinlock_t *ptl;
+ pte_t pte;
swp_entry_t entry;
struct page *page;
- ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
+ spin_lock(ptl);
pte = *ptep;
if (!is_swap_pte(pte))
goto out;
@@ -236,6 +235,20 @@ out:
pte_unmap_unlock(ptep, ptl);
}
+void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
+ unsigned long address)
+{
+ spinlock_t *ptl = pte_lockptr(mm, pmd);
+ pte_t *ptep = pte_offset_map(pmd, address);
+ __migration_entry_wait(mm, ptep, ptl);
+}
+
+void migration_entry_wait_huge(struct mm_struct *mm, pte_t *pte)
+{
+ spinlock_t *ptl = &(mm)->page_table_lock;
+ __migration_entry_wait(mm, pte, ptl);
+}
+
#ifdef CONFIG_BLOCK
/* Returns true if all buffers are successfully locked */
static bool buffer_migrate_lock_buffers(struct buffer_head *head,
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 378a15b..c3edb62 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1628,6 +1628,7 @@ static bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark,
long min = mark;
long lowmem_reserve = z->lowmem_reserve[classzone_idx];
int o;
+ long free_cma = 0;
free_pages -= (1 << order) - 1;
if (alloc_flags & ALLOC_HIGH)
@@ -1637,9 +1638,10 @@ static bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark,
#ifdef CONFIG_CMA
/* If allocation can't use CMA areas don't use free CMA pages */
if (!(alloc_flags & ALLOC_CMA))
- free_pages -= zone_page_state(z, NR_FREE_CMA_PAGES);
+ free_cma = zone_page_state(z, NR_FREE_CMA_PAGES);
#endif
- if (free_pages <= min + lowmem_reserve)
+
+ if (free_pages - free_cma <= min + lowmem_reserve)
return false;
for (o = 0; o < order; o++) {
/* At the next order, this order's pages become unavailable */
diff --git a/mm/readahead.c b/mm/readahead.c
index daed28d..829a77c 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -48,7 +48,7 @@ static void read_cache_pages_invalidate_page(struct address_space *mapping,
if (!trylock_page(page))
BUG();
page->mapping = mapping;
- do_invalidatepage(page, 0);
+ do_invalidatepage(page, 0, PAGE_CACHE_SIZE);
page->mapping = NULL;
unlock_page(page);
}
diff --git a/mm/shmem.c b/mm/shmem.c
index 5e6a842..118dfa4 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1798,10 +1798,7 @@ static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence)
}
}
- if (offset >= 0 && offset != file->f_pos) {
- file->f_pos = offset;
- file->f_version = 0;
- }
+ offset = vfs_setpos(file, offset, MAX_LFS_FILESIZE);
mutex_unlock(&inode->i_mutex);
return offset;
}
@@ -1965,6 +1962,37 @@ shmem_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
return error;
}
+static int
+shmem_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+ struct inode *inode;
+ int error = -ENOSPC;
+
+ inode = shmem_get_inode(dir->i_sb, dir, mode, 0, VM_NORESERVE);
+ if (inode) {
+ error = security_inode_init_security(inode, dir,
+ NULL,
+ shmem_initxattrs, NULL);
+ if (error) {
+ if (error != -EOPNOTSUPP) {
+ iput(inode);
+ return error;
+ }
+ }
+#ifdef CONFIG_TMPFS_POSIX_ACL
+ error = generic_acl_init(inode, dir);
+ if (error) {
+ iput(inode);
+ return error;
+ }
+#else
+ error = 0;
+#endif
+ d_tmpfile(dentry, inode);
+ }
+ return error;
+}
+
static int shmem_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
{
int error;
@@ -2723,6 +2751,7 @@ static const struct inode_operations shmem_dir_inode_operations = {
.rmdir = shmem_rmdir,
.mknod = shmem_mknod,
.rename = shmem_rename,
+ .tmpfile = shmem_tmpfile,
#endif
#ifdef CONFIG_TMPFS_XATTR
.setxattr = shmem_setxattr,
diff --git a/mm/slab_common.c b/mm/slab_common.c
index ff3218a..2d41450 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -373,8 +373,10 @@ struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags)
{
int index;
- if (WARN_ON_ONCE(size > KMALLOC_MAX_SIZE))
+ if (size > KMALLOC_MAX_SIZE) {
+ WARN_ON_ONCE(!(flags & __GFP_NOWARN));
return NULL;
+ }
if (size <= 192) {
if (!size)
diff --git a/mm/swap_state.c b/mm/swap_state.c
index b3d40dc..f24ab0d 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -336,8 +336,24 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
* Swap entry may have been freed since our caller observed it.
*/
err = swapcache_prepare(entry);
- if (err == -EEXIST) { /* seems racy */
+ if (err == -EEXIST) {
radix_tree_preload_end();
+ /*
+ * We might race against get_swap_page() and stumble
+ * across a SWAP_HAS_CACHE swap_map entry whose page
+ * has not been brought into the swapcache yet, while
+ * the other end is scheduled away waiting on discard
+ * I/O completion at scan_swap_map().
+ *
+ * In order to avoid turning this transitory state
+ * into a permanent loop around this -EEXIST case
+ * if !CONFIG_PREEMPT and the I/O completion happens
+ * to be waiting on the CPU waitqueue where we are now
+ * busy looping, we just conditionally invoke the
+ * scheduler here, if there are some more important
+ * tasks to run.
+ */
+ cond_resched();
continue;
}
if (err) { /* swp entry is obsolete ? */
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 6c340d9..746af55b 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2116,7 +2116,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
}
/* frontswap enabled? set up bit-per-page map for frontswap */
if (frontswap_enabled)
- frontswap_map = vzalloc(maxpages / sizeof(long));
+ frontswap_map = vzalloc(BITS_TO_LONGS(maxpages) * sizeof(long));
if (p->bdev) {
if (blk_queue_nonrot(bdev_get_queue(p->bdev))) {
diff --git a/mm/truncate.c b/mm/truncate.c
index c75b736..e2e8a8a 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -26,7 +26,8 @@
/**
* do_invalidatepage - invalidate part or all of a page
* @page: the page which is affected
- * @offset: the index of the truncation point
+ * @offset: start of the range to invalidate
+ * @length: length of the range to invalidate
*
* do_invalidatepage() is called when all or part of the page has become
* invalidated by a truncate operation.
@@ -37,24 +38,18 @@
* point. Because the caller is about to free (and possibly reuse) those
* blocks on-disk.
*/
-void do_invalidatepage(struct page *page, unsigned long offset)
+void do_invalidatepage(struct page *page, unsigned int offset,
+ unsigned int length)
{
- void (*invalidatepage)(struct page *, unsigned long);
+ void (*invalidatepage)(struct page *, unsigned int, unsigned int);
+
invalidatepage = page->mapping->a_ops->invalidatepage;
#ifdef CONFIG_BLOCK
if (!invalidatepage)
invalidatepage = block_invalidatepage;
#endif
if (invalidatepage)
- (*invalidatepage)(page, offset);
-}
-
-static inline void truncate_partial_page(struct page *page, unsigned partial)
-{
- zero_user_segment(page, partial, PAGE_CACHE_SIZE);
- cleancache_invalidate_page(page->mapping, page);
- if (page_has_private(page))
- do_invalidatepage(page, partial);
+ (*invalidatepage)(page, offset, length);
}
/*
@@ -103,7 +98,7 @@ truncate_complete_page(struct address_space *mapping, struct page *page)
return -EIO;
if (page_has_private(page))
- do_invalidatepage(page, 0);
+ do_invalidatepage(page, 0, PAGE_CACHE_SIZE);
cancel_dirty_page(page, PAGE_CACHE_SIZE);
@@ -185,11 +180,11 @@ int invalidate_inode_page(struct page *page)
* truncate_inode_pages_range - truncate range of pages specified by start & end byte offsets
* @mapping: mapping to truncate
* @lstart: offset from which to truncate
- * @lend: offset to which to truncate
+ * @lend: offset to which to truncate (inclusive)
*
* Truncate the page cache, removing the pages that are between
- * specified offsets (and zeroing out partial page
- * (if lstart is not page aligned)).
+ * specified offsets (and zeroing out partial pages
+ * if lstart or lend + 1 is not page aligned).
*
* Truncate takes two passes - the first pass is nonblocking. It will not
* block on page locks and it will not block on writeback. The second pass
@@ -200,35 +195,58 @@ int invalidate_inode_page(struct page *page)
* We pass down the cache-hot hint to the page freeing code. Even if the
* mapping is large, it is probably the case that the final pages are the most
* recently touched, and freeing happens in ascending file offset order.
+ *
+ * Note that since ->invalidatepage() accepts range to invalidate
+ * truncate_inode_pages_range is able to handle cases where lend + 1 is not
+ * page aligned properly.
*/
void truncate_inode_pages_range(struct address_space *mapping,
loff_t lstart, loff_t lend)
{
- const pgoff_t start = (lstart + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;
- const unsigned partial = lstart & (PAGE_CACHE_SIZE - 1);
- struct pagevec pvec;
- pgoff_t index;
- pgoff_t end;
- int i;
+ pgoff_t start; /* inclusive */
+ pgoff_t end; /* exclusive */
+ unsigned int partial_start; /* inclusive */
+ unsigned int partial_end; /* exclusive */
+ struct pagevec pvec;
+ pgoff_t index;
+ int i;
cleancache_invalidate_inode(mapping);
if (mapping->nrpages == 0)
return;
- BUG_ON((lend & (PAGE_CACHE_SIZE - 1)) != (PAGE_CACHE_SIZE - 1));
- end = (lend >> PAGE_CACHE_SHIFT);
+ /* Offsets within partial pages */
+ partial_start = lstart & (PAGE_CACHE_SIZE - 1);
+ partial_end = (lend + 1) & (PAGE_CACHE_SIZE - 1);
+
+ /*
+ * 'start' and 'end' always covers the range of pages to be fully
+ * truncated. Partial pages are covered with 'partial_start' at the
+ * start of the range and 'partial_end' at the end of the range.
+ * Note that 'end' is exclusive while 'lend' is inclusive.
+ */
+ start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ if (lend == -1)
+ /*
+ * lend == -1 indicates end-of-file so we have to set 'end'
+ * to the highest possible pgoff_t and since the type is
+ * unsigned we're using -1.
+ */
+ end = -1;
+ else
+ end = (lend + 1) >> PAGE_CACHE_SHIFT;
pagevec_init(&pvec, 0);
index = start;
- while (index <= end && pagevec_lookup(&pvec, mapping, index,
- min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
+ while (index < end && pagevec_lookup(&pvec, mapping, index,
+ min(end - index, (pgoff_t)PAGEVEC_SIZE))) {
mem_cgroup_uncharge_start();
for (i = 0; i < pagevec_count(&pvec); i++) {
struct page *page = pvec.pages[i];
/* We rely upon deletion not changing page->index */
index = page->index;
- if (index > end)
+ if (index >= end)
break;
if (!trylock_page(page))
@@ -247,27 +265,56 @@ void truncate_inode_pages_range(struct address_space *mapping,
index++;
}
- if (partial) {
+ if (partial_start) {
struct page *page = find_lock_page(mapping, start - 1);
if (page) {
+ unsigned int top = PAGE_CACHE_SIZE;
+ if (start > end) {
+ /* Truncation within a single page */
+ top = partial_end;
+ partial_end = 0;
+ }
wait_on_page_writeback(page);
- truncate_partial_page(page, partial);
+ zero_user_segment(page, partial_start, top);
+ cleancache_invalidate_page(mapping, page);
+ if (page_has_private(page))
+ do_invalidatepage(page, partial_start,
+ top - partial_start);
unlock_page(page);
page_cache_release(page);
}
}
+ if (partial_end) {
+ struct page *page = find_lock_page(mapping, end);
+ if (page) {
+ wait_on_page_writeback(page);
+ zero_user_segment(page, 0, partial_end);
+ cleancache_invalidate_page(mapping, page);
+ if (page_has_private(page))
+ do_invalidatepage(page, 0,
+ partial_end);
+ unlock_page(page);
+ page_cache_release(page);
+ }
+ }
+ /*
+ * If the truncation happened within a single page no pages
+ * will be released, just zeroed, so we can bail out now.
+ */
+ if (start >= end)
+ return;
index = start;
for ( ; ; ) {
cond_resched();
if (!pagevec_lookup(&pvec, mapping, index,
- min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
+ min(end - index, (pgoff_t)PAGEVEC_SIZE))) {
if (index == start)
break;
index = start;
continue;
}
- if (index == start && pvec.pages[0]->index > end) {
+ if (index == start && pvec.pages[0]->index >= end) {
pagevec_release(&pvec);
break;
}
@@ -277,7 +324,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
/* We rely upon deletion not changing page->index */
index = page->index;
- if (index > end)
+ if (index >= end)
break;
lock_page(page);
@@ -598,10 +645,8 @@ void truncate_pagecache_range(struct inode *inode, loff_t lstart, loff_t lend)
* This rounding is currently just for example: unmap_mapping_range
* expands its hole outwards, whereas we want it to contract the hole
* inwards. However, existing callers of truncate_pagecache_range are
- * doing their own page rounding first; and truncate_inode_pages_range
- * currently BUGs if lend is not pagealigned-1 (it handles partial
- * page at start of hole, but not partial page at end of hole). Note
- * unmap_mapping_range allows holelen 0 for all, and we allow lend -1.
+ * doing their own page rounding first. Note that unmap_mapping_range
+ * allows holelen 0 for all, and we allow lend -1 for end of file.
*/
/*
OpenPOWER on IntegriCloud