summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig1
-rw-r--r--mm/backing-dev.c186
-rw-r--r--mm/filemap.c18
-rw-r--r--mm/gup.c148
-rw-r--r--mm/memory.c2
-rw-r--r--mm/migrate.c2
-rw-r--r--mm/page_alloc.c46
-rw-r--r--mm/percpu.c40
-rw-r--r--mm/swap.c10
-rw-r--r--mm/usercopy.c19
-rw-r--r--mm/vmalloc.c2
-rw-r--r--mm/vmstat.c3
12 files changed, 296 insertions, 181 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index 9b8fccb..beb7a45 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -312,7 +312,6 @@ config NEED_BOUNCE_POOL
config NR_QUICK
int
depends on QUICKLIST
- default "2" if AVR32
default "1"
config VIRT_TO_BUS
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index c6f2a37..f028a9a 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -12,8 +12,6 @@
#include <linux/device.h>
#include <trace/events/writeback.h>
-static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
-
struct backing_dev_info noop_backing_dev_info = {
.name = "noop",
.capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
@@ -242,6 +240,8 @@ static __init int bdi_class_init(void)
}
postcore_initcall(bdi_class_init);
+static int bdi_init(struct backing_dev_info *bdi);
+
static int __init default_bdi_init(void)
{
int err;
@@ -294,6 +294,8 @@ static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi,
memset(wb, 0, sizeof(*wb));
+ if (wb != &bdi->wb)
+ bdi_get(bdi);
wb->bdi = bdi;
wb->last_old_flush = jiffies;
INIT_LIST_HEAD(&wb->b_dirty);
@@ -314,8 +316,10 @@ static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi,
wb->dirty_sleep = jiffies;
wb->congested = wb_congested_get_create(bdi, blkcg_id, gfp);
- if (!wb->congested)
- return -ENOMEM;
+ if (!wb->congested) {
+ err = -ENOMEM;
+ goto out_put_bdi;
+ }
err = fprop_local_init_percpu(&wb->completions, gfp);
if (err)
@@ -335,9 +339,14 @@ out_destroy_stat:
fprop_local_destroy_percpu(&wb->completions);
out_put_cong:
wb_congested_put(wb->congested);
+out_put_bdi:
+ if (wb != &bdi->wb)
+ bdi_put(bdi);
return err;
}
+static void cgwb_remove_from_bdi_list(struct bdi_writeback *wb);
+
/*
* Remove bdi from the global list and shutdown any threads we have running
*/
@@ -347,10 +356,18 @@ static void wb_shutdown(struct bdi_writeback *wb)
spin_lock_bh(&wb->work_lock);
if (!test_and_clear_bit(WB_registered, &wb->state)) {
spin_unlock_bh(&wb->work_lock);
+ /*
+ * Wait for wb shutdown to finish if someone else is just
+ * running wb_shutdown(). Otherwise we could proceed to wb /
+ * bdi destruction before wb_shutdown() is finished.
+ */
+ wait_on_bit(&wb->state, WB_shutting_down, TASK_UNINTERRUPTIBLE);
return;
}
+ set_bit(WB_shutting_down, &wb->state);
spin_unlock_bh(&wb->work_lock);
+ cgwb_remove_from_bdi_list(wb);
/*
* Drain work list and shutdown the delayed_work. !WB_registered
* tells wb_workfn() that @wb is dying and its work_list needs to
@@ -359,6 +376,12 @@ static void wb_shutdown(struct bdi_writeback *wb)
mod_delayed_work(bdi_wq, &wb->dwork, 0);
flush_delayed_work(&wb->dwork);
WARN_ON(!list_empty(&wb->work_list));
+ /*
+ * Make sure bit gets cleared after shutdown is finished. Matches with
+ * the barrier provided by test_and_clear_bit() above.
+ */
+ smp_wmb();
+ clear_bit(WB_shutting_down, &wb->state);
}
static void wb_exit(struct bdi_writeback *wb)
@@ -372,6 +395,8 @@ static void wb_exit(struct bdi_writeback *wb)
fprop_local_destroy_percpu(&wb->completions);
wb_congested_put(wb->congested);
+ if (wb != &wb->bdi->wb)
+ bdi_put(wb->bdi);
}
#ifdef CONFIG_CGROUP_WRITEBACK
@@ -381,11 +406,9 @@ static void wb_exit(struct bdi_writeback *wb)
/*
* cgwb_lock protects bdi->cgwb_tree, bdi->cgwb_congested_tree,
* blkcg->cgwb_list, and memcg->cgwb_list. bdi->cgwb_tree is also RCU
- * protected. cgwb_release_wait is used to wait for the completion of cgwb
- * releases from bdi destruction path.
+ * protected.
*/
static DEFINE_SPINLOCK(cgwb_lock);
-static DECLARE_WAIT_QUEUE_HEAD(cgwb_release_wait);
/**
* wb_congested_get_create - get or create a wb_congested
@@ -438,7 +461,7 @@ retry:
return NULL;
atomic_set(&new_congested->refcnt, 0);
- new_congested->bdi = bdi;
+ new_congested->__bdi = bdi;
new_congested->blkcg_id = blkcg_id;
goto retry;
@@ -466,10 +489,10 @@ void wb_congested_put(struct bdi_writeback_congested *congested)
}
/* bdi might already have been destroyed leaving @congested unlinked */
- if (congested->bdi) {
+ if (congested->__bdi) {
rb_erase(&congested->rb_node,
- &congested->bdi->cgwb_congested_tree);
- congested->bdi = NULL;
+ &congested->__bdi->cgwb_congested_tree);
+ congested->__bdi = NULL;
}
spin_unlock_irqrestore(&cgwb_lock, flags);
@@ -480,11 +503,6 @@ static void cgwb_release_workfn(struct work_struct *work)
{
struct bdi_writeback *wb = container_of(work, struct bdi_writeback,
release_work);
- struct backing_dev_info *bdi = wb->bdi;
-
- spin_lock_irq(&cgwb_lock);
- list_del_rcu(&wb->bdi_node);
- spin_unlock_irq(&cgwb_lock);
wb_shutdown(wb);
@@ -495,9 +513,6 @@ static void cgwb_release_workfn(struct work_struct *work)
percpu_ref_exit(&wb->refcnt);
wb_exit(wb);
kfree_rcu(wb, rcu);
-
- if (atomic_dec_and_test(&bdi->usage_cnt))
- wake_up_all(&cgwb_release_wait);
}
static void cgwb_release(struct percpu_ref *refcnt)
@@ -517,6 +532,13 @@ static void cgwb_kill(struct bdi_writeback *wb)
percpu_ref_kill(&wb->refcnt);
}
+static void cgwb_remove_from_bdi_list(struct bdi_writeback *wb)
+{
+ spin_lock_irq(&cgwb_lock);
+ list_del_rcu(&wb->bdi_node);
+ spin_unlock_irq(&cgwb_lock);
+}
+
static int cgwb_create(struct backing_dev_info *bdi,
struct cgroup_subsys_state *memcg_css, gfp_t gfp)
{
@@ -580,7 +602,6 @@ static int cgwb_create(struct backing_dev_info *bdi,
/* we might have raced another instance of this function */
ret = radix_tree_insert(&bdi->cgwb_tree, memcg_css->id, wb);
if (!ret) {
- atomic_inc(&bdi->usage_cnt);
list_add_tail_rcu(&wb->bdi_node, &bdi->wb_list);
list_add(&wb->memcg_node, memcg_cgwb_list);
list_add(&wb->blkcg_node, blkcg_cgwb_list);
@@ -670,7 +691,6 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi)
INIT_RADIX_TREE(&bdi->cgwb_tree, GFP_ATOMIC);
bdi->cgwb_congested_tree = RB_ROOT;
- atomic_set(&bdi->usage_cnt, 1);
ret = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL);
if (!ret) {
@@ -680,29 +700,26 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi)
return ret;
}
-static void cgwb_bdi_destroy(struct backing_dev_info *bdi)
+static void cgwb_bdi_unregister(struct backing_dev_info *bdi)
{
struct radix_tree_iter iter;
void **slot;
+ struct bdi_writeback *wb;
WARN_ON(test_bit(WB_registered, &bdi->wb.state));
spin_lock_irq(&cgwb_lock);
radix_tree_for_each_slot(slot, &bdi->cgwb_tree, &iter, 0)
cgwb_kill(*slot);
- spin_unlock_irq(&cgwb_lock);
- /*
- * All cgwb's must be shutdown and released before returning. Drain
- * the usage counter to wait for all cgwb's ever created on @bdi.
- */
- atomic_dec(&bdi->usage_cnt);
- wait_event(cgwb_release_wait, !atomic_read(&bdi->usage_cnt));
- /*
- * Grab back our reference so that we hold it when @bdi gets
- * re-registered.
- */
- atomic_inc(&bdi->usage_cnt);
+ while (!list_empty(&bdi->wb_list)) {
+ wb = list_first_entry(&bdi->wb_list, struct bdi_writeback,
+ bdi_node);
+ spin_unlock_irq(&cgwb_lock);
+ wb_shutdown(wb);
+ spin_lock_irq(&cgwb_lock);
+ }
+ spin_unlock_irq(&cgwb_lock);
}
/**
@@ -752,11 +769,18 @@ static void cgwb_bdi_exit(struct backing_dev_info *bdi)
rb_entry(rbn, struct bdi_writeback_congested, rb_node);
rb_erase(rbn, &bdi->cgwb_congested_tree);
- congested->bdi = NULL; /* mark @congested unlinked */
+ congested->__bdi = NULL; /* mark @congested unlinked */
}
spin_unlock_irq(&cgwb_lock);
}
+static void cgwb_bdi_register(struct backing_dev_info *bdi)
+{
+ spin_lock_irq(&cgwb_lock);
+ list_add_tail_rcu(&bdi->wb.bdi_node, &bdi->wb_list);
+ spin_unlock_irq(&cgwb_lock);
+}
+
#else /* CONFIG_CGROUP_WRITEBACK */
static int cgwb_bdi_init(struct backing_dev_info *bdi)
@@ -777,16 +801,26 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi)
return 0;
}
-static void cgwb_bdi_destroy(struct backing_dev_info *bdi) { }
+static void cgwb_bdi_unregister(struct backing_dev_info *bdi) { }
static void cgwb_bdi_exit(struct backing_dev_info *bdi)
{
wb_congested_put(bdi->wb_congested);
}
+static void cgwb_bdi_register(struct backing_dev_info *bdi)
+{
+ list_add_tail_rcu(&bdi->wb.bdi_node, &bdi->wb_list);
+}
+
+static void cgwb_remove_from_bdi_list(struct bdi_writeback *wb)
+{
+ list_del_rcu(&wb->bdi_node);
+}
+
#endif /* CONFIG_CGROUP_WRITEBACK */
-int bdi_init(struct backing_dev_info *bdi)
+static int bdi_init(struct backing_dev_info *bdi)
{
int ret;
@@ -802,11 +836,8 @@ int bdi_init(struct backing_dev_info *bdi)
ret = cgwb_bdi_init(bdi);
- list_add_tail_rcu(&bdi->wb.bdi_node, &bdi->wb_list);
-
return ret;
}
-EXPORT_SYMBOL(bdi_init);
struct backing_dev_info *bdi_alloc_node(gfp_t gfp_mask, int node_id)
{
@@ -823,22 +854,20 @@ struct backing_dev_info *bdi_alloc_node(gfp_t gfp_mask, int node_id)
}
return bdi;
}
+EXPORT_SYMBOL(bdi_alloc_node);
-int bdi_register(struct backing_dev_info *bdi, struct device *parent,
- const char *fmt, ...)
+int bdi_register_va(struct backing_dev_info *bdi, const char *fmt, va_list args)
{
- va_list args;
struct device *dev;
if (bdi->dev) /* The driver needs to use separate queues per device */
return 0;
- va_start(args, fmt);
- dev = device_create_vargs(bdi_class, parent, MKDEV(0, 0), bdi, fmt, args);
- va_end(args);
+ dev = device_create_vargs(bdi_class, NULL, MKDEV(0, 0), bdi, fmt, args);
if (IS_ERR(dev))
return PTR_ERR(dev);
+ cgwb_bdi_register(bdi);
bdi->dev = dev;
bdi_debug_register(bdi, dev_name(dev));
@@ -851,20 +880,25 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
trace_writeback_bdi_register(bdi);
return 0;
}
-EXPORT_SYMBOL(bdi_register);
+EXPORT_SYMBOL(bdi_register_va);
-int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev)
+int bdi_register(struct backing_dev_info *bdi, const char *fmt, ...)
{
- return bdi_register(bdi, NULL, "%u:%u", MAJOR(dev), MINOR(dev));
+ va_list args;
+ int ret;
+
+ va_start(args, fmt);
+ ret = bdi_register_va(bdi, fmt, args);
+ va_end(args);
+ return ret;
}
-EXPORT_SYMBOL(bdi_register_dev);
+EXPORT_SYMBOL(bdi_register);
int bdi_register_owner(struct backing_dev_info *bdi, struct device *owner)
{
int rc;
- rc = bdi_register(bdi, NULL, "%u:%u", MAJOR(owner->devt),
- MINOR(owner->devt));
+ rc = bdi_register(bdi, "%u:%u", MAJOR(owner->devt), MINOR(owner->devt));
if (rc)
return rc;
/* Leaking owner reference... */
@@ -892,7 +926,7 @@ void bdi_unregister(struct backing_dev_info *bdi)
/* make sure nobody finds us on the bdi_list anymore */
bdi_remove_from_list(bdi);
wb_shutdown(&bdi->wb);
- cgwb_bdi_destroy(bdi);
+ cgwb_bdi_unregister(bdi);
if (bdi->dev) {
bdi_debug_unregister(bdi);
@@ -906,19 +940,16 @@ void bdi_unregister(struct backing_dev_info *bdi)
}
}
-static void bdi_exit(struct backing_dev_info *bdi)
-{
- WARN_ON_ONCE(bdi->dev);
- wb_exit(&bdi->wb);
- cgwb_bdi_exit(bdi);
-}
-
static void release_bdi(struct kref *ref)
{
struct backing_dev_info *bdi =
container_of(ref, struct backing_dev_info, refcnt);
- bdi_exit(bdi);
+ if (test_bit(WB_registered, &bdi->wb.state))
+ bdi_unregister(bdi);
+ WARN_ON_ONCE(bdi->dev);
+ wb_exit(&bdi->wb);
+ cgwb_bdi_exit(bdi);
kfree(bdi);
}
@@ -926,38 +957,7 @@ void bdi_put(struct backing_dev_info *bdi)
{
kref_put(&bdi->refcnt, release_bdi);
}
-
-void bdi_destroy(struct backing_dev_info *bdi)
-{
- bdi_unregister(bdi);
- bdi_exit(bdi);
-}
-EXPORT_SYMBOL(bdi_destroy);
-
-/*
- * For use from filesystems to quickly init and register a bdi associated
- * with dirty writeback
- */
-int bdi_setup_and_register(struct backing_dev_info *bdi, char *name)
-{
- int err;
-
- bdi->name = name;
- bdi->capabilities = 0;
- err = bdi_init(bdi);
- if (err)
- return err;
-
- err = bdi_register(bdi, NULL, "%.28s-%ld", name,
- atomic_long_inc_return(&bdi_seq));
- if (err) {
- bdi_destroy(bdi);
- return err;
- }
-
- return 0;
-}
-EXPORT_SYMBOL(bdi_setup_and_register);
+EXPORT_SYMBOL(bdi_put);
static wait_queue_head_t congestion_wqh[2] = {
__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]),
diff --git a/mm/filemap.c b/mm/filemap.c
index cc480c0..dc59c5f 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -519,7 +519,7 @@ EXPORT_SYMBOL(filemap_write_and_wait);
*
* Write out and wait upon file offsets lstart->lend, inclusive.
*
- * Note that `lend' is inclusive (describes the last byte to be written) so
+ * Note that @lend is inclusive (describes the last byte to be written) so
* that this function can be used to write to the very end-of-file (end = -1).
*/
int filemap_write_and_wait_range(struct address_space *mapping,
@@ -1277,12 +1277,14 @@ EXPORT_SYMBOL(find_lock_entry);
*
* PCG flags modify how the page is returned.
*
- * FGP_ACCESSED: the page will be marked accessed
- * FGP_LOCK: Page is return locked
- * FGP_CREAT: If page is not present then a new page is allocated using
- * @gfp_mask and added to the page cache and the VM's LRU
- * list. The page is returned locked and with an increased
- * refcount. Otherwise, %NULL is returned.
+ * @fgp_flags can be:
+ *
+ * - FGP_ACCESSED: the page will be marked accessed
+ * - FGP_LOCK: Page is return locked
+ * - FGP_CREAT: If page is not present then a new page is allocated using
+ * @gfp_mask and added to the page cache and the VM's LRU
+ * list. The page is returned locked and with an increased
+ * refcount. Otherwise, NULL is returned.
*
* If FGP_LOCK or FGP_CREAT are specified then the function may sleep even
* if the GFP flags specified for FGP_CREAT are atomic.
@@ -3000,7 +3002,7 @@ EXPORT_SYMBOL(generic_file_write_iter);
* @gfp_mask: memory allocation flags (and I/O mode)
*
* The address_space is to try to release any data against the page
- * (presumably at page->private). If the release was successful, return `1'.
+ * (presumably at page->private). If the release was successful, return '1'.
* Otherwise return zero.
*
* This may also be called if PG_fscache is set on a page, indicating that the
diff --git a/mm/gup.c b/mm/gup.c
index 04aa405..527ec2c 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1189,34 +1189,57 @@ struct page *get_dump_page(unsigned long addr)
*/
#ifdef CONFIG_HAVE_GENERIC_RCU_GUP
+#ifndef gup_get_pte
+/*
+ * We assume that the PTE can be read atomically. If this is not the case for
+ * your architecture, please provide the helper.
+ */
+static inline pte_t gup_get_pte(pte_t *ptep)
+{
+ return READ_ONCE(*ptep);
+}
+#endif
+
+static void undo_dev_pagemap(int *nr, int nr_start, struct page **pages)
+{
+ while ((*nr) - nr_start) {
+ struct page *page = pages[--(*nr)];
+
+ ClearPageReferenced(page);
+ put_page(page);
+ }
+}
+
#ifdef __HAVE_ARCH_PTE_SPECIAL
static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
int write, struct page **pages, int *nr)
{
+ struct dev_pagemap *pgmap = NULL;
+ int nr_start = *nr, ret = 0;
pte_t *ptep, *ptem;
- int ret = 0;
ptem = ptep = pte_offset_map(&pmd, addr);
do {
- /*
- * In the line below we are assuming that the pte can be read
- * atomically. If this is not the case for your architecture,
- * please wrap this in a helper function!
- *
- * for an example see gup_get_pte in arch/x86/mm/gup.c
- */
- pte_t pte = READ_ONCE(*ptep);
+ pte_t pte = gup_get_pte(ptep);
struct page *head, *page;
/*
* Similar to the PMD case below, NUMA hinting must take slow
* path using the pte_protnone check.
*/
- if (!pte_present(pte) || pte_special(pte) ||
- pte_protnone(pte) || (write && !pte_write(pte)))
+ if (pte_protnone(pte))
goto pte_unmap;
- if (!arch_pte_access_permitted(pte, write))
+ if (!pte_access_permitted(pte, write))
+ goto pte_unmap;
+
+ if (pte_devmap(pte)) {
+ pgmap = get_dev_pagemap(pte_pfn(pte), pgmap);
+ if (unlikely(!pgmap)) {
+ undo_dev_pagemap(nr, nr_start, pages);
+ goto pte_unmap;
+ }
+ } else if (pte_special(pte))
goto pte_unmap;
VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
@@ -1232,6 +1255,9 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
}
VM_BUG_ON_PAGE(compound_head(page) != head, page);
+
+ put_dev_pagemap(pgmap);
+ SetPageReferenced(page);
pages[*nr] = page;
(*nr)++;
@@ -1261,15 +1287,76 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
}
#endif /* __HAVE_ARCH_PTE_SPECIAL */
+#ifdef __HAVE_ARCH_PTE_DEVMAP
+static int __gup_device_huge(unsigned long pfn, unsigned long addr,
+ unsigned long end, struct page **pages, int *nr)
+{
+ int nr_start = *nr;
+ struct dev_pagemap *pgmap = NULL;
+
+ do {
+ struct page *page = pfn_to_page(pfn);
+
+ pgmap = get_dev_pagemap(pfn, pgmap);
+ if (unlikely(!pgmap)) {
+ undo_dev_pagemap(nr, nr_start, pages);
+ return 0;
+ }
+ SetPageReferenced(page);
+ pages[*nr] = page;
+ get_page(page);
+ put_dev_pagemap(pgmap);
+ (*nr)++;
+ pfn++;
+ } while (addr += PAGE_SIZE, addr != end);
+ return 1;
+}
+
+static int __gup_device_huge_pmd(pmd_t pmd, unsigned long addr,
+ unsigned long end, struct page **pages, int *nr)
+{
+ unsigned long fault_pfn;
+
+ fault_pfn = pmd_pfn(pmd) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
+ return __gup_device_huge(fault_pfn, addr, end, pages, nr);
+}
+
+static int __gup_device_huge_pud(pud_t pud, unsigned long addr,
+ unsigned long end, struct page **pages, int *nr)
+{
+ unsigned long fault_pfn;
+
+ fault_pfn = pud_pfn(pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
+ return __gup_device_huge(fault_pfn, addr, end, pages, nr);
+}
+#else
+static int __gup_device_huge_pmd(pmd_t pmd, unsigned long addr,
+ unsigned long end, struct page **pages, int *nr)
+{
+ BUILD_BUG();
+ return 0;
+}
+
+static int __gup_device_huge_pud(pud_t pud, unsigned long addr,
+ unsigned long end, struct page **pages, int *nr)
+{
+ BUILD_BUG();
+ return 0;
+}
+#endif
+
static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
unsigned long end, int write, struct page **pages, int *nr)
{
struct page *head, *page;
int refs;
- if (write && !pmd_write(orig))
+ if (!pmd_access_permitted(orig, write))
return 0;
+ if (pmd_devmap(orig))
+ return __gup_device_huge_pmd(orig, addr, end, pages, nr);
+
refs = 0;
head = pmd_page(orig);
page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
@@ -1293,6 +1380,7 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
return 0;
}
+ SetPageReferenced(head);
return 1;
}
@@ -1302,9 +1390,12 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
struct page *head, *page;
int refs;
- if (write && !pud_write(orig))
+ if (!pud_access_permitted(orig, write))
return 0;
+ if (pud_devmap(orig))
+ return __gup_device_huge_pud(orig, addr, end, pages, nr);
+
refs = 0;
head = pud_page(orig);
page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
@@ -1328,6 +1419,7 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
return 0;
}
+ SetPageReferenced(head);
return 1;
}
@@ -1338,9 +1430,10 @@ static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr,
int refs;
struct page *head, *page;
- if (write && !pgd_write(orig))
+ if (!pgd_access_permitted(orig, write))
return 0;
+ BUILD_BUG_ON(pgd_devmap(orig));
refs = 0;
head = pgd_page(orig);
page = head + ((addr & ~PGDIR_MASK) >> PAGE_SHIFT);
@@ -1364,6 +1457,7 @@ static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr,
return 0;
}
+ SetPageReferenced(head);
return 1;
}
@@ -1520,6 +1614,21 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
return nr;
}
+#ifndef gup_fast_permitted
+/*
+ * Check if it's allowed to use __get_user_pages_fast() for the range, or
+ * we need to fall back to the slow version:
+ */
+bool gup_fast_permitted(unsigned long start, int nr_pages, int write)
+{
+ unsigned long len, end;
+
+ len = (unsigned long) nr_pages << PAGE_SHIFT;
+ end = start + len;
+ return end >= start;
+}
+#endif
+
/**
* get_user_pages_fast() - pin user pages in memory
* @start: starting user address
@@ -1539,11 +1648,14 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
int get_user_pages_fast(unsigned long start, int nr_pages, int write,
struct page **pages)
{
- int nr, ret;
+ int nr = 0, ret = 0;
start &= PAGE_MASK;
- nr = __get_user_pages_fast(start, nr_pages, write, pages);
- ret = nr;
+
+ if (gup_fast_permitted(start, nr_pages, write)) {
+ nr = __get_user_pages_fast(start, nr_pages, write, pages);
+ ret = nr;
+ }
if (nr < nr_pages) {
/* Try to get the remaining pages with get_user_pages */
diff --git a/mm/memory.c b/mm/memory.c
index 235ba51..6ff5d72 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4298,7 +4298,7 @@ void __might_fault(const char *file, int line)
* get paged out, therefore we'll never actually fault, and the
* below annotations will generate false positives.
*/
- if (segment_eq(get_fs(), KERNEL_DS))
+ if (uaccess_kernel())
return;
if (pagefault_disabled())
return;
diff --git a/mm/migrate.c b/mm/migrate.c
index ed97c2c..738f1d5 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -184,9 +184,9 @@ void putback_movable_pages(struct list_head *l)
unlock_page(page);
put_page(page);
} else {
- putback_lru_page(page);
dec_node_page_state(page, NR_ISOLATED_ANON +
page_is_file_cache(page));
+ putback_lru_page(page);
}
}
}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index f3d603c..bd01501 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1090,10 +1090,10 @@ static void free_pcppages_bulk(struct zone *zone, int count,
{
int migratetype = 0;
int batch_free = 0;
- unsigned long nr_scanned, flags;
+ unsigned long nr_scanned;
bool isolated_pageblocks;
- spin_lock_irqsave(&zone->lock, flags);
+ spin_lock(&zone->lock);
isolated_pageblocks = has_isolate_pageblock(zone);
nr_scanned = node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED);
if (nr_scanned)
@@ -1142,7 +1142,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
trace_mm_page_pcpu_drain(page, 0, mt);
} while (--count && --batch_free && !list_empty(list));
}
- spin_unlock_irqrestore(&zone->lock, flags);
+ spin_unlock(&zone->lock);
}
static void free_one_page(struct zone *zone,
@@ -1150,9 +1150,8 @@ static void free_one_page(struct zone *zone,
unsigned int order,
int migratetype)
{
- unsigned long nr_scanned, flags;
- spin_lock_irqsave(&zone->lock, flags);
- __count_vm_events(PGFREE, 1 << order);
+ unsigned long nr_scanned;
+ spin_lock(&zone->lock);
nr_scanned = node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED);
if (nr_scanned)
__mod_node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED, -nr_scanned);
@@ -1162,7 +1161,7 @@ static void free_one_page(struct zone *zone,
migratetype = get_pfnblock_migratetype(page, pfn);
}
__free_one_page(page, pfn, zone, order, migratetype);
- spin_unlock_irqrestore(&zone->lock, flags);
+ spin_unlock(&zone->lock);
}
static void __meminit __init_single_page(struct page *page, unsigned long pfn,
@@ -1240,6 +1239,7 @@ void __meminit reserve_bootmem_region(phys_addr_t start, phys_addr_t end)
static void __free_pages_ok(struct page *page, unsigned int order)
{
+ unsigned long flags;
int migratetype;
unsigned long pfn = page_to_pfn(page);
@@ -1247,7 +1247,10 @@ static void __free_pages_ok(struct page *page, unsigned int order)
return;
migratetype = get_pfnblock_migratetype(page, pfn);
+ local_irq_save(flags);
+ __count_vm_events(PGFREE, 1 << order);
free_one_page(page_zone(page), page, pfn, order, migratetype);
+ local_irq_restore(flags);
}
static void __init __free_pages_boot_core(struct page *page, unsigned int order)
@@ -2219,9 +2222,8 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
int migratetype, bool cold)
{
int i, alloced = 0;
- unsigned long flags;
- spin_lock_irqsave(&zone->lock, flags);
+ spin_lock(&zone->lock);
for (i = 0; i < count; ++i) {
struct page *page = __rmqueue(zone, order, migratetype);
if (unlikely(page == NULL))
@@ -2257,7 +2259,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
* pages added to the pcp list.
*/
__mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order));
- spin_unlock_irqrestore(&zone->lock, flags);
+ spin_unlock(&zone->lock);
return alloced;
}
@@ -2485,20 +2487,17 @@ void free_hot_cold_page(struct page *page, bool cold)
{
struct zone *zone = page_zone(page);
struct per_cpu_pages *pcp;
+ unsigned long flags;
unsigned long pfn = page_to_pfn(page);
int migratetype;
- if (in_interrupt()) {
- __free_pages_ok(page, 0);
- return;
- }
-
if (!free_pcp_prepare(page))
return;
migratetype = get_pfnblock_migratetype(page, pfn);
set_pcppage_migratetype(page, migratetype);
- preempt_disable();
+ local_irq_save(flags);
+ __count_vm_event(PGFREE);
/*
* We only track unmovable, reclaimable and movable on pcp lists.
@@ -2515,7 +2514,6 @@ void free_hot_cold_page(struct page *page, bool cold)
migratetype = MIGRATE_MOVABLE;
}
- __count_vm_event(PGFREE);
pcp = &this_cpu_ptr(zone->pageset)->pcp;
if (!cold)
list_add(&page->lru, &pcp->lists[migratetype]);
@@ -2529,7 +2527,7 @@ void free_hot_cold_page(struct page *page, bool cold)
}
out:
- preempt_enable();
+ local_irq_restore(flags);
}
/*
@@ -2654,8 +2652,6 @@ static struct page *__rmqueue_pcplist(struct zone *zone, int migratetype,
{
struct page *page;
- VM_BUG_ON(in_interrupt());
-
do {
if (list_empty(list)) {
pcp->count += rmqueue_bulk(zone, 0,
@@ -2686,8 +2682,9 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
struct list_head *list;
bool cold = ((gfp_flags & __GFP_COLD) != 0);
struct page *page;
+ unsigned long flags;
- preempt_disable();
+ local_irq_save(flags);
pcp = &this_cpu_ptr(zone->pageset)->pcp;
list = &pcp->lists[migratetype];
page = __rmqueue_pcplist(zone, migratetype, cold, pcp, list);
@@ -2695,7 +2692,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
__count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
zone_statistics(preferred_zone, zone);
}
- preempt_enable();
+ local_irq_restore(flags);
return page;
}
@@ -2711,7 +2708,7 @@ struct page *rmqueue(struct zone *preferred_zone,
unsigned long flags;
struct page *page;
- if (likely(order == 0) && !in_interrupt()) {
+ if (likely(order == 0)) {
page = rmqueue_pcplist(preferred_zone, zone, order,
gfp_flags, migratetype);
goto out;
@@ -4250,7 +4247,8 @@ EXPORT_SYMBOL(free_pages_exact);
* nr_free_zone_pages() counts the number of counts pages which are beyond the
* high watermark within all zones at or below a given zone index. For each
* zone, the number of pages is calculated as:
- * managed_pages - high_pages
+ *
+ * nr_free_zone_pages = managed_pages - high_pages
*/
static unsigned long nr_free_zone_pages(int offset)
{
diff --git a/mm/percpu.c b/mm/percpu.c
index 60a6488..e0aa8ae 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1284,18 +1284,7 @@ void free_percpu(void __percpu *ptr)
}
EXPORT_SYMBOL_GPL(free_percpu);
-/**
- * is_kernel_percpu_address - test whether address is from static percpu area
- * @addr: address to test
- *
- * Test whether @addr belongs to in-kernel static percpu area. Module
- * static percpu areas are not considered. For those, use
- * is_module_percpu_address().
- *
- * RETURNS:
- * %true if @addr is from in-kernel static percpu area, %false otherwise.
- */
-bool is_kernel_percpu_address(unsigned long addr)
+bool __is_kernel_percpu_address(unsigned long addr, unsigned long *can_addr)
{
#ifdef CONFIG_SMP
const size_t static_size = __per_cpu_end - __per_cpu_start;
@@ -1304,16 +1293,39 @@ bool is_kernel_percpu_address(unsigned long addr)
for_each_possible_cpu(cpu) {
void *start = per_cpu_ptr(base, cpu);
+ void *va = (void *)addr;
- if ((void *)addr >= start && (void *)addr < start + static_size)
+ if (va >= start && va < start + static_size) {
+ if (can_addr) {
+ *can_addr = (unsigned long) (va - start);
+ *can_addr += (unsigned long)
+ per_cpu_ptr(base, get_boot_cpu_id());
+ }
return true;
- }
+ }
+ }
#endif
/* on UP, can't distinguish from other static vars, always false */
return false;
}
/**
+ * is_kernel_percpu_address - test whether address is from static percpu area
+ * @addr: address to test
+ *
+ * Test whether @addr belongs to in-kernel static percpu area. Module
+ * static percpu areas are not considered. For those, use
+ * is_module_percpu_address().
+ *
+ * RETURNS:
+ * %true if @addr is from in-kernel static percpu area, %false otherwise.
+ */
+bool is_kernel_percpu_address(unsigned long addr)
+{
+ return __is_kernel_percpu_address(addr, NULL);
+}
+
+/**
* per_cpu_ptr_to_phys - convert translated percpu address to physical address
* @addr: the address to be converted to physical address
*
diff --git a/mm/swap.c b/mm/swap.c
index 5dabf44..d8d9ee9 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -97,6 +97,16 @@ static void __put_compound_page(struct page *page)
void __put_page(struct page *page)
{
+ if (is_zone_device_page(page)) {
+ put_dev_pagemap(page->pgmap);
+
+ /*
+ * The page belongs to the device that created pgmap. Do
+ * not return it to page allocator.
+ */
+ return;
+ }
+
if (unlikely(PageCompound(page)))
__put_compound_page(page);
else
diff --git a/mm/usercopy.c b/mm/usercopy.c
index d155e12..a9852b2 100644
--- a/mm/usercopy.c
+++ b/mm/usercopy.c
@@ -19,15 +19,9 @@
#include <linux/sched.h>
#include <linux/sched/task.h>
#include <linux/sched/task_stack.h>
+#include <linux/thread_info.h>
#include <asm/sections.h>
-enum {
- BAD_STACK = -1,
- NOT_STACK = 0,
- GOOD_FRAME,
- GOOD_STACK,
-};
-
/*
* Checks if a given pointer and length is contained by the current
* stack frame (if possible).
@@ -206,17 +200,6 @@ static inline const char *check_heap_object(const void *ptr, unsigned long n,
{
struct page *page;
- /*
- * Some architectures (arm64) return true for virt_addr_valid() on
- * vmalloced addresses. Work around this by checking for vmalloc
- * first.
- *
- * We also need to check for module addresses explicitly since we
- * may copy static data from modules to userspace
- */
- if (is_vmalloc_or_module_addr(ptr))
- return NULL;
-
if (!virt_addr_valid(ptr))
return NULL;
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 0b05762..b52aeed 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1579,7 +1579,7 @@ void vfree_atomic(const void *addr)
* have CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG, but making the calling
* conventions for vfree() arch-depenedent would be a really bad idea)
*
- * NOTE: assumes that the object at *addr has a size >= sizeof(llist_node)
+ * NOTE: assumes that the object at @addr has a size >= sizeof(llist_node)
*/
void vfree(const void *addr)
{
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 809025e..5a4f5c5 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1768,8 +1768,7 @@ void __init init_mm_internals(void)
{
int ret __maybe_unused;
- mm_percpu_wq = alloc_workqueue("mm_percpu_wq",
- WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
+ mm_percpu_wq = alloc_workqueue("mm_percpu_wq", WQ_MEM_RECLAIM, 0);
#ifdef CONFIG_SMP
ret = cpuhp_setup_state_nocalls(CPUHP_MM_VMSTAT_DEAD, "mm/vmstat:dead",
OpenPOWER on IntegriCloud