diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/fadvise.c | 11 | ||||
-rw-r--r-- | mm/hugetlb.c | 42 | ||||
-rw-r--r-- | mm/kasan/kasan.c | 4 | ||||
-rw-r--r-- | mm/memcontrol.c | 5 | ||||
-rw-r--r-- | mm/oom_kill.c | 7 | ||||
-rw-r--r-- | mm/page_alloc.c | 39 | ||||
-rw-r--r-- | mm/page_owner.c | 26 | ||||
-rw-r--r-- | mm/page_poison.c | 8 | ||||
-rw-r--r-- | mm/swap.c | 20 | ||||
-rw-r--r-- | mm/swap_state.c | 5 | ||||
-rw-r--r-- | mm/vmalloc.c | 9 | ||||
-rw-r--r-- | mm/vmstat.c | 2 | ||||
-rw-r--r-- | mm/z3fold.c | 24 |
13 files changed, 163 insertions, 39 deletions
diff --git a/mm/fadvise.c b/mm/fadvise.c index b8024fa..6c707bf 100644 --- a/mm/fadvise.c +++ b/mm/fadvise.c @@ -126,6 +126,17 @@ SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice) */ start_index = (offset+(PAGE_SIZE-1)) >> PAGE_SHIFT; end_index = (endbyte >> PAGE_SHIFT); + if ((endbyte & ~PAGE_MASK) != ~PAGE_MASK) { + /* First page is tricky as 0 - 1 = -1, but pgoff_t + * is unsigned, so the end_index >= start_index + * check below would be true and we'll discard the whole + * file cache which is not what was asked. + */ + if (end_index == 0) + break; + + end_index--; + } if (end_index >= start_index) { unsigned long count = invalidate_mapping_pages(mapping, diff --git a/mm/hugetlb.c b/mm/hugetlb.c index d26162e..388c2bb 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -832,8 +832,27 @@ static bool vma_has_reserves(struct vm_area_struct *vma, long chg) * Only the process that called mmap() has reserves for * private mappings. */ - if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) - return true; + if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) { + /* + * Like the shared case above, a hole punch or truncate + * could have been performed on the private mapping. + * Examine the value of chg to determine if reserves + * actually exist or were previously consumed. + * Very Subtle - The value of chg comes from a previous + * call to vma_needs_reserves(). The reserve map for + * private mappings has different (opposite) semantics + * than that of shared mappings. vma_needs_reserves() + * has already taken this difference in semantics into + * account. Therefore, the meaning of chg is the same + * as in the shared case above. Code could easily be + * combined, but keeping it separate draws attention to + * subtle differences. + */ + if (chg) + return false; + else + return true; + } return false; } @@ -1816,6 +1835,25 @@ static long __vma_reservation_common(struct hstate *h, if (vma->vm_flags & VM_MAYSHARE) return ret; + else if (is_vma_resv_set(vma, HPAGE_RESV_OWNER) && ret >= 0) { + /* + * In most cases, reserves always exist for private mappings. + * However, a file associated with mapping could have been + * hole punched or truncated after reserves were consumed. + * As subsequent fault on such a range will not use reserves. + * Subtle - The reserve map for private mappings has the + * opposite meaning than that of shared mappings. If NO + * entry is in the reserve map, it means a reservation exists. + * If an entry exists in the reserve map, it means the + * reservation has already been consumed. As a result, the + * return value of this routine is the opposite of the + * value returned from reserve map manipulation routines above. + */ + if (ret) + return 0; + else + return 1; + } else return ret < 0 ? ret : 0; } diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index 18b6a2b..28439ac 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -763,8 +763,8 @@ static int kasan_mem_notifier(struct notifier_block *nb, static int __init kasan_memhotplug_init(void) { - pr_err("WARNING: KASAN doesn't support memory hot-add\n"); - pr_err("Memory hot-add will be disabled\n"); + pr_info("WARNING: KASAN doesn't support memory hot-add\n"); + pr_info("Memory hot-add will be disabled\n"); hotplug_memory_notifier(kasan_mem_notifier, 0); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 925b431..75e7440 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1608,7 +1608,7 @@ static void memcg_oom_recover(struct mem_cgroup *memcg) static void mem_cgroup_oom(struct mem_cgroup *memcg, gfp_t mask, int order) { - if (!current->memcg_may_oom || current->memcg_in_oom) + if (!current->memcg_may_oom) return; /* * We are in the middle of the charge context here, so we @@ -2896,6 +2896,7 @@ static void memcg_offline_kmem(struct mem_cgroup *memcg) * ordering is imposed by list_lru_node->lock taken by * memcg_drain_all_list_lrus(). */ + rcu_read_lock(); /* can be called from css_free w/o cgroup_mutex */ css_for_each_descendant_pre(css, &memcg->css) { child = mem_cgroup_from_css(css); BUG_ON(child->kmemcg_id != kmemcg_id); @@ -2903,6 +2904,8 @@ static void memcg_offline_kmem(struct mem_cgroup *memcg) if (!memcg->use_hierarchy) break; } + rcu_read_unlock(); + memcg_drain_all_list_lrus(kmemcg_id, parent->kmemcg_id); memcg_free_cache_id(kmemcg_id); diff --git a/mm/oom_kill.c b/mm/oom_kill.c index dfb1ab6..acbc432 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -625,8 +625,6 @@ void try_oom_reaper(struct task_struct *tsk) if (atomic_read(&mm->mm_users) > 1) { rcu_read_lock(); for_each_process(p) { - bool exiting; - if (!process_shares_mm(p, mm)) continue; if (fatal_signal_pending(p)) @@ -636,10 +634,7 @@ void try_oom_reaper(struct task_struct *tsk) * If the task is exiting make sure the whole thread group * is exiting and cannot acces mm anymore. */ - spin_lock_irq(&p->sighand->siglock); - exiting = signal_group_exit(p->signal); - spin_unlock_irq(&p->sighand->siglock); - if (exiting) + if (signal_group_exit(p->signal)) continue; /* Give up */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index f8f3bfc..6903b69 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -656,6 +656,9 @@ static inline void set_page_guard(struct zone *zone, struct page *page, return; page_ext = lookup_page_ext(page); + if (unlikely(!page_ext)) + return; + __set_bit(PAGE_EXT_DEBUG_GUARD, &page_ext->flags); INIT_LIST_HEAD(&page->lru); @@ -673,6 +676,9 @@ static inline void clear_page_guard(struct zone *zone, struct page *page, return; page_ext = lookup_page_ext(page); + if (unlikely(!page_ext)) + return; + __clear_bit(PAGE_EXT_DEBUG_GUARD, &page_ext->flags); set_page_private(page, 0); @@ -2609,11 +2615,12 @@ struct page *buffered_rmqueue(struct zone *preferred_zone, page = list_last_entry(list, struct page, lru); else page = list_first_entry(list, struct page, lru); - } while (page && check_new_pcp(page)); - __dec_zone_state(zone, NR_ALLOC_BATCH); - list_del(&page->lru); - pcp->count--; + __dec_zone_state(zone, NR_ALLOC_BATCH); + list_del(&page->lru); + pcp->count--; + + } while (check_new_pcp(page)); } else { /* * We most definitely don't want callers attempting to @@ -3023,6 +3030,7 @@ reset_fair: apply_fair = false; fair_skipped = false; reset_alloc_batches(ac->preferred_zoneref->zone); + z = ac->preferred_zoneref; goto zonelist_scan; } @@ -3596,6 +3604,17 @@ retry: */ alloc_flags = gfp_to_alloc_flags(gfp_mask); + /* + * Reset the zonelist iterators if memory policies can be ignored. + * These allocations are high priority and system rather than user + * orientated. + */ + if ((alloc_flags & ALLOC_NO_WATERMARKS) || !(alloc_flags & ALLOC_CPUSET)) { + ac->zonelist = node_zonelist(numa_node_id(), gfp_mask); + ac->preferred_zoneref = first_zones_zonelist(ac->zonelist, + ac->high_zoneidx, ac->nodemask); + } + /* This is the last chance, in general, before the goto nopage. */ page = get_page_from_freelist(gfp_mask, order, alloc_flags & ~ALLOC_NO_WATERMARKS, ac); @@ -3604,12 +3623,6 @@ retry: /* Allocate without watermarks if the context allows */ if (alloc_flags & ALLOC_NO_WATERMARKS) { - /* - * Ignore mempolicies if ALLOC_NO_WATERMARKS on the grounds - * the allocation is high priority and these type of - * allocations are system rather than user orientated - */ - ac->zonelist = node_zonelist(numa_node_id(), gfp_mask); page = get_page_from_freelist(gfp_mask, order, ALLOC_NO_WATERMARKS, ac); if (page) @@ -3808,7 +3821,11 @@ retry_cpuset: /* Dirty zone balancing only done in the fast path */ ac.spread_dirty_pages = (gfp_mask & __GFP_WRITE); - /* The preferred zone is used for statistics later */ + /* + * The preferred zone is used for statistics but crucially it is + * also used as the starting point for the zonelist iterator. It + * may get reset for allocations that ignore memory policies. + */ ac.preferred_zoneref = first_zones_zonelist(ac.zonelist, ac.high_zoneidx, ac.nodemask); if (!ac.preferred_zoneref) { diff --git a/mm/page_owner.c b/mm/page_owner.c index 792b56d..c6cda3e 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -55,6 +55,8 @@ void __reset_page_owner(struct page *page, unsigned int order) for (i = 0; i < (1 << order); i++) { page_ext = lookup_page_ext(page + i); + if (unlikely(!page_ext)) + continue; __clear_bit(PAGE_EXT_OWNER, &page_ext->flags); } } @@ -62,6 +64,7 @@ void __reset_page_owner(struct page *page, unsigned int order) void __set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask) { struct page_ext *page_ext = lookup_page_ext(page); + struct stack_trace trace = { .nr_entries = 0, .max_entries = ARRAY_SIZE(page_ext->trace_entries), @@ -69,6 +72,9 @@ void __set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask) .skip = 3, }; + if (unlikely(!page_ext)) + return; + save_stack_trace(&trace); page_ext->order = order; @@ -82,6 +88,8 @@ void __set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask) void __set_page_owner_migrate_reason(struct page *page, int reason) { struct page_ext *page_ext = lookup_page_ext(page); + if (unlikely(!page_ext)) + return; page_ext->last_migrate_reason = reason; } @@ -89,6 +97,12 @@ void __set_page_owner_migrate_reason(struct page *page, int reason) gfp_t __get_page_owner_gfp(struct page *page) { struct page_ext *page_ext = lookup_page_ext(page); + if (unlikely(!page_ext)) + /* + * The caller just returns 0 if no valid gfp + * So return 0 here too. + */ + return 0; return page_ext->gfp_mask; } @@ -99,6 +113,9 @@ void __copy_page_owner(struct page *oldpage, struct page *newpage) struct page_ext *new_ext = lookup_page_ext(newpage); int i; + if (unlikely(!old_ext || !new_ext)) + return; + new_ext->order = old_ext->order; new_ext->gfp_mask = old_ext->gfp_mask; new_ext->nr_entries = old_ext->nr_entries; @@ -193,6 +210,11 @@ void __dump_page_owner(struct page *page) gfp_t gfp_mask = page_ext->gfp_mask; int mt = gfpflags_to_migratetype(gfp_mask); + if (unlikely(!page_ext)) { + pr_alert("There is not page extension available.\n"); + return; + } + if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags)) { pr_alert("page_owner info is not active (free page?)\n"); return; @@ -251,6 +273,8 @@ read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos) } page_ext = lookup_page_ext(page); + if (unlikely(!page_ext)) + continue; /* * Some pages could be missed by concurrent allocation or free, @@ -317,6 +341,8 @@ static void init_pages_in_zone(pg_data_t *pgdat, struct zone *zone) continue; page_ext = lookup_page_ext(page); + if (unlikely(!page_ext)) + continue; /* Maybe overraping zone */ if (test_bit(PAGE_EXT_OWNER, &page_ext->flags)) diff --git a/mm/page_poison.c b/mm/page_poison.c index 1eae5fa..2e647c6 100644 --- a/mm/page_poison.c +++ b/mm/page_poison.c @@ -54,6 +54,9 @@ static inline void set_page_poison(struct page *page) struct page_ext *page_ext; page_ext = lookup_page_ext(page); + if (unlikely(!page_ext)) + return; + __set_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags); } @@ -62,6 +65,9 @@ static inline void clear_page_poison(struct page *page) struct page_ext *page_ext; page_ext = lookup_page_ext(page); + if (unlikely(!page_ext)) + return; + __clear_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags); } @@ -70,7 +76,7 @@ bool page_is_poisoned(struct page *page) struct page_ext *page_ext; page_ext = lookup_page_ext(page); - if (!page_ext) + if (unlikely(!page_ext)) return false; return test_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags); @@ -667,6 +667,24 @@ static void lru_add_drain_per_cpu(struct work_struct *dummy) static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work); +/* + * lru_add_drain_wq is used to do lru_add_drain_all() from a WQ_MEM_RECLAIM + * workqueue, aiding in getting memory freed. + */ +static struct workqueue_struct *lru_add_drain_wq; + +static int __init lru_init(void) +{ + lru_add_drain_wq = alloc_workqueue("lru-add-drain", WQ_MEM_RECLAIM, 0); + + if (WARN(!lru_add_drain_wq, + "Failed to create workqueue lru_add_drain_wq")) + return -ENOMEM; + + return 0; +} +early_initcall(lru_init); + void lru_add_drain_all(void) { static DEFINE_MUTEX(lock); @@ -686,7 +704,7 @@ void lru_add_drain_all(void) pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) || need_activate_page_drain(cpu)) { INIT_WORK(work, lru_add_drain_per_cpu); - schedule_work_on(cpu, work); + queue_work_on(cpu, lru_add_drain_wq, work); cpumask_set_cpu(cpu, &has_work); } } diff --git a/mm/swap_state.c b/mm/swap_state.c index 0d457e7..c99463a 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -252,7 +252,10 @@ static inline void free_swap_cache(struct page *page) void free_page_and_swap_cache(struct page *page) { free_swap_cache(page); - put_page(page); + if (is_huge_zero_page(page)) + put_huge_zero_page(); + else + put_page(page); } /* diff --git a/mm/vmalloc.c b/mm/vmalloc.c index cf7ad1a..e11475c 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1105,7 +1105,7 @@ EXPORT_SYMBOL_GPL(vm_unmap_aliases); */ void vm_unmap_ram(const void *mem, unsigned int count) { - unsigned long size = count << PAGE_SHIFT; + unsigned long size = (unsigned long)count << PAGE_SHIFT; unsigned long addr = (unsigned long)mem; BUG_ON(!addr); @@ -1140,7 +1140,7 @@ EXPORT_SYMBOL(vm_unmap_ram); */ void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t prot) { - unsigned long size = count << PAGE_SHIFT; + unsigned long size = (unsigned long)count << PAGE_SHIFT; unsigned long addr; void *mem; @@ -1574,14 +1574,15 @@ void *vmap(struct page **pages, unsigned int count, unsigned long flags, pgprot_t prot) { struct vm_struct *area; + unsigned long size; /* In bytes */ might_sleep(); if (count > totalram_pages) return NULL; - area = get_vm_area_caller((count << PAGE_SHIFT), flags, - __builtin_return_address(0)); + size = (unsigned long)count << PAGE_SHIFT; + area = get_vm_area_caller(size, flags, __builtin_return_address(0)); if (!area) return NULL; diff --git a/mm/vmstat.c b/mm/vmstat.c index 77e42ef..cb2a67b 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1061,6 +1061,8 @@ static void pagetypeinfo_showmixedcount_print(struct seq_file *m, continue; page_ext = lookup_page_ext(page); + if (unlikely(!page_ext)) + continue; if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags)) continue; diff --git a/mm/z3fold.c b/mm/z3fold.c index 34917d5..8f9e89c 100644 --- a/mm/z3fold.c +++ b/mm/z3fold.c @@ -412,7 +412,7 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle) /* HEADLESS page stored */ bud = HEADLESS; } else { - bud = (handle - zhdr->first_num) & BUDDY_MASK; + bud = handle_to_buddy(handle); switch (bud) { case FIRST: @@ -572,15 +572,19 @@ next: pool->pages_nr--; spin_unlock(&pool->lock); return 0; - } else if (zhdr->first_chunks != 0 && - zhdr->last_chunks != 0 && zhdr->middle_chunks != 0) { - /* Full, add to buddied list */ - list_add(&zhdr->buddy, &pool->buddied); - } else if (!test_bit(PAGE_HEADLESS, &page->private)) { - z3fold_compact_page(zhdr); - /* add to unbuddied list */ - freechunks = num_free_chunks(zhdr); - list_add(&zhdr->buddy, &pool->unbuddied[freechunks]); + } else if (!test_bit(PAGE_HEADLESS, &page->private)) { + if (zhdr->first_chunks != 0 && + zhdr->last_chunks != 0 && + zhdr->middle_chunks != 0) { + /* Full, add to buddied list */ + list_add(&zhdr->buddy, &pool->buddied); + } else { + z3fold_compact_page(zhdr); + /* add to unbuddied list */ + freechunks = num_free_chunks(zhdr); + list_add(&zhdr->buddy, + &pool->unbuddied[freechunks]); + } } /* add to beginning of LRU */ |