summaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c416
1 files changed, 233 insertions, 183 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 7e208f0..df2022f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -90,6 +90,9 @@ nodemask_t node_states[NR_NODE_STATES] __read_mostly = {
#ifdef CONFIG_HIGHMEM
[N_HIGH_MEMORY] = { { [0] = 1UL } },
#endif
+#ifdef CONFIG_MOVABLE_NODE
+ [N_MEMORY] = { { [0] = 1UL } },
+#endif
[N_CPU] = { { [0] = 1UL } },
#endif /* NUMA */
};
@@ -218,11 +221,6 @@ EXPORT_SYMBOL(nr_online_nodes);
int page_group_by_mobility_disabled __read_mostly;
-/*
- * NOTE:
- * Don't use set_pageblock_migratetype(page, MIGRATE_ISOLATE) directly.
- * Instead, use {un}set_pageblock_isolate.
- */
void set_pageblock_migratetype(struct page *page, int migratetype)
{
@@ -368,8 +366,7 @@ static int destroy_compound_page(struct page *page, unsigned long order)
int nr_pages = 1 << order;
int bad = 0;
- if (unlikely(compound_order(page) != order) ||
- unlikely(!PageHead(page))) {
+ if (unlikely(compound_order(page) != order)) {
bad_page(page);
bad++;
}
@@ -523,7 +520,7 @@ static inline int page_is_buddy(struct page *page, struct page *buddy,
* If a block is freed, and its buddy is also free, then this
* triggers coalescing into a block of larger size.
*
- * -- wli
+ * -- nyc
*/
static inline void __free_one_page(struct page *page,
@@ -608,6 +605,7 @@ static inline int free_pages_check(struct page *page)
bad_page(page);
return 1;
}
+ reset_page_last_nid(page);
if (page->flags & PAGE_FLAGS_CHECK_AT_PREP)
page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
return 0;
@@ -667,11 +665,13 @@ static void free_pcppages_bulk(struct zone *zone, int count,
/* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */
__free_one_page(page, zone, 0, mt);
trace_mm_page_pcpu_drain(page, 0, mt);
- if (is_migrate_cma(mt))
- __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, 1);
+ if (likely(get_pageblock_migratetype(page) != MIGRATE_ISOLATE)) {
+ __mod_zone_page_state(zone, NR_FREE_PAGES, 1);
+ if (is_migrate_cma(mt))
+ __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, 1);
+ }
} while (--to_free && --batch_free && !list_empty(list));
}
- __mod_zone_page_state(zone, NR_FREE_PAGES, count);
spin_unlock(&zone->lock);
}
@@ -730,6 +730,13 @@ static void __free_pages_ok(struct page *page, unsigned int order)
local_irq_restore(flags);
}
+/*
+ * Read access to zone->managed_pages is safe because it's unsigned long,
+ * but we still need to serialize writers. Currently all callers of
+ * __free_pages_bootmem() except put_page_bootmem() should only be used
+ * at boot time. So for shorter boot time, we shift the burden to
+ * put_page_bootmem() to serialize writers.
+ */
void __meminit __free_pages_bootmem(struct page *page, unsigned int order)
{
unsigned int nr_pages = 1 << order;
@@ -745,6 +752,7 @@ void __meminit __free_pages_bootmem(struct page *page, unsigned int order)
set_page_count(p, 0);
}
+ page_zone(page)->managed_pages += 1 << order;
set_page_refcounted(page);
__free_pages(page, order);
}
@@ -780,7 +788,7 @@ void __init init_cma_reserved_pageblock(struct page *page)
* large block of memory acted on by a series of small allocations.
* This behavior is a critical factor in sglist merging's success.
*
- * -- wli
+ * -- nyc
*/
static inline void expand(struct zone *zone, struct page *page,
int low, int high, struct free_area *area,
@@ -1376,14 +1384,8 @@ void split_page(struct page *page, unsigned int order)
set_page_refcounted(page + i);
}
-/*
- * Similar to the split_page family of functions except that the page
- * required at the given order and being isolated now to prevent races
- * with parallel allocators
- */
-int capture_free_page(struct page *page, int alloc_order, int migratetype)
+static int __isolate_free_page(struct page *page, unsigned int order)
{
- unsigned int order;
unsigned long watermark;
struct zone *zone;
int mt;
@@ -1391,27 +1393,23 @@ int capture_free_page(struct page *page, int alloc_order, int migratetype)
BUG_ON(!PageBuddy(page));
zone = page_zone(page);
- order = page_order(page);
+ mt = get_pageblock_migratetype(page);
- /* Obey watermarks as if the page was being allocated */
- watermark = low_wmark_pages(zone) + (1 << order);
- if (!zone_watermark_ok(zone, 0, watermark, 0, 0))
- return 0;
+ if (mt != MIGRATE_ISOLATE) {
+ /* Obey watermarks as if the page was being allocated */
+ watermark = low_wmark_pages(zone) + (1 << order);
+ if (!zone_watermark_ok(zone, 0, watermark, 0, 0))
+ return 0;
+
+ __mod_zone_freepage_state(zone, -(1UL << order), mt);
+ }
/* Remove page from free list */
list_del(&page->lru);
zone->free_area[order].nr_free--;
rmv_page_order(page);
- mt = get_pageblock_migratetype(page);
- if (unlikely(mt != MIGRATE_ISOLATE))
- __mod_zone_freepage_state(zone, -(1UL << alloc_order), mt);
-
- if (alloc_order != order)
- expand(zone, page, alloc_order, order,
- &zone->free_area[order], migratetype);
-
- /* Set the pageblock if the captured page is at least a pageblock */
+ /* Set the pageblock if the isolated page is at least a pageblock */
if (order >= pageblock_order - 1) {
struct page *endpage = page + (1 << order) - 1;
for (; page < endpage; page += pageblock_nr_pages) {
@@ -1422,7 +1420,7 @@ int capture_free_page(struct page *page, int alloc_order, int migratetype)
}
}
- return 1UL << alloc_order;
+ return 1UL << order;
}
/*
@@ -1440,10 +1438,9 @@ int split_free_page(struct page *page)
unsigned int order;
int nr_pages;
- BUG_ON(!PageBuddy(page));
order = page_order(page);
- nr_pages = capture_free_page(page, order, 0);
+ nr_pages = __isolate_free_page(page, order);
if (!nr_pages)
return 0;
@@ -1641,20 +1638,6 @@ static bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark,
return true;
}
-#ifdef CONFIG_MEMORY_ISOLATION
-static inline unsigned long nr_zone_isolate_freepages(struct zone *zone)
-{
- if (unlikely(zone->nr_pageblock_isolate))
- return zone->nr_pageblock_isolate * pageblock_nr_pages;
- return 0;
-}
-#else
-static inline unsigned long nr_zone_isolate_freepages(struct zone *zone)
-{
- return 0;
-}
-#endif
-
bool zone_watermark_ok(struct zone *z, int order, unsigned long mark,
int classzone_idx, int alloc_flags)
{
@@ -1670,14 +1653,6 @@ bool zone_watermark_ok_safe(struct zone *z, int order, unsigned long mark,
if (z->percpu_drift_mark && free_pages < z->percpu_drift_mark)
free_pages = zone_page_state_snapshot(z, NR_FREE_PAGES);
- /*
- * If the zone has MIGRATE_ISOLATE type free pages, we should consider
- * it. nr_zone_isolate_freepages is never accurate so kswapd might not
- * sleep although it could do so. But this is more desirable for memory
- * hotplug than sleeping which can cause a livelock in the direct
- * reclaim path.
- */
- free_pages -= nr_zone_isolate_freepages(z);
return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags,
free_pages);
}
@@ -1692,7 +1667,7 @@ bool zone_watermark_ok_safe(struct zone *z, int order, unsigned long mark,
*
* If the zonelist cache is present in the passed in zonelist, then
* returns a pointer to the allowed node mask (either the current
- * tasks mems_allowed, or node_states[N_HIGH_MEMORY].)
+ * tasks mems_allowed, or node_states[N_MEMORY].)
*
* If the zonelist cache is not available for this zonelist, does
* nothing and returns NULL.
@@ -1721,7 +1696,7 @@ static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags)
allowednodes = !in_interrupt() && (alloc_flags & ALLOC_CPUSET) ?
&cpuset_current_mems_allowed :
- &node_states[N_HIGH_MEMORY];
+ &node_states[N_MEMORY];
return allowednodes;
}
@@ -1871,7 +1846,7 @@ zonelist_scan:
*/
for_each_zone_zonelist_nodemask(zone, z, zonelist,
high_zoneidx, nodemask) {
- if (NUMA_BUILD && zlc_active &&
+ if (IS_ENABLED(CONFIG_NUMA) && zlc_active &&
!zlc_zone_worth_trying(zonelist, z, allowednodes))
continue;
if ((alloc_flags & ALLOC_CPUSET) &&
@@ -1917,7 +1892,8 @@ zonelist_scan:
classzone_idx, alloc_flags))
goto try_this_zone;
- if (NUMA_BUILD && !did_zlc_setup && nr_online_nodes > 1) {
+ if (IS_ENABLED(CONFIG_NUMA) &&
+ !did_zlc_setup && nr_online_nodes > 1) {
/*
* we do zlc_setup if there are multiple nodes
* and before considering the first zone allowed
@@ -1936,7 +1912,7 @@ zonelist_scan:
* As we may have just activated ZLC, check if the first
* eligible zone has failed zone_reclaim recently.
*/
- if (NUMA_BUILD && zlc_active &&
+ if (IS_ENABLED(CONFIG_NUMA) && zlc_active &&
!zlc_zone_worth_trying(zonelist, z, allowednodes))
continue;
@@ -1962,11 +1938,11 @@ try_this_zone:
if (page)
break;
this_zone_full:
- if (NUMA_BUILD)
+ if (IS_ENABLED(CONFIG_NUMA))
zlc_mark_zone_full(zonelist, z);
}
- if (unlikely(NUMA_BUILD && page == NULL && zlc_active)) {
+ if (unlikely(IS_ENABLED(CONFIG_NUMA) && page == NULL && zlc_active)) {
/* Disable zlc cache for second zonelist scan */
zlc_active = 0;
goto zonelist_scan;
@@ -2148,8 +2124,6 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
bool *contended_compaction, bool *deferred_compaction,
unsigned long *did_some_progress)
{
- struct page *page = NULL;
-
if (!order)
return NULL;
@@ -2161,16 +2135,12 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
current->flags |= PF_MEMALLOC;
*did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask,
nodemask, sync_migration,
- contended_compaction, &page);
+ contended_compaction);
current->flags &= ~PF_MEMALLOC;
- /* If compaction captured a page, prep and use it */
- if (page) {
- prep_new_page(page, order, gfp_mask);
- goto got_page;
- }
-
if (*did_some_progress != COMPACT_SKIPPED) {
+ struct page *page;
+
/* Page migration frees to the PCP lists but we want merging */
drain_pages(get_cpu());
put_cpu();
@@ -2180,7 +2150,6 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
alloc_flags & ~ALLOC_NO_WATERMARKS,
preferred_zone, migratetype);
if (page) {
-got_page:
preferred_zone->compact_blockskip_flush = false;
preferred_zone->compact_considered = 0;
preferred_zone->compact_defer_shift = 0;
@@ -2266,7 +2235,7 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order,
return NULL;
/* After successful reclaim, reconsider all zones for allocation */
- if (NUMA_BUILD)
+ if (IS_ENABLED(CONFIG_NUMA))
zlc_clear_zones_full(zonelist);
retry:
@@ -2412,7 +2381,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
* allowed per node queues are empty and that nodes are
* over allocated.
*/
- if (NUMA_BUILD && (gfp_mask & GFP_THISNODE) == GFP_THISNODE)
+ if (IS_ENABLED(CONFIG_NUMA) &&
+ (gfp_mask & GFP_THISNODE) == GFP_THISNODE)
goto nopage;
restart:
@@ -2596,6 +2566,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
int migratetype = allocflags_to_migratetype(gfp_mask);
unsigned int cpuset_mems_cookie;
int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET;
+ struct mem_cgroup *memcg = NULL;
gfp_mask &= gfp_allowed_mask;
@@ -2614,6 +2585,13 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
if (unlikely(!zonelist->_zonerefs->zone))
return NULL;
+ /*
+ * Will only have any effect when __GFP_KMEMCG is set. This is
+ * verified in the (always inline) callee
+ */
+ if (!memcg_kmem_newpage_charge(gfp_mask, &memcg, order))
+ return NULL;
+
retry_cpuset:
cpuset_mems_cookie = get_mems_allowed();
@@ -2649,6 +2627,8 @@ out:
if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page))
goto retry_cpuset;
+ memcg_kmem_commit_charge(page, memcg, order);
+
return page;
}
EXPORT_SYMBOL(__alloc_pages_nodemask);
@@ -2701,6 +2681,31 @@ void free_pages(unsigned long addr, unsigned int order)
EXPORT_SYMBOL(free_pages);
+/*
+ * __free_memcg_kmem_pages and free_memcg_kmem_pages will free
+ * pages allocated with __GFP_KMEMCG.
+ *
+ * Those pages are accounted to a particular memcg, embedded in the
+ * corresponding page_cgroup. To avoid adding a hit in the allocator to search
+ * for that information only to find out that it is NULL for users who have no
+ * interest in that whatsoever, we provide these functions.
+ *
+ * The caller knows better which flags it relies on.
+ */
+void __free_memcg_kmem_pages(struct page *page, unsigned int order)
+{
+ memcg_kmem_uncharge_pages(page, order);
+ __free_pages(page, order);
+}
+
+void free_memcg_kmem_pages(unsigned long addr, unsigned int order)
+{
+ if (addr != 0) {
+ VM_BUG_ON(!virt_addr_valid((void *)addr));
+ __free_memcg_kmem_pages(virt_to_page((void *)addr), order);
+ }
+}
+
static void *make_alloc_exact(unsigned long addr, unsigned order, size_t size)
{
if (addr) {
@@ -2819,7 +2824,7 @@ unsigned int nr_free_pagecache_pages(void)
static inline void show_node(struct zone *zone)
{
- if (NUMA_BUILD)
+ if (IS_ENABLED(CONFIG_NUMA))
printk("Node %d ", zone_to_nid(zone));
}
@@ -2877,6 +2882,31 @@ out:
#define K(x) ((x) << (PAGE_SHIFT-10))
+static void show_migration_types(unsigned char type)
+{
+ static const char types[MIGRATE_TYPES] = {
+ [MIGRATE_UNMOVABLE] = 'U',
+ [MIGRATE_RECLAIMABLE] = 'E',
+ [MIGRATE_MOVABLE] = 'M',
+ [MIGRATE_RESERVE] = 'R',
+#ifdef CONFIG_CMA
+ [MIGRATE_CMA] = 'C',
+#endif
+ [MIGRATE_ISOLATE] = 'I',
+ };
+ char tmp[MIGRATE_TYPES + 1];
+ char *p = tmp;
+ int i;
+
+ for (i = 0; i < MIGRATE_TYPES; i++) {
+ if (type & (1 << i))
+ *p++ = types[i];
+ }
+
+ *p = '\0';
+ printk("(%s) ", tmp);
+}
+
/*
* Show free area list (used inside shift_scroll-lock stuff)
* We also calculate the percentage fragmentation. We do this by counting the
@@ -2951,6 +2981,7 @@ void show_free_areas(unsigned int filter)
" isolated(anon):%lukB"
" isolated(file):%lukB"
" present:%lukB"
+ " managed:%lukB"
" mlocked:%lukB"
" dirty:%lukB"
" writeback:%lukB"
@@ -2980,6 +3011,7 @@ void show_free_areas(unsigned int filter)
K(zone_page_state(zone, NR_ISOLATED_ANON)),
K(zone_page_state(zone, NR_ISOLATED_FILE)),
K(zone->present_pages),
+ K(zone->managed_pages),
K(zone_page_state(zone, NR_MLOCK)),
K(zone_page_state(zone, NR_FILE_DIRTY)),
K(zone_page_state(zone, NR_WRITEBACK)),
@@ -3005,6 +3037,7 @@ void show_free_areas(unsigned int filter)
for_each_populated_zone(zone) {
unsigned long nr[MAX_ORDER], flags, order, total = 0;
+ unsigned char types[MAX_ORDER];
if (skip_free_areas_node(filter, zone_to_nid(zone)))
continue;
@@ -3013,12 +3046,24 @@ void show_free_areas(unsigned int filter)
spin_lock_irqsave(&zone->lock, flags);
for (order = 0; order < MAX_ORDER; order++) {
- nr[order] = zone->free_area[order].nr_free;
+ struct free_area *area = &zone->free_area[order];
+ int type;
+
+ nr[order] = area->nr_free;
total += nr[order] << order;
+
+ types[order] = 0;
+ for (type = 0; type < MIGRATE_TYPES; type++) {
+ if (!list_empty(&area->free_list[type]))
+ types[order] |= 1 << type;
+ }
}
spin_unlock_irqrestore(&zone->lock, flags);
- for (order = 0; order < MAX_ORDER; order++)
+ for (order = 0; order < MAX_ORDER; order++) {
printk("%lu*%lukB ", nr[order], K(1UL) << order);
+ if (nr[order])
+ show_migration_types(types[order]);
+ }
printk("= %lukB\n", K(total));
}
@@ -3195,7 +3240,7 @@ static int find_next_best_node(int node, nodemask_t *used_node_mask)
return node;
}
- for_each_node_state(n, N_HIGH_MEMORY) {
+ for_each_node_state(n, N_MEMORY) {
/* Don't want a node to appear more than once */
if (node_isset(n, *used_node_mask))
@@ -3337,7 +3382,7 @@ static int default_zonelist_order(void)
* local memory, NODE_ORDER may be suitable.
*/
average_size = total_size /
- (nodes_weight(node_states[N_HIGH_MEMORY]) + 1);
+ (nodes_weight(node_states[N_MEMORY]) + 1);
for_each_online_node(nid) {
low_kmem_size = 0;
total_size = 0;
@@ -3827,6 +3872,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
mminit_verify_page_links(page, zone, nid, pfn);
init_page_count(page);
reset_page_mapcount(page);
+ reset_page_last_nid(page);
SetPageReserved(page);
/*
* Mark the block movable so that blocks are reserved for
@@ -4433,6 +4479,26 @@ void __init set_pageblock_order(void)
#endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */
+static unsigned long __paginginit calc_memmap_size(unsigned long spanned_pages,
+ unsigned long present_pages)
+{
+ unsigned long pages = spanned_pages;
+
+ /*
+ * Provide a more accurate estimation if there are holes within
+ * the zone and SPARSEMEM is in use. If there are holes within the
+ * zone, each populated memory region may cost us one or two extra
+ * memmap pages due to alignment because memmap pages for each
+ * populated regions may not naturally algined on page boundary.
+ * So the (present_pages >> 4) heuristic is a tradeoff for that.
+ */
+ if (spanned_pages > present_pages + (present_pages >> 4) &&
+ IS_ENABLED(CONFIG_SPARSEMEM))
+ pages = present_pages;
+
+ return PAGE_ALIGN(pages * sizeof(struct page)) >> PAGE_SHIFT;
+}
+
/*
* Set up the zone data structures:
* - mark all pages reserved
@@ -4450,54 +4516,67 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
int ret;
pgdat_resize_init(pgdat);
+#ifdef CONFIG_NUMA_BALANCING
+ spin_lock_init(&pgdat->numabalancing_migrate_lock);
+ pgdat->numabalancing_migrate_nr_pages = 0;
+ pgdat->numabalancing_migrate_next_window = jiffies;
+#endif
init_waitqueue_head(&pgdat->kswapd_wait);
init_waitqueue_head(&pgdat->pfmemalloc_wait);
pgdat_page_cgroup_init(pgdat);
for (j = 0; j < MAX_NR_ZONES; j++) {
struct zone *zone = pgdat->node_zones + j;
- unsigned long size, realsize, memmap_pages;
+ unsigned long size, realsize, freesize, memmap_pages;
size = zone_spanned_pages_in_node(nid, j, zones_size);
- realsize = size - zone_absent_pages_in_node(nid, j,
+ realsize = freesize = size - zone_absent_pages_in_node(nid, j,
zholes_size);
/*
- * Adjust realsize so that it accounts for how much memory
+ * Adjust freesize so that it accounts for how much memory
* is used by this zone for memmap. This affects the watermark
* and per-cpu initialisations
*/
- memmap_pages =
- PAGE_ALIGN(size * sizeof(struct page)) >> PAGE_SHIFT;
- if (realsize >= memmap_pages) {
- realsize -= memmap_pages;
+ memmap_pages = calc_memmap_size(size, realsize);
+ if (freesize >= memmap_pages) {
+ freesize -= memmap_pages;
if (memmap_pages)
printk(KERN_DEBUG
" %s zone: %lu pages used for memmap\n",
zone_names[j], memmap_pages);
} else
printk(KERN_WARNING
- " %s zone: %lu pages exceeds realsize %lu\n",
- zone_names[j], memmap_pages, realsize);
+ " %s zone: %lu pages exceeds freesize %lu\n",
+ zone_names[j], memmap_pages, freesize);
/* Account for reserved pages */
- if (j == 0 && realsize > dma_reserve) {
- realsize -= dma_reserve;
+ if (j == 0 && freesize > dma_reserve) {
+ freesize -= dma_reserve;
printk(KERN_DEBUG " %s zone: %lu pages reserved\n",
zone_names[0], dma_reserve);
}
if (!is_highmem_idx(j))
- nr_kernel_pages += realsize;
- nr_all_pages += realsize;
+ nr_kernel_pages += freesize;
+ /* Charge for highmem memmap if there are enough kernel pages */
+ else if (nr_kernel_pages > memmap_pages * 2)
+ nr_kernel_pages -= memmap_pages;
+ nr_all_pages += freesize;
zone->spanned_pages = size;
- zone->present_pages = realsize;
+ zone->present_pages = freesize;
+ /*
+ * Set an approximate value for lowmem here, it will be adjusted
+ * when the bootmem allocator frees pages into the buddy system.
+ * And all highmem pages will be managed by the buddy system.
+ */
+ zone->managed_pages = is_highmem_idx(j) ? realsize : freesize;
#ifdef CONFIG_NUMA
zone->node = nid;
- zone->min_unmapped_pages = (realsize*sysctl_min_unmapped_ratio)
+ zone->min_unmapped_pages = (freesize*sysctl_min_unmapped_ratio)
/ 100;
- zone->min_slab_pages = (realsize * sysctl_min_slab_ratio) / 100;
+ zone->min_slab_pages = (freesize * sysctl_min_slab_ratio) / 100;
#endif
zone->name = zone_names[j];
spin_lock_init(&zone->lock);
@@ -4688,7 +4767,7 @@ unsigned long __init find_min_pfn_with_active_regions(void)
/*
* early_calculate_totalpages()
* Sum pages in active regions for movable zone.
- * Populate N_HIGH_MEMORY for calculating usable_nodes.
+ * Populate N_MEMORY for calculating usable_nodes.
*/
static unsigned long __init early_calculate_totalpages(void)
{
@@ -4701,7 +4780,7 @@ static unsigned long __init early_calculate_totalpages(void)
totalpages += pages;
if (pages)
- node_set_state(nid, N_HIGH_MEMORY);
+ node_set_state(nid, N_MEMORY);
}
return totalpages;
}
@@ -4718,9 +4797,9 @@ static void __init find_zone_movable_pfns_for_nodes(void)
unsigned long usable_startpfn;
unsigned long kernelcore_node, kernelcore_remaining;
/* save the state before borrow the nodemask */
- nodemask_t saved_node_state = node_states[N_HIGH_MEMORY];
+ nodemask_t saved_node_state = node_states[N_MEMORY];
unsigned long totalpages = early_calculate_totalpages();
- int usable_nodes = nodes_weight(node_states[N_HIGH_MEMORY]);
+ int usable_nodes = nodes_weight(node_states[N_MEMORY]);
/*
* If movablecore was specified, calculate what size of
@@ -4755,7 +4834,7 @@ static void __init find_zone_movable_pfns_for_nodes(void)
restart:
/* Spread kernelcore memory as evenly as possible throughout nodes */
kernelcore_node = required_kernelcore / usable_nodes;
- for_each_node_state(nid, N_HIGH_MEMORY) {
+ for_each_node_state(nid, N_MEMORY) {
unsigned long start_pfn, end_pfn;
/*
@@ -4847,23 +4926,27 @@ restart:
out:
/* restore the node_state */
- node_states[N_HIGH_MEMORY] = saved_node_state;
+ node_states[N_MEMORY] = saved_node_state;
}
-/* Any regular memory on that node ? */
-static void __init check_for_regular_memory(pg_data_t *pgdat)
+/* Any regular or high memory on that node ? */
+static void check_for_memory(pg_data_t *pgdat, int nid)
{
-#ifdef CONFIG_HIGHMEM
enum zone_type zone_type;
- for (zone_type = 0; zone_type <= ZONE_NORMAL; zone_type++) {
+ if (N_MEMORY == N_NORMAL_MEMORY)
+ return;
+
+ for (zone_type = 0; zone_type <= ZONE_MOVABLE - 1; zone_type++) {
struct zone *zone = &pgdat->node_zones[zone_type];
if (zone->present_pages) {
- node_set_state(zone_to_nid(zone), N_NORMAL_MEMORY);
+ node_set_state(nid, N_HIGH_MEMORY);
+ if (N_NORMAL_MEMORY != N_HIGH_MEMORY &&
+ zone_type <= ZONE_NORMAL)
+ node_set_state(nid, N_NORMAL_MEMORY);
break;
}
}
-#endif
}
/**
@@ -4946,8 +5029,8 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
/* Any memory on that node */
if (pgdat->node_present_pages)
- node_set_state(nid, N_HIGH_MEMORY);
- check_for_regular_memory(pgdat);
+ node_set_state(nid, N_MEMORY);
+ check_for_memory(pgdat, nid);
}
}
@@ -5175,10 +5258,6 @@ static void __setup_per_zone_wmarks(void)
zone->watermark[WMARK_LOW] = min_wmark_pages(zone) + (tmp >> 2);
zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + (tmp >> 1);
- zone->watermark[WMARK_MIN] += cma_wmark_pages(zone);
- zone->watermark[WMARK_LOW] += cma_wmark_pages(zone);
- zone->watermark[WMARK_HIGH] += cma_wmark_pages(zone);
-
setup_zone_migrate_reserve(zone);
spin_unlock_irqrestore(&zone->lock, flags);
}
@@ -5506,7 +5585,7 @@ static inline int pfn_to_bitidx(struct zone *zone, unsigned long pfn)
pfn &= (PAGES_PER_SECTION-1);
return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS;
#else
- pfn = pfn - zone->zone_start_pfn;
+ pfn = pfn - round_down(zone->zone_start_pfn, pageblock_nr_pages);
return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS;
#endif /* CONFIG_SPARSEMEM */
}
@@ -5576,7 +5655,8 @@ void set_pageblock_flags_group(struct page *page, unsigned long flags,
* MIGRATE_MOVABLE block might include unmovable pages. It means you can't
* expect this function should be exact.
*/
-bool has_unmovable_pages(struct zone *zone, struct page *page, int count)
+bool has_unmovable_pages(struct zone *zone, struct page *page, int count,
+ bool skip_hwpoisoned_pages)
{
unsigned long pfn, iter, found;
int mt;
@@ -5611,6 +5691,13 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count)
continue;
}
+ /*
+ * The HWPoisoned page may be not in buddy system, and
+ * page_count() is not 0.
+ */
+ if (skip_hwpoisoned_pages && PageHWPoison(page))
+ continue;
+
if (!PageLRU(page))
found++;
/*
@@ -5653,7 +5740,7 @@ bool is_pageblock_removable_nolock(struct page *page)
zone->zone_start_pfn + zone->spanned_pages <= pfn)
return false;
- return !has_unmovable_pages(zone, page, 0);
+ return !has_unmovable_pages(zone, page, 0, true);
}
#ifdef CONFIG_CMA
@@ -5680,7 +5767,7 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
unsigned int tries = 0;
int ret = 0;
- migrate_prep_local();
+ migrate_prep();
while (pfn < end || !list_empty(&cc->migratepages)) {
if (fatal_signal_pending(current)) {
@@ -5708,61 +5795,14 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
ret = migrate_pages(&cc->migratepages,
alloc_migrate_target,
- 0, false, MIGRATE_SYNC);
+ 0, false, MIGRATE_SYNC,
+ MR_CMA);
}
- putback_lru_pages(&cc->migratepages);
+ putback_movable_pages(&cc->migratepages);
return ret > 0 ? 0 : ret;
}
-/*
- * Update zone's cma pages counter used for watermark level calculation.
- */
-static inline void __update_cma_watermarks(struct zone *zone, int count)
-{
- unsigned long flags;
- spin_lock_irqsave(&zone->lock, flags);
- zone->min_cma_pages += count;
- spin_unlock_irqrestore(&zone->lock, flags);
- setup_per_zone_wmarks();
-}
-
-/*
- * Trigger memory pressure bump to reclaim some pages in order to be able to
- * allocate 'count' pages in single page units. Does similar work as
- *__alloc_pages_slowpath() function.
- */
-static int __reclaim_pages(struct zone *zone, gfp_t gfp_mask, int count)
-{
- enum zone_type high_zoneidx = gfp_zone(gfp_mask);
- struct zonelist *zonelist = node_zonelist(0, gfp_mask);
- int did_some_progress = 0;
- int order = 1;
-
- /*
- * Increase level of watermarks to force kswapd do his job
- * to stabilise at new watermark level.
- */
- __update_cma_watermarks(zone, count);
-
- /* Obey watermarks as if the page was being allocated */
- while (!zone_watermark_ok(zone, 0, low_wmark_pages(zone), 0, 0)) {
- wake_all_kswapd(order, zonelist, high_zoneidx, zone_idx(zone));
-
- did_some_progress = __perform_reclaim(gfp_mask, order, zonelist,
- NULL);
- if (!did_some_progress) {
- /* Exhausted what can be done so it's blamo time */
- out_of_memory(zonelist, gfp_mask, order, NULL, false);
- }
- }
-
- /* Restore original watermark levels. */
- __update_cma_watermarks(zone, -count);
-
- return count;
-}
-
/**
* alloc_contig_range() -- tries to allocate given range of pages
* @start: start PFN to allocate
@@ -5786,7 +5826,6 @@ static int __reclaim_pages(struct zone *zone, gfp_t gfp_mask, int count)
int alloc_contig_range(unsigned long start, unsigned long end,
unsigned migratetype)
{
- struct zone *zone = page_zone(pfn_to_page(start));
unsigned long outer_start, outer_end;
int ret = 0, order;
@@ -5824,7 +5863,8 @@ int alloc_contig_range(unsigned long start, unsigned long end,
*/
ret = start_isolate_page_range(pfn_max_align_down(start),
- pfn_max_align_up(end), migratetype);
+ pfn_max_align_up(end), migratetype,
+ false);
if (ret)
return ret;
@@ -5863,18 +5903,13 @@ int alloc_contig_range(unsigned long start, unsigned long end,
}
/* Make sure the range is really isolated. */
- if (test_pages_isolated(outer_start, end)) {
+ if (test_pages_isolated(outer_start, end, false)) {
pr_warn("alloc_contig_range test_pages_isolated(%lx, %lx) failed\n",
outer_start, end);
ret = -EBUSY;
goto done;
}
- /*
- * Reclaim enough pages to make sure that contiguous allocation
- * will not starve the system.
- */
- __reclaim_pages(zone, GFP_HIGHUSER_MOVABLE, end-start);
/* Grab isolated pages from freelists. */
outer_end = isolate_freepages_range(&cc, outer_start, end);
@@ -5897,8 +5932,15 @@ done:
void free_contig_range(unsigned long pfn, unsigned nr_pages)
{
- for (; nr_pages--; ++pfn)
- __free_page(pfn_to_page(pfn));
+ unsigned int count = 0;
+
+ for (; nr_pages--; pfn++) {
+ struct page *page = pfn_to_page(pfn);
+
+ count += page_count(page) != 1;
+ __free_page(page);
+ }
+ WARN(count != 0, "%d pages are still in use!\n", count);
}
#endif
@@ -5932,7 +5974,6 @@ void __meminit zone_pcp_update(struct zone *zone)
}
#endif
-#ifdef CONFIG_MEMORY_HOTREMOVE
void zone_pcp_reset(struct zone *zone)
{
unsigned long flags;
@@ -5952,6 +5993,7 @@ void zone_pcp_reset(struct zone *zone)
local_irq_restore(flags);
}
+#ifdef CONFIG_MEMORY_HOTREMOVE
/*
* All pages in the range must be isolated before calling this.
*/
@@ -5978,6 +6020,16 @@ __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
continue;
}
page = pfn_to_page(pfn);
+ /*
+ * The HWPoisoned page may be not in buddy system, and
+ * page_count() is not 0.
+ */
+ if (unlikely(!PageBuddy(page) && PageHWPoison(page))) {
+ pfn++;
+ SetPageReserved(page);
+ continue;
+ }
+
BUG_ON(page_count(page));
BUG_ON(!PageBuddy(page));
order = page_order(page);
@@ -5988,8 +6040,6 @@ __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
list_del(&page->lru);
rmv_page_order(page);
zone->free_area[order].nr_free--;
- __mod_zone_page_state(zone, NR_FREE_PAGES,
- - (1UL << order));
for (i = 0; i < (1 << order); i++)
SetPageReserved((page+i));
pfn += (1 << order);
OpenPOWER on IntegriCloud