summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/hugetlb.c95
-rw-r--r--mm/memory.c4
-rw-r--r--mm/memory_hotplug.c6
-rw-r--r--mm/mempolicy.c21
-rw-r--r--mm/page-writeback.c19
-rw-r--r--mm/page_alloc.c20
-rw-r--r--mm/page_isolation.c6
-rw-r--r--mm/rmap.c7
-rw-r--r--mm/slab.c2
-rw-r--r--mm/slub.c2
-rw-r--r--mm/util.c4
-rw-r--r--mm/vmstat.c2
12 files changed, 114 insertions, 74 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 8b809ec..6121b57 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -116,7 +116,9 @@ static void update_and_free_page(struct page *page)
static void free_huge_page(struct page *page)
{
int nid = page_to_nid(page);
+ struct address_space *mapping;
+ mapping = (struct address_space *) page_private(page);
BUG_ON(page_count(page));
INIT_LIST_HEAD(&page->lru);
@@ -129,6 +131,9 @@ static void free_huge_page(struct page *page)
enqueue_huge_page(page);
}
spin_unlock(&hugetlb_lock);
+ if (mapping)
+ hugetlb_put_quota(mapping, 1);
+ set_page_private(page, 0);
}
/*
@@ -323,7 +328,7 @@ free:
* allocated to satisfy the reservation must be explicitly freed if they were
* never used.
*/
-void return_unused_surplus_pages(unsigned long unused_resv_pages)
+static void return_unused_surplus_pages(unsigned long unused_resv_pages)
{
static int nid = -1;
struct page *page;
@@ -353,35 +358,50 @@ void return_unused_surplus_pages(unsigned long unused_resv_pages)
}
}
-static struct page *alloc_huge_page(struct vm_area_struct *vma,
- unsigned long addr)
+
+static struct page *alloc_huge_page_shared(struct vm_area_struct *vma,
+ unsigned long addr)
{
- struct page *page = NULL;
- int use_reserved_page = vma->vm_flags & VM_MAYSHARE;
+ struct page *page;
spin_lock(&hugetlb_lock);
- if (!use_reserved_page && (free_huge_pages <= resv_huge_pages))
- goto fail;
-
page = dequeue_huge_page(vma, addr);
- if (!page)
- goto fail;
-
spin_unlock(&hugetlb_lock);
- set_page_refcounted(page);
- return page;
+ return page ? page : ERR_PTR(-VM_FAULT_OOM);
+}
-fail:
- spin_unlock(&hugetlb_lock);
+static struct page *alloc_huge_page_private(struct vm_area_struct *vma,
+ unsigned long addr)
+{
+ struct page *page = NULL;
- /*
- * Private mappings do not use reserved huge pages so the allocation
- * may have failed due to an undersized hugetlb pool. Try to grab a
- * surplus huge page from the buddy allocator.
- */
- if (!use_reserved_page)
+ if (hugetlb_get_quota(vma->vm_file->f_mapping, 1))
+ return ERR_PTR(-VM_FAULT_SIGBUS);
+
+ spin_lock(&hugetlb_lock);
+ if (free_huge_pages > resv_huge_pages)
+ page = dequeue_huge_page(vma, addr);
+ spin_unlock(&hugetlb_lock);
+ if (!page)
page = alloc_buddy_huge_page(vma, addr);
+ return page ? page : ERR_PTR(-VM_FAULT_OOM);
+}
+
+static struct page *alloc_huge_page(struct vm_area_struct *vma,
+ unsigned long addr)
+{
+ struct page *page;
+ struct address_space *mapping = vma->vm_file->f_mapping;
+ if (vma->vm_flags & VM_MAYSHARE)
+ page = alloc_huge_page_shared(vma, addr);
+ else
+ page = alloc_huge_page_private(vma, addr);
+
+ if (!IS_ERR(page)) {
+ set_page_refcounted(page);
+ set_page_private(page, (unsigned long) mapping);
+ }
return page;
}
@@ -726,9 +746,9 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
page_cache_get(old_page);
new_page = alloc_huge_page(vma, address);
- if (!new_page) {
+ if (IS_ERR(new_page)) {
page_cache_release(old_page);
- return VM_FAULT_OOM;
+ return -PTR_ERR(new_page);
}
spin_unlock(&mm->page_table_lock);
@@ -772,27 +792,28 @@ retry:
size = i_size_read(mapping->host) >> HPAGE_SHIFT;
if (idx >= size)
goto out;
- if (hugetlb_get_quota(mapping))
- goto out;
page = alloc_huge_page(vma, address);
- if (!page) {
- hugetlb_put_quota(mapping);
- ret = VM_FAULT_OOM;
+ if (IS_ERR(page)) {
+ ret = -PTR_ERR(page);
goto out;
}
clear_huge_page(page, address);
if (vma->vm_flags & VM_SHARED) {
int err;
+ struct inode *inode = mapping->host;
err = add_to_page_cache(page, mapping, idx, GFP_KERNEL);
if (err) {
put_page(page);
- hugetlb_put_quota(mapping);
if (err == -EEXIST)
goto retry;
goto out;
}
+
+ spin_lock(&inode->i_lock);
+ inode->i_blocks += BLOCKS_PER_HUGEPAGE;
+ spin_unlock(&inode->i_lock);
} else
lock_page(page);
}
@@ -822,7 +843,6 @@ out:
backout:
spin_unlock(&mm->page_table_lock);
- hugetlb_put_quota(mapping);
unlock_page(page);
put_page(page);
goto out;
@@ -868,7 +888,8 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
struct page **pages, struct vm_area_struct **vmas,
- unsigned long *position, int *length, int i)
+ unsigned long *position, int *length, int i,
+ int write)
{
unsigned long pfn_offset;
unsigned long vaddr = *position;
@@ -890,7 +911,7 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
int ret;
spin_unlock(&mm->page_table_lock);
- ret = hugetlb_fault(mm, vma, vaddr, 0);
+ ret = hugetlb_fault(mm, vma, vaddr, write);
spin_lock(&mm->page_table_lock);
if (!(ret & VM_FAULT_ERROR))
continue;
@@ -1132,6 +1153,8 @@ int hugetlb_reserve_pages(struct inode *inode, long from, long to)
if (chg < 0)
return chg;
+ if (hugetlb_get_quota(inode->i_mapping, chg))
+ return -ENOSPC;
ret = hugetlb_acct_memory(chg);
if (ret < 0)
return ret;
@@ -1142,5 +1165,11 @@ int hugetlb_reserve_pages(struct inode *inode, long from, long to)
void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
{
long chg = region_truncate(&inode->i_mapping->private_list, offset);
- hugetlb_acct_memory(freed - chg);
+
+ spin_lock(&inode->i_lock);
+ inode->i_blocks -= BLOCKS_PER_HUGEPAGE * freed;
+ spin_unlock(&inode->i_lock);
+
+ hugetlb_put_quota(inode->i_mapping, (chg - freed));
+ hugetlb_acct_memory(-(chg - freed));
}
diff --git a/mm/memory.c b/mm/memory.c
index 9791e47..4bf0b6d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1036,7 +1036,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
if (is_vm_hugetlb_page(vma)) {
i = follow_hugetlb_page(mm, vma, pages, vmas,
- &start, &len, i);
+ &start, &len, i, write);
continue;
}
@@ -2084,9 +2084,9 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
count_vm_event(PGMAJFAULT);
}
- delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
mark_page_accessed(page);
lock_page(page);
+ delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
/*
* Back out if somebody else already faulted in this pte.
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 3a47871..9512a54 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -39,7 +39,7 @@ static struct resource *register_memory_resource(u64 start, u64 size)
res->name = "System RAM";
res->start = start;
res->end = start + size - 1;
- res->flags = IORESOURCE_MEM;
+ res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
if (request_resource(&iomem_resource, res) < 0) {
printk("System RAM resource %llx - %llx cannot be added\n",
(unsigned long long)res->start, (unsigned long long)res->end);
@@ -574,8 +574,8 @@ repeat:
/* Ok, all of our target is islaoted.
We cannot do rollback at this point. */
offline_isolated_pages(start_pfn, end_pfn);
- /* reset pagetype flags */
- start_isolate_page_range(start_pfn, end_pfn);
+ /* reset pagetype flags and makes migrate type to be MOVABLE */
+ undo_isolate_page_range(start_pfn, end_pfn);
/* removal success */
zone->present_pages -= offlined_pages;
zone->zone_pgdat->node_present_pages -= offlined_pages;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index c1592a9..83c69f8 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -722,12 +722,29 @@ out:
}
+/*
+ * Allocate a new page for page migration based on vma policy.
+ * Start assuming that page is mapped by vma pointed to by @private.
+ * Search forward from there, if not. N.B., this assumes that the
+ * list of pages handed to migrate_pages()--which is how we get here--
+ * is in virtual address order.
+ */
static struct page *new_vma_page(struct page *page, unsigned long private, int **x)
{
struct vm_area_struct *vma = (struct vm_area_struct *)private;
+ unsigned long uninitialized_var(address);
- return alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma,
- page_address_in_vma(page, vma));
+ while (vma) {
+ address = page_address_in_vma(page, vma);
+ if (address != -EFAULT)
+ break;
+ vma = vma->vm_next;
+ }
+
+ /*
+ * if !vma, alloc_page_vma() will use task or system default policy
+ */
+ return alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
}
#else
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 838a5e3..81a91e6 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -355,8 +355,8 @@ get_dirty_limits(long *pbackground, long *pdirty, long *pbdi_dirty,
*/
static void balance_dirty_pages(struct address_space *mapping)
{
- long bdi_nr_reclaimable;
- long bdi_nr_writeback;
+ long nr_reclaimable, bdi_nr_reclaimable;
+ long nr_writeback, bdi_nr_writeback;
long background_thresh;
long dirty_thresh;
long bdi_thresh;
@@ -376,11 +376,26 @@ static void balance_dirty_pages(struct address_space *mapping)
get_dirty_limits(&background_thresh, &dirty_thresh,
&bdi_thresh, bdi);
+
+ nr_reclaimable = global_page_state(NR_FILE_DIRTY) +
+ global_page_state(NR_UNSTABLE_NFS);
+ nr_writeback = global_page_state(NR_WRITEBACK);
+
bdi_nr_reclaimable = bdi_stat(bdi, BDI_RECLAIMABLE);
bdi_nr_writeback = bdi_stat(bdi, BDI_WRITEBACK);
+
if (bdi_nr_reclaimable + bdi_nr_writeback <= bdi_thresh)
break;
+ /*
+ * Throttle it only when the background writeback cannot
+ * catch-up. This avoids (excessively) small writeouts
+ * when the bdi limits are ramping up.
+ */
+ if (nr_reclaimable + nr_writeback <
+ (background_thresh + dirty_thresh) / 2)
+ break;
+
if (!bdi->dirty_exceeded)
bdi->dirty_exceeded = 1;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index da69d83..12376ae 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -749,23 +749,6 @@ int move_freepages_block(struct zone *zone, struct page *page, int migratetype)
return move_freepages(zone, start_page, end_page, migratetype);
}
-/* Return the page with the lowest PFN in the list */
-static struct page *min_page(struct list_head *list)
-{
- unsigned long min_pfn = -1UL;
- struct page *min_page = NULL, *page;;
-
- list_for_each_entry(page, list, lru) {
- unsigned long pfn = page_to_pfn(page);
- if (pfn < min_pfn) {
- min_pfn = pfn;
- min_page = page;
- }
- }
-
- return min_page;
-}
-
/* Remove an element from the buddy allocator from the fallback list */
static struct page *__rmqueue_fallback(struct zone *zone, int order,
int start_migratetype)
@@ -789,11 +772,8 @@ static struct page *__rmqueue_fallback(struct zone *zone, int order,
if (list_empty(&area->free_list[migratetype]))
continue;
- /* Bias kernel allocations towards low pfns */
page = list_entry(area->free_list[migratetype].next,
struct page, lru);
- if (unlikely(start_migratetype != MIGRATE_MOVABLE))
- page = min_page(&area->free_list[migratetype]);
area->nr_free--;
/*
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index 8f92a29..3444b58 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -55,7 +55,7 @@ start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn)
return 0;
undo:
for (pfn = start_pfn;
- pfn <= undo_pfn;
+ pfn < undo_pfn;
pfn += pageblock_nr_pages)
unset_migratetype_isolate(pfn_to_page(pfn));
@@ -76,7 +76,7 @@ undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn)
pfn < end_pfn;
pfn += pageblock_nr_pages) {
page = __first_valid_page(pfn, pageblock_nr_pages);
- if (!page || get_pageblock_flags(page) != MIGRATE_ISOLATE)
+ if (!page || get_pageblock_migratetype(page) != MIGRATE_ISOLATE)
continue;
unset_migratetype_isolate(page);
}
@@ -126,7 +126,7 @@ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn)
*/
for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
page = __first_valid_page(pfn, pageblock_nr_pages);
- if (page && get_pageblock_flags(page) != MIGRATE_ISOLATE)
+ if (page && get_pageblock_migratetype(page) != MIGRATE_ISOLATE)
break;
}
if (pfn < end_pfn)
diff --git a/mm/rmap.c b/mm/rmap.c
index 8990f90..dc3be5f 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -183,7 +183,9 @@ static void page_unlock_anon_vma(struct anon_vma *anon_vma)
}
/*
- * At what user virtual address is page expected in vma?
+ * At what user virtual address is page expected in @vma?
+ * Returns virtual address or -EFAULT if page's index/offset is not
+ * within the range mapped the @vma.
*/
static inline unsigned long
vma_address(struct page *page, struct vm_area_struct *vma)
@@ -193,8 +195,7 @@ vma_address(struct page *page, struct vm_area_struct *vma)
address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
if (unlikely(address < vma->vm_start || address >= vma->vm_end)) {
- /* page should be within any vma from prio_tree_next */
- BUG_ON(!PageAnon(page));
+ /* page should be within @vma mapping range */
return -EFAULT;
}
return address;
diff --git a/mm/slab.c b/mm/slab.c
index cfa6be4..c31cd36 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1043,7 +1043,7 @@ static struct array_cache **alloc_alien_cache(int node, int limit)
}
ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d);
if (!ac_ptr[i]) {
- for (i--; i <= 0; i--)
+ for (i--; i >= 0; i--)
kfree(ac_ptr[i]);
kfree(ac_ptr);
return NULL;
diff --git a/mm/slub.c b/mm/slub.c
index 84f59fd..9acb413 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1080,7 +1080,6 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
struct page *page;
struct kmem_cache_node *n;
void *start;
- void *end;
void *last;
void *p;
@@ -1101,7 +1100,6 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
SetSlabDebug(page);
start = page_address(page);
- end = start + s->objects * s->size;
if (unlikely(s->flags & SLAB_POISON))
memset(start, POISON_INUSE, PAGE_SIZE << s->order);
diff --git a/mm/util.c b/mm/util.c
index 5f64026..8f18683 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -95,8 +95,8 @@ void *krealloc(const void *p, size_t new_size, gfp_t flags)
return (void *)p;
ret = kmalloc_track_caller(new_size, flags);
- if (ret) {
- memcpy(ret, p, min(new_size, ks));
+ if (ret && p) {
+ memcpy(ret, p, ks);
kfree(p);
}
return ret;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 4651bf1..e8d846f 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -803,7 +803,7 @@ static void vmstat_update(struct work_struct *w)
sysctl_stat_interval);
}
-static void __devinit start_cpu_timer(int cpu)
+static void __cpuinit start_cpu_timer(int cpu)
{
struct delayed_work *vmstat_work = &per_cpu(vmstat_work, cpu);
OpenPOWER on IntegriCloud