summaryrefslogtreecommitdiffstats
path: root/mm/huge_memory.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-01-12 20:42:54 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2012-01-12 20:42:54 -0800
commit099469502f62fbe0d7e4f0b83a2f22538367f734 (patch)
tree5229c3818b2e6e09d35026d49314047121130536 /mm/huge_memory.c
parent7c17d86a8502c2e30c2eea777ed1b830aa3b447b (diff)
parent35f1526845a9d804206883e19bd257d3dcef758f (diff)
downloadop-kernel-dev-099469502f62fbe0d7e4f0b83a2f22538367f734.zip
op-kernel-dev-099469502f62fbe0d7e4f0b83a2f22538367f734.tar.gz
Merge branch 'akpm' (aka "Andrew's patch-bomb, take two")
Andrew explains: - various misc stuff - Most of the rest of MM: memcg, threaded hugepages, others. - cpumask - kexec - kdump - some direct-io performance tweaking - radix-tree optimisations - new selftests code A note on this: often people will develop a new userspace-visible feature and will develop userspace code to exercise/test that feature. Then they merge the patch and the selftest code dies. Sometimes we paste it into the changelog. Sometimes the code gets thrown into Documentation/(!). This saddens me. So this patch creates a bare-bones framework which will henceforth allow me to ask people to include their test apps in the kernel tree so we can keep them alive. Then when people enhance or fix the feature, I can ask them to update the test app too. The infrastruture is terribly trivial at present - let's see how it evolves. - checkpoint/restart feature work. A note on this: this is a project by various mad Russians to perform c/r mainly from userspace, with various oddball helper code added into the kernel where the need is demonstrated. So rather than some large central lump of code, what we have is little bits and pieces popping up in various places which either expose something new or which permit something which is normally kernel-private to be modified. The overall project is an ongoing thing. I've judged that the size and scope of the thing means that we're more likely to be successful with it if we integrate the support into mainline piecemeal rather than allowing it all to develop out-of-tree. However I'm less confident than the developers that it will all eventually work! So what I'm asking them to do is to wrap each piece of new code inside CONFIG_CHECKPOINT_RESTORE. So if it all eventually comes to tears and the project as a whole fails, it should be a simple matter to go through and delete all trace of it. This lot pretty much wraps up the -rc1 merge for me. * akpm: (96 commits) unlzo: fix input buffer free ramoops: update parameters only after successful init ramoops: fix use of rounddown_pow_of_two() c/r: prctl: add PR_SET_MM codes to set up mm_struct entries c/r: procfs: add start_data, end_data, start_brk members to /proc/$pid/stat v4 c/r: introduce CHECKPOINT_RESTORE symbol selftests: new x86 breakpoints selftest selftests: new very basic kernel selftests directory radix_tree: take radix_tree_path off stack radix_tree: remove radix_tree_indirect_to_ptr() dio: optimize cache misses in the submission path vfs: cache request_queue in struct block_device fs/direct-io.c: calculate fs_count correctly in get_more_blocks() drivers/parport/parport_pc.c: fix warnings panic: don't print redundant backtraces on oops sysctl: add the kernel.ns_last_pid control kdump: add udev events for memory online/offline include/linux/crash_dump.h needs elf.h kdump: fix crash_kexec()/smp_send_stop() race in panic() kdump: crashk_res init check for /sys/kernel/kexec_crash_size ...
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r--mm/huge_memory.c93
1 files changed, 56 insertions, 37 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 36b3d98..b3ffc21 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -487,41 +487,68 @@ static struct attribute_group khugepaged_attr_group = {
.attrs = khugepaged_attr,
.name = "khugepaged",
};
-#endif /* CONFIG_SYSFS */
-static int __init hugepage_init(void)
+static int __init hugepage_init_sysfs(struct kobject **hugepage_kobj)
{
int err;
-#ifdef CONFIG_SYSFS
- static struct kobject *hugepage_kobj;
-#endif
-
- err = -EINVAL;
- if (!has_transparent_hugepage()) {
- transparent_hugepage_flags = 0;
- goto out;
- }
-#ifdef CONFIG_SYSFS
- err = -ENOMEM;
- hugepage_kobj = kobject_create_and_add("transparent_hugepage", mm_kobj);
- if (unlikely(!hugepage_kobj)) {
+ *hugepage_kobj = kobject_create_and_add("transparent_hugepage", mm_kobj);
+ if (unlikely(!*hugepage_kobj)) {
printk(KERN_ERR "hugepage: failed kobject create\n");
- goto out;
+ return -ENOMEM;
}
- err = sysfs_create_group(hugepage_kobj, &hugepage_attr_group);
+ err = sysfs_create_group(*hugepage_kobj, &hugepage_attr_group);
if (err) {
printk(KERN_ERR "hugepage: failed register hugeage group\n");
- goto out;
+ goto delete_obj;
}
- err = sysfs_create_group(hugepage_kobj, &khugepaged_attr_group);
+ err = sysfs_create_group(*hugepage_kobj, &khugepaged_attr_group);
if (err) {
printk(KERN_ERR "hugepage: failed register hugeage group\n");
- goto out;
+ goto remove_hp_group;
}
-#endif
+
+ return 0;
+
+remove_hp_group:
+ sysfs_remove_group(*hugepage_kobj, &hugepage_attr_group);
+delete_obj:
+ kobject_put(*hugepage_kobj);
+ return err;
+}
+
+static void __init hugepage_exit_sysfs(struct kobject *hugepage_kobj)
+{
+ sysfs_remove_group(hugepage_kobj, &khugepaged_attr_group);
+ sysfs_remove_group(hugepage_kobj, &hugepage_attr_group);
+ kobject_put(hugepage_kobj);
+}
+#else
+static inline int hugepage_init_sysfs(struct kobject **hugepage_kobj)
+{
+ return 0;
+}
+
+static inline void hugepage_exit_sysfs(struct kobject *hugepage_kobj)
+{
+}
+#endif /* CONFIG_SYSFS */
+
+static int __init hugepage_init(void)
+{
+ int err;
+ struct kobject *hugepage_kobj;
+
+ if (!has_transparent_hugepage()) {
+ transparent_hugepage_flags = 0;
+ return -EINVAL;
+ }
+
+ err = hugepage_init_sysfs(&hugepage_kobj);
+ if (err)
+ return err;
err = khugepaged_slab_init();
if (err)
@@ -545,7 +572,9 @@ static int __init hugepage_init(void)
set_recommended_min_free_kbytes();
+ return 0;
out:
+ hugepage_exit_sysfs(hugepage_kobj);
return err;
}
module_init(hugepage_init)
@@ -997,7 +1026,7 @@ out:
}
int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
- pmd_t *pmd)
+ pmd_t *pmd, unsigned long addr)
{
int ret = 0;
@@ -1013,6 +1042,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
pgtable = get_pmd_huge_pte(tlb->mm);
page = pmd_page(*pmd);
pmd_clear(pmd);
+ tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
page_remove_rmap(page);
VM_BUG_ON(page_mapcount(page) < 0);
add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
@@ -1116,7 +1146,6 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
entry = pmd_modify(entry, newprot);
set_pmd_at(mm, addr, pmd, entry);
spin_unlock(&vma->vm_mm->page_table_lock);
- flush_tlb_range(vma, addr, addr + HPAGE_PMD_SIZE);
ret = 1;
}
} else
@@ -1199,16 +1228,16 @@ static int __split_huge_page_splitting(struct page *page,
static void __split_huge_page_refcount(struct page *page)
{
int i;
- unsigned long head_index = page->index;
struct zone *zone = page_zone(page);
- int zonestat;
int tail_count = 0;
/* prevent PageLRU to go away from under us, and freeze lru stats */
spin_lock_irq(&zone->lru_lock);
compound_lock(page);
+ /* complete memcg works before add pages to LRU */
+ mem_cgroup_split_huge_fixup(page);
- for (i = 1; i < HPAGE_PMD_NR; i++) {
+ for (i = HPAGE_PMD_NR - 1; i >= 1; i--) {
struct page *page_tail = page + i;
/* tail_page->_mapcount cannot change */
@@ -1271,14 +1300,13 @@ static void __split_huge_page_refcount(struct page *page)
BUG_ON(page_tail->mapping);
page_tail->mapping = page->mapping;
- page_tail->index = ++head_index;
+ page_tail->index = page->index + i;
BUG_ON(!PageAnon(page_tail));
BUG_ON(!PageUptodate(page_tail));
BUG_ON(!PageDirty(page_tail));
BUG_ON(!PageSwapBacked(page_tail));
- mem_cgroup_split_huge_fixup(page, page_tail);
lru_add_page_tail(zone, page, page_tail);
}
@@ -1288,15 +1316,6 @@ static void __split_huge_page_refcount(struct page *page)
__dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES);
__mod_zone_page_state(zone, NR_ANON_PAGES, HPAGE_PMD_NR);
- /*
- * A hugepage counts for HPAGE_PMD_NR pages on the LRU statistics,
- * so adjust those appropriately if this page is on the LRU.
- */
- if (PageLRU(page)) {
- zonestat = NR_LRU_BASE + page_lru(page);
- __mod_zone_page_state(zone, zonestat, -(HPAGE_PMD_NR-1));
- }
-
ClearPageCompound(page);
compound_unlock(page);
spin_unlock_irq(&zone->lru_lock);
OpenPOWER on IntegriCloud