diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-01-12 20:42:54 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-01-12 20:42:54 -0800 |
commit | 099469502f62fbe0d7e4f0b83a2f22538367f734 (patch) | |
tree | 5229c3818b2e6e09d35026d49314047121130536 /mm/huge_memory.c | |
parent | 7c17d86a8502c2e30c2eea777ed1b830aa3b447b (diff) | |
parent | 35f1526845a9d804206883e19bd257d3dcef758f (diff) | |
download | op-kernel-dev-099469502f62fbe0d7e4f0b83a2f22538367f734.zip op-kernel-dev-099469502f62fbe0d7e4f0b83a2f22538367f734.tar.gz |
Merge branch 'akpm' (aka "Andrew's patch-bomb, take two")
Andrew explains:
- various misc stuff
- Most of the rest of MM: memcg, threaded hugepages, others.
- cpumask
- kexec
- kdump
- some direct-io performance tweaking
- radix-tree optimisations
- new selftests code
A note on this: often people will develop a new userspace-visible
feature and will develop userspace code to exercise/test that
feature. Then they merge the patch and the selftest code dies.
Sometimes we paste it into the changelog. Sometimes the code gets
thrown into Documentation/(!).
This saddens me. So this patch creates a bare-bones framework which
will henceforth allow me to ask people to include their test apps in
the kernel tree so we can keep them alive. Then when people enhance
or fix the feature, I can ask them to update the test app too.
The infrastruture is terribly trivial at present - let's see how it
evolves.
- checkpoint/restart feature work.
A note on this: this is a project by various mad Russians to perform
c/r mainly from userspace, with various oddball helper code added
into the kernel where the need is demonstrated.
So rather than some large central lump of code, what we have is
little bits and pieces popping up in various places which either
expose something new or which permit something which is normally
kernel-private to be modified.
The overall project is an ongoing thing. I've judged that the size
and scope of the thing means that we're more likely to be successful
with it if we integrate the support into mainline piecemeal rather
than allowing it all to develop out-of-tree.
However I'm less confident than the developers that it will all
eventually work! So what I'm asking them to do is to wrap each piece
of new code inside CONFIG_CHECKPOINT_RESTORE. So if it all
eventually comes to tears and the project as a whole fails, it should
be a simple matter to go through and delete all trace of it.
This lot pretty much wraps up the -rc1 merge for me.
* akpm: (96 commits)
unlzo: fix input buffer free
ramoops: update parameters only after successful init
ramoops: fix use of rounddown_pow_of_two()
c/r: prctl: add PR_SET_MM codes to set up mm_struct entries
c/r: procfs: add start_data, end_data, start_brk members to /proc/$pid/stat v4
c/r: introduce CHECKPOINT_RESTORE symbol
selftests: new x86 breakpoints selftest
selftests: new very basic kernel selftests directory
radix_tree: take radix_tree_path off stack
radix_tree: remove radix_tree_indirect_to_ptr()
dio: optimize cache misses in the submission path
vfs: cache request_queue in struct block_device
fs/direct-io.c: calculate fs_count correctly in get_more_blocks()
drivers/parport/parport_pc.c: fix warnings
panic: don't print redundant backtraces on oops
sysctl: add the kernel.ns_last_pid control
kdump: add udev events for memory online/offline
include/linux/crash_dump.h needs elf.h
kdump: fix crash_kexec()/smp_send_stop() race in panic()
kdump: crashk_res init check for /sys/kernel/kexec_crash_size
...
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r-- | mm/huge_memory.c | 93 |
1 files changed, 56 insertions, 37 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 36b3d98..b3ffc21 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -487,41 +487,68 @@ static struct attribute_group khugepaged_attr_group = { .attrs = khugepaged_attr, .name = "khugepaged", }; -#endif /* CONFIG_SYSFS */ -static int __init hugepage_init(void) +static int __init hugepage_init_sysfs(struct kobject **hugepage_kobj) { int err; -#ifdef CONFIG_SYSFS - static struct kobject *hugepage_kobj; -#endif - - err = -EINVAL; - if (!has_transparent_hugepage()) { - transparent_hugepage_flags = 0; - goto out; - } -#ifdef CONFIG_SYSFS - err = -ENOMEM; - hugepage_kobj = kobject_create_and_add("transparent_hugepage", mm_kobj); - if (unlikely(!hugepage_kobj)) { + *hugepage_kobj = kobject_create_and_add("transparent_hugepage", mm_kobj); + if (unlikely(!*hugepage_kobj)) { printk(KERN_ERR "hugepage: failed kobject create\n"); - goto out; + return -ENOMEM; } - err = sysfs_create_group(hugepage_kobj, &hugepage_attr_group); + err = sysfs_create_group(*hugepage_kobj, &hugepage_attr_group); if (err) { printk(KERN_ERR "hugepage: failed register hugeage group\n"); - goto out; + goto delete_obj; } - err = sysfs_create_group(hugepage_kobj, &khugepaged_attr_group); + err = sysfs_create_group(*hugepage_kobj, &khugepaged_attr_group); if (err) { printk(KERN_ERR "hugepage: failed register hugeage group\n"); - goto out; + goto remove_hp_group; } -#endif + + return 0; + +remove_hp_group: + sysfs_remove_group(*hugepage_kobj, &hugepage_attr_group); +delete_obj: + kobject_put(*hugepage_kobj); + return err; +} + +static void __init hugepage_exit_sysfs(struct kobject *hugepage_kobj) +{ + sysfs_remove_group(hugepage_kobj, &khugepaged_attr_group); + sysfs_remove_group(hugepage_kobj, &hugepage_attr_group); + kobject_put(hugepage_kobj); +} +#else +static inline int hugepage_init_sysfs(struct kobject **hugepage_kobj) +{ + return 0; +} + +static inline void hugepage_exit_sysfs(struct kobject *hugepage_kobj) +{ +} +#endif /* CONFIG_SYSFS */ + +static int __init hugepage_init(void) +{ + int err; + struct kobject *hugepage_kobj; + + if (!has_transparent_hugepage()) { + transparent_hugepage_flags = 0; + return -EINVAL; + } + + err = hugepage_init_sysfs(&hugepage_kobj); + if (err) + return err; err = khugepaged_slab_init(); if (err) @@ -545,7 +572,9 @@ static int __init hugepage_init(void) set_recommended_min_free_kbytes(); + return 0; out: + hugepage_exit_sysfs(hugepage_kobj); return err; } module_init(hugepage_init) @@ -997,7 +1026,7 @@ out: } int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, - pmd_t *pmd) + pmd_t *pmd, unsigned long addr) { int ret = 0; @@ -1013,6 +1042,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, pgtable = get_pmd_huge_pte(tlb->mm); page = pmd_page(*pmd); pmd_clear(pmd); + tlb_remove_pmd_tlb_entry(tlb, pmd, addr); page_remove_rmap(page); VM_BUG_ON(page_mapcount(page) < 0); add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR); @@ -1116,7 +1146,6 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, entry = pmd_modify(entry, newprot); set_pmd_at(mm, addr, pmd, entry); spin_unlock(&vma->vm_mm->page_table_lock); - flush_tlb_range(vma, addr, addr + HPAGE_PMD_SIZE); ret = 1; } } else @@ -1199,16 +1228,16 @@ static int __split_huge_page_splitting(struct page *page, static void __split_huge_page_refcount(struct page *page) { int i; - unsigned long head_index = page->index; struct zone *zone = page_zone(page); - int zonestat; int tail_count = 0; /* prevent PageLRU to go away from under us, and freeze lru stats */ spin_lock_irq(&zone->lru_lock); compound_lock(page); + /* complete memcg works before add pages to LRU */ + mem_cgroup_split_huge_fixup(page); - for (i = 1; i < HPAGE_PMD_NR; i++) { + for (i = HPAGE_PMD_NR - 1; i >= 1; i--) { struct page *page_tail = page + i; /* tail_page->_mapcount cannot change */ @@ -1271,14 +1300,13 @@ static void __split_huge_page_refcount(struct page *page) BUG_ON(page_tail->mapping); page_tail->mapping = page->mapping; - page_tail->index = ++head_index; + page_tail->index = page->index + i; BUG_ON(!PageAnon(page_tail)); BUG_ON(!PageUptodate(page_tail)); BUG_ON(!PageDirty(page_tail)); BUG_ON(!PageSwapBacked(page_tail)); - mem_cgroup_split_huge_fixup(page, page_tail); lru_add_page_tail(zone, page, page_tail); } @@ -1288,15 +1316,6 @@ static void __split_huge_page_refcount(struct page *page) __dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES); __mod_zone_page_state(zone, NR_ANON_PAGES, HPAGE_PMD_NR); - /* - * A hugepage counts for HPAGE_PMD_NR pages on the LRU statistics, - * so adjust those appropriately if this page is on the LRU. - */ - if (PageLRU(page)) { - zonestat = NR_LRU_BASE + page_lru(page); - __mod_zone_page_state(zone, zonestat, -(HPAGE_PMD_NR-1)); - } - ClearPageCompound(page); compound_unlock(page); spin_unlock_irq(&zone->lru_lock); |