diff options
author | Kirill A. Shutemov <kirill.shutemov@linux.intel.com> | 2012-12-12 13:51:12 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-12-12 17:38:32 -0800 |
commit | 79da5407eeadc740fbf4b45d6df7d7f8e6adaf2c (patch) | |
tree | a9f1ca92b2711bb84a4707c904fcada24614d60e | |
parent | d8a8e1f0da3d29d7268b3300c96a059d63901b76 (diff) | |
download | op-kernel-dev-79da5407eeadc740fbf4b45d6df7d7f8e6adaf2c.zip op-kernel-dev-79da5407eeadc740fbf4b45d6df7d7f8e6adaf2c.tar.gz |
thp: introduce sysfs knob to disable huge zero page
By default kernel tries to use huge zero page on read page fault. It's
possible to disable huge zero page by writing 0 or enable it back by
writing 1:
echo 0 >/sys/kernel/mm/transparent_hugepage/khugepaged/use_zero_page
echo 1 >/sys/kernel/mm/transparent_hugepage/khugepaged/use_zero_page
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@linux.intel.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | Documentation/vm/transhuge.txt | 7 | ||||
-rw-r--r-- | include/linux/huge_mm.h | 4 | ||||
-rw-r--r-- | mm/huge_memory.c | 21 |
3 files changed, 30 insertions, 2 deletions
diff --git a/Documentation/vm/transhuge.txt b/Documentation/vm/transhuge.txt index 60aeedd..8785fb8 100644 --- a/Documentation/vm/transhuge.txt +++ b/Documentation/vm/transhuge.txt @@ -116,6 +116,13 @@ echo always >/sys/kernel/mm/transparent_hugepage/defrag echo madvise >/sys/kernel/mm/transparent_hugepage/defrag echo never >/sys/kernel/mm/transparent_hugepage/defrag +By default kernel tries to use huge zero page on read page fault. +It's possible to disable huge zero page by writing 0 or enable it +back by writing 1: + +echo 0 >/sys/kernel/mm/transparent_hugepage/khugepaged/use_zero_page +echo 1 >/sys/kernel/mm/transparent_hugepage/khugepaged/use_zero_page + khugepaged will be automatically started when transparent_hugepage/enabled is set to "always" or "madvise, and it'll be automatically shutdown if it's set to "never". diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 3132ea7..092dc53 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -39,6 +39,7 @@ enum transparent_hugepage_flag { TRANSPARENT_HUGEPAGE_DEFRAG_FLAG, TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG, + TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG, #ifdef CONFIG_DEBUG_VM TRANSPARENT_HUGEPAGE_DEBUG_COW_FLAG, #endif @@ -78,6 +79,9 @@ extern bool is_vma_temporary_stack(struct vm_area_struct *vma); (transparent_hugepage_flags & \ (1<<TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG) && \ (__vma)->vm_flags & VM_HUGEPAGE)) +#define transparent_hugepage_use_zero_page() \ + (transparent_hugepage_flags & \ + (1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG)) #ifdef CONFIG_DEBUG_VM #define transparent_hugepage_debug_cow() \ (transparent_hugepage_flags & \ diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 9a5d45d..72835ce 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -39,7 +39,8 @@ unsigned long transparent_hugepage_flags __read_mostly = (1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG)| #endif (1<<TRANSPARENT_HUGEPAGE_DEFRAG_FLAG)| - (1<<TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG); + (1<<TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG)| + (1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG); /* default scan 8*512 pte (or vmas) every 30 second */ static unsigned int khugepaged_pages_to_scan __read_mostly = HPAGE_PMD_NR*8; @@ -357,6 +358,20 @@ static ssize_t defrag_store(struct kobject *kobj, static struct kobj_attribute defrag_attr = __ATTR(defrag, 0644, defrag_show, defrag_store); +static ssize_t use_zero_page_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return single_flag_show(kobj, attr, buf, + TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG); +} +static ssize_t use_zero_page_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + return single_flag_store(kobj, attr, buf, count, + TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG); +} +static struct kobj_attribute use_zero_page_attr = + __ATTR(use_zero_page, 0644, use_zero_page_show, use_zero_page_store); #ifdef CONFIG_DEBUG_VM static ssize_t debug_cow_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) @@ -378,6 +393,7 @@ static struct kobj_attribute debug_cow_attr = static struct attribute *hugepage_attr[] = { &enabled_attr.attr, &defrag_attr.attr, + &use_zero_page_attr.attr, #ifdef CONFIG_DEBUG_VM &debug_cow_attr.attr, #endif @@ -779,7 +795,8 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, return VM_FAULT_OOM; if (unlikely(khugepaged_enter(vma))) return VM_FAULT_OOM; - if (!(flags & FAULT_FLAG_WRITE)) { + if (!(flags & FAULT_FLAG_WRITE) && + transparent_hugepage_use_zero_page()) { pgtable_t pgtable; unsigned long zero_pfn; pgtable = pte_alloc_one(mm, haddr); |