summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrea Arcangeli <aarcange@redhat.com>2011-01-13 15:47:17 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2011-01-13 17:32:47 -0800
commita664b2d8555c659127bf8fe049a58449d394a707 (patch)
tree14771f4ab93a9dda98174f21e0361a77e2aebfa6
parent1ddd6db43a08cba56c7ee920800980862086f1c3 (diff)
downloadop-kernel-dev-a664b2d8555c659127bf8fe049a58449d394a707.zip
op-kernel-dev-a664b2d8555c659127bf8fe049a58449d394a707.tar.gz
thp: madvise(MADV_NOHUGEPAGE)
Add madvise MADV_NOHUGEPAGE to mark regions that are not important to be hugepage backed. Return -EINVAL if the vma is not of an anonymous type, or the feature isn't built into the kernel. Never silently return success. Signed-off-by: Andrea Arcangeli <aarcange@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/huge_mm.h14
-rw-r--r--include/linux/khugepaged.h7
-rw-r--r--include/linux/mm.h1
-rw-r--r--mm/huge_memory.c41
-rw-r--r--mm/madvise.c4
5 files changed, 46 insertions, 21 deletions
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 9b48c24d..a8b7e42 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -52,10 +52,12 @@ extern pmd_t *page_check_address_pmd(struct page *page,
#define HPAGE_PMD_SIZE HPAGE_SIZE
#define transparent_hugepage_enabled(__vma) \
- (transparent_hugepage_flags & (1<<TRANSPARENT_HUGEPAGE_FLAG) || \
- (transparent_hugepage_flags & \
- (1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG) && \
- (__vma)->vm_flags & VM_HUGEPAGE))
+ ((transparent_hugepage_flags & \
+ (1<<TRANSPARENT_HUGEPAGE_FLAG) || \
+ (transparent_hugepage_flags & \
+ (1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG) && \
+ ((__vma)->vm_flags & VM_HUGEPAGE))) && \
+ !((__vma)->vm_flags & VM_NOHUGEPAGE))
#define transparent_hugepage_defrag(__vma) \
((transparent_hugepage_flags & \
(1<<TRANSPARENT_HUGEPAGE_DEFRAG_FLAG)) || \
@@ -103,7 +105,7 @@ extern void __split_huge_page_pmd(struct mm_struct *mm, pmd_t *pmd);
#if HPAGE_PMD_ORDER > MAX_ORDER
#error "hugepages can't be allocated by the buddy allocator"
#endif
-extern int hugepage_madvise(unsigned long *vm_flags);
+extern int hugepage_madvise(unsigned long *vm_flags, int advice);
extern void __vma_adjust_trans_huge(struct vm_area_struct *vma,
unsigned long start,
unsigned long end,
@@ -141,7 +143,7 @@ static inline int split_huge_page(struct page *page)
do { } while (0)
#define wait_split_huge_page(__anon_vma, __pmd) \
do { } while (0)
-static inline int hugepage_madvise(unsigned long *vm_flags)
+static inline int hugepage_madvise(unsigned long *vm_flags, int advice)
{
BUG();
return 0;
diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h
index 552f318..6b394f0 100644
--- a/include/linux/khugepaged.h
+++ b/include/linux/khugepaged.h
@@ -38,9 +38,10 @@ static inline void khugepaged_exit(struct mm_struct *mm)
static inline int khugepaged_enter(struct vm_area_struct *vma)
{
if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags))
- if (khugepaged_always() ||
- (khugepaged_req_madv() &&
- vma->vm_flags & VM_HUGEPAGE))
+ if ((khugepaged_always() ||
+ (khugepaged_req_madv() &&
+ vma->vm_flags & VM_HUGEPAGE)) &&
+ !(vma->vm_flags & VM_NOHUGEPAGE))
if (__khugepaged_enter(vma->vm_mm))
return -ENOMEM;
return 0;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index ce97a2b..956a355 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -83,6 +83,7 @@ extern unsigned int kobjsize(const void *objp);
#define VM_GROWSUP 0x00000200
#else
#define VM_GROWSUP 0x00000000
+#define VM_NOHUGEPAGE 0x00000200 /* MADV_NOHUGEPAGE marked this vma */
#endif
#define VM_PFNMAP 0x00000400 /* Page-ranges managed without "struct page", just pure PFN */
#define VM_DENYWRITE 0x00000800 /* ETXTBSY on write attempts.. */
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index f4f6041..fce667c0 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -16,6 +16,7 @@
#include <linux/kthread.h>
#include <linux/khugepaged.h>
#include <linux/freezer.h>
+#include <linux/mman.h>
#include <asm/tlb.h>
#include <asm/pgalloc.h>
#include "internal.h"
@@ -1388,18 +1389,36 @@ out:
return ret;
}
-int hugepage_madvise(unsigned long *vm_flags)
+int hugepage_madvise(unsigned long *vm_flags, int advice)
{
- /*
- * Be somewhat over-protective like KSM for now!
- */
- if (*vm_flags & (VM_HUGEPAGE | VM_SHARED | VM_MAYSHARE |
- VM_PFNMAP | VM_IO | VM_DONTEXPAND |
- VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE |
- VM_MIXEDMAP | VM_SAO))
- return -EINVAL;
-
- *vm_flags |= VM_HUGEPAGE;
+ switch (advice) {
+ case MADV_HUGEPAGE:
+ /*
+ * Be somewhat over-protective like KSM for now!
+ */
+ if (*vm_flags & (VM_HUGEPAGE |
+ VM_SHARED | VM_MAYSHARE |
+ VM_PFNMAP | VM_IO | VM_DONTEXPAND |
+ VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE |
+ VM_MIXEDMAP | VM_SAO))
+ return -EINVAL;
+ *vm_flags &= ~VM_NOHUGEPAGE;
+ *vm_flags |= VM_HUGEPAGE;
+ break;
+ case MADV_NOHUGEPAGE:
+ /*
+ * Be somewhat over-protective like KSM for now!
+ */
+ if (*vm_flags & (VM_NOHUGEPAGE |
+ VM_SHARED | VM_MAYSHARE |
+ VM_PFNMAP | VM_IO | VM_DONTEXPAND |
+ VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE |
+ VM_MIXEDMAP | VM_SAO))
+ return -EINVAL;
+ *vm_flags &= ~VM_HUGEPAGE;
+ *vm_flags |= VM_NOHUGEPAGE;
+ break;
+ }
return 0;
}
diff --git a/mm/madvise.c b/mm/madvise.c
index ecde40a..bbac126 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -72,7 +72,8 @@ static long madvise_behavior(struct vm_area_struct * vma,
goto out;
break;
case MADV_HUGEPAGE:
- error = hugepage_madvise(&new_flags);
+ case MADV_NOHUGEPAGE:
+ error = hugepage_madvise(&new_flags, behavior);
if (error)
goto out;
break;
@@ -290,6 +291,7 @@ madvise_behavior_valid(int behavior)
#endif
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
case MADV_HUGEPAGE:
+ case MADV_NOHUGEPAGE:
#endif
return 1;
OpenPOWER on IntegriCloud