summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorKirill A. Shutemov <kirill.shutemov@linux.intel.com>2016-07-26 15:26:32 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2016-07-26 16:19:19 -0700
commitf3f0e1d2150b2b99da2cbdfaad000089efe9bf30 (patch)
tree505f48a5a319e44b31bd810ed8b673f2bdb1cb88 /include
parent4595ef88d136134a9470c955575640f5c96344ed (diff)
downloadop-kernel-dev-f3f0e1d2150b2b99da2cbdfaad000089efe9bf30.zip
op-kernel-dev-f3f0e1d2150b2b99da2cbdfaad000089efe9bf30.tar.gz
khugepaged: add support of collapse for tmpfs/shmem pages
This patch extends khugepaged to support collapse of tmpfs/shmem pages. We share fair amount of infrastructure with anon-THP collapse. Few design points: - First we are looking for VMA which can be suitable for mapping huge page; - If the VMA maps shmem file, the rest scan/collapse operations operates on page cache, not on page tables as in anon VMA case. - khugepaged_scan_shmem() finds a range which is suitable for huge page. The scan is lockless and shouldn't disturb system too much. - once the candidate for collapse is found, collapse_shmem() attempts to create a huge page: + scan over radix tree, making the range point to new huge page; + new huge page is not-uptodate, locked and freezed (refcount is 0), so nobody can touch them until we say so. + we swap in pages during the scan. khugepaged_scan_shmem() filters out ranges with more than khugepaged_max_ptes_swap swapped out pages. It's HPAGE_PMD_NR/8 by default. + old pages are isolated, unmapped and put to local list in case to be restored back if collapse failed. - if collapse succeed, we retract pte page tables from VMAs where huge pages mapping is possible. The huge page will be mapped as PMD on next minor fault into the range. Link: http://lkml.kernel.org/r/1466021202-61880-35-git-send-email-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'include')
-rw-r--r--include/linux/shmem_fs.h23
-rw-r--r--include/trace/events/huge_memory.h3
2 files changed, 25 insertions, 1 deletions
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 94eaaa2..0890f70 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -54,6 +54,7 @@ extern unsigned long shmem_get_unmapped_area(struct file *, unsigned long addr,
unsigned long len, unsigned long pgoff, unsigned long flags);
extern int shmem_lock(struct file *file, int lock, struct user_struct *user);
extern bool shmem_mapping(struct address_space *mapping);
+extern bool shmem_huge_enabled(struct vm_area_struct *vma);
extern void shmem_unlock_mapping(struct address_space *mapping);
extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
pgoff_t index, gfp_t gfp_mask);
@@ -64,6 +65,19 @@ extern unsigned long shmem_swap_usage(struct vm_area_struct *vma);
extern unsigned long shmem_partial_swap_usage(struct address_space *mapping,
pgoff_t start, pgoff_t end);
+/* Flag allocation requirements to shmem_getpage */
+enum sgp_type {
+ SGP_READ, /* don't exceed i_size, don't allocate page */
+ SGP_CACHE, /* don't exceed i_size, may allocate page */
+ SGP_NOHUGE, /* like SGP_CACHE, but no huge pages */
+ SGP_HUGE, /* like SGP_CACHE, huge pages preferred */
+ SGP_WRITE, /* may exceed i_size, may allocate !Uptodate page */
+ SGP_FALLOC, /* like SGP_WRITE, but make existing page Uptodate */
+};
+
+extern int shmem_getpage(struct inode *inode, pgoff_t index,
+ struct page **pagep, enum sgp_type sgp);
+
static inline struct page *shmem_read_mapping_page(
struct address_space *mapping, pgoff_t index)
{
@@ -71,6 +85,15 @@ static inline struct page *shmem_read_mapping_page(
mapping_gfp_mask(mapping));
}
+static inline bool shmem_file(struct file *file)
+{
+ if (!IS_ENABLED(CONFIG_SHMEM))
+ return false;
+ if (!file || !file->f_mapping)
+ return false;
+ return shmem_mapping(file->f_mapping);
+}
+
extern bool shmem_charge(struct inode *inode, long pages);
extern void shmem_uncharge(struct inode *inode, long pages);
diff --git a/include/trace/events/huge_memory.h b/include/trace/events/huge_memory.h
index bda2118..830d47d 100644
--- a/include/trace/events/huge_memory.h
+++ b/include/trace/events/huge_memory.h
@@ -29,7 +29,8 @@
EM( SCAN_DEL_PAGE_LRU, "could_not_delete_page_from_lru")\
EM( SCAN_ALLOC_HUGE_PAGE_FAIL, "alloc_huge_page_failed") \
EM( SCAN_CGROUP_CHARGE_FAIL, "ccgroup_charge_failed") \
- EMe( SCAN_EXCEED_SWAP_PTE, "exceed_swap_pte")
+ EM( SCAN_EXCEED_SWAP_PTE, "exceed_swap_pte") \
+ EMe(SCAN_TRUNCATED, "truncated") \
#undef EM
#undef EMe
OpenPOWER on IntegriCloud