diff options
-rw-r--r-- | Documentation/vm/split_page_table_lock | 6 | ||||
-rw-r--r-- | MAINTAINERS | 1 | ||||
-rw-r--r-- | arch/x86/mm/pgtable.c | 4 | ||||
-rw-r--r-- | block/partitions/efi.c | 5 | ||||
-rw-r--r-- | drivers/block/null_blk.c | 8 | ||||
-rw-r--r-- | drivers/rtc/rtc-at91rm9200.c | 9 | ||||
-rw-r--r-- | fs/configfs/dir.c | 16 | ||||
-rw-r--r-- | include/linux/hugetlb.h | 10 | ||||
-rw-r--r-- | include/linux/mm_types.h | 6 | ||||
-rw-r--r-- | ipc/shm.c | 37 | ||||
-rw-r--r-- | mm/hugetlb.c | 51 | ||||
-rw-r--r-- | mm/mempolicy.c | 2 | ||||
-rw-r--r-- | mm/migrate.c | 48 | ||||
-rw-r--r-- | mm/swap.c | 143 | ||||
-rw-r--r-- | net/Kconfig | 4 | ||||
-rwxr-xr-x | scripts/checkpatch.pl | 1 |
16 files changed, 227 insertions, 124 deletions
diff --git a/Documentation/vm/split_page_table_lock b/Documentation/vm/split_page_table_lock index 7521d36..6dea4fd 100644 --- a/Documentation/vm/split_page_table_lock +++ b/Documentation/vm/split_page_table_lock @@ -63,9 +63,9 @@ levels. PMD split lock enabling requires pgtable_pmd_page_ctor() call on PMD table allocation and pgtable_pmd_page_dtor() on freeing. -Allocation usually happens in pmd_alloc_one(), freeing in pmd_free(), but -make sure you cover all PMD table allocation / freeing paths: i.e X86_PAE -preallocate few PMDs on pgd_alloc(). +Allocation usually happens in pmd_alloc_one(), freeing in pmd_free() and +pmd_free_tlb(), but make sure you cover all PMD table allocation / freeing +paths: i.e X86_PAE preallocate few PMDs on pgd_alloc(). With everything in place you can set CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK. diff --git a/MAINTAINERS b/MAINTAINERS index 8d98e54..d9c97dc 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4065,6 +4065,7 @@ F: arch/x86/include/uapi/asm/hyperv.h F: arch/x86/kernel/cpu/mshyperv.c F: drivers/hid/hid-hyperv.c F: drivers/hv/ +F: drivers/input/serio/hyperv-keyboard.c F: drivers/net/hyperv/ F: drivers/scsi/storvsc_drv.c F: drivers/video/hyperv_fb.c diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 36aa999..c96314a 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -61,6 +61,7 @@ void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte) #if PAGETABLE_LEVELS > 2 void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) { + struct page *page = virt_to_page(pmd); paravirt_release_pmd(__pa(pmd) >> PAGE_SHIFT); /* * NOTE! For PAE, any changes to the top page-directory-pointer-table @@ -69,7 +70,8 @@ void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) #ifdef CONFIG_X86_PAE tlb->need_flush_all = 1; #endif - tlb_remove_page(tlb, virt_to_page(pmd)); + pgtable_pmd_page_dtor(page); + tlb_remove_page(tlb, page); } #if PAGETABLE_LEVELS > 3 diff --git a/block/partitions/efi.c b/block/partitions/efi.c index a8287b4..dc51f46 100644 --- a/block/partitions/efi.c +++ b/block/partitions/efi.c @@ -96,6 +96,7 @@ * - Code works, detects all the partitions. * ************************************************************/ +#include <linux/kernel.h> #include <linux/crc32.h> #include <linux/ctype.h> #include <linux/math64.h> @@ -715,8 +716,8 @@ int efi_partition(struct parsed_partitions *state) efi_guid_unparse(&ptes[i].unique_partition_guid, info->uuid); /* Naively convert UTF16-LE to 7 bits. */ - label_max = min(sizeof(info->volname) - 1, - sizeof(ptes[i].partition_name)); + label_max = min(ARRAY_SIZE(info->volname) - 1, + ARRAY_SIZE(ptes[i].partition_name)); info->volname[label_max] = 0; while (label_count < label_max) { u8 c = ptes[i].partition_name[label_count] & 0xff; diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index b5d8423..ea192ec 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -223,7 +223,7 @@ static void null_softirq_done_fn(struct request *rq) blk_end_request_all(rq, 0); } -#if defined(CONFIG_SMP) && defined(CONFIG_USE_GENERIC_SMP_HELPERS) +#ifdef CONFIG_SMP static void null_ipi_cmd_end_io(void *data) { @@ -260,7 +260,7 @@ static void null_cmd_end_ipi(struct nullb_cmd *cmd) put_cpu(); } -#endif /* CONFIG_SMP && CONFIG_USE_GENERIC_SMP_HELPERS */ +#endif /* CONFIG_SMP */ static inline void null_handle_cmd(struct nullb_cmd *cmd) { @@ -270,7 +270,7 @@ static inline void null_handle_cmd(struct nullb_cmd *cmd) end_cmd(cmd); break; case NULL_IRQ_SOFTIRQ: -#if defined(CONFIG_SMP) && defined(CONFIG_USE_GENERIC_SMP_HELPERS) +#ifdef CONFIG_SMP null_cmd_end_ipi(cmd); #else end_cmd(cmd); @@ -571,7 +571,7 @@ static int __init null_init(void) { unsigned int i; -#if !defined(CONFIG_SMP) || !defined(CONFIG_USE_GENERIC_SMP_HELPERS) +#if !defined(CONFIG_SMP) if (irqmode == NULL_IRQ_SOFTIRQ) { pr_warn("null_blk: softirq completions not available.\n"); pr_warn("null_blk: using direct completions.\n"); diff --git a/drivers/rtc/rtc-at91rm9200.c b/drivers/rtc/rtc-at91rm9200.c index 8b2cd8a..c0da95e 100644 --- a/drivers/rtc/rtc-at91rm9200.c +++ b/drivers/rtc/rtc-at91rm9200.c @@ -428,6 +428,14 @@ static int __exit at91_rtc_remove(struct platform_device *pdev) return 0; } +static void at91_rtc_shutdown(struct platform_device *pdev) +{ + /* Disable all interrupts */ + at91_rtc_write(AT91_RTC_IDR, AT91_RTC_ACKUPD | AT91_RTC_ALARM | + AT91_RTC_SECEV | AT91_RTC_TIMEV | + AT91_RTC_CALEV); +} + #ifdef CONFIG_PM_SLEEP /* AT91RM9200 RTC Power management control */ @@ -466,6 +474,7 @@ static SIMPLE_DEV_PM_OPS(at91_rtc_pm_ops, at91_rtc_suspend, at91_rtc_resume); static struct platform_driver at91_rtc_driver = { .remove = __exit_p(at91_rtc_remove), + .shutdown = at91_rtc_shutdown, .driver = { .name = "at91_rtc", .owner = THIS_MODULE, diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 4522e07..e081acb 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -56,10 +56,19 @@ static void configfs_d_iput(struct dentry * dentry, struct configfs_dirent *sd = dentry->d_fsdata; if (sd) { - BUG_ON(sd->s_dentry != dentry); /* Coordinate with configfs_readdir */ spin_lock(&configfs_dirent_lock); - sd->s_dentry = NULL; + /* Coordinate with configfs_attach_attr where will increase + * sd->s_count and update sd->s_dentry to new allocated one. + * Only set sd->dentry to null when this dentry is the only + * sd owner. + * If not do so, configfs_d_iput may run just after + * configfs_attach_attr and set sd->s_dentry to null + * even it's still in use. + */ + if (atomic_read(&sd->s_count) <= 2) + sd->s_dentry = NULL; + spin_unlock(&configfs_dirent_lock); configfs_put(sd); } @@ -416,8 +425,11 @@ static int configfs_attach_attr(struct configfs_dirent * sd, struct dentry * den struct configfs_attribute * attr = sd->s_element; int error; + spin_lock(&configfs_dirent_lock); dentry->d_fsdata = configfs_get(sd); sd->s_dentry = dentry; + spin_unlock(&configfs_dirent_lock); + error = configfs_create(dentry, (attr->ca_mode & S_IALLUGO) | S_IFREG, configfs_init_file); if (error) { diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index acd2010..9649ff0 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -31,6 +31,7 @@ struct hugepage_subpool *hugepage_new_subpool(long nr_blocks); void hugepage_put_subpool(struct hugepage_subpool *spool); int PageHuge(struct page *page); +int PageHeadHuge(struct page *page_head); void reset_vma_resv_huge_pages(struct vm_area_struct *vma); int hugetlb_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); @@ -69,7 +70,6 @@ int dequeue_hwpoisoned_huge_page(struct page *page); bool isolate_huge_page(struct page *page, struct list_head *list); void putback_active_hugepage(struct page *page); bool is_hugepage_active(struct page *page); -void copy_huge_page(struct page *dst, struct page *src); #ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud); @@ -104,6 +104,11 @@ static inline int PageHuge(struct page *page) return 0; } +static inline int PageHeadHuge(struct page *page_head) +{ + return 0; +} + static inline void reset_vma_resv_huge_pages(struct vm_area_struct *vma) { } @@ -140,9 +145,6 @@ static inline int dequeue_hwpoisoned_huge_page(struct page *page) #define isolate_huge_page(p, l) false #define putback_active_hugepage(p) do {} while (0) #define is_hugepage_active(x) false -static inline void copy_huge_page(struct page *dst, struct page *src) -{ -} static inline unsigned long hugetlb_change_protection(struct vm_area_struct *vma, unsigned long address, unsigned long end, pgprot_t newprot) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 10f5a72..011eb85 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -65,9 +65,6 @@ struct page { * this page is only used to * free other pages. */ -#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && USE_SPLIT_PMD_PTLOCKS - pgtable_t pmd_huge_pte; /* protected by page->ptl */ -#endif }; union { @@ -135,6 +132,9 @@ struct page { struct list_head list; /* slobs list of pages */ struct slab *slab_page; /* slab fields */ +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && USE_SPLIT_PMD_PTLOCKS + pgtable_t pmd_huge_pte; /* protected by page->ptl */ +#endif }; /* Remainder is not double word aligned */ @@ -208,15 +208,18 @@ static void shm_open(struct vm_area_struct *vma) */ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) { + struct file *shm_file; + + shm_file = shp->shm_file; + shp->shm_file = NULL; ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT; shm_rmid(ns, shp); shm_unlock(shp); - if (!is_file_hugepages(shp->shm_file)) - shmem_lock(shp->shm_file, 0, shp->mlock_user); + if (!is_file_hugepages(shm_file)) + shmem_lock(shm_file, 0, shp->mlock_user); else if (shp->mlock_user) - user_shm_unlock(file_inode(shp->shm_file)->i_size, - shp->mlock_user); - fput (shp->shm_file); + user_shm_unlock(file_inode(shm_file)->i_size, shp->mlock_user); + fput(shm_file); ipc_rcu_putref(shp, shm_rcu_free); } @@ -974,15 +977,25 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) ipc_lock_object(&shp->shm_perm); if (!ns_capable(ns->user_ns, CAP_IPC_LOCK)) { kuid_t euid = current_euid(); - err = -EPERM; if (!uid_eq(euid, shp->shm_perm.uid) && - !uid_eq(euid, shp->shm_perm.cuid)) + !uid_eq(euid, shp->shm_perm.cuid)) { + err = -EPERM; goto out_unlock0; - if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK)) + } + if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK)) { + err = -EPERM; goto out_unlock0; + } } shm_file = shp->shm_file; + + /* check if shm_destroy() is tearing down shp */ + if (shm_file == NULL) { + err = -EIDRM; + goto out_unlock0; + } + if (is_file_hugepages(shm_file)) goto out_unlock0; @@ -1101,6 +1114,14 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr, goto out_unlock; ipc_lock_object(&shp->shm_perm); + + /* check if shm_destroy() is tearing down shp */ + if (shp->shm_file == NULL) { + ipc_unlock_object(&shp->shm_perm); + err = -EIDRM; + goto out_unlock; + } + path = shp->shm_file->f_path; path_get(&path); shp->shm_nattch++; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 7d57af2..dee6cf4 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -476,40 +476,6 @@ static int vma_has_reserves(struct vm_area_struct *vma, long chg) return 0; } -static void copy_gigantic_page(struct page *dst, struct page *src) -{ - int i; - struct hstate *h = page_hstate(src); - struct page *dst_base = dst; - struct page *src_base = src; - - for (i = 0; i < pages_per_huge_page(h); ) { - cond_resched(); - copy_highpage(dst, src); - - i++; - dst = mem_map_next(dst, dst_base, i); - src = mem_map_next(src, src_base, i); - } -} - -void copy_huge_page(struct page *dst, struct page *src) -{ - int i; - struct hstate *h = page_hstate(src); - - if (unlikely(pages_per_huge_page(h) > MAX_ORDER_NR_PAGES)) { - copy_gigantic_page(dst, src); - return; - } - - might_sleep(); - for (i = 0; i < pages_per_huge_page(h); i++) { - cond_resched(); - copy_highpage(dst + i, src + i); - } -} - static void enqueue_huge_page(struct hstate *h, struct page *page) { int nid = page_to_nid(page); @@ -736,6 +702,23 @@ int PageHuge(struct page *page) } EXPORT_SYMBOL_GPL(PageHuge); +/* + * PageHeadHuge() only returns true for hugetlbfs head page, but not for + * normal or transparent huge pages. + */ +int PageHeadHuge(struct page *page_head) +{ + compound_page_dtor *dtor; + + if (!PageHead(page_head)) + return 0; + + dtor = get_compound_page_dtor(page_head); + + return dtor == free_huge_page; +} +EXPORT_SYMBOL_GPL(PageHeadHuge); + pgoff_t __basepage_index(struct page *page) { struct page *page_head = compound_head(page); diff --git a/mm/mempolicy.c b/mm/mempolicy.c index c4403cd..eca4a31 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2950,7 +2950,7 @@ void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol) return; } - p += snprintf(p, maxlen, policy_modes[mode]); + p += snprintf(p, maxlen, "%s", policy_modes[mode]); if (flags & MPOL_MODE_FLAGS) { p += snprintf(p, buffer + maxlen - p, "="); diff --git a/mm/migrate.c b/mm/migrate.c index 316e720..bb94004 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -442,6 +442,54 @@ int migrate_huge_page_move_mapping(struct address_space *mapping, } /* + * Gigantic pages are so large that we do not guarantee that page++ pointer + * arithmetic will work across the entire page. We need something more + * specialized. + */ +static void __copy_gigantic_page(struct page *dst, struct page *src, + int nr_pages) +{ + int i; + struct page *dst_base = dst; + struct page *src_base = src; + + for (i = 0; i < nr_pages; ) { + cond_resched(); + copy_highpage(dst, src); + + i++; + dst = mem_map_next(dst, dst_base, i); + src = mem_map_next(src, src_base, i); + } +} + +static void copy_huge_page(struct page *dst, struct page *src) +{ + int i; + int nr_pages; + + if (PageHuge(src)) { + /* hugetlbfs page */ + struct hstate *h = page_hstate(src); + nr_pages = pages_per_huge_page(h); + + if (unlikely(nr_pages > MAX_ORDER_NR_PAGES)) { + __copy_gigantic_page(dst, src, nr_pages); + return; + } + } else { + /* thp page */ + BUG_ON(!PageTransHuge(src)); + nr_pages = hpage_nr_pages(src); + } + + for (i = 0; i < nr_pages; i++) { + cond_resched(); + copy_highpage(dst + i, src + i); + } +} + +/* * Copy the page to its new location */ void migrate_page_copy(struct page *newpage, struct page *page) @@ -82,19 +82,6 @@ static void __put_compound_page(struct page *page) static void put_compound_page(struct page *page) { - /* - * hugetlbfs pages cannot be split from under us. If this is a - * hugetlbfs page, check refcount on head page and release the page if - * the refcount becomes zero. - */ - if (PageHuge(page)) { - page = compound_head(page); - if (put_page_testzero(page)) - __put_compound_page(page); - - return; - } - if (unlikely(PageTail(page))) { /* __split_huge_page_refcount can run under us */ struct page *page_head = compound_trans_head(page); @@ -111,14 +98,31 @@ static void put_compound_page(struct page *page) * still hot on arches that do not support * this_cpu_cmpxchg_double(). */ - if (PageSlab(page_head)) { - if (PageTail(page)) { + if (PageSlab(page_head) || PageHeadHuge(page_head)) { + if (likely(PageTail(page))) { + /* + * __split_huge_page_refcount + * cannot race here. + */ + VM_BUG_ON(!PageHead(page_head)); + atomic_dec(&page->_mapcount); if (put_page_testzero(page_head)) VM_BUG_ON(1); - - atomic_dec(&page->_mapcount); - goto skip_lock_tail; + if (put_page_testzero(page_head)) + __put_compound_page(page_head); + return; } else + /* + * __split_huge_page_refcount + * run before us, "page" was a + * THP tail. The split + * page_head has been freed + * and reallocated as slab or + * hugetlbfs page of smaller + * order (only possible if + * reallocated as slab on + * x86). + */ goto skip_lock; } /* @@ -132,8 +136,27 @@ static void put_compound_page(struct page *page) /* __split_huge_page_refcount run before us */ compound_unlock_irqrestore(page_head, flags); skip_lock: - if (put_page_testzero(page_head)) - __put_single_page(page_head); + if (put_page_testzero(page_head)) { + /* + * The head page may have been + * freed and reallocated as a + * compound page of smaller + * order and then freed again. + * All we know is that it + * cannot have become: a THP + * page, a compound page of + * higher order, a tail page. + * That is because we still + * hold the refcount of the + * split THP tail and + * page_head was the THP head + * before the split. + */ + if (PageHead(page_head)) + __put_compound_page(page_head); + else + __put_single_page(page_head); + } out_put_single: if (put_page_testzero(page)) __put_single_page(page); @@ -155,7 +178,6 @@ out_put_single: VM_BUG_ON(atomic_read(&page->_count) != 0); compound_unlock_irqrestore(page_head, flags); -skip_lock_tail: if (put_page_testzero(page_head)) { if (PageHead(page_head)) __put_compound_page(page_head); @@ -198,51 +220,52 @@ bool __get_page_tail(struct page *page) * proper PT lock that already serializes against * split_huge_page(). */ + unsigned long flags; bool got = false; - struct page *page_head; - - /* - * If this is a hugetlbfs page it cannot be split under us. Simply - * increment refcount for the head page. - */ - if (PageHuge(page)) { - page_head = compound_head(page); - atomic_inc(&page_head->_count); - got = true; - } else { - unsigned long flags; + struct page *page_head = compound_trans_head(page); - page_head = compound_trans_head(page); - if (likely(page != page_head && - get_page_unless_zero(page_head))) { - - /* Ref to put_compound_page() comment. */ - if (PageSlab(page_head)) { - if (likely(PageTail(page))) { - __get_page_tail_foll(page, false); - return true; - } else { - put_page(page_head); - return false; - } - } - - /* - * page_head wasn't a dangling pointer but it - * may not be a head page anymore by the time - * we obtain the lock. That is ok as long as it - * can't be freed from under us. - */ - flags = compound_lock_irqsave(page_head); - /* here __split_huge_page_refcount won't run anymore */ + if (likely(page != page_head && get_page_unless_zero(page_head))) { + /* Ref to put_compound_page() comment. */ + if (PageSlab(page_head) || PageHeadHuge(page_head)) { if (likely(PageTail(page))) { + /* + * This is a hugetlbfs page or a slab + * page. __split_huge_page_refcount + * cannot race here. + */ + VM_BUG_ON(!PageHead(page_head)); __get_page_tail_foll(page, false); - got = true; - } - compound_unlock_irqrestore(page_head, flags); - if (unlikely(!got)) + return true; + } else { + /* + * __split_huge_page_refcount run + * before us, "page" was a THP + * tail. The split page_head has been + * freed and reallocated as slab or + * hugetlbfs page of smaller order + * (only possible if reallocated as + * slab on x86). + */ put_page(page_head); + return false; + } + } + + /* + * page_head wasn't a dangling pointer but it + * may not be a head page anymore by the time + * we obtain the lock. That is ok as long as it + * can't be freed from under us. + */ + flags = compound_lock_irqsave(page_head); + /* here __split_huge_page_refcount won't run anymore */ + if (likely(PageTail(page))) { + __get_page_tail_foll(page, false); + got = true; } + compound_unlock_irqrestore(page_head, flags); + if (unlikely(!got)) + put_page(page_head); } return got; } diff --git a/net/Kconfig b/net/Kconfig index 0715db6..d334678 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -224,7 +224,7 @@ source "net/hsr/Kconfig" config RPS boolean - depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS + depends on SMP && SYSFS default y config RFS_ACCEL @@ -235,7 +235,7 @@ config RFS_ACCEL config XPS boolean - depends on SMP && USE_GENERIC_SMP_HELPERS + depends on SMP default y config NETPRIO_CGROUP diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 61090e0..9c98100 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -3289,6 +3289,7 @@ sub process { } } if (!defined $suppress_whiletrailers{$linenr} && + defined($stat) && defined($cond) && $line =~ /\b(?:if|while|for)\s*\(/ && $line !~ /^.\s*#/) { my ($s, $c) = ($stat, $cond); |