diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-10-01 22:20:11 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-10-01 22:20:11 -0400 |
commit | bde17b90dd9712cb61a7ab0c1ccd0f7f6aa57957 (patch) | |
tree | d4597bfde4f5d4a3a5729e5534c44993c6216352 | |
parent | 1bca1000fa71a1092947b4a51928abe80a3316d2 (diff) | |
parent | 676bd99178cd962ed24ffdad222b7069d330a969 (diff) | |
download | op-kernel-dev-bde17b90dd9712cb61a7ab0c1ccd0f7f6aa57957.zip op-kernel-dev-bde17b90dd9712cb61a7ab0c1ccd0f7f6aa57957.tar.gz |
Merge branch 'akpm' (patches from Andrew)
Merge misc fixes from Andrew Morton:
"12 fixes"
* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
dmapool: fix overflow condition in pool_find_page()
thermal: avoid division by zero in power allocator
memcg: remove pcp_counter_lock
kprobes: use _do_fork() in samples to make them work again
drivers/input/joystick/Kconfig: zhenhua.c needs BITREVERSE
memcg: make mem_cgroup_read_stat() unsigned
memcg: fix dirty page migration
dax: fix NULL pointer in __dax_pmd_fault()
mm: hugetlbfs: skip shared VMAs when unmapping private pages to satisfy a fault
mm/slab: fix unexpected index mapping result of kmalloc_size(INDEX_NODE+1)
userfaultfd: remove kernel header include from uapi header
arch/x86/include/asm/efi.h: fix build failure
-rw-r--r-- | arch/x86/include/asm/efi.h | 2 | ||||
-rw-r--r-- | drivers/input/joystick/Kconfig | 1 | ||||
-rw-r--r-- | drivers/thermal/power_allocator.c | 10 | ||||
-rw-r--r-- | fs/dax.c | 13 | ||||
-rw-r--r-- | include/linux/memcontrol.h | 1 | ||||
-rw-r--r-- | include/linux/mm.h | 21 | ||||
-rw-r--r-- | include/uapi/linux/userfaultfd.h | 2 | ||||
-rw-r--r-- | mm/dmapool.c | 2 | ||||
-rw-r--r-- | mm/hugetlb.c | 8 | ||||
-rw-r--r-- | mm/memcontrol.c | 31 | ||||
-rw-r--r-- | mm/migrate.c | 12 | ||||
-rw-r--r-- | mm/slab.c | 13 | ||||
-rw-r--r-- | samples/kprobes/jprobe_example.c | 14 | ||||
-rw-r--r-- | samples/kprobes/kprobe_example.c | 6 | ||||
-rw-r--r-- | samples/kprobes/kretprobe_example.c | 4 |
15 files changed, 106 insertions, 34 deletions
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index ab5f1d4..ae68be9 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -86,6 +86,7 @@ extern u64 asmlinkage efi_call(void *fp, ...); extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size, u32 type, u64 attribute); +#ifdef CONFIG_KASAN /* * CONFIG_KASAN may redefine memset to __memset. __memset function is present * only in kernel binary. Since the EFI stub linked into a separate binary it @@ -95,6 +96,7 @@ extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size, #undef memcpy #undef memset #undef memmove +#endif #endif /* CONFIG_X86_32 */ diff --git a/drivers/input/joystick/Kconfig b/drivers/input/joystick/Kconfig index 56eb471..4215b53 100644 --- a/drivers/input/joystick/Kconfig +++ b/drivers/input/joystick/Kconfig @@ -196,6 +196,7 @@ config JOYSTICK_TWIDJOY config JOYSTICK_ZHENHUA tristate "5-byte Zhenhua RC transmitter" select SERIO + select BITREVERSE help Say Y here if you have a Zhen Hua PPM-4CH transmitter which is supplied with a ready to fly micro electric indoor helicopters diff --git a/drivers/thermal/power_allocator.c b/drivers/thermal/power_allocator.c index 7ff9627..e570ff0 100644 --- a/drivers/thermal/power_allocator.c +++ b/drivers/thermal/power_allocator.c @@ -144,6 +144,16 @@ static void estimate_pid_constants(struct thermal_zone_device *tz, switch_on_temp = 0; temperature_threshold = control_temp - switch_on_temp; + /* + * estimate_pid_constants() tries to find appropriate default + * values for thermal zones that don't provide them. If a + * system integrator has configured a thermal zone with two + * passive trip points at the same temperature, that person + * hasn't put any effort to set up the thermal zone properly + * so just give up. + */ + if (!temperature_threshold) + return; if (!tz->tzp->k_po || force) tz->tzp->k_po = int_to_frac(sustainable_power) / @@ -569,8 +569,20 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address, if (!buffer_size_valid(&bh) || bh.b_size < PMD_SIZE) goto fallback; + sector = bh.b_blocknr << (blkbits - 9); + if (buffer_unwritten(&bh) || buffer_new(&bh)) { int i; + + length = bdev_direct_access(bh.b_bdev, sector, &kaddr, &pfn, + bh.b_size); + if (length < 0) { + result = VM_FAULT_SIGBUS; + goto out; + } + if ((length < PMD_SIZE) || (pfn & PG_PMD_COLOUR)) + goto fallback; + for (i = 0; i < PTRS_PER_PMD; i++) clear_pmem(kaddr + i * PAGE_SIZE, PAGE_SIZE); wmb_pmem(); @@ -623,7 +635,6 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address, result = VM_FAULT_NOPAGE; spin_unlock(ptl); } else { - sector = bh.b_blocknr << (blkbits - 9); length = bdev_direct_access(bh.b_bdev, sector, &kaddr, &pfn, bh.b_size); if (length < 0) { diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index ad800e6..6452ff4 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -242,7 +242,6 @@ struct mem_cgroup { * percpu counter. */ struct mem_cgroup_stat_cpu __percpu *stat; - spinlock_t pcp_counter_lock; #if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET) struct cg_proto tcp_mem; diff --git a/include/linux/mm.h b/include/linux/mm.h index 91c08f6..80001de 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -905,6 +905,27 @@ static inline void set_page_links(struct page *page, enum zone_type zone, #endif } +#ifdef CONFIG_MEMCG +static inline struct mem_cgroup *page_memcg(struct page *page) +{ + return page->mem_cgroup; +} + +static inline void set_page_memcg(struct page *page, struct mem_cgroup *memcg) +{ + page->mem_cgroup = memcg; +} +#else +static inline struct mem_cgroup *page_memcg(struct page *page) +{ + return NULL; +} + +static inline void set_page_memcg(struct page *page, struct mem_cgroup *memcg) +{ +} +#endif + /* * Some inline functions in vmstat.h depend on page_zone() */ diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h index df0e09b..9057d7a 100644 --- a/include/uapi/linux/userfaultfd.h +++ b/include/uapi/linux/userfaultfd.h @@ -11,8 +11,6 @@ #include <linux/types.h> -#include <linux/compiler.h> - #define UFFD_API ((__u64)0xAA) /* * After implementing the respective features it will become: diff --git a/mm/dmapool.c b/mm/dmapool.c index 71a8998..312a716 100644 --- a/mm/dmapool.c +++ b/mm/dmapool.c @@ -394,7 +394,7 @@ static struct dma_page *pool_find_page(struct dma_pool *pool, dma_addr_t dma) list_for_each_entry(page, &pool->page_list, page_list) { if (dma < page->dma) continue; - if (dma < (page->dma + pool->allocation)) + if ((dma - page->dma) < pool->allocation) return page; } return NULL; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 999fb0a..9cc7734 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3202,6 +3202,14 @@ static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, continue; /* + * Shared VMAs have their own reserves and do not affect + * MAP_PRIVATE accounting but it is possible that a shared + * VMA is using the same page so check and skip such VMAs. + */ + if (iter_vma->vm_flags & VM_MAYSHARE) + continue; + + /* * Unmap the page from other VMAs without their own reserves. * They get marked to be SIGKILLed if they fault in these * areas. This is because a future no-page fault on this VMA diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 6ddaeba..1fedbde 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -644,12 +644,14 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz) } /* + * Return page count for single (non recursive) @memcg. + * * Implementation Note: reading percpu statistics for memcg. * * Both of vmstat[] and percpu_counter has threshold and do periodic * synchronization to implement "quick" read. There are trade-off between * reading cost and precision of value. Then, we may have a chance to implement - * a periodic synchronizion of counter in memcg's counter. + * a periodic synchronization of counter in memcg's counter. * * But this _read() function is used for user interface now. The user accounts * memory usage by memory cgroup and he _always_ requires exact value because @@ -659,17 +661,24 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz) * * If there are kernel internal actions which can make use of some not-exact * value, and reading all cpu value can be performance bottleneck in some - * common workload, threashold and synchonization as vmstat[] should be + * common workload, threshold and synchronization as vmstat[] should be * implemented. */ -static long mem_cgroup_read_stat(struct mem_cgroup *memcg, - enum mem_cgroup_stat_index idx) +static unsigned long +mem_cgroup_read_stat(struct mem_cgroup *memcg, enum mem_cgroup_stat_index idx) { long val = 0; int cpu; + /* Per-cpu values can be negative, use a signed accumulator */ for_each_possible_cpu(cpu) val += per_cpu(memcg->stat->count[idx], cpu); + /* + * Summing races with updates, so val may be negative. Avoid exposing + * transient negative values. + */ + if (val < 0) + val = 0; return val; } @@ -1254,7 +1263,7 @@ void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) { if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account) continue; - pr_cont(" %s:%ldKB", mem_cgroup_stat_names[i], + pr_cont(" %s:%luKB", mem_cgroup_stat_names[i], K(mem_cgroup_read_stat(iter, i))); } @@ -2819,14 +2828,11 @@ static unsigned long tree_stat(struct mem_cgroup *memcg, enum mem_cgroup_stat_index idx) { struct mem_cgroup *iter; - long val = 0; + unsigned long val = 0; - /* Per-cpu values can be negative, use a signed accumulator */ for_each_mem_cgroup_tree(iter, memcg) val += mem_cgroup_read_stat(iter, idx); - if (val < 0) /* race ? */ - val = 0; return val; } @@ -3169,7 +3175,7 @@ static int memcg_stat_show(struct seq_file *m, void *v) for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) { if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account) continue; - seq_printf(m, "%s %ld\n", mem_cgroup_stat_names[i], + seq_printf(m, "%s %lu\n", mem_cgroup_stat_names[i], mem_cgroup_read_stat(memcg, i) * PAGE_SIZE); } @@ -3194,13 +3200,13 @@ static int memcg_stat_show(struct seq_file *m, void *v) (u64)memsw * PAGE_SIZE); for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) { - long long val = 0; + unsigned long long val = 0; if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account) continue; for_each_mem_cgroup_tree(mi, memcg) val += mem_cgroup_read_stat(mi, i) * PAGE_SIZE; - seq_printf(m, "total_%s %lld\n", mem_cgroup_stat_names[i], val); + seq_printf(m, "total_%s %llu\n", mem_cgroup_stat_names[i], val); } for (i = 0; i < MEM_CGROUP_EVENTS_NSTATS; i++) { @@ -4179,7 +4185,6 @@ static struct mem_cgroup *mem_cgroup_alloc(void) if (memcg_wb_domain_init(memcg, GFP_KERNEL)) goto out_free_stat; - spin_lock_init(&memcg->pcp_counter_lock); return memcg; out_free_stat: diff --git a/mm/migrate.c b/mm/migrate.c index 7452a00..842ecd7 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -740,6 +740,15 @@ static int move_to_new_page(struct page *newpage, struct page *page, if (PageSwapBacked(page)) SetPageSwapBacked(newpage); + /* + * Indirectly called below, migrate_page_copy() copies PG_dirty and thus + * needs newpage's memcg set to transfer memcg dirty page accounting. + * So perform memcg migration in two steps: + * 1. set newpage->mem_cgroup (here) + * 2. clear page->mem_cgroup (below) + */ + set_page_memcg(newpage, page_memcg(page)); + mapping = page_mapping(page); if (!mapping) rc = migrate_page(mapping, newpage, page, mode); @@ -756,9 +765,10 @@ static int move_to_new_page(struct page *newpage, struct page *page, rc = fallback_migrate_page(mapping, newpage, page, mode); if (rc != MIGRATEPAGE_SUCCESS) { + set_page_memcg(newpage, NULL); newpage->mapping = NULL; } else { - mem_cgroup_migrate(page, newpage, false); + set_page_memcg(page, NULL); if (page_was_mapped) remove_migration_ptes(page, newpage); page->mapping = NULL; @@ -2190,9 +2190,16 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) size += BYTES_PER_WORD; } #if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC) - if (size >= kmalloc_size(INDEX_NODE + 1) - && cachep->object_size > cache_line_size() - && ALIGN(size, cachep->align) < PAGE_SIZE) { + /* + * To activate debug pagealloc, off-slab management is necessary + * requirement. In early phase of initialization, small sized slab + * doesn't get initialized so it would not be possible. So, we need + * to check size >= 256. It guarantees that all necessary small + * sized slab is initialized in current slab initialization sequence. + */ + if (!slab_early_init && size >= kmalloc_size(INDEX_NODE) && + size >= 256 && cachep->object_size > cache_line_size() && + ALIGN(size, cachep->align) < PAGE_SIZE) { cachep->obj_offset += PAGE_SIZE - ALIGN(size, cachep->align); size = PAGE_SIZE; } diff --git a/samples/kprobes/jprobe_example.c b/samples/kprobes/jprobe_example.c index 9119ac6..c285a3b 100644 --- a/samples/kprobes/jprobe_example.c +++ b/samples/kprobes/jprobe_example.c @@ -1,13 +1,13 @@ /* * Here's a sample kernel module showing the use of jprobes to dump - * the arguments of do_fork(). + * the arguments of _do_fork(). * * For more information on theory of operation of jprobes, see * Documentation/kprobes.txt * * Build and insert the kernel module as done in the kprobe example. * You will see the trace data in /var/log/messages and on the - * console whenever do_fork() is invoked to create a new process. + * console whenever _do_fork() is invoked to create a new process. * (Some messages may be suppressed if syslogd is configured to * eliminate duplicate messages.) */ @@ -17,13 +17,13 @@ #include <linux/kprobes.h> /* - * Jumper probe for do_fork. + * Jumper probe for _do_fork. * Mirror principle enables access to arguments of the probed routine * from the probe handler. */ -/* Proxy routine having the same arguments as actual do_fork() routine */ -static long jdo_fork(unsigned long clone_flags, unsigned long stack_start, +/* Proxy routine having the same arguments as actual _do_fork() routine */ +static long j_do_fork(unsigned long clone_flags, unsigned long stack_start, unsigned long stack_size, int __user *parent_tidptr, int __user *child_tidptr) { @@ -36,9 +36,9 @@ static long jdo_fork(unsigned long clone_flags, unsigned long stack_start, } static struct jprobe my_jprobe = { - .entry = jdo_fork, + .entry = j_do_fork, .kp = { - .symbol_name = "do_fork", + .symbol_name = "_do_fork", }, }; diff --git a/samples/kprobes/kprobe_example.c b/samples/kprobes/kprobe_example.c index 366db1a..727eb21 100644 --- a/samples/kprobes/kprobe_example.c +++ b/samples/kprobes/kprobe_example.c @@ -1,13 +1,13 @@ /* * NOTE: This example is works on x86 and powerpc. * Here's a sample kernel module showing the use of kprobes to dump a - * stack trace and selected registers when do_fork() is called. + * stack trace and selected registers when _do_fork() is called. * * For more information on theory of operation of kprobes, see * Documentation/kprobes.txt * * You will see the trace data in /var/log/messages and on the console - * whenever do_fork() is invoked to create a new process. + * whenever _do_fork() is invoked to create a new process. */ #include <linux/kernel.h> @@ -16,7 +16,7 @@ /* For each probe you need to allocate a kprobe structure */ static struct kprobe kp = { - .symbol_name = "do_fork", + .symbol_name = "_do_fork", }; /* kprobe pre_handler: called just before the probed instruction is executed */ diff --git a/samples/kprobes/kretprobe_example.c b/samples/kprobes/kretprobe_example.c index 1041b67..ebb1d1a 100644 --- a/samples/kprobes/kretprobe_example.c +++ b/samples/kprobes/kretprobe_example.c @@ -7,7 +7,7 @@ * * usage: insmod kretprobe_example.ko func=<func_name> * - * If no func_name is specified, do_fork is instrumented + * If no func_name is specified, _do_fork is instrumented * * For more information on theory of operation of kretprobes, see * Documentation/kprobes.txt @@ -25,7 +25,7 @@ #include <linux/limits.h> #include <linux/sched.h> -static char func_name[NAME_MAX] = "do_fork"; +static char func_name[NAME_MAX] = "_do_fork"; module_param_string(func, func_name, NAME_MAX, S_IRUGO); MODULE_PARM_DESC(func, "Function to kretprobe; this module will report the" " function's execution time"); |