summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-10-01 22:20:11 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-10-01 22:20:11 -0400
commitbde17b90dd9712cb61a7ab0c1ccd0f7f6aa57957 (patch)
treed4597bfde4f5d4a3a5729e5534c44993c6216352
parent1bca1000fa71a1092947b4a51928abe80a3316d2 (diff)
parent676bd99178cd962ed24ffdad222b7069d330a969 (diff)
downloadop-kernel-dev-bde17b90dd9712cb61a7ab0c1ccd0f7f6aa57957.zip
op-kernel-dev-bde17b90dd9712cb61a7ab0c1ccd0f7f6aa57957.tar.gz
Merge branch 'akpm' (patches from Andrew)
Merge misc fixes from Andrew Morton: "12 fixes" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: dmapool: fix overflow condition in pool_find_page() thermal: avoid division by zero in power allocator memcg: remove pcp_counter_lock kprobes: use _do_fork() in samples to make them work again drivers/input/joystick/Kconfig: zhenhua.c needs BITREVERSE memcg: make mem_cgroup_read_stat() unsigned memcg: fix dirty page migration dax: fix NULL pointer in __dax_pmd_fault() mm: hugetlbfs: skip shared VMAs when unmapping private pages to satisfy a fault mm/slab: fix unexpected index mapping result of kmalloc_size(INDEX_NODE+1) userfaultfd: remove kernel header include from uapi header arch/x86/include/asm/efi.h: fix build failure
-rw-r--r--arch/x86/include/asm/efi.h2
-rw-r--r--drivers/input/joystick/Kconfig1
-rw-r--r--drivers/thermal/power_allocator.c10
-rw-r--r--fs/dax.c13
-rw-r--r--include/linux/memcontrol.h1
-rw-r--r--include/linux/mm.h21
-rw-r--r--include/uapi/linux/userfaultfd.h2
-rw-r--r--mm/dmapool.c2
-rw-r--r--mm/hugetlb.c8
-rw-r--r--mm/memcontrol.c31
-rw-r--r--mm/migrate.c12
-rw-r--r--mm/slab.c13
-rw-r--r--samples/kprobes/jprobe_example.c14
-rw-r--r--samples/kprobes/kprobe_example.c6
-rw-r--r--samples/kprobes/kretprobe_example.c4
15 files changed, 106 insertions, 34 deletions
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index ab5f1d4..ae68be9 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -86,6 +86,7 @@ extern u64 asmlinkage efi_call(void *fp, ...);
extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size,
u32 type, u64 attribute);
+#ifdef CONFIG_KASAN
/*
* CONFIG_KASAN may redefine memset to __memset. __memset function is present
* only in kernel binary. Since the EFI stub linked into a separate binary it
@@ -95,6 +96,7 @@ extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size,
#undef memcpy
#undef memset
#undef memmove
+#endif
#endif /* CONFIG_X86_32 */
diff --git a/drivers/input/joystick/Kconfig b/drivers/input/joystick/Kconfig
index 56eb471..4215b53 100644
--- a/drivers/input/joystick/Kconfig
+++ b/drivers/input/joystick/Kconfig
@@ -196,6 +196,7 @@ config JOYSTICK_TWIDJOY
config JOYSTICK_ZHENHUA
tristate "5-byte Zhenhua RC transmitter"
select SERIO
+ select BITREVERSE
help
Say Y here if you have a Zhen Hua PPM-4CH transmitter which is
supplied with a ready to fly micro electric indoor helicopters
diff --git a/drivers/thermal/power_allocator.c b/drivers/thermal/power_allocator.c
index 7ff9627..e570ff0 100644
--- a/drivers/thermal/power_allocator.c
+++ b/drivers/thermal/power_allocator.c
@@ -144,6 +144,16 @@ static void estimate_pid_constants(struct thermal_zone_device *tz,
switch_on_temp = 0;
temperature_threshold = control_temp - switch_on_temp;
+ /*
+ * estimate_pid_constants() tries to find appropriate default
+ * values for thermal zones that don't provide them. If a
+ * system integrator has configured a thermal zone with two
+ * passive trip points at the same temperature, that person
+ * hasn't put any effort to set up the thermal zone properly
+ * so just give up.
+ */
+ if (!temperature_threshold)
+ return;
if (!tz->tzp->k_po || force)
tz->tzp->k_po = int_to_frac(sustainable_power) /
diff --git a/fs/dax.c b/fs/dax.c
index 7ae6df7..bcfb14b 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -569,8 +569,20 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
if (!buffer_size_valid(&bh) || bh.b_size < PMD_SIZE)
goto fallback;
+ sector = bh.b_blocknr << (blkbits - 9);
+
if (buffer_unwritten(&bh) || buffer_new(&bh)) {
int i;
+
+ length = bdev_direct_access(bh.b_bdev, sector, &kaddr, &pfn,
+ bh.b_size);
+ if (length < 0) {
+ result = VM_FAULT_SIGBUS;
+ goto out;
+ }
+ if ((length < PMD_SIZE) || (pfn & PG_PMD_COLOUR))
+ goto fallback;
+
for (i = 0; i < PTRS_PER_PMD; i++)
clear_pmem(kaddr + i * PAGE_SIZE, PAGE_SIZE);
wmb_pmem();
@@ -623,7 +635,6 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
result = VM_FAULT_NOPAGE;
spin_unlock(ptl);
} else {
- sector = bh.b_blocknr << (blkbits - 9);
length = bdev_direct_access(bh.b_bdev, sector, &kaddr, &pfn,
bh.b_size);
if (length < 0) {
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index ad800e6..6452ff4 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -242,7 +242,6 @@ struct mem_cgroup {
* percpu counter.
*/
struct mem_cgroup_stat_cpu __percpu *stat;
- spinlock_t pcp_counter_lock;
#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET)
struct cg_proto tcp_mem;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 91c08f6..80001de 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -905,6 +905,27 @@ static inline void set_page_links(struct page *page, enum zone_type zone,
#endif
}
+#ifdef CONFIG_MEMCG
+static inline struct mem_cgroup *page_memcg(struct page *page)
+{
+ return page->mem_cgroup;
+}
+
+static inline void set_page_memcg(struct page *page, struct mem_cgroup *memcg)
+{
+ page->mem_cgroup = memcg;
+}
+#else
+static inline struct mem_cgroup *page_memcg(struct page *page)
+{
+ return NULL;
+}
+
+static inline void set_page_memcg(struct page *page, struct mem_cgroup *memcg)
+{
+}
+#endif
+
/*
* Some inline functions in vmstat.h depend on page_zone()
*/
diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h
index df0e09b..9057d7a 100644
--- a/include/uapi/linux/userfaultfd.h
+++ b/include/uapi/linux/userfaultfd.h
@@ -11,8 +11,6 @@
#include <linux/types.h>
-#include <linux/compiler.h>
-
#define UFFD_API ((__u64)0xAA)
/*
* After implementing the respective features it will become:
diff --git a/mm/dmapool.c b/mm/dmapool.c
index 71a8998..312a716 100644
--- a/mm/dmapool.c
+++ b/mm/dmapool.c
@@ -394,7 +394,7 @@ static struct dma_page *pool_find_page(struct dma_pool *pool, dma_addr_t dma)
list_for_each_entry(page, &pool->page_list, page_list) {
if (dma < page->dma)
continue;
- if (dma < (page->dma + pool->allocation))
+ if ((dma - page->dma) < pool->allocation)
return page;
}
return NULL;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 999fb0a..9cc7734 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3202,6 +3202,14 @@ static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
continue;
/*
+ * Shared VMAs have their own reserves and do not affect
+ * MAP_PRIVATE accounting but it is possible that a shared
+ * VMA is using the same page so check and skip such VMAs.
+ */
+ if (iter_vma->vm_flags & VM_MAYSHARE)
+ continue;
+
+ /*
* Unmap the page from other VMAs without their own reserves.
* They get marked to be SIGKILLed if they fault in these
* areas. This is because a future no-page fault on this VMA
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 6ddaeba..1fedbde 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -644,12 +644,14 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
}
/*
+ * Return page count for single (non recursive) @memcg.
+ *
* Implementation Note: reading percpu statistics for memcg.
*
* Both of vmstat[] and percpu_counter has threshold and do periodic
* synchronization to implement "quick" read. There are trade-off between
* reading cost and precision of value. Then, we may have a chance to implement
- * a periodic synchronizion of counter in memcg's counter.
+ * a periodic synchronization of counter in memcg's counter.
*
* But this _read() function is used for user interface now. The user accounts
* memory usage by memory cgroup and he _always_ requires exact value because
@@ -659,17 +661,24 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
*
* If there are kernel internal actions which can make use of some not-exact
* value, and reading all cpu value can be performance bottleneck in some
- * common workload, threashold and synchonization as vmstat[] should be
+ * common workload, threshold and synchronization as vmstat[] should be
* implemented.
*/
-static long mem_cgroup_read_stat(struct mem_cgroup *memcg,
- enum mem_cgroup_stat_index idx)
+static unsigned long
+mem_cgroup_read_stat(struct mem_cgroup *memcg, enum mem_cgroup_stat_index idx)
{
long val = 0;
int cpu;
+ /* Per-cpu values can be negative, use a signed accumulator */
for_each_possible_cpu(cpu)
val += per_cpu(memcg->stat->count[idx], cpu);
+ /*
+ * Summing races with updates, so val may be negative. Avoid exposing
+ * transient negative values.
+ */
+ if (val < 0)
+ val = 0;
return val;
}
@@ -1254,7 +1263,7 @@ void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) {
if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account)
continue;
- pr_cont(" %s:%ldKB", mem_cgroup_stat_names[i],
+ pr_cont(" %s:%luKB", mem_cgroup_stat_names[i],
K(mem_cgroup_read_stat(iter, i)));
}
@@ -2819,14 +2828,11 @@ static unsigned long tree_stat(struct mem_cgroup *memcg,
enum mem_cgroup_stat_index idx)
{
struct mem_cgroup *iter;
- long val = 0;
+ unsigned long val = 0;
- /* Per-cpu values can be negative, use a signed accumulator */
for_each_mem_cgroup_tree(iter, memcg)
val += mem_cgroup_read_stat(iter, idx);
- if (val < 0) /* race ? */
- val = 0;
return val;
}
@@ -3169,7 +3175,7 @@ static int memcg_stat_show(struct seq_file *m, void *v)
for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) {
if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account)
continue;
- seq_printf(m, "%s %ld\n", mem_cgroup_stat_names[i],
+ seq_printf(m, "%s %lu\n", mem_cgroup_stat_names[i],
mem_cgroup_read_stat(memcg, i) * PAGE_SIZE);
}
@@ -3194,13 +3200,13 @@ static int memcg_stat_show(struct seq_file *m, void *v)
(u64)memsw * PAGE_SIZE);
for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) {
- long long val = 0;
+ unsigned long long val = 0;
if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account)
continue;
for_each_mem_cgroup_tree(mi, memcg)
val += mem_cgroup_read_stat(mi, i) * PAGE_SIZE;
- seq_printf(m, "total_%s %lld\n", mem_cgroup_stat_names[i], val);
+ seq_printf(m, "total_%s %llu\n", mem_cgroup_stat_names[i], val);
}
for (i = 0; i < MEM_CGROUP_EVENTS_NSTATS; i++) {
@@ -4179,7 +4185,6 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
if (memcg_wb_domain_init(memcg, GFP_KERNEL))
goto out_free_stat;
- spin_lock_init(&memcg->pcp_counter_lock);
return memcg;
out_free_stat:
diff --git a/mm/migrate.c b/mm/migrate.c
index 7452a00..842ecd7 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -740,6 +740,15 @@ static int move_to_new_page(struct page *newpage, struct page *page,
if (PageSwapBacked(page))
SetPageSwapBacked(newpage);
+ /*
+ * Indirectly called below, migrate_page_copy() copies PG_dirty and thus
+ * needs newpage's memcg set to transfer memcg dirty page accounting.
+ * So perform memcg migration in two steps:
+ * 1. set newpage->mem_cgroup (here)
+ * 2. clear page->mem_cgroup (below)
+ */
+ set_page_memcg(newpage, page_memcg(page));
+
mapping = page_mapping(page);
if (!mapping)
rc = migrate_page(mapping, newpage, page, mode);
@@ -756,9 +765,10 @@ static int move_to_new_page(struct page *newpage, struct page *page,
rc = fallback_migrate_page(mapping, newpage, page, mode);
if (rc != MIGRATEPAGE_SUCCESS) {
+ set_page_memcg(newpage, NULL);
newpage->mapping = NULL;
} else {
- mem_cgroup_migrate(page, newpage, false);
+ set_page_memcg(page, NULL);
if (page_was_mapped)
remove_migration_ptes(page, newpage);
page->mapping = NULL;
diff --git a/mm/slab.c b/mm/slab.c
index c77ebe6..4fcc5dd 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2190,9 +2190,16 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
size += BYTES_PER_WORD;
}
#if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC)
- if (size >= kmalloc_size(INDEX_NODE + 1)
- && cachep->object_size > cache_line_size()
- && ALIGN(size, cachep->align) < PAGE_SIZE) {
+ /*
+ * To activate debug pagealloc, off-slab management is necessary
+ * requirement. In early phase of initialization, small sized slab
+ * doesn't get initialized so it would not be possible. So, we need
+ * to check size >= 256. It guarantees that all necessary small
+ * sized slab is initialized in current slab initialization sequence.
+ */
+ if (!slab_early_init && size >= kmalloc_size(INDEX_NODE) &&
+ size >= 256 && cachep->object_size > cache_line_size() &&
+ ALIGN(size, cachep->align) < PAGE_SIZE) {
cachep->obj_offset += PAGE_SIZE - ALIGN(size, cachep->align);
size = PAGE_SIZE;
}
diff --git a/samples/kprobes/jprobe_example.c b/samples/kprobes/jprobe_example.c
index 9119ac6..c285a3b 100644
--- a/samples/kprobes/jprobe_example.c
+++ b/samples/kprobes/jprobe_example.c
@@ -1,13 +1,13 @@
/*
* Here's a sample kernel module showing the use of jprobes to dump
- * the arguments of do_fork().
+ * the arguments of _do_fork().
*
* For more information on theory of operation of jprobes, see
* Documentation/kprobes.txt
*
* Build and insert the kernel module as done in the kprobe example.
* You will see the trace data in /var/log/messages and on the
- * console whenever do_fork() is invoked to create a new process.
+ * console whenever _do_fork() is invoked to create a new process.
* (Some messages may be suppressed if syslogd is configured to
* eliminate duplicate messages.)
*/
@@ -17,13 +17,13 @@
#include <linux/kprobes.h>
/*
- * Jumper probe for do_fork.
+ * Jumper probe for _do_fork.
* Mirror principle enables access to arguments of the probed routine
* from the probe handler.
*/
-/* Proxy routine having the same arguments as actual do_fork() routine */
-static long jdo_fork(unsigned long clone_flags, unsigned long stack_start,
+/* Proxy routine having the same arguments as actual _do_fork() routine */
+static long j_do_fork(unsigned long clone_flags, unsigned long stack_start,
unsigned long stack_size, int __user *parent_tidptr,
int __user *child_tidptr)
{
@@ -36,9 +36,9 @@ static long jdo_fork(unsigned long clone_flags, unsigned long stack_start,
}
static struct jprobe my_jprobe = {
- .entry = jdo_fork,
+ .entry = j_do_fork,
.kp = {
- .symbol_name = "do_fork",
+ .symbol_name = "_do_fork",
},
};
diff --git a/samples/kprobes/kprobe_example.c b/samples/kprobes/kprobe_example.c
index 366db1a..727eb21 100644
--- a/samples/kprobes/kprobe_example.c
+++ b/samples/kprobes/kprobe_example.c
@@ -1,13 +1,13 @@
/*
* NOTE: This example is works on x86 and powerpc.
* Here's a sample kernel module showing the use of kprobes to dump a
- * stack trace and selected registers when do_fork() is called.
+ * stack trace and selected registers when _do_fork() is called.
*
* For more information on theory of operation of kprobes, see
* Documentation/kprobes.txt
*
* You will see the trace data in /var/log/messages and on the console
- * whenever do_fork() is invoked to create a new process.
+ * whenever _do_fork() is invoked to create a new process.
*/
#include <linux/kernel.h>
@@ -16,7 +16,7 @@
/* For each probe you need to allocate a kprobe structure */
static struct kprobe kp = {
- .symbol_name = "do_fork",
+ .symbol_name = "_do_fork",
};
/* kprobe pre_handler: called just before the probed instruction is executed */
diff --git a/samples/kprobes/kretprobe_example.c b/samples/kprobes/kretprobe_example.c
index 1041b67..ebb1d1a 100644
--- a/samples/kprobes/kretprobe_example.c
+++ b/samples/kprobes/kretprobe_example.c
@@ -7,7 +7,7 @@
*
* usage: insmod kretprobe_example.ko func=<func_name>
*
- * If no func_name is specified, do_fork is instrumented
+ * If no func_name is specified, _do_fork is instrumented
*
* For more information on theory of operation of kretprobes, see
* Documentation/kprobes.txt
@@ -25,7 +25,7 @@
#include <linux/limits.h>
#include <linux/sched.h>
-static char func_name[NAME_MAX] = "do_fork";
+static char func_name[NAME_MAX] = "_do_fork";
module_param_string(func, func_name, NAME_MAX, S_IRUGO);
MODULE_PARM_DESC(func, "Function to kretprobe; this module will report the"
" function's execution time");
OpenPOWER on IntegriCloud