summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/exit.c8
-rw-r--r--kernel/irq/manage.c4
-rw-r--r--kernel/latencytop.c17
-rw-r--r--kernel/module.c12
-rw-r--r--kernel/perf_event.c42
-rw-r--r--kernel/printk.c21
-rw-r--r--kernel/range.c2
-rw-r--r--kernel/relay.c15
-rw-r--r--kernel/sysctl.c25
-rw-r--r--kernel/trace/blktrace.c4
-rw-r--r--kernel/trace/trace.c19
-rw-r--r--kernel/watchdog.c2
12 files changed, 120 insertions, 51 deletions
diff --git a/kernel/exit.c b/kernel/exit.c
index b194feb..21aa7b3 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -96,6 +96,14 @@ static void __exit_signal(struct task_struct *tsk)
sig->tty = NULL;
} else {
/*
+ * This can only happen if the caller is de_thread().
+ * FIXME: this is the temporary hack, we should teach
+ * posix-cpu-timers to handle this case correctly.
+ */
+ if (unlikely(has_group_leader_pid(tsk)))
+ posix_cpu_timers_exit_group(tsk);
+
+ /*
* If there is any task waiting for the group exit
* then notify it:
*/
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 644e8d5..5f92acc5 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -324,6 +324,10 @@ void enable_irq(unsigned int irq)
if (!desc)
return;
+ if (WARN(!desc->irq_data.chip || !desc->irq_data.chip->irq_enable,
+ KERN_ERR "enable_irq before setup/request_irq: irq %u\n", irq))
+ return;
+
chip_bus_lock(desc);
raw_spin_lock_irqsave(&desc->lock, flags);
__enable_irq(desc, irq, false);
diff --git a/kernel/latencytop.c b/kernel/latencytop.c
index 877fb30..17110a4 100644
--- a/kernel/latencytop.c
+++ b/kernel/latencytop.c
@@ -194,14 +194,7 @@ __account_scheduler_latency(struct task_struct *tsk, int usecs, int inter)
account_global_scheduler_latency(tsk, &lat);
- /*
- * short term hack; if we're > 32 we stop; future we recycle:
- */
- tsk->latency_record_count++;
- if (tsk->latency_record_count >= LT_SAVECOUNT)
- goto out_unlock;
-
- for (i = 0; i < LT_SAVECOUNT; i++) {
+ for (i = 0; i < tsk->latency_record_count; i++) {
struct latency_record *mylat;
int same = 1;
@@ -227,8 +220,14 @@ __account_scheduler_latency(struct task_struct *tsk, int usecs, int inter)
}
}
+ /*
+ * short term hack; if we're > 32 we stop; future we recycle:
+ */
+ if (tsk->latency_record_count >= LT_SAVECOUNT)
+ goto out_unlock;
+
/* Allocated a new one: */
- i = tsk->latency_record_count;
+ i = tsk->latency_record_count++;
memcpy(&tsk->latency_record[i], &lat, sizeof(struct latency_record));
out_unlock:
diff --git a/kernel/module.c b/kernel/module.c
index 437a74a..d190664 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2326,6 +2326,18 @@ static void find_module_sections(struct module *mod, struct load_info *info)
kmemleak_scan_area(mod->trace_events, sizeof(*mod->trace_events) *
mod->num_trace_events, GFP_KERNEL);
#endif
+#ifdef CONFIG_TRACING
+ mod->trace_bprintk_fmt_start = section_objs(info, "__trace_printk_fmt",
+ sizeof(*mod->trace_bprintk_fmt_start),
+ &mod->num_trace_bprintk_fmt);
+ /*
+ * This section contains pointers to allocated objects in the trace
+ * code and not scanning it leads to false positives.
+ */
+ kmemleak_scan_area(mod->trace_bprintk_fmt_start,
+ sizeof(*mod->trace_bprintk_fmt_start) *
+ mod->num_trace_bprintk_fmt, GFP_KERNEL);
+#endif
#ifdef CONFIG_FTRACE_MCOUNT_RECORD
/* sechdrs[0].sh_size is always zero */
mod->ftrace_callsites = section_objs(info, "__mcount_loc",
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index ee1e903..40c3aab 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -674,6 +674,8 @@ event_sched_in(struct perf_event *event,
event->tstamp_running += ctx->time - event->tstamp_stopped;
+ event->shadow_ctx_time = ctx->time - ctx->timestamp;
+
if (!is_software_event(event))
cpuctx->active_oncpu++;
ctx->nr_active++;
@@ -3396,7 +3398,8 @@ static u32 perf_event_tid(struct perf_event *event, struct task_struct *p)
}
static void perf_output_read_one(struct perf_output_handle *handle,
- struct perf_event *event)
+ struct perf_event *event,
+ u64 enabled, u64 running)
{
u64 read_format = event->attr.read_format;
u64 values[4];
@@ -3404,11 +3407,11 @@ static void perf_output_read_one(struct perf_output_handle *handle,
values[n++] = perf_event_count(event);
if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
- values[n++] = event->total_time_enabled +
+ values[n++] = enabled +
atomic64_read(&event->child_total_time_enabled);
}
if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
- values[n++] = event->total_time_running +
+ values[n++] = running +
atomic64_read(&event->child_total_time_running);
}
if (read_format & PERF_FORMAT_ID)
@@ -3421,7 +3424,8 @@ static void perf_output_read_one(struct perf_output_handle *handle,
* XXX PERF_FORMAT_GROUP vs inherited events seems difficult.
*/
static void perf_output_read_group(struct perf_output_handle *handle,
- struct perf_event *event)
+ struct perf_event *event,
+ u64 enabled, u64 running)
{
struct perf_event *leader = event->group_leader, *sub;
u64 read_format = event->attr.read_format;
@@ -3431,10 +3435,10 @@ static void perf_output_read_group(struct perf_output_handle *handle,
values[n++] = 1 + leader->nr_siblings;
if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
- values[n++] = leader->total_time_enabled;
+ values[n++] = enabled;
if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
- values[n++] = leader->total_time_running;
+ values[n++] = running;
if (leader != event)
leader->pmu->read(leader);
@@ -3459,13 +3463,35 @@ static void perf_output_read_group(struct perf_output_handle *handle,
}
}
+#define PERF_FORMAT_TOTAL_TIMES (PERF_FORMAT_TOTAL_TIME_ENABLED|\
+ PERF_FORMAT_TOTAL_TIME_RUNNING)
+
static void perf_output_read(struct perf_output_handle *handle,
struct perf_event *event)
{
+ u64 enabled = 0, running = 0, now, ctx_time;
+ u64 read_format = event->attr.read_format;
+
+ /*
+ * compute total_time_enabled, total_time_running
+ * based on snapshot values taken when the event
+ * was last scheduled in.
+ *
+ * we cannot simply called update_context_time()
+ * because of locking issue as we are called in
+ * NMI context
+ */
+ if (read_format & PERF_FORMAT_TOTAL_TIMES) {
+ now = perf_clock();
+ ctx_time = event->shadow_ctx_time + now;
+ enabled = ctx_time - event->tstamp_enabled;
+ running = ctx_time - event->tstamp_running;
+ }
+
if (event->attr.read_format & PERF_FORMAT_GROUP)
- perf_output_read_group(handle, event);
+ perf_output_read_group(handle, event, enabled, running);
else
- perf_output_read_one(handle, event);
+ perf_output_read_one(handle, event, enabled, running);
}
void perf_output_sample(struct perf_output_handle *handle,
diff --git a/kernel/printk.c b/kernel/printk.c
index b2ebaee..9a2264f 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -261,6 +261,12 @@ static inline void boot_delay_msec(void)
}
#endif
+#ifdef CONFIG_SECURITY_DMESG_RESTRICT
+int dmesg_restrict = 1;
+#else
+int dmesg_restrict;
+#endif
+
int do_syslog(int type, char __user *buf, int len, bool from_file)
{
unsigned i, j, limit, count;
@@ -268,7 +274,20 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
char c;
int error = 0;
- error = security_syslog(type, from_file);
+ /*
+ * If this is from /proc/kmsg we only do the capabilities checks
+ * at open time.
+ */
+ if (type == SYSLOG_ACTION_OPEN || !from_file) {
+ if (dmesg_restrict && !capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ if ((type != SYSLOG_ACTION_READ_ALL &&
+ type != SYSLOG_ACTION_SIZE_BUFFER) &&
+ !capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ }
+
+ error = security_syslog(type);
if (error)
return error;
diff --git a/kernel/range.c b/kernel/range.c
index 471b66a..37fa9b9 100644
--- a/kernel/range.c
+++ b/kernel/range.c
@@ -119,7 +119,7 @@ static int cmp_range(const void *x1, const void *x2)
int clean_sort_range(struct range *range, int az)
{
- int i, j, k = az - 1, nr_range = 0;
+ int i, j, k = az - 1, nr_range = az;
for (i = 0; i < k; i++) {
if (range[i].end)
diff --git a/kernel/relay.c b/kernel/relay.c
index c7cf397..859ea5a 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -70,17 +70,10 @@ static const struct vm_operations_struct relay_file_mmap_ops = {
*/
static struct page **relay_alloc_page_array(unsigned int n_pages)
{
- struct page **array;
- size_t pa_size = n_pages * sizeof(struct page *);
-
- if (pa_size > PAGE_SIZE) {
- array = vmalloc(pa_size);
- if (array)
- memset(array, 0, pa_size);
- } else {
- array = kzalloc(pa_size, GFP_KERNEL);
- }
- return array;
+ const size_t pa_size = n_pages * sizeof(struct page *);
+ if (pa_size > PAGE_SIZE)
+ return vzalloc(pa_size);
+ return kzalloc(pa_size, GFP_KERNEL);
}
/*
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index c33a1ed..ce33e2a 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -704,6 +704,15 @@ static struct ctl_table kern_table[] = {
},
#endif
{
+ .procname = "dmesg_restrict",
+ .data = &dmesg_restrict,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
+ {
.procname = "ngroups_max",
.data = &ngroups_max,
.maxlen = sizeof (int),
@@ -737,22 +746,6 @@ static struct ctl_table kern_table[] = {
.extra2 = &one,
},
#endif
-#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) && !defined(CONFIG_LOCKUP_DETECTOR)
- {
- .procname = "unknown_nmi_panic",
- .data = &unknown_nmi_panic,
- .maxlen = sizeof (int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "nmi_watchdog",
- .data = &nmi_watchdog_enabled,
- .maxlen = sizeof (int),
- .mode = 0644,
- .proc_handler = proc_nmi_enabled,
- },
-#endif
#if defined(CONFIG_X86)
{
.procname = "panic_on_unrecovered_nmi",
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index bc251ed..7b8ec02 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -168,7 +168,6 @@ static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,
static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ),
BLK_TC_ACT(BLK_TC_WRITE) };
-#define BLK_TC_HARDBARRIER BLK_TC_BARRIER
#define BLK_TC_RAHEAD BLK_TC_AHEAD
/* The ilog2() calls fall out because they're constant */
@@ -196,7 +195,6 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
return;
what |= ddir_act[rw & WRITE];
- what |= MASK_TC_BIT(rw, HARDBARRIER);
what |= MASK_TC_BIT(rw, SYNC);
what |= MASK_TC_BIT(rw, RAHEAD);
what |= MASK_TC_BIT(rw, META);
@@ -1807,8 +1805,6 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes)
if (rw & REQ_RAHEAD)
rwbs[i++] = 'A';
- if (rw & REQ_HARDBARRIER)
- rwbs[i++] = 'B';
if (rw & REQ_SYNC)
rwbs[i++] = 'S';
if (rw & REQ_META)
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 82d9b81..ee6a7339 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1284,6 +1284,8 @@ void trace_dump_stack(void)
__ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count());
}
+static DEFINE_PER_CPU(int, user_stack_count);
+
void
ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
{
@@ -1302,6 +1304,18 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
if (unlikely(in_nmi()))
return;
+ /*
+ * prevent recursion, since the user stack tracing may
+ * trigger other kernel events.
+ */
+ preempt_disable();
+ if (__this_cpu_read(user_stack_count))
+ goto out;
+
+ __this_cpu_inc(user_stack_count);
+
+
+
event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
sizeof(*entry), flags, pc);
if (!event)
@@ -1319,6 +1333,11 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
save_stack_trace_user(&trace);
if (!filter_check_discard(call, entry, buffer, event))
ring_buffer_unlock_commit(buffer, event);
+
+ __this_cpu_dec(user_stack_count);
+
+ out:
+ preempt_enable();
}
#ifdef UNUSED
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index bafba68..6e3c41a 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -43,7 +43,7 @@ static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
#endif
-static int __initdata no_watchdog;
+static int no_watchdog;
/* boot commands */
OpenPOWER on IntegriCloud