summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup/cgroup.c2
-rw-r--r--kernel/events/core.c2
-rw-r--r--kernel/exit.c1
-rw-r--r--kernel/locking/lockdep.c11
-rw-r--r--kernel/locking/test-ww_mutex.c6
-rw-r--r--kernel/sched/core.c11
-rw-r--r--kernel/sched/cpufreq_schedutil.c19
-rw-r--r--kernel/sched/fair.c2
-rw-r--r--kernel/sched/features.h5
-rw-r--r--kernel/sched/wait.c39
-rw-r--r--kernel/time/jiffies.c2
-rw-r--r--kernel/trace/Kconfig6
-rw-r--r--kernel/trace/Makefile4
-rw-r--r--kernel/trace/ftrace.c23
-rw-r--r--kernel/trace/trace.c10
-rw-r--r--kernel/trace/trace_probe.h4
-rw-r--r--kernel/trace/trace_stack.c2
-rw-r--r--kernel/ucount.c18
18 files changed, 119 insertions, 48 deletions
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 0125589..4885132 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -2669,7 +2669,7 @@ static bool css_visible(struct cgroup_subsys_state *css)
*
* Returns 0 on success, -errno on failure. On failure, csses which have
* been processed already aren't cleaned up. The caller is responsible for
- * cleaning up with cgroup_apply_control_disble().
+ * cleaning up with cgroup_apply_control_disable().
*/
static int cgroup_apply_control_enable(struct cgroup *cgrp)
{
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 6f41548f..a17ed56 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -998,7 +998,7 @@ list_update_cgroup_event(struct perf_event *event,
*/
#define PERF_CPU_HRTIMER (1000 / HZ)
/*
- * function must be called with interrupts disbled
+ * function must be called with interrupts disabled
*/
static enum hrtimer_restart perf_mux_hrtimer_handler(struct hrtimer *hr)
{
diff --git a/kernel/exit.c b/kernel/exit.c
index e126ebf..516acdb 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -554,7 +554,6 @@ static void exit_mm(void)
enter_lazy_tlb(mm, current);
task_unlock(current);
mm_update_next_owner(mm);
- userfaultfd_exit(mm);
mmput(mm);
if (test_thread_flag(TIF_MEMDIE))
exit_oom_victim();
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 12e38c2..a95e5d1 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -3262,10 +3262,17 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
if (depth) {
hlock = curr->held_locks + depth - 1;
if (hlock->class_idx == class_idx && nest_lock) {
- if (hlock->references)
+ if (hlock->references) {
+ /*
+ * Check: unsigned int references:12, overflow.
+ */
+ if (DEBUG_LOCKS_WARN_ON(hlock->references == (1 << 12)-1))
+ return 0;
+
hlock->references++;
- else
+ } else {
hlock->references = 2;
+ }
return 1;
}
diff --git a/kernel/locking/test-ww_mutex.c b/kernel/locking/test-ww_mutex.c
index da6c9a3..6b7abb3 100644
--- a/kernel/locking/test-ww_mutex.c
+++ b/kernel/locking/test-ww_mutex.c
@@ -50,7 +50,7 @@ static void test_mutex_work(struct work_struct *work)
if (mtx->flags & TEST_MTX_TRY) {
while (!ww_mutex_trylock(&mtx->mutex))
- cpu_relax();
+ cond_resched();
} else {
ww_mutex_lock(&mtx->mutex, NULL);
}
@@ -88,7 +88,7 @@ static int __test_mutex(unsigned int flags)
ret = -EINVAL;
break;
}
- cpu_relax();
+ cond_resched();
} while (time_before(jiffies, timeout));
} else {
ret = wait_for_completion_timeout(&mtx.done, TIMEOUT);
@@ -627,7 +627,7 @@ static int __init test_ww_mutex_init(void)
if (ret)
return ret;
- ret = stress(4096, hweight32(STRESS_ALL)*ncpus, 1<<12, STRESS_ALL);
+ ret = stress(4095, hweight32(STRESS_ALL)*ncpus, 1<<12, STRESS_ALL);
if (ret)
return ret;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 9563838..3b31fc0 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3287,10 +3287,15 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
struct task_struct *p;
/*
- * Optimization: we know that if all tasks are in
- * the fair class we can call that function directly:
+ * Optimization: we know that if all tasks are in the fair class we can
+ * call that function directly, but only if the @prev task wasn't of a
+ * higher scheduling class, because otherwise those loose the
+ * opportunity to pull in more work from other CPUs.
*/
- if (likely(rq->nr_running == rq->cfs.h_nr_running)) {
+ if (likely((prev->sched_class == &idle_sched_class ||
+ prev->sched_class == &fair_sched_class) &&
+ rq->nr_running == rq->cfs.h_nr_running)) {
+
p = fair_sched_class.pick_next_task(rq, prev, rf);
if (unlikely(p == RETRY_TASK))
goto again;
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 8f8de3d..cd7cd48 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -36,6 +36,7 @@ struct sugov_policy {
u64 last_freq_update_time;
s64 freq_update_delay_ns;
unsigned int next_freq;
+ unsigned int cached_raw_freq;
/* The next fields are only needed if fast switch cannot be used. */
struct irq_work irq_work;
@@ -52,7 +53,6 @@ struct sugov_cpu {
struct update_util_data update_util;
struct sugov_policy *sg_policy;
- unsigned int cached_raw_freq;
unsigned long iowait_boost;
unsigned long iowait_boost_max;
u64 last_update;
@@ -116,7 +116,7 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time,
/**
* get_next_freq - Compute a new frequency for a given cpufreq policy.
- * @sg_cpu: schedutil cpu object to compute the new frequency for.
+ * @sg_policy: schedutil policy object to compute the new frequency for.
* @util: Current CPU utilization.
* @max: CPU capacity.
*
@@ -136,19 +136,18 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time,
* next_freq (as calculated above) is returned, subject to policy min/max and
* cpufreq driver limitations.
*/
-static unsigned int get_next_freq(struct sugov_cpu *sg_cpu, unsigned long util,
- unsigned long max)
+static unsigned int get_next_freq(struct sugov_policy *sg_policy,
+ unsigned long util, unsigned long max)
{
- struct sugov_policy *sg_policy = sg_cpu->sg_policy;
struct cpufreq_policy *policy = sg_policy->policy;
unsigned int freq = arch_scale_freq_invariant() ?
policy->cpuinfo.max_freq : policy->cur;
freq = (freq + (freq >> 2)) * util / max;
- if (freq == sg_cpu->cached_raw_freq && sg_policy->next_freq != UINT_MAX)
+ if (freq == sg_policy->cached_raw_freq && sg_policy->next_freq != UINT_MAX)
return sg_policy->next_freq;
- sg_cpu->cached_raw_freq = freq;
+ sg_policy->cached_raw_freq = freq;
return cpufreq_driver_resolve_freq(policy, freq);
}
@@ -213,7 +212,7 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
} else {
sugov_get_util(&util, &max);
sugov_iowait_boost(sg_cpu, &util, &max);
- next_f = get_next_freq(sg_cpu, util, max);
+ next_f = get_next_freq(sg_policy, util, max);
}
sugov_update_commit(sg_policy, time, next_f);
}
@@ -267,7 +266,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu,
sugov_iowait_boost(j_sg_cpu, &util, &max);
}
- return get_next_freq(sg_cpu, util, max);
+ return get_next_freq(sg_policy, util, max);
}
static void sugov_update_shared(struct update_util_data *hook, u64 time,
@@ -580,6 +579,7 @@ static int sugov_start(struct cpufreq_policy *policy)
sg_policy->next_freq = UINT_MAX;
sg_policy->work_in_progress = false;
sg_policy->need_freq_update = false;
+ sg_policy->cached_raw_freq = 0;
for_each_cpu(cpu, policy->cpus) {
struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu);
@@ -590,7 +590,6 @@ static int sugov_start(struct cpufreq_policy *policy)
sg_cpu->max = 0;
sg_cpu->flags = SCHED_CPUFREQ_RT;
sg_cpu->last_update = 0;
- sg_cpu->cached_raw_freq = 0;
sg_cpu->iowait_boost = 0;
sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq;
cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 3e88b35..dea1389 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5799,7 +5799,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
* Due to large variance we need a large fuzz factor; hackbench in
* particularly is sensitive here.
*/
- if ((avg_idle / 512) < avg_cost)
+ if (sched_feat(SIS_AVG_CPU) && (avg_idle / 512) < avg_cost)
return -1;
time = local_clock();
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index 69631fa..1b3c818 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -51,6 +51,11 @@ SCHED_FEAT(NONTASK_CAPACITY, true)
*/
SCHED_FEAT(TTWU_QUEUE, true)
+/*
+ * When doing wakeups, attempt to limit superfluous scans of the LLC domain.
+ */
+SCHED_FEAT(SIS_AVG_CPU, false)
+
#ifdef HAVE_RT_PUSH_IPI
/*
* In order to avoid a thundering herd attack of CPUs that are
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 4d2ea6f..b8c84c6 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -242,6 +242,45 @@ long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state)
}
EXPORT_SYMBOL(prepare_to_wait_event);
+/*
+ * Note! These two wait functions are entered with the
+ * wait-queue lock held (and interrupts off in the _irq
+ * case), so there is no race with testing the wakeup
+ * condition in the caller before they add the wait
+ * entry to the wake queue.
+ */
+int do_wait_intr(wait_queue_head_t *wq, wait_queue_t *wait)
+{
+ if (likely(list_empty(&wait->task_list)))
+ __add_wait_queue_tail(wq, wait);
+
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (signal_pending(current))
+ return -ERESTARTSYS;
+
+ spin_unlock(&wq->lock);
+ schedule();
+ spin_lock(&wq->lock);
+ return 0;
+}
+EXPORT_SYMBOL(do_wait_intr);
+
+int do_wait_intr_irq(wait_queue_head_t *wq, wait_queue_t *wait)
+{
+ if (likely(list_empty(&wait->task_list)))
+ __add_wait_queue_tail(wq, wait);
+
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (signal_pending(current))
+ return -ERESTARTSYS;
+
+ spin_unlock_irq(&wq->lock);
+ schedule();
+ spin_lock_irq(&wq->lock);
+ return 0;
+}
+EXPORT_SYMBOL(do_wait_intr_irq);
+
/**
* finish_wait - clean up after waiting in a queue
* @q: waitqueue waited on
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index 7906b3f..4977191 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -125,7 +125,7 @@ int register_refined_jiffies(long cycles_per_second)
shift_hz += cycles_per_tick/2;
do_div(shift_hz, cycles_per_tick);
/* Calculate nsec_per_tick using shift_hz */
- nsec_per_tick = (u64)TICK_NSEC << 8;
+ nsec_per_tick = (u64)NSEC_PER_SEC << 8;
nsec_per_tick += (u32)shift_hz/2;
do_div(nsec_per_tick, (u32)shift_hz);
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index d503800..d4a06e7 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -429,7 +429,7 @@ config BLK_DEV_IO_TRACE
If unsure, say N.
-config KPROBE_EVENT
+config KPROBE_EVENTS
depends on KPROBES
depends on HAVE_REGS_AND_STACK_ACCESS_API
bool "Enable kprobes-based dynamic events"
@@ -447,7 +447,7 @@ config KPROBE_EVENT
This option is also required by perf-probe subcommand of perf tools.
If you want to use perf tools, this option is strongly recommended.
-config UPROBE_EVENT
+config UPROBE_EVENTS
bool "Enable uprobes-based dynamic events"
depends on ARCH_SUPPORTS_UPROBES
depends on MMU
@@ -466,7 +466,7 @@ config UPROBE_EVENT
config BPF_EVENTS
depends on BPF_SYSCALL
- depends on (KPROBE_EVENT || UPROBE_EVENT) && PERF_EVENTS
+ depends on (KPROBE_EVENTS || UPROBE_EVENTS) && PERF_EVENTS
bool
default y
help
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index e579808..90f2701 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -57,7 +57,7 @@ obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
obj-$(CONFIG_EVENT_TRACING) += trace_events_trigger.o
obj-$(CONFIG_HIST_TRIGGERS) += trace_events_hist.o
obj-$(CONFIG_BPF_EVENTS) += bpf_trace.o
-obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
+obj-$(CONFIG_KPROBE_EVENTS) += trace_kprobe.o
obj-$(CONFIG_TRACEPOINTS) += power-traces.o
ifeq ($(CONFIG_PM),y)
obj-$(CONFIG_TRACEPOINTS) += rpm-traces.o
@@ -66,7 +66,7 @@ ifeq ($(CONFIG_TRACING),y)
obj-$(CONFIG_KGDB_KDB) += trace_kdb.o
endif
obj-$(CONFIG_PROBE_EVENTS) += trace_probe.o
-obj-$(CONFIG_UPROBE_EVENT) += trace_uprobe.o
+obj-$(CONFIG_UPROBE_EVENTS) += trace_uprobe.o
obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 0d1597c..b9691ee 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -4416,16 +4416,24 @@ static int __init set_graph_notrace_function(char *str)
}
__setup("ftrace_graph_notrace=", set_graph_notrace_function);
+static int __init set_graph_max_depth_function(char *str)
+{
+ if (!str)
+ return 0;
+ fgraph_max_depth = simple_strtoul(str, NULL, 0);
+ return 1;
+}
+__setup("ftrace_graph_max_depth=", set_graph_max_depth_function);
+
static void __init set_ftrace_early_graph(char *buf, int enable)
{
int ret;
char *func;
struct ftrace_hash *hash;
- if (enable)
- hash = ftrace_graph_hash;
- else
- hash = ftrace_graph_notrace_hash;
+ hash = alloc_ftrace_hash(FTRACE_HASH_DEFAULT_BITS);
+ if (WARN_ON(!hash))
+ return;
while (buf) {
func = strsep(&buf, ",");
@@ -4435,6 +4443,11 @@ static void __init set_ftrace_early_graph(char *buf, int enable)
printk(KERN_DEBUG "ftrace: function %s not "
"traceable\n", func);
}
+
+ if (enable)
+ ftrace_graph_hash = hash;
+ else
+ ftrace_graph_notrace_hash = hash;
}
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
@@ -5488,7 +5501,7 @@ static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip,
* Normally the mcount trampoline will call the ops->func, but there
* are times that it should not. For example, if the ops does not
* have its own recursion protection, then it should call the
- * ftrace_ops_recurs_func() instead.
+ * ftrace_ops_assist_func() instead.
*
* Returns the function that the trampoline should call for @ops.
*/
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 707445c..f351095 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4341,22 +4341,22 @@ static const char readme_msg[] =
"\t\t\t traces\n"
#endif
#endif /* CONFIG_STACK_TRACER */
-#ifdef CONFIG_KPROBE_EVENT
+#ifdef CONFIG_KPROBE_EVENTS
" kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
"\t\t\t Write into this file to define/undefine new trace events.\n"
#endif
-#ifdef CONFIG_UPROBE_EVENT
+#ifdef CONFIG_UPROBE_EVENTS
" uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
"\t\t\t Write into this file to define/undefine new trace events.\n"
#endif
-#if defined(CONFIG_KPROBE_EVENT) || defined(CONFIG_UPROBE_EVENT)
+#if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
"\t accepts: event-definitions (one definition per line)\n"
"\t Format: p|r[:[<group>/]<event>] <place> [<args>]\n"
"\t -:[<group>/]<event>\n"
-#ifdef CONFIG_KPROBE_EVENT
+#ifdef CONFIG_KPROBE_EVENTS
"\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
#endif
-#ifdef CONFIG_UPROBE_EVENT
+#ifdef CONFIG_UPROBE_EVENTS
"\t place: <path>:<offset>\n"
#endif
"\t args: <name>=fetcharg[:type]\n"
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 0c0ae54..903273c 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -248,7 +248,7 @@ ASSIGN_FETCH_FUNC(file_offset, ftype), \
#define FETCH_TYPE_STRING 0
#define FETCH_TYPE_STRSIZE 1
-#ifdef CONFIG_KPROBE_EVENT
+#ifdef CONFIG_KPROBE_EVENTS
struct symbol_cache;
unsigned long update_symbol_cache(struct symbol_cache *sc);
void free_symbol_cache(struct symbol_cache *sc);
@@ -278,7 +278,7 @@ alloc_symbol_cache(const char *sym, long offset)
{
return NULL;
}
-#endif /* CONFIG_KPROBE_EVENT */
+#endif /* CONFIG_KPROBE_EVENTS */
struct probe_arg {
struct fetch_param fetch;
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 1d68b5b..5fb1f2c 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -65,7 +65,7 @@ void stack_trace_print(void)
}
/*
- * When arch-specific code overides this function, the following
+ * When arch-specific code overrides this function, the following
* data should be filled up, assuming stack_trace_max_lock is held to
* prevent concurrent updates.
* stack_trace_index[]
diff --git a/kernel/ucount.c b/kernel/ucount.c
index 62630a4..b4eeee0 100644
--- a/kernel/ucount.c
+++ b/kernel/ucount.c
@@ -144,7 +144,7 @@ static struct ucounts *get_ucounts(struct user_namespace *ns, kuid_t uid)
new->ns = ns;
new->uid = uid;
- atomic_set(&new->count, 0);
+ new->count = 0;
spin_lock_irq(&ucounts_lock);
ucounts = find_ucounts(ns, uid, hashent);
@@ -155,8 +155,10 @@ static struct ucounts *get_ucounts(struct user_namespace *ns, kuid_t uid)
ucounts = new;
}
}
- if (!atomic_add_unless(&ucounts->count, 1, INT_MAX))
+ if (ucounts->count == INT_MAX)
ucounts = NULL;
+ else
+ ucounts->count += 1;
spin_unlock_irq(&ucounts_lock);
return ucounts;
}
@@ -165,13 +167,15 @@ static void put_ucounts(struct ucounts *ucounts)
{
unsigned long flags;
- if (atomic_dec_and_test(&ucounts->count)) {
- spin_lock_irqsave(&ucounts_lock, flags);
+ spin_lock_irqsave(&ucounts_lock, flags);
+ ucounts->count -= 1;
+ if (!ucounts->count)
hlist_del_init(&ucounts->node);
- spin_unlock_irqrestore(&ucounts_lock, flags);
+ else
+ ucounts = NULL;
+ spin_unlock_irqrestore(&ucounts_lock, flags);
- kfree(ucounts);
- }
+ kfree(ucounts);
}
static inline bool atomic_inc_below(atomic_t *v, int u)
OpenPOWER on IntegriCloud