summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/RCU/checklist.txt17
-rw-r--r--Documentation/RCU/whatisRCU.txt4
-rw-r--r--arch/um/drivers/mconsole_kern.c2
-rw-r--r--arch/x86/kernel/ptrace.c7
-rw-r--r--include/linux/rculist.h17
-rw-r--r--include/linux/rcupdate.h27
-rw-r--r--include/linux/sched.h10
-rw-r--r--include/linux/srcu.h34
-rw-r--r--init/Kconfig19
-rw-r--r--kernel/ksysfs.c18
-rw-r--r--kernel/rcu.h2
-rw-r--r--kernel/rcupdate.c3
-rw-r--r--kernel/rcutiny.c2
-rw-r--r--kernel/rcutiny_plugin.h5
-rw-r--r--kernel/rcutorture.c54
-rw-r--r--kernel/rcutree.c222
-rw-r--r--kernel/rcutree.h19
-rw-r--r--kernel/rcutree_plugin.h18
-rw-r--r--kernel/rcutree_trace.c318
-rw-r--r--kernel/sched/core.c8
-rw-r--r--kernel/srcu.c16
-rw-r--r--lib/Kconfig.debug2
22 files changed, 507 insertions, 317 deletions
diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt
index cdb20d4..31ef8fe 100644
--- a/Documentation/RCU/checklist.txt
+++ b/Documentation/RCU/checklist.txt
@@ -271,15 +271,14 @@ over a rather long period of time, but improvements are always welcome!
The same cautions apply to call_rcu_bh() and call_rcu_sched().
9. All RCU list-traversal primitives, which include
- rcu_dereference(), list_for_each_entry_rcu(),
- list_for_each_continue_rcu(), and list_for_each_safe_rcu(),
- must be either within an RCU read-side critical section or
- must be protected by appropriate update-side locks. RCU
- read-side critical sections are delimited by rcu_read_lock()
- and rcu_read_unlock(), or by similar primitives such as
- rcu_read_lock_bh() and rcu_read_unlock_bh(), in which case
- the matching rcu_dereference() primitive must be used in order
- to keep lockdep happy, in this case, rcu_dereference_bh().
+ rcu_dereference(), list_for_each_entry_rcu(), and
+ list_for_each_safe_rcu(), must be either within an RCU read-side
+ critical section or must be protected by appropriate update-side
+ locks. RCU read-side critical sections are delimited by
+ rcu_read_lock() and rcu_read_unlock(), or by similar primitives
+ such as rcu_read_lock_bh() and rcu_read_unlock_bh(), in which
+ case the matching rcu_dereference() primitive must be used in
+ order to keep lockdep happy, in this case, rcu_dereference_bh().
The reason that it is permissible to use RCU list-traversal
primitives when the update-side lock is held is that doing so
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index 160ac55..0cc7820 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -797,9 +797,7 @@ RCU list traversal:
list_for_each_entry_rcu
hlist_for_each_entry_rcu
hlist_nulls_for_each_entry_rcu
-
- list_for_each_continue_rcu (to be deprecated in favor of new
- list_for_each_entry_continue_rcu)
+ list_for_each_entry_continue_rcu
RCU pointer/list update:
diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c
index 79ccfe6..49e3b49 100644
--- a/arch/um/drivers/mconsole_kern.c
+++ b/arch/um/drivers/mconsole_kern.c
@@ -648,7 +648,7 @@ static void stack_proc(void *arg)
struct task_struct *from = current, *to = arg;
to->thread.saved_task = from;
- rcu_switch(from, to);
+ rcu_user_hooks_switch(from, to);
switch_to(from, to, from);
}
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index b00b33a..eff5b8c 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1511,6 +1511,13 @@ void syscall_trace_leave(struct pt_regs *regs)
{
bool step;
+ /*
+ * We may come here right after calling schedule_user()
+ * or do_notify_resume(), in which case we can be in RCU
+ * user mode.
+ */
+ rcu_user_exit();
+
audit_syscall_exit(regs);
if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index e0f0fab..c92dd28 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -286,23 +286,6 @@ static inline void list_splice_init_rcu(struct list_head *list,
&pos->member != (head); \
pos = list_entry_rcu(pos->member.next, typeof(*pos), member))
-
-/**
- * list_for_each_continue_rcu
- * @pos: the &struct list_head to use as a loop cursor.
- * @head: the head for your list.
- *
- * Iterate over an rcu-protected list, continuing after current point.
- *
- * This list-traversal primitive may safely run concurrently with
- * the _rcu list-mutation primitives such as list_add_rcu()
- * as long as the traversal is guarded by rcu_read_lock().
- */
-#define list_for_each_continue_rcu(pos, head) \
- for ((pos) = rcu_dereference_raw(list_next_rcu(pos)); \
- (pos) != (head); \
- (pos) = rcu_dereference_raw(list_next_rcu(pos)))
-
/**
* list_for_each_entry_continue_rcu - continue iteration over list of given type
* @pos: the type * to use as a loop cursor.
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 7c968e4f..8fe7c18 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -90,6 +90,25 @@ extern void do_trace_rcu_torture_read(char *rcutorturename,
* that started after call_rcu() was invoked. RCU read-side critical
* sections are delimited by rcu_read_lock() and rcu_read_unlock(),
* and may be nested.
+ *
+ * Note that all CPUs must agree that the grace period extended beyond
+ * all pre-existing RCU read-side critical section. On systems with more
+ * than one CPU, this means that when "func()" is invoked, each CPU is
+ * guaranteed to have executed a full memory barrier since the end of its
+ * last RCU read-side critical section whose beginning preceded the call
+ * to call_rcu(). It also means that each CPU executing an RCU read-side
+ * critical section that continues beyond the start of "func()" must have
+ * executed a memory barrier after the call_rcu() but before the beginning
+ * of that RCU read-side critical section. Note that these guarantees
+ * include CPUs that are offline, idle, or executing in user mode, as
+ * well as CPUs that are executing in the kernel.
+ *
+ * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the
+ * resulting RCU callback function "func()", then both CPU A and CPU B are
+ * guaranteed to execute a full memory barrier during the time interval
+ * between the call to call_rcu() and the invocation of "func()" -- even
+ * if CPU A and CPU B are the same CPU (but again only if the system has
+ * more than one CPU).
*/
extern void call_rcu(struct rcu_head *head,
void (*func)(struct rcu_head *head));
@@ -118,6 +137,9 @@ extern void call_rcu(struct rcu_head *head,
* OR
* - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context.
* These may be nested.
+ *
+ * See the description of call_rcu() for more detailed information on
+ * memory ordering guarantees.
*/
extern void call_rcu_bh(struct rcu_head *head,
void (*func)(struct rcu_head *head));
@@ -137,6 +159,9 @@ extern void call_rcu_bh(struct rcu_head *head,
* OR
* anything that disables preemption.
* These may be nested.
+ *
+ * See the description of call_rcu() for more detailed information on
+ * memory ordering guarantees.
*/
extern void call_rcu_sched(struct rcu_head *head,
void (*func)(struct rcu_head *rcu));
@@ -204,6 +229,8 @@ static inline void rcu_user_enter(void) { }
static inline void rcu_user_exit(void) { }
static inline void rcu_user_enter_after_irq(void) { }
static inline void rcu_user_exit_after_irq(void) { }
+static inline void rcu_user_hooks_switch(struct task_struct *prev,
+ struct task_struct *next) { }
#endif /* CONFIG_RCU_USER_QS */
extern void exit_rcu(void);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0dd42a0..530c52e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -109,6 +109,8 @@ extern void update_cpu_load_nohz(void);
extern unsigned long get_parent_ip(unsigned long addr);
+extern void dump_cpu_task(int cpu);
+
struct seq_file;
struct cfs_rq;
struct task_group;
@@ -1844,14 +1846,6 @@ static inline void rcu_copy_process(struct task_struct *p)
#endif
-static inline void rcu_switch(struct task_struct *prev,
- struct task_struct *next)
-{
-#ifdef CONFIG_RCU_USER_QS
- rcu_user_hooks_switch(prev, next);
-#endif
-}
-
static inline void tsk_restore_flags(struct task_struct *task,
unsigned long orig_flags, unsigned long flags)
{
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 55a5c52..6eb691b 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -16,8 +16,10 @@
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* Copyright (C) IBM Corporation, 2006
+ * Copyright (C) Fujitsu, 2012
*
* Author: Paul McKenney <paulmck@us.ibm.com>
+ * Lai Jiangshan <laijs@cn.fujitsu.com>
*
* For detailed explanation of Read-Copy Update mechanism see -
* Documentation/RCU/ *.txt
@@ -40,6 +42,8 @@ struct rcu_batch {
struct rcu_head *head, **tail;
};
+#define RCU_BATCH_INIT(name) { NULL, &(name.head) }
+
struct srcu_struct {
unsigned completed;
struct srcu_struct_array __percpu *per_cpu_ref;
@@ -70,12 +74,42 @@ int __init_srcu_struct(struct srcu_struct *sp, const char *name,
__init_srcu_struct((sp), #sp, &__srcu_key); \
})
+#define __SRCU_DEP_MAP_INIT(srcu_name) .dep_map = { .name = #srcu_name },
#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
int init_srcu_struct(struct srcu_struct *sp);
+#define __SRCU_DEP_MAP_INIT(srcu_name)
#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+void process_srcu(struct work_struct *work);
+
+#define __SRCU_STRUCT_INIT(name) \
+ { \
+ .completed = -300, \
+ .per_cpu_ref = &name##_srcu_array, \
+ .queue_lock = __SPIN_LOCK_UNLOCKED(name.queue_lock), \
+ .running = false, \
+ .batch_queue = RCU_BATCH_INIT(name.batch_queue), \
+ .batch_check0 = RCU_BATCH_INIT(name.batch_check0), \
+ .batch_check1 = RCU_BATCH_INIT(name.batch_check1), \
+ .batch_done = RCU_BATCH_INIT(name.batch_done), \
+ .work = __DELAYED_WORK_INITIALIZER(name.work, process_srcu, 0),\
+ __SRCU_DEP_MAP_INIT(name) \
+ }
+
+/*
+ * define and init a srcu struct at build time.
+ * dont't call init_srcu_struct() nor cleanup_srcu_struct() on it.
+ */
+#define DEFINE_SRCU(name) \
+ static DEFINE_PER_CPU(struct srcu_struct_array, name##_srcu_array);\
+ struct srcu_struct name = __SRCU_STRUCT_INIT(name);
+
+#define DEFINE_STATIC_SRCU(name) \
+ static DEFINE_PER_CPU(struct srcu_struct_array, name##_srcu_array);\
+ static struct srcu_struct name = __SRCU_STRUCT_INIT(name);
+
/**
* call_srcu() - Queue a callback for invocation after an SRCU grace period
* @sp: srcu_struct in queue the callback
diff --git a/init/Kconfig b/init/Kconfig
index 6fdd6e3..ec62139 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -494,11 +494,11 @@ config RCU_USER_QS
puts RCU in extended quiescent state when the CPU runs in
userspace. It means that when a CPU runs in userspace, it is
excluded from the global RCU state machine and thus doesn't
- to keep the timer tick on for RCU.
+ try to keep the timer tick on for RCU.
Unless you want to hack and help the development of the full
- tickless feature, you shouldn't enable this option. It adds
- unnecessary overhead.
+ tickless feature, you shouldn't enable this option. It also
+ adds unnecessary overhead.
If unsure say N
@@ -582,14 +582,13 @@ config RCU_FAST_NO_HZ
depends on NO_HZ && SMP
default n
help
- This option causes RCU to attempt to accelerate grace periods
- in order to allow CPUs to enter dynticks-idle state more
- quickly. On the other hand, this option increases the overhead
- of the dynticks-idle checking, particularly on systems with
- large numbers of CPUs.
+ This option causes RCU to attempt to accelerate grace periods in
+ order to allow CPUs to enter dynticks-idle state more quickly.
+ On the other hand, this option increases the overhead of the
+ dynticks-idle checking, thus degrading scheduling latency.
- Say Y if energy efficiency is critically important, particularly
- if you have relatively few CPUs.
+ Say Y if energy efficiency is critically important, and you don't
+ care about real-time response.
Say N if you are unsure.
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index 4e316e1..8715a79 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -141,6 +141,23 @@ static ssize_t fscaps_show(struct kobject *kobj,
}
KERNEL_ATTR_RO(fscaps);
+int rcu_expedited;
+static ssize_t rcu_expedited_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%d\n", rcu_expedited);
+}
+static ssize_t rcu_expedited_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ if (kstrtoint(buf, 0, &rcu_expedited))
+ return -EINVAL;
+
+ return count;
+}
+KERNEL_ATTR_RW(rcu_expedited);
+
/*
* Make /sys/kernel/notes give the raw contents of our kernel .notes section.
*/
@@ -182,6 +199,7 @@ static struct attribute * kernel_attrs[] = {
&kexec_crash_size_attr.attr,
&vmcoreinfo_attr.attr,
#endif
+ &rcu_expedited_attr.attr,
NULL
};
diff --git a/kernel/rcu.h b/kernel/rcu.h
index 8ba99cd..20dfba5 100644
--- a/kernel/rcu.h
+++ b/kernel/rcu.h
@@ -109,4 +109,6 @@ static inline bool __rcu_reclaim(char *rn, struct rcu_head *head)
}
}
+extern int rcu_expedited;
+
#endif /* __LINUX_RCU_H */
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 29ca1c6..a2cf761 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -46,12 +46,15 @@
#include <linux/export.h>
#include <linux/hardirq.h>
#include <linux/delay.h>
+#include <linux/module.h>
#define CREATE_TRACE_POINTS
#include <trace/events/rcu.h>
#include "rcu.h"
+module_param(rcu_expedited, int, 0);
+
#ifdef CONFIG_PREEMPT_RCU
/*
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
index e4c6a59..e7dce58 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcutiny.c
@@ -195,7 +195,7 @@ EXPORT_SYMBOL(rcu_is_cpu_idle);
*/
int rcu_is_cpu_rrupt_from_idle(void)
{
- return rcu_dynticks_nesting <= 0;
+ return rcu_dynticks_nesting <= 1;
}
/*
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h
index 3d01902..f85016a 100644
--- a/kernel/rcutiny_plugin.h
+++ b/kernel/rcutiny_plugin.h
@@ -706,7 +706,10 @@ void synchronize_rcu(void)
return;
/* Once we get past the fastpath checks, same code as rcu_barrier(). */
- rcu_barrier();
+ if (rcu_expedited)
+ synchronize_rcu_expedited();
+ else
+ rcu_barrier();
}
EXPORT_SYMBOL_GPL(synchronize_rcu);
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index aaa7b9f..31dea01 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -339,7 +339,6 @@ rcu_stutter_wait(char *title)
struct rcu_torture_ops {
void (*init)(void);
- void (*cleanup)(void);
int (*readlock)(void);
void (*read_delay)(struct rcu_random_state *rrsp);
void (*readunlock)(int idx);
@@ -431,7 +430,6 @@ static void rcu_torture_deferred_free(struct rcu_torture *p)
static struct rcu_torture_ops rcu_ops = {
.init = NULL,
- .cleanup = NULL,
.readlock = rcu_torture_read_lock,
.read_delay = rcu_read_delay,
.readunlock = rcu_torture_read_unlock,
@@ -475,7 +473,6 @@ static void rcu_sync_torture_init(void)
static struct rcu_torture_ops rcu_sync_ops = {
.init = rcu_sync_torture_init,
- .cleanup = NULL,
.readlock = rcu_torture_read_lock,
.read_delay = rcu_read_delay,
.readunlock = rcu_torture_read_unlock,
@@ -493,7 +490,6 @@ static struct rcu_torture_ops rcu_sync_ops = {
static struct rcu_torture_ops rcu_expedited_ops = {
.init = rcu_sync_torture_init,
- .cleanup = NULL,
.readlock = rcu_torture_read_lock,
.read_delay = rcu_read_delay, /* just reuse rcu's version. */
.readunlock = rcu_torture_read_unlock,
@@ -536,7 +532,6 @@ static void rcu_bh_torture_deferred_free(struct rcu_torture *p)
static struct rcu_torture_ops rcu_bh_ops = {
.init = NULL,
- .cleanup = NULL,
.readlock = rcu_bh_torture_read_lock,
.read_delay = rcu_read_delay, /* just reuse rcu's version. */
.readunlock = rcu_bh_torture_read_unlock,
@@ -553,7 +548,6 @@ static struct rcu_torture_ops rcu_bh_ops = {
static struct rcu_torture_ops rcu_bh_sync_ops = {
.init = rcu_sync_torture_init,
- .cleanup = NULL,
.readlock = rcu_bh_torture_read_lock,
.read_delay = rcu_read_delay, /* just reuse rcu's version. */
.readunlock = rcu_bh_torture_read_unlock,
@@ -570,7 +564,6 @@ static struct rcu_torture_ops rcu_bh_sync_ops = {
static struct rcu_torture_ops rcu_bh_expedited_ops = {
.init = rcu_sync_torture_init,
- .cleanup = NULL,
.readlock = rcu_bh_torture_read_lock,
.read_delay = rcu_read_delay, /* just reuse rcu's version. */
.readunlock = rcu_bh_torture_read_unlock,
@@ -589,19 +582,7 @@ static struct rcu_torture_ops rcu_bh_expedited_ops = {
* Definitions for srcu torture testing.
*/
-static struct srcu_struct srcu_ctl;
-
-static void srcu_torture_init(void)
-{
- init_srcu_struct(&srcu_ctl);
- rcu_sync_torture_init();
-}
-
-static void srcu_torture_cleanup(void)
-{
- synchronize_srcu(&srcu_ctl);
- cleanup_srcu_struct(&srcu_ctl);
-}
+DEFINE_STATIC_SRCU(srcu_ctl);
static int srcu_torture_read_lock(void) __acquires(&srcu_ctl)
{
@@ -672,8 +653,7 @@ static int srcu_torture_stats(char *page)
}
static struct rcu_torture_ops srcu_ops = {
- .init = srcu_torture_init,
- .cleanup = srcu_torture_cleanup,
+ .init = rcu_sync_torture_init,
.readlock = srcu_torture_read_lock,
.read_delay = srcu_read_delay,
.readunlock = srcu_torture_read_unlock,
@@ -687,8 +667,7 @@ static struct rcu_torture_ops srcu_ops = {
};
static struct rcu_torture_ops srcu_sync_ops = {
- .init = srcu_torture_init,
- .cleanup = srcu_torture_cleanup,
+ .init = rcu_sync_torture_init,
.readlock = srcu_torture_read_lock,
.read_delay = srcu_read_delay,
.readunlock = srcu_torture_read_unlock,
@@ -712,8 +691,7 @@ static void srcu_torture_read_unlock_raw(int idx) __releases(&srcu_ctl)
}
static struct rcu_torture_ops srcu_raw_ops = {
- .init = srcu_torture_init,
- .cleanup = srcu_torture_cleanup,
+ .init = rcu_sync_torture_init,
.readlock = srcu_torture_read_lock_raw,
.read_delay = srcu_read_delay,
.readunlock = srcu_torture_read_unlock_raw,
@@ -727,8 +705,7 @@ static struct rcu_torture_ops srcu_raw_ops = {
};
static struct rcu_torture_ops srcu_raw_sync_ops = {
- .init = srcu_torture_init,
- .cleanup = srcu_torture_cleanup,
+ .init = rcu_sync_torture_init,
.readlock = srcu_torture_read_lock_raw,
.read_delay = srcu_read_delay,
.readunlock = srcu_torture_read_unlock_raw,
@@ -747,8 +724,7 @@ static void srcu_torture_synchronize_expedited(void)
}
static struct rcu_torture_ops srcu_expedited_ops = {
- .init = srcu_torture_init,
- .cleanup = srcu_torture_cleanup,
+ .init = rcu_sync_torture_init,
.readlock = srcu_torture_read_lock,
.read_delay = srcu_read_delay,
.readunlock = srcu_torture_read_unlock,
@@ -783,7 +759,6 @@ static void rcu_sched_torture_deferred_free(struct rcu_torture *p)
static struct rcu_torture_ops sched_ops = {
.init = rcu_sync_torture_init,
- .cleanup = NULL,
.readlock = sched_torture_read_lock,
.read_delay = rcu_read_delay, /* just reuse rcu's version. */
.readunlock = sched_torture_read_unlock,
@@ -799,7 +774,6 @@ static struct rcu_torture_ops sched_ops = {
static struct rcu_torture_ops sched_sync_ops = {
.init = rcu_sync_torture_init,
- .cleanup = NULL,
.readlock = sched_torture_read_lock,
.read_delay = rcu_read_delay, /* just reuse rcu's version. */
.readunlock = sched_torture_read_unlock,
@@ -814,7 +788,6 @@ static struct rcu_torture_ops sched_sync_ops = {
static struct rcu_torture_ops sched_expedited_ops = {
.init = rcu_sync_torture_init,
- .cleanup = NULL,
.readlock = sched_torture_read_lock,
.read_delay = rcu_read_delay, /* just reuse rcu's version. */
.readunlock = sched_torture_read_unlock,
@@ -1396,12 +1369,16 @@ rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, char *tag)
"fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d "
"test_boost=%d/%d test_boost_interval=%d "
"test_boost_duration=%d shutdown_secs=%d "
+ "stall_cpu=%d stall_cpu_holdoff=%d "
+ "n_barrier_cbs=%d "
"onoff_interval=%d onoff_holdoff=%d\n",
torture_type, tag, nrealreaders, nfakewriters,
stat_interval, verbose, test_no_idle_hz, shuffle_interval,
stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter,
test_boost, cur_ops->can_boost,
test_boost_interval, test_boost_duration, shutdown_secs,
+ stall_cpu, stall_cpu_holdoff,
+ n_barrier_cbs,
onoff_interval, onoff_holdoff);
}
@@ -1502,6 +1479,7 @@ rcu_torture_onoff(void *arg)
unsigned long delta;
int maxcpu = -1;
DEFINE_RCU_RANDOM(rand);
+ int ret;
unsigned long starttime;
VERBOSE_PRINTK_STRING("rcu_torture_onoff task started");
@@ -1522,7 +1500,13 @@ rcu_torture_onoff(void *arg)
torture_type, cpu);
starttime = jiffies;
n_offline_attempts++;
- if (cpu_down(cpu) == 0) {
+ ret = cpu_down(cpu);
+ if (ret) {
+ if (verbose)
+ pr_alert("%s" TORTURE_FLAG
+ "rcu_torture_onoff task: offline %d failed: errno %d\n",
+ torture_type, cpu, ret);
+ } else {
if (verbose)
pr_alert("%s" TORTURE_FLAG
"rcu_torture_onoff task: offlined %d\n",
@@ -1936,8 +1920,6 @@ rcu_torture_cleanup(void)
rcu_torture_stats_print(); /* -After- the stats thread is stopped! */
- if (cur_ops->cleanup)
- cur_ops->cleanup();
if (atomic_read(&n_rcu_torture_error) || n_rcu_torture_barrier_error)
rcu_torture_print_module_parms(cur_ops, "End of test: FAILURE");
else if (n_online_successes != n_online_attempts ||
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 74df86b..5ffadcc 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -68,9 +68,9 @@ static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS];
.level = { &sname##_state.node[0] }, \
.call = cr, \
.fqs_state = RCU_GP_IDLE, \
- .gpnum = -300, \
- .completed = -300, \
- .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.onofflock), \
+ .gpnum = 0UL - 300UL, \
+ .completed = 0UL - 300UL, \
+ .orphan_lock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.orphan_lock), \
.orphan_nxttail = &sname##_state.orphan_nxtlist, \
.orphan_donetail = &sname##_state.orphan_donelist, \
.barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \
@@ -212,13 +212,13 @@ DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
#endif
};
-static int blimit = 10; /* Maximum callbacks per rcu_do_batch. */
-static int qhimark = 10000; /* If this many pending, ignore blimit. */
-static int qlowmark = 100; /* Once only this many pending, use blimit. */
+static long blimit = 10; /* Maximum callbacks per rcu_do_batch. */
+static long qhimark = 10000; /* If this many pending, ignore blimit. */
+static long qlowmark = 100; /* Once only this many pending, use blimit. */
-module_param(blimit, int, 0444);
-module_param(qhimark, int, 0444);
-module_param(qlowmark, int, 0444);
+module_param(blimit, long, 0444);
+module_param(qhimark, long, 0444);
+module_param(qlowmark, long, 0444);
int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */
int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT;
@@ -313,7 +313,7 @@ static int
cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
{
return *rdp->nxttail[RCU_DONE_TAIL +
- ACCESS_ONCE(rsp->completed) != rdp->completed] &&
+ (ACCESS_ONCE(rsp->completed) != rdp->completed)] &&
!rcu_gp_in_progress(rsp);
}
@@ -873,6 +873,29 @@ static void record_gp_stall_check_time(struct rcu_state *rsp)
rsp->jiffies_stall = jiffies + jiffies_till_stall_check();
}
+/*
+ * Dump stacks of all tasks running on stalled CPUs. This is a fallback
+ * for architectures that do not implement trigger_all_cpu_backtrace().
+ * The NMI-triggered stack traces are more accurate because they are
+ * printed by the target CPU.
+ */
+static void rcu_dump_cpu_stacks(struct rcu_state *rsp)
+{
+ int cpu;
+ unsigned long flags;
+ struct rcu_node *rnp;
+
+ rcu_for_each_leaf_node(rsp, rnp) {
+ raw_spin_lock_irqsave(&rnp->lock, flags);
+ if (rnp->qsmask != 0) {
+ for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++)
+ if (rnp->qsmask & (1UL << cpu))
+ dump_cpu_task(rnp->grplo + cpu);
+ }
+ raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ }
+}
+
static void print_other_cpu_stall(struct rcu_state *rsp)
{
int cpu;
@@ -880,6 +903,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
unsigned long flags;
int ndetected = 0;
struct rcu_node *rnp = rcu_get_root(rsp);
+ long totqlen = 0;
/* Only let one CPU complain about others per time interval. */
@@ -924,12 +948,15 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
raw_spin_unlock_irqrestore(&rnp->lock, flags);
print_cpu_stall_info_end();
- printk(KERN_CONT "(detected by %d, t=%ld jiffies)\n",
- smp_processor_id(), (long)(jiffies - rsp->gp_start));
+ for_each_possible_cpu(cpu)
+ totqlen += per_cpu_ptr(rsp->rda, cpu)->qlen;
+ pr_cont("(detected by %d, t=%ld jiffies, g=%lu, c=%lu, q=%lu)\n",
+ smp_processor_id(), (long)(jiffies - rsp->gp_start),
+ rsp->gpnum, rsp->completed, totqlen);
if (ndetected == 0)
printk(KERN_ERR "INFO: Stall ended before state dump start\n");
else if (!trigger_all_cpu_backtrace())
- dump_stack();
+ rcu_dump_cpu_stacks(rsp);
/* Complain about tasks blocking the grace period. */
@@ -940,8 +967,10 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
static void print_cpu_stall(struct rcu_state *rsp)
{
+ int cpu;
unsigned long flags;
struct rcu_node *rnp = rcu_get_root(rsp);
+ long totqlen = 0;
/*
* OK, time to rat on ourselves...
@@ -952,7 +981,10 @@ static void print_cpu_stall(struct rcu_state *rsp)
print_cpu_stall_info_begin();
print_cpu_stall_info(rsp, smp_processor_id());
print_cpu_stall_info_end();
- printk(KERN_CONT " (t=%lu jiffies)\n", jiffies - rsp->gp_start);
+ for_each_possible_cpu(cpu)
+ totqlen += per_cpu_ptr(rsp->rda, cpu)->qlen;
+ pr_cont(" (t=%lu jiffies g=%lu c=%lu q=%lu)\n",
+ jiffies - rsp->gp_start, rsp->gpnum, rsp->completed, totqlen);
if (!trigger_all_cpu_backtrace())
dump_stack();
@@ -1404,15 +1436,37 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
!cpu_needs_another_gp(rsp, rdp)) {
/*
* Either we have not yet spawned the grace-period
- * task or this CPU does not need another grace period.
+ * task, this CPU does not need another grace period,
+ * or a grace period is already in progress.
* Either way, don't start a new grace period.
*/
raw_spin_unlock_irqrestore(&rnp->lock, flags);
return;
}
+ /*
+ * Because there is no grace period in progress right now,
+ * any callbacks we have up to this point will be satisfied
+ * by the next grace period. So promote all callbacks to be
+ * handled after the end of the next grace period. If the
+ * CPU is not yet aware of the end of the previous grace period,
+ * we need to allow for the callback advancement that will
+ * occur when it does become aware. Deadlock prevents us from
+ * making it aware at this point: We cannot acquire a leaf
+ * rcu_node ->lock while holding the root rcu_node ->lock.
+ */
+ rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
+ if (rdp->completed == rsp->completed)
+ rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
+
rsp->gp_flags = RCU_GP_FLAG_INIT;
- raw_spin_unlock_irqrestore(&rnp->lock, flags);
+ raw_spin_unlock(&rnp->lock); /* Interrupts remain disabled. */
+
+ /* Ensure that CPU is aware of completion of last grace period. */
+ rcu_process_gp_end(rsp, rdp);
+ local_irq_restore(flags);
+
+ /* Wake up rcu_gp_kthread() to start the grace period. */
wake_up(&rsp->gp_wq);
}
@@ -1573,7 +1627,7 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp)
/*
* Send the specified CPU's RCU callbacks to the orphanage. The
* specified CPU must be offline, and the caller must hold the
- * ->onofflock.
+ * ->orphan_lock.
*/
static void
rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
@@ -1581,8 +1635,8 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
{
/*
* Orphan the callbacks. First adjust the counts. This is safe
- * because ->onofflock excludes _rcu_barrier()'s adoption of
- * the callbacks, thus no memory barrier is required.
+ * because _rcu_barrier() excludes CPU-hotplug operations, so it
+ * cannot be running now. Thus no memory barrier is required.
*/
if (rdp->nxtlist != NULL) {
rsp->qlen_lazy += rdp->qlen_lazy;
@@ -1623,7 +1677,7 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
/*
* Adopt the RCU callbacks from the specified rcu_state structure's
- * orphanage. The caller must hold the ->onofflock.
+ * orphanage. The caller must hold the ->orphan_lock.
*/
static void rcu_adopt_orphan_cbs(struct rcu_state *rsp)
{
@@ -1702,7 +1756,7 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
/* Exclude any attempts to start a new grace period. */
mutex_lock(&rsp->onoff_mutex);
- raw_spin_lock_irqsave(&rsp->onofflock, flags);
+ raw_spin_lock_irqsave(&rsp->orphan_lock, flags);
/* Orphan the dead CPU's callbacks, and adopt them if appropriate. */
rcu_send_cbs_to_orphanage(cpu, rsp, rnp, rdp);
@@ -1729,10 +1783,10 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
/*
* We still hold the leaf rcu_node structure lock here, and
* irqs are still disabled. The reason for this subterfuge is
- * because invoking rcu_report_unblock_qs_rnp() with ->onofflock
+ * because invoking rcu_report_unblock_qs_rnp() with ->orphan_lock
* held leads to deadlock.
*/
- raw_spin_unlock(&rsp->onofflock); /* irqs remain disabled. */
+ raw_spin_unlock(&rsp->orphan_lock); /* irqs remain disabled. */
rnp = rdp->mynode;
if (need_report & RCU_OFL_TASKS_NORM_GP)
rcu_report_unblock_qs_rnp(rnp, flags);
@@ -1769,7 +1823,8 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
{
unsigned long flags;
struct rcu_head *next, *list, **tail;
- int bl, count, count_lazy, i;
+ long bl, count, count_lazy;
+ int i;
/* If no callbacks are ready, just return.*/
if (!cpu_has_callbacks_ready_to_invoke(rdp)) {
@@ -2205,10 +2260,28 @@ static inline int rcu_blocking_is_gp(void)
* rcu_read_lock_sched().
*
* This means that all preempt_disable code sequences, including NMI and
- * hardware-interrupt handlers, in progress on entry will have completed
- * before this primitive returns. However, this does not guarantee that
- * softirq handlers will have completed, since in some kernels, these
- * handlers can run in process context, and can block.
+ * non-threaded hardware-interrupt handlers, in progress on entry will
+ * have completed before this primitive returns. However, this does not
+ * guarantee that softirq handlers will have completed, since in some
+ * kernels, these handlers can run in process context, and can block.
+ *
+ * Note that this guarantee implies further memory-ordering guarantees.
+ * On systems with more than one CPU, when synchronize_sched() returns,
+ * each CPU is guaranteed to have executed a full memory barrier since the
+ * end of its last RCU-sched read-side critical section whose beginning
+ * preceded the call to synchronize_sched(). In addition, each CPU having
+ * an RCU read-side critical section that extends beyond the return from
+ * synchronize_sched() is guaranteed to have executed a full memory barrier
+ * after the beginning of synchronize_sched() and before the beginning of
+ * that RCU read-side critical section. Note that these guarantees include
+ * CPUs that are offline, idle, or executing in user mode, as well as CPUs
+ * that are executing in the kernel.
+ *
+ * Furthermore, if CPU A invoked synchronize_sched(), which returned
+ * to its caller on CPU B, then both CPU A and CPU B are guaranteed
+ * to have executed a full memory barrier during the execution of
+ * synchronize_sched() -- even if CPU A and CPU B are the same CPU (but
+ * again only if the system has more than one CPU).
*
* This primitive provides the guarantees made by the (now removed)
* synchronize_kernel() API. In contrast, synchronize_rcu() only
@@ -2224,7 +2297,10 @@ void synchronize_sched(void)
"Illegal synchronize_sched() in RCU-sched read-side critical section");
if (rcu_blocking_is_gp())
return;
- wait_rcu_gp(call_rcu_sched);
+ if (rcu_expedited)
+ synchronize_sched_expedited();
+ else
+ wait_rcu_gp(call_rcu_sched);
}
EXPORT_SYMBOL_GPL(synchronize_sched);
@@ -2236,6 +2312,9 @@ EXPORT_SYMBOL_GPL(synchronize_sched);
* read-side critical sections have completed. RCU read-side critical
* sections are delimited by rcu_read_lock_bh() and rcu_read_unlock_bh(),
* and may be nested.
+ *
+ * See the description of synchronize_sched() for more detailed information
+ * on memory ordering guarantees.
*/
void synchronize_rcu_bh(void)
{
@@ -2245,13 +2324,13 @@ void synchronize_rcu_bh(void)
"Illegal synchronize_rcu_bh() in RCU-bh read-side critical section");
if (rcu_blocking_is_gp())
return;
- wait_rcu_gp(call_rcu_bh);
+ if (rcu_expedited)
+ synchronize_rcu_bh_expedited();
+ else
+ wait_rcu_gp(call_rcu_bh);
}
EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
-static atomic_t sync_sched_expedited_started = ATOMIC_INIT(0);
-static atomic_t sync_sched_expedited_done = ATOMIC_INIT(0);
-
static int synchronize_sched_expedited_cpu_stop(void *data)
{
/*
@@ -2308,10 +2387,32 @@ static int synchronize_sched_expedited_cpu_stop(void *data)
*/
void synchronize_sched_expedited(void)
{
- int firstsnap, s, snap, trycount = 0;
+ long firstsnap, s, snap;
+ int trycount = 0;
+ struct rcu_state *rsp = &rcu_sched_state;
+
+ /*
+ * If we are in danger of counter wrap, just do synchronize_sched().
+ * By allowing sync_sched_expedited_started to advance no more than
+ * ULONG_MAX/8 ahead of sync_sched_expedited_done, we are ensuring
+ * that more than 3.5 billion CPUs would be required to force a
+ * counter wrap on a 32-bit system. Quite a few more CPUs would of
+ * course be required on a 64-bit system.
+ */
+ if (ULONG_CMP_GE((ulong)atomic_long_read(&rsp->expedited_start),
+ (ulong)atomic_long_read(&rsp->expedited_done) +
+ ULONG_MAX / 8)) {
+ synchronize_sched();
+ atomic_long_inc(&rsp->expedited_wrap);
+ return;
+ }
- /* Note that atomic_inc_return() implies full memory barrier. */
- firstsnap = snap = atomic_inc_return(&sync_sched_expedited_started);
+ /*
+ * Take a ticket. Note that atomic_inc_return() implies a
+ * full memory barrier.
+ */
+ snap = atomic_long_inc_return(&rsp->expedited_start);
+ firstsnap = snap;
get_online_cpus();
WARN_ON_ONCE(cpu_is_offline(raw_smp_processor_id()));
@@ -2323,48 +2424,65 @@ void synchronize_sched_expedited(void)
synchronize_sched_expedited_cpu_stop,
NULL) == -EAGAIN) {
put_online_cpus();
+ atomic_long_inc(&rsp->expedited_tryfail);
+
+ /* Check to see if someone else did our work for us. */
+ s = atomic_long_read(&rsp->expedited_done);
+ if (ULONG_CMP_GE((ulong)s, (ulong)firstsnap)) {
+ /* ensure test happens before caller kfree */
+ smp_mb__before_atomic_inc(); /* ^^^ */
+ atomic_long_inc(&rsp->expedited_workdone1);
+ return;
+ }
/* No joy, try again later. Or just synchronize_sched(). */
if (trycount++ < 10) {
udelay(trycount * num_online_cpus());
} else {
- synchronize_sched();
+ wait_rcu_gp(call_rcu_sched);
+ atomic_long_inc(&rsp->expedited_normal);
return;
}
- /* Check to see if someone else did our work for us. */
- s = atomic_read(&sync_sched_expedited_done);
- if (UINT_CMP_GE((unsigned)s, (unsigned)firstsnap)) {
- smp_mb(); /* ensure test happens before caller kfree */
+ /* Recheck to see if someone else did our work for us. */
+ s = atomic_long_read(&rsp->expedited_done);
+ if (ULONG_CMP_GE((ulong)s, (ulong)firstsnap)) {
+ /* ensure test happens before caller kfree */
+ smp_mb__before_atomic_inc(); /* ^^^ */
+ atomic_long_inc(&rsp->expedited_workdone2);
return;
}
/*
* Refetching sync_sched_expedited_started allows later
- * callers to piggyback on our grace period. We subtract
- * 1 to get the same token that the last incrementer got.
- * We retry after they started, so our grace period works
- * for them, and they started after our first try, so their
- * grace period works for us.
+ * callers to piggyback on our grace period. We retry
+ * after they started, so our grace period works for them,
+ * and they started after our first try, so their grace
+ * period works for us.
*/
get_online_cpus();
- snap = atomic_read(&sync_sched_expedited_started);
+ snap = atomic_long_read(&rsp->expedited_start);
smp_mb(); /* ensure read is before try_stop_cpus(). */
}
+ atomic_long_inc(&rsp->expedited_stoppedcpus);
/*
* Everyone up to our most recent fetch is covered by our grace
* period. Update the counter, but only if our work is still
* relevant -- which it won't be if someone who started later
- * than we did beat us to the punch.
+ * than we did already did their update.
*/
do {
- s = atomic_read(&sync_sched_expedited_done);
- if (UINT_CMP_GE((unsigned)s, (unsigned)snap)) {
- smp_mb(); /* ensure test happens before caller kfree */
+ atomic_long_inc(&rsp->expedited_done_tries);
+ s = atomic_long_read(&rsp->expedited_done);
+ if (ULONG_CMP_GE((ulong)s, (ulong)snap)) {
+ /* ensure test happens before caller kfree */
+ smp_mb__before_atomic_inc(); /* ^^^ */
+ atomic_long_inc(&rsp->expedited_done_lost);
break;
}
- } while (atomic_cmpxchg(&sync_sched_expedited_done, s, snap) != s);
+ } while (atomic_long_cmpxchg(&rsp->expedited_done, s, snap) != s);
+ atomic_long_inc(&rsp->expedited_done_exit);
put_online_cpus();
}
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index a240f03..d274af3 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -383,9 +383,8 @@ struct rcu_state {
/* End of fields guarded by root rcu_node's lock. */
- raw_spinlock_t onofflock ____cacheline_internodealigned_in_smp;
- /* exclude on/offline and */
- /* starting new GP. */
+ raw_spinlock_t orphan_lock ____cacheline_internodealigned_in_smp;
+ /* Protect following fields. */
struct rcu_head *orphan_nxtlist; /* Orphaned callbacks that */
/* need a grace period. */
struct rcu_head **orphan_nxttail; /* Tail of above. */
@@ -394,7 +393,7 @@ struct rcu_state {
struct rcu_head **orphan_donetail; /* Tail of above. */
long qlen_lazy; /* Number of lazy callbacks. */
long qlen; /* Total number of callbacks. */
- /* End of fields guarded by onofflock. */
+ /* End of fields guarded by orphan_lock. */
struct mutex onoff_mutex; /* Coordinate hotplug & GPs. */
@@ -405,6 +404,18 @@ struct rcu_state {
/* _rcu_barrier(). */
/* End of fields guarded by barrier_mutex. */
+ atomic_long_t expedited_start; /* Starting ticket. */
+ atomic_long_t expedited_done; /* Done ticket. */
+ atomic_long_t expedited_wrap; /* # near-wrap incidents. */
+ atomic_long_t expedited_tryfail; /* # acquisition failures. */
+ atomic_long_t expedited_workdone1; /* # done by others #1. */
+ atomic_long_t expedited_workdone2; /* # done by others #2. */
+ atomic_long_t expedited_normal; /* # fallbacks to normal. */
+ atomic_long_t expedited_stoppedcpus; /* # successful stop_cpus. */
+ atomic_long_t expedited_done_tries; /* # tries to update _done. */
+ atomic_long_t expedited_done_lost; /* # times beaten to _done. */
+ atomic_long_t expedited_done_exit; /* # times exited _done loop. */
+
unsigned long jiffies_force_qs; /* Time at which to invoke */
/* force_quiescent_state(). */
unsigned long n_force_qs; /* Number of calls to */
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index f921154..5ce3352 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -670,6 +670,9 @@ EXPORT_SYMBOL_GPL(kfree_call_rcu);
* concurrently with new RCU read-side critical sections that began while
* synchronize_rcu() was waiting. RCU read-side critical sections are
* delimited by rcu_read_lock() and rcu_read_unlock(), and may be nested.
+ *
+ * See the description of synchronize_sched() for more detailed information
+ * on memory ordering guarantees.
*/
void synchronize_rcu(void)
{
@@ -679,7 +682,10 @@ void synchronize_rcu(void)
"Illegal synchronize_rcu() in RCU read-side critical section");
if (!rcu_scheduler_active)
return;
- wait_rcu_gp(call_rcu);
+ if (rcu_expedited)
+ synchronize_rcu_expedited();
+ else
+ wait_rcu_gp(call_rcu);
}
EXPORT_SYMBOL_GPL(synchronize_rcu);
@@ -757,7 +763,8 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
* grace period for the specified rcu_node structure. If there are no such
* tasks, report it up the rcu_node hierarchy.
*
- * Caller must hold sync_rcu_preempt_exp_mutex and rsp->onofflock.
+ * Caller must hold sync_rcu_preempt_exp_mutex and must exclude
+ * CPU hotplug operations.
*/
static void
sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)
@@ -831,7 +838,7 @@ void synchronize_rcu_expedited(void)
udelay(trycount * num_online_cpus());
} else {
put_online_cpus();
- synchronize_rcu();
+ wait_rcu_gp(call_rcu);
return;
}
}
@@ -875,6 +882,11 @@ EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
/**
* rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
+ *
+ * Note that this primitive does not necessarily wait for an RCU grace period
+ * to complete. For example, if there are no RCU callbacks queued anywhere
+ * in the system, then rcu_barrier() is within its rights to return
+ * immediately, without waiting for anything, much less an RCU grace period.
*/
void rcu_barrier(void)
{
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index 693513b..f951268 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -46,29 +46,58 @@
#define RCU_TREE_NONCORE
#include "rcutree.h"
-static int show_rcubarrier(struct seq_file *m, void *unused)
+#define ulong2long(a) (*(long *)(&(a)))
+
+static int r_open(struct inode *inode, struct file *file,
+ const struct seq_operations *op)
{
- struct rcu_state *rsp;
+ int ret = seq_open(file, op);
+ if (!ret) {
+ struct seq_file *m = (struct seq_file *)file->private_data;
+ m->private = inode->i_private;
+ }
+ return ret;
+}
+
+static void *r_start(struct seq_file *m, loff_t *pos)
+{
+ struct rcu_state *rsp = (struct rcu_state *)m->private;
+ *pos = cpumask_next(*pos - 1, cpu_possible_mask);
+ if ((*pos) < nr_cpu_ids)
+ return per_cpu_ptr(rsp->rda, *pos);
+ return NULL;
+}
- for_each_rcu_flavor(rsp)
- seq_printf(m, "%s: bcc: %d nbd: %lu\n",
- rsp->name,
- atomic_read(&rsp->barrier_cpu_count),
- rsp->n_barrier_done);
+static void *r_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ (*pos)++;
+ return r_start(m, pos);
+}
+
+static void r_stop(struct seq_file *m, void *v)
+{
+}
+
+static int show_rcubarrier(struct seq_file *m, void *v)
+{
+ struct rcu_state *rsp = (struct rcu_state *)m->private;
+ seq_printf(m, "bcc: %d nbd: %lu\n",
+ atomic_read(&rsp->barrier_cpu_count),
+ rsp->n_barrier_done);
return 0;
}
static int rcubarrier_open(struct inode *inode, struct file *file)
{
- return single_open(file, show_rcubarrier, NULL);
+ return single_open(file, show_rcubarrier, inode->i_private);
}
static const struct file_operations rcubarrier_fops = {
.owner = THIS_MODULE,
.open = rcubarrier_open,
.read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
+ .llseek = no_llseek,
+ .release = seq_release,
};
#ifdef CONFIG_RCU_BOOST
@@ -86,10 +115,10 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
{
if (!rdp->beenonline)
return;
- seq_printf(m, "%3d%cc=%lu g=%lu pq=%d qp=%d",
+ seq_printf(m, "%3d%cc=%ld g=%ld pq=%d qp=%d",
rdp->cpu,
cpu_is_offline(rdp->cpu) ? '!' : ' ',
- rdp->completed, rdp->gpnum,
+ ulong2long(rdp->completed), ulong2long(rdp->gpnum),
rdp->passed_quiesce, rdp->qs_pending);
seq_printf(m, " dt=%d/%llx/%d df=%lu",
atomic_read(&rdp->dynticks->dynticks),
@@ -118,97 +147,62 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
rdp->n_cbs_invoked, rdp->n_cbs_orphaned, rdp->n_cbs_adopted);
}
-static int show_rcudata(struct seq_file *m, void *unused)
+static int show_rcudata(struct seq_file *m, void *v)
{
- int cpu;
- struct rcu_state *rsp;
-
- for_each_rcu_flavor(rsp) {
- seq_printf(m, "%s:\n", rsp->name);
- for_each_possible_cpu(cpu)
- print_one_rcu_data(m, per_cpu_ptr(rsp->rda, cpu));
- }
+ print_one_rcu_data(m, (struct rcu_data *)v);
return 0;
}
+static const struct seq_operations rcudate_op = {
+ .start = r_start,
+ .next = r_next,
+ .stop = r_stop,
+ .show = show_rcudata,
+};
+
static int rcudata_open(struct inode *inode, struct file *file)
{
- return single_open(file, show_rcudata, NULL);
+ return r_open(inode, file, &rcudate_op);
}
static const struct file_operations rcudata_fops = {
.owner = THIS_MODULE,
.open = rcudata_open,
.read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
+ .llseek = no_llseek,
+ .release = seq_release,
};
-static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp)
-{
- if (!rdp->beenonline)
- return;
- seq_printf(m, "%d,%s,%lu,%lu,%d,%d",
- rdp->cpu,
- cpu_is_offline(rdp->cpu) ? "\"N\"" : "\"Y\"",
- rdp->completed, rdp->gpnum,
- rdp->passed_quiesce, rdp->qs_pending);
- seq_printf(m, ",%d,%llx,%d,%lu",
- atomic_read(&rdp->dynticks->dynticks),
- rdp->dynticks->dynticks_nesting,
- rdp->dynticks->dynticks_nmi_nesting,
- rdp->dynticks_fqs);
- seq_printf(m, ",%lu", rdp->offline_fqs);
- seq_printf(m, ",%ld,%ld,\"%c%c%c%c\"", rdp->qlen_lazy, rdp->qlen,
- ".N"[rdp->nxttail[RCU_NEXT_READY_TAIL] !=
- rdp->nxttail[RCU_NEXT_TAIL]],
- ".R"[rdp->nxttail[RCU_WAIT_TAIL] !=
- rdp->nxttail[RCU_NEXT_READY_TAIL]],
- ".W"[rdp->nxttail[RCU_DONE_TAIL] !=
- rdp->nxttail[RCU_WAIT_TAIL]],
- ".D"[&rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL]]);
-#ifdef CONFIG_RCU_BOOST
- seq_printf(m, ",%d,\"%c\"",
- per_cpu(rcu_cpu_has_work, rdp->cpu),
- convert_kthread_status(per_cpu(rcu_cpu_kthread_status,
- rdp->cpu)));
-#endif /* #ifdef CONFIG_RCU_BOOST */
- seq_printf(m, ",%ld", rdp->blimit);
- seq_printf(m, ",%lu,%lu,%lu\n",
- rdp->n_cbs_invoked, rdp->n_cbs_orphaned, rdp->n_cbs_adopted);
-}
-
-static int show_rcudata_csv(struct seq_file *m, void *unused)
+static int show_rcuexp(struct seq_file *m, void *v)
{
- int cpu;
- struct rcu_state *rsp;
-
- seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pq\",");
- seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\",");
- seq_puts(m, "\"of\",\"qll\",\"ql\",\"qs\"");
-#ifdef CONFIG_RCU_BOOST
- seq_puts(m, "\"kt\",\"ktl\"");
-#endif /* #ifdef CONFIG_RCU_BOOST */
- seq_puts(m, ",\"b\",\"ci\",\"co\",\"ca\"\n");
- for_each_rcu_flavor(rsp) {
- seq_printf(m, "\"%s:\"\n", rsp->name);
- for_each_possible_cpu(cpu)
- print_one_rcu_data_csv(m, per_cpu_ptr(rsp->rda, cpu));
- }
+ struct rcu_state *rsp = (struct rcu_state *)m->private;
+
+ seq_printf(m, "s=%lu d=%lu w=%lu tf=%lu wd1=%lu wd2=%lu n=%lu sc=%lu dt=%lu dl=%lu dx=%lu\n",
+ atomic_long_read(&rsp->expedited_start),
+ atomic_long_read(&rsp->expedited_done),
+ atomic_long_read(&rsp->expedited_wrap),
+ atomic_long_read(&rsp->expedited_tryfail),
+ atomic_long_read(&rsp->expedited_workdone1),
+ atomic_long_read(&rsp->expedited_workdone2),
+ atomic_long_read(&rsp->expedited_normal),
+ atomic_long_read(&rsp->expedited_stoppedcpus),
+ atomic_long_read(&rsp->expedited_done_tries),
+ atomic_long_read(&rsp->expedited_done_lost),
+ atomic_long_read(&rsp->expedited_done_exit));
return 0;
}
-static int rcudata_csv_open(struct inode *inode, struct file *file)
+static int rcuexp_open(struct inode *inode, struct file *file)
{
- return single_open(file, show_rcudata_csv, NULL);
+ return single_open(file, show_rcuexp, inode->i_private);
}
-static const struct file_operations rcudata_csv_fops = {
+static const struct file_operations rcuexp_fops = {
.owner = THIS_MODULE,
- .open = rcudata_csv_open,
+ .open = rcuexp_open,
.read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
+ .llseek = no_llseek,
+ .release = seq_release,
};
#ifdef CONFIG_RCU_BOOST
@@ -254,27 +248,11 @@ static const struct file_operations rcu_node_boost_fops = {
.owner = THIS_MODULE,
.open = rcu_node_boost_open,
.read = seq_read,
- .llseek = seq_lseek,
+ .llseek = no_llseek,
.release = single_release,
};
-/*
- * Create the rcuboost debugfs entry. Standard error return.
- */
-static int rcu_boost_trace_create_file(struct dentry *rcudir)
-{
- return !debugfs_create_file("rcuboost", 0444, rcudir, NULL,
- &rcu_node_boost_fops);
-}
-
-#else /* #ifdef CONFIG_RCU_BOOST */
-
-static int rcu_boost_trace_create_file(struct dentry *rcudir)
-{
- return 0; /* There cannot be an error if we didn't create it! */
-}
-
-#endif /* #else #ifdef CONFIG_RCU_BOOST */
+#endif /* #ifdef CONFIG_RCU_BOOST */
static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
{
@@ -283,8 +261,9 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
struct rcu_node *rnp;
gpnum = rsp->gpnum;
- seq_printf(m, "%s: c=%lu g=%lu s=%d jfq=%ld j=%x ",
- rsp->name, rsp->completed, gpnum, rsp->fqs_state,
+ seq_printf(m, "c=%ld g=%ld s=%d jfq=%ld j=%x ",
+ ulong2long(rsp->completed), ulong2long(gpnum),
+ rsp->fqs_state,
(long)(rsp->jiffies_force_qs - jiffies),
(int)(jiffies & 0xffff));
seq_printf(m, "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld/%ld\n",
@@ -306,26 +285,24 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
seq_puts(m, "\n");
}
-static int show_rcuhier(struct seq_file *m, void *unused)
+static int show_rcuhier(struct seq_file *m, void *v)
{
- struct rcu_state *rsp;
-
- for_each_rcu_flavor(rsp)
- print_one_rcu_state(m, rsp);
+ struct rcu_state *rsp = (struct rcu_state *)m->private;
+ print_one_rcu_state(m, rsp);
return 0;
}
static int rcuhier_open(struct inode *inode, struct file *file)
{
- return single_open(file, show_rcuhier, NULL);
+ return single_open(file, show_rcuhier, inode->i_private);
}
static const struct file_operations rcuhier_fops = {
.owner = THIS_MODULE,
.open = rcuhier_open,
.read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
+ .llseek = no_llseek,
+ .release = seq_release,
};
static void show_one_rcugp(struct seq_file *m, struct rcu_state *rsp)
@@ -338,42 +315,42 @@ static void show_one_rcugp(struct seq_file *m, struct rcu_state *rsp)
struct rcu_node *rnp = &rsp->node[0];
raw_spin_lock_irqsave(&rnp->lock, flags);
- completed = rsp->completed;
- gpnum = rsp->gpnum;
- if (rsp->completed == rsp->gpnum)
+ completed = ACCESS_ONCE(rsp->completed);
+ gpnum = ACCESS_ONCE(rsp->gpnum);
+ if (completed == gpnum)
gpage = 0;
else
gpage = jiffies - rsp->gp_start;
gpmax = rsp->gp_max;
raw_spin_unlock_irqrestore(&rnp->lock, flags);
- seq_printf(m, "%s: completed=%ld gpnum=%lu age=%ld max=%ld\n",
- rsp->name, completed, gpnum, gpage, gpmax);
+ seq_printf(m, "completed=%ld gpnum=%ld age=%ld max=%ld\n",
+ ulong2long(completed), ulong2long(gpnum), gpage, gpmax);
}
-static int show_rcugp(struct seq_file *m, void *unused)
+static int show_rcugp(struct seq_file *m, void *v)
{
- struct rcu_state *rsp;
-
- for_each_rcu_flavor(rsp)
- show_one_rcugp(m, rsp);
+ struct rcu_state *rsp = (struct rcu_state *)m->private;
+ show_one_rcugp(m, rsp);
return 0;
}
static int rcugp_open(struct inode *inode, struct file *file)
{
- return single_open(file, show_rcugp, NULL);
+ return single_open(file, show_rcugp, inode->i_private);
}
static const struct file_operations rcugp_fops = {
.owner = THIS_MODULE,
.open = rcugp_open,
.read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
+ .llseek = no_llseek,
+ .release = seq_release,
};
static void print_one_rcu_pending(struct seq_file *m, struct rcu_data *rdp)
{
+ if (!rdp->beenonline)
+ return;
seq_printf(m, "%3d%cnp=%ld ",
rdp->cpu,
cpu_is_offline(rdp->cpu) ? '!' : ' ',
@@ -389,34 +366,30 @@ static void print_one_rcu_pending(struct seq_file *m, struct rcu_data *rdp)
rdp->n_rp_need_nothing);
}
-static int show_rcu_pending(struct seq_file *m, void *unused)
+static int show_rcu_pending(struct seq_file *m, void *v)
{
- int cpu;
- struct rcu_data *rdp;
- struct rcu_state *rsp;
-
- for_each_rcu_flavor(rsp) {
- seq_printf(m, "%s:\n", rsp->name);
- for_each_possible_cpu(cpu) {
- rdp = per_cpu_ptr(rsp->rda, cpu);
- if (rdp->beenonline)
- print_one_rcu_pending(m, rdp);
- }
- }
+ print_one_rcu_pending(m, (struct rcu_data *)v);
return 0;
}
+static const struct seq_operations rcu_pending_op = {
+ .start = r_start,
+ .next = r_next,
+ .stop = r_stop,
+ .show = show_rcu_pending,
+};
+
static int rcu_pending_open(struct inode *inode, struct file *file)
{
- return single_open(file, show_rcu_pending, NULL);
+ return r_open(inode, file, &rcu_pending_op);
}
static const struct file_operations rcu_pending_fops = {
.owner = THIS_MODULE,
.open = rcu_pending_open,
.read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
+ .llseek = no_llseek,
+ .release = seq_release,
};
static int show_rcutorture(struct seq_file *m, void *unused)
@@ -446,43 +419,58 @@ static struct dentry *rcudir;
static int __init rcutree_trace_init(void)
{
+ struct rcu_state *rsp;
struct dentry *retval;
+ struct dentry *rspdir;
rcudir = debugfs_create_dir("rcu", NULL);
if (!rcudir)
goto free_out;
- retval = debugfs_create_file("rcubarrier", 0444, rcudir,
- NULL, &rcubarrier_fops);
- if (!retval)
- goto free_out;
-
- retval = debugfs_create_file("rcudata", 0444, rcudir,
- NULL, &rcudata_fops);
- if (!retval)
- goto free_out;
-
- retval = debugfs_create_file("rcudata.csv", 0444, rcudir,
- NULL, &rcudata_csv_fops);
- if (!retval)
- goto free_out;
-
- if (rcu_boost_trace_create_file(rcudir))
- goto free_out;
+ for_each_rcu_flavor(rsp) {
+ rspdir = debugfs_create_dir(rsp->name, rcudir);
+ if (!rspdir)
+ goto free_out;
+
+ retval = debugfs_create_file("rcudata", 0444,
+ rspdir, rsp, &rcudata_fops);
+ if (!retval)
+ goto free_out;
+
+ retval = debugfs_create_file("rcuexp", 0444,
+ rspdir, rsp, &rcuexp_fops);
+ if (!retval)
+ goto free_out;
+
+ retval = debugfs_create_file("rcu_pending", 0444,
+ rspdir, rsp, &rcu_pending_fops);
+ if (!retval)
+ goto free_out;
+
+ retval = debugfs_create_file("rcubarrier", 0444,
+ rspdir, rsp, &rcubarrier_fops);
+ if (!retval)
+ goto free_out;
- retval = debugfs_create_file("rcugp", 0444, rcudir, NULL, &rcugp_fops);
- if (!retval)
- goto free_out;
+#ifdef CONFIG_RCU_BOOST
+ if (rsp == &rcu_preempt_state) {
+ retval = debugfs_create_file("rcuboost", 0444,
+ rspdir, NULL, &rcu_node_boost_fops);
+ if (!retval)
+ goto free_out;
+ }
+#endif
- retval = debugfs_create_file("rcuhier", 0444, rcudir,
- NULL, &rcuhier_fops);
- if (!retval)
- goto free_out;
+ retval = debugfs_create_file("rcugp", 0444,
+ rspdir, rsp, &rcugp_fops);
+ if (!retval)
+ goto free_out;
- retval = debugfs_create_file("rcu_pending", 0444, rcudir,
- NULL, &rcu_pending_fops);
- if (!retval)
- goto free_out;
+ retval = debugfs_create_file("rcuhier", 0444,
+ rspdir, rsp, &rcuhier_fops);
+ if (!retval)
+ goto free_out;
+ }
retval = debugfs_create_file("rcutorture", 0444, rcudir,
NULL, &rcutorture_fops);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2d8927f..6d4569e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1887,7 +1887,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
#endif
/* Here we just switch the register state and the stack. */
- rcu_switch(prev, next);
+ rcu_user_hooks_switch(prev, next);
switch_to(prev, next, prev);
barrier();
@@ -8076,3 +8076,9 @@ struct cgroup_subsys cpuacct_subsys = {
.base_cftypes = files,
};
#endif /* CONFIG_CGROUP_CPUACCT */
+
+void dump_cpu_task(int cpu)
+{
+ pr_info("Task dump for CPU %d:\n", cpu);
+ sched_show_task(cpu_curr(cpu));
+}
diff --git a/kernel/srcu.c b/kernel/srcu.c
index 97c465e..2b85982 100644
--- a/kernel/srcu.c
+++ b/kernel/srcu.c
@@ -16,8 +16,10 @@
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* Copyright (C) IBM Corporation, 2006
+ * Copyright (C) Fujitsu, 2012
*
* Author: Paul McKenney <paulmck@us.ibm.com>
+ * Lai Jiangshan <laijs@cn.fujitsu.com>
*
* For detailed explanation of Read-Copy Update mechanism see -
* Documentation/RCU/ *.txt
@@ -34,6 +36,10 @@
#include <linux/delay.h>
#include <linux/srcu.h>
+#include <trace/events/rcu.h>
+
+#include "rcu.h"
+
/*
* Initialize an rcu_batch structure to empty.
*/
@@ -92,9 +98,6 @@ static inline void rcu_batch_move(struct rcu_batch *to, struct rcu_batch *from)
}
}
-/* single-thread state-machine */
-static void process_srcu(struct work_struct *work);
-
static int init_srcu_struct_fields(struct srcu_struct *sp)
{
sp->completed = 0;
@@ -464,7 +467,9 @@ static void __synchronize_srcu(struct srcu_struct *sp, int trycount)
*/
void synchronize_srcu(struct srcu_struct *sp)
{
- __synchronize_srcu(sp, SYNCHRONIZE_SRCU_TRYCOUNT);
+ __synchronize_srcu(sp, rcu_expedited
+ ? SYNCHRONIZE_SRCU_EXP_TRYCOUNT
+ : SYNCHRONIZE_SRCU_TRYCOUNT);
}
EXPORT_SYMBOL_GPL(synchronize_srcu);
@@ -637,7 +642,7 @@ static void srcu_reschedule(struct srcu_struct *sp)
/*
* This is the work-queue function that handles SRCU grace periods.
*/
-static void process_srcu(struct work_struct *work)
+void process_srcu(struct work_struct *work)
{
struct srcu_struct *sp;
@@ -648,3 +653,4 @@ static void process_srcu(struct work_struct *work)
srcu_invoke_callbacks(sp);
srcu_reschedule(sp);
}
+EXPORT_SYMBOL_GPL(process_srcu);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 28e9d6c9..41faf0b 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -972,7 +972,7 @@ config RCU_CPU_STALL_TIMEOUT
int "RCU CPU stall timeout in seconds"
depends on TREE_RCU || TREE_PREEMPT_RCU
range 3 300
- default 60
+ default 21
help
If a given RCU grace period extends more than the specified
number of seconds, a CPU stall warning is printed. If the
OpenPOWER on IntegriCloud