summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul E. McKenney <paulmck@linux.vnet.ibm.com>2018-04-21 20:44:11 -0700
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>2018-07-12 14:24:42 -0700
commit26d950a9451336a6b5abc1c8ca6c21df58e8d89f (patch)
treef68a88af62c5dda366ccc760afd57b223f8d5b1a
parent8c42b1f39fdf9fde7cfc4024397255f31a860db6 (diff)
downloadop-kernel-dev-26d950a9451336a6b5abc1c8ca6c21df58e8d89f.zip
op-kernel-dev-26d950a9451336a6b5abc1c8ca6c21df58e8d89f.tar.gz
rcu: Diagnostics for grace-period startup hangs
This commit causes a splat if RCU is idle and a request for a new grace period is ignored for more than one second. This splat normally indicates that some code path asked for a new grace period, but failed to wake up the RCU grace-period kthread. Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> [ paulmck: Fix bug located by Dan Carpenter and his static checker. ] [ paulmck: Fix self-deadlock bug located 0day test robot. ] [ paulmck: Disable unless CONFIG_PROVE_RCU=y. ]
-rw-r--r--kernel/rcu/tree.c66
-rw-r--r--kernel/rcu/tree.h2
2 files changed, 66 insertions, 2 deletions
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index b1fffa2..6ce82c0 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1681,6 +1681,7 @@ static bool rcu_start_this_gp(struct rcu_node *rnp, struct rcu_data *rdp,
}
trace_rcu_this_gp(rnp_root, rdp, c, TPS("Startedroot"));
WRITE_ONCE(rsp->gp_flags, rsp->gp_flags | RCU_GP_FLAG_INIT);
+ rsp->gp_req_activity = jiffies;
if (!rsp->gp_kthread) {
trace_rcu_this_gp(rnp_root, rdp, c, TPS("NoGPkthread"));
goto unlock_out;
@@ -2113,6 +2114,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
/* Advance CBs to reduce false positives below. */
if (!rcu_accelerate_cbs(rsp, rnp, rdp) && needgp) {
WRITE_ONCE(rsp->gp_flags, RCU_GP_FLAG_INIT);
+ rsp->gp_req_activity = jiffies;
trace_rcu_grace_period(rsp->name, READ_ONCE(rsp->gpnum),
TPS("newreq"));
}
@@ -2745,6 +2747,65 @@ static void force_quiescent_state(struct rcu_state *rsp)
}
/*
+ * This function checks for grace-period requests that fail to motivate
+ * RCU to come out of its idle mode.
+ */
+static void
+rcu_check_gp_start_stall(struct rcu_state *rsp, struct rcu_node *rnp,
+ struct rcu_data *rdp)
+{
+ unsigned long flags;
+ unsigned long j;
+ struct rcu_node *rnp_root = rcu_get_root(rsp);
+ static atomic_t warned = ATOMIC_INIT(0);
+
+ if (!IS_ENABLED(CONFIG_PROVE_RCU) ||
+ rcu_gp_in_progress(rsp) || !need_any_future_gp(rcu_get_root(rsp)))
+ return;
+ j = jiffies; /* Expensive access, and in common case don't get here. */
+ if (time_before(j, READ_ONCE(rsp->gp_req_activity) + HZ) ||
+ time_before(j, READ_ONCE(rsp->gp_activity) + HZ) ||
+ atomic_read(&warned))
+ return;
+
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
+ j = jiffies;
+ if (rcu_gp_in_progress(rsp) || !need_any_future_gp(rcu_get_root(rsp)) ||
+ time_before(j, READ_ONCE(rsp->gp_req_activity) + HZ) ||
+ time_before(j, READ_ONCE(rsp->gp_activity) + HZ) ||
+ atomic_read(&warned)) {
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+ return;
+ }
+ /* Hold onto the leaf lock to make others see warned==1. */
+
+ if (rnp_root != rnp)
+ raw_spin_lock_rcu_node(rnp_root); /* irqs already disabled. */
+ j = jiffies;
+ if (rcu_gp_in_progress(rsp) || !need_any_future_gp(rcu_get_root(rsp)) ||
+ time_before(j, rsp->gp_req_activity + HZ) ||
+ time_before(j, rsp->gp_activity + HZ) ||
+ atomic_xchg(&warned, 1)) {
+ raw_spin_unlock_rcu_node(rnp_root); /* irqs remain disabled. */
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+ return;
+ }
+ pr_alert("%s: g%lu %d%d%d%d gar:%lu ga:%lu f%#x %s->state:%#lx\n",
+ __func__, READ_ONCE(rsp->gpnum),
+ need_future_gp_element(rcu_get_root(rsp), 0),
+ need_future_gp_element(rcu_get_root(rsp), 1),
+ need_future_gp_element(rcu_get_root(rsp), 2),
+ need_future_gp_element(rcu_get_root(rsp), 3),
+ j - rsp->gp_req_activity, j - rsp->gp_activity,
+ rsp->gp_flags, rsp->name,
+ rsp->gp_kthread ? rsp->gp_kthread->state : 0x1ffffL);
+ WARN_ON(1);
+ if (rnp_root != rnp)
+ raw_spin_unlock_rcu_node(rnp_root);
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+}
+
+/*
* This does the RCU core processing work for the specified rcu_state
* and rcu_data structures. This may be called only from the CPU to
* whom the rdp belongs.
@@ -2755,7 +2816,7 @@ __rcu_process_callbacks(struct rcu_state *rsp)
unsigned long flags;
bool needwake;
struct rcu_data *rdp = raw_cpu_ptr(rsp->rda);
- struct rcu_node *rnp;
+ struct rcu_node *rnp = rdp->mynode;
WARN_ON_ONCE(!rdp->beenonline);
@@ -2769,7 +2830,6 @@ __rcu_process_callbacks(struct rcu_state *rsp)
if (rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL)) {
local_irq_restore(flags);
} else {
- rnp = rdp->mynode;
raw_spin_lock_rcu_node(rnp); /* irqs disabled. */
needwake = rcu_accelerate_cbs(rsp, rnp, rdp);
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
@@ -2778,6 +2838,8 @@ __rcu_process_callbacks(struct rcu_state *rsp)
}
}
+ rcu_check_gp_start_stall(rsp, rnp, rdp);
+
/* If there are callbacks ready, invoke them. */
if (rcu_segcblist_ready_cbs(&rdp->cblist))
invoke_rcu_callbacks(rsp, rdp);
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 7365ac5..3c19421 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -374,6 +374,8 @@ struct rcu_state {
/* but in jiffies. */
unsigned long gp_activity; /* Time of last GP kthread */
/* activity in jiffies. */
+ unsigned long gp_req_activity; /* Time of last GP request */
+ /* in jiffies. */
unsigned long jiffies_stall; /* Time at which to check */
/* for CPU stalls. */
unsigned long jiffies_resched; /* Time at which to resched */
OpenPOWER on IntegriCloud