diff options
author | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2018-04-21 20:44:11 -0700 |
---|---|---|
committer | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2018-07-12 14:24:42 -0700 |
commit | 26d950a9451336a6b5abc1c8ca6c21df58e8d89f (patch) | |
tree | f68a88af62c5dda366ccc760afd57b223f8d5b1a | |
parent | 8c42b1f39fdf9fde7cfc4024397255f31a860db6 (diff) | |
download | op-kernel-dev-26d950a9451336a6b5abc1c8ca6c21df58e8d89f.zip op-kernel-dev-26d950a9451336a6b5abc1c8ca6c21df58e8d89f.tar.gz |
rcu: Diagnostics for grace-period startup hangs
This commit causes a splat if RCU is idle and a request for a new grace
period is ignored for more than one second. This splat normally indicates
that some code path asked for a new grace period, but failed to wake up
the RCU grace-period kthread.
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
[ paulmck: Fix bug located by Dan Carpenter and his static checker. ]
[ paulmck: Fix self-deadlock bug located 0day test robot. ]
[ paulmck: Disable unless CONFIG_PROVE_RCU=y. ]
-rw-r--r-- | kernel/rcu/tree.c | 66 | ||||
-rw-r--r-- | kernel/rcu/tree.h | 2 |
2 files changed, 66 insertions, 2 deletions
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index b1fffa2..6ce82c0 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1681,6 +1681,7 @@ static bool rcu_start_this_gp(struct rcu_node *rnp, struct rcu_data *rdp, } trace_rcu_this_gp(rnp_root, rdp, c, TPS("Startedroot")); WRITE_ONCE(rsp->gp_flags, rsp->gp_flags | RCU_GP_FLAG_INIT); + rsp->gp_req_activity = jiffies; if (!rsp->gp_kthread) { trace_rcu_this_gp(rnp_root, rdp, c, TPS("NoGPkthread")); goto unlock_out; @@ -2113,6 +2114,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp) /* Advance CBs to reduce false positives below. */ if (!rcu_accelerate_cbs(rsp, rnp, rdp) && needgp) { WRITE_ONCE(rsp->gp_flags, RCU_GP_FLAG_INIT); + rsp->gp_req_activity = jiffies; trace_rcu_grace_period(rsp->name, READ_ONCE(rsp->gpnum), TPS("newreq")); } @@ -2745,6 +2747,65 @@ static void force_quiescent_state(struct rcu_state *rsp) } /* + * This function checks for grace-period requests that fail to motivate + * RCU to come out of its idle mode. + */ +static void +rcu_check_gp_start_stall(struct rcu_state *rsp, struct rcu_node *rnp, + struct rcu_data *rdp) +{ + unsigned long flags; + unsigned long j; + struct rcu_node *rnp_root = rcu_get_root(rsp); + static atomic_t warned = ATOMIC_INIT(0); + + if (!IS_ENABLED(CONFIG_PROVE_RCU) || + rcu_gp_in_progress(rsp) || !need_any_future_gp(rcu_get_root(rsp))) + return; + j = jiffies; /* Expensive access, and in common case don't get here. */ + if (time_before(j, READ_ONCE(rsp->gp_req_activity) + HZ) || + time_before(j, READ_ONCE(rsp->gp_activity) + HZ) || + atomic_read(&warned)) + return; + + raw_spin_lock_irqsave_rcu_node(rnp, flags); + j = jiffies; + if (rcu_gp_in_progress(rsp) || !need_any_future_gp(rcu_get_root(rsp)) || + time_before(j, READ_ONCE(rsp->gp_req_activity) + HZ) || + time_before(j, READ_ONCE(rsp->gp_activity) + HZ) || + atomic_read(&warned)) { + raw_spin_unlock_irqrestore_rcu_node(rnp, flags); + return; + } + /* Hold onto the leaf lock to make others see warned==1. */ + + if (rnp_root != rnp) + raw_spin_lock_rcu_node(rnp_root); /* irqs already disabled. */ + j = jiffies; + if (rcu_gp_in_progress(rsp) || !need_any_future_gp(rcu_get_root(rsp)) || + time_before(j, rsp->gp_req_activity + HZ) || + time_before(j, rsp->gp_activity + HZ) || + atomic_xchg(&warned, 1)) { + raw_spin_unlock_rcu_node(rnp_root); /* irqs remain disabled. */ + raw_spin_unlock_irqrestore_rcu_node(rnp, flags); + return; + } + pr_alert("%s: g%lu %d%d%d%d gar:%lu ga:%lu f%#x %s->state:%#lx\n", + __func__, READ_ONCE(rsp->gpnum), + need_future_gp_element(rcu_get_root(rsp), 0), + need_future_gp_element(rcu_get_root(rsp), 1), + need_future_gp_element(rcu_get_root(rsp), 2), + need_future_gp_element(rcu_get_root(rsp), 3), + j - rsp->gp_req_activity, j - rsp->gp_activity, + rsp->gp_flags, rsp->name, + rsp->gp_kthread ? rsp->gp_kthread->state : 0x1ffffL); + WARN_ON(1); + if (rnp_root != rnp) + raw_spin_unlock_rcu_node(rnp_root); + raw_spin_unlock_irqrestore_rcu_node(rnp, flags); +} + +/* * This does the RCU core processing work for the specified rcu_state * and rcu_data structures. This may be called only from the CPU to * whom the rdp belongs. @@ -2755,7 +2816,7 @@ __rcu_process_callbacks(struct rcu_state *rsp) unsigned long flags; bool needwake; struct rcu_data *rdp = raw_cpu_ptr(rsp->rda); - struct rcu_node *rnp; + struct rcu_node *rnp = rdp->mynode; WARN_ON_ONCE(!rdp->beenonline); @@ -2769,7 +2830,6 @@ __rcu_process_callbacks(struct rcu_state *rsp) if (rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL)) { local_irq_restore(flags); } else { - rnp = rdp->mynode; raw_spin_lock_rcu_node(rnp); /* irqs disabled. */ needwake = rcu_accelerate_cbs(rsp, rnp, rdp); raw_spin_unlock_irqrestore_rcu_node(rnp, flags); @@ -2778,6 +2838,8 @@ __rcu_process_callbacks(struct rcu_state *rsp) } } + rcu_check_gp_start_stall(rsp, rnp, rdp); + /* If there are callbacks ready, invoke them. */ if (rcu_segcblist_ready_cbs(&rdp->cblist)) invoke_rcu_callbacks(rsp, rdp); diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 7365ac5..3c19421 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -374,6 +374,8 @@ struct rcu_state { /* but in jiffies. */ unsigned long gp_activity; /* Time of last GP kthread */ /* activity in jiffies. */ + unsigned long gp_req_activity; /* Time of last GP request */ + /* in jiffies. */ unsigned long jiffies_stall; /* Time at which to check */ /* for CPU stalls. */ unsigned long jiffies_resched; /* Time at which to resched */ |