From 2036d94a7b61ca5032ce90f2bda06afec0fe713e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 30 Jan 2012 17:02:47 -0800 Subject: rcu: Rework detection of use of RCU by offline CPUs Because newly offlined CPUs continue executing after completing the CPU_DYING notifiers, they legitimately enter the scheduler and use RCU while appearing to be offline. This calls for a more sophisticated approach as follows: 1. RCU marks the CPU online during the CPU_UP_PREPARE phase. 2. RCU marks the CPU offline during the CPU_DEAD phase. 3. Diagnostics regarding use of read-side RCU by offline CPUs use RCU's accounting rather than the cpu_online_map. (Note that __call_rcu() still uses cpu_online_map to detect illegal invocations within CPU_DYING notifiers.) 4. Offline CPUs are prevented from hanging the system by force_quiescent_state(), which pays attention to cpu_online_map. Some additional work (in a later commit) will be needed to guarantee that force_quiescent_state() waits a full jiffy before assuming that a CPU is offline, for example, when called from idle entry. (This commit also makes the one-jiffy wait explicit, since the old-style implicit wait can now be defeated by RCU_FAST_NO_HZ and by rcutorture.) This approach avoids the false positives encountered when attempting to use more exact classification of CPU online/offline state. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 113 ++++++++++++++++++++++++++++-------------------- kernel/rcutree.h | 1 - kernel/rcutree_plugin.h | 2 +- kernel/rcutree_trace.c | 6 +-- 4 files changed, 71 insertions(+), 51 deletions(-) (limited to 'kernel') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 05470d4..708469a 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -320,25 +320,18 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp) static int rcu_implicit_offline_qs(struct rcu_data *rdp) { /* - * If the CPU is offline, it is in a quiescent state. We can - * trust its state not to change because interrupts are disabled. + * If the CPU is offline for more than a jiffy, it is in a quiescent + * state. We can trust its state not to change because interrupts + * are disabled. The reason for the jiffy's worth of slack is to + * handle CPUs initializing on the way up and finding their way + * to the idle loop on the way down. */ - if (cpu_is_offline(rdp->cpu)) { + if (cpu_is_offline(rdp->cpu) && + ULONG_CMP_LT(rdp->rsp->gp_start + 2, jiffies)) { trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "ofl"); rdp->offline_fqs++; return 1; } - - /* - * The CPU is online, so send it a reschedule IPI. This forces - * it through the scheduler, and (inefficiently) also handles cases - * where idle loops fail to inform RCU about the CPU being idle. - */ - if (rdp->cpu != smp_processor_id()) - smp_send_reschedule(rdp->cpu); - else - set_need_resched(); - rdp->resched_ipi++; return 0; } @@ -601,19 +594,33 @@ EXPORT_SYMBOL(rcu_is_cpu_idle); * this task being preempted, its old CPU being taken offline, resuming * on some other CPU, then determining that its old CPU is now offline. * It is OK to use RCU on an offline processor during initial boot, hence - * the check for rcu_scheduler_fully_active. + * the check for rcu_scheduler_fully_active. Note also that it is OK + * for a CPU coming online to use RCU for one jiffy prior to marking itself + * online in the cpu_online_mask. Similarly, it is OK for a CPU going + * offline to continue to use RCU for one jiffy after marking itself + * offline in the cpu_online_mask. This leniency is necessary given the + * non-atomic nature of the online and offline processing, for example, + * the fact that a CPU enters the scheduler after completing the CPU_DYING + * notifiers. + * + * This is also why RCU internally marks CPUs online during the + * CPU_UP_PREPARE phase and offline during the CPU_DEAD phase. * * Disable checking if in an NMI handler because we cannot safely report * errors from NMI handlers anyway. */ bool rcu_lockdep_current_cpu_online(void) { + struct rcu_data *rdp; + struct rcu_node *rnp; bool ret; if (in_nmi()) return 1; preempt_disable(); - ret = cpu_online(smp_processor_id()) || + rdp = &__get_cpu_var(rcu_sched_data); + rnp = rdp->mynode; + ret = (rdp->grpmask & rnp->qsmaskinit) || !rcu_scheduler_fully_active; preempt_enable(); return ret; @@ -1308,14 +1315,12 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp) */ static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) { - unsigned long flags; int i; unsigned long mask; - int need_report; int receive_cpu = cpumask_any(cpu_online_mask); struct rcu_data *rdp = this_cpu_ptr(rsp->rda); struct rcu_data *receive_rdp = per_cpu_ptr(rsp->rda, receive_cpu); - struct rcu_node *rnp = rdp->mynode; /* For dying CPU. */ + RCU_TRACE(struct rcu_node *rnp = rdp->mynode); /* For dying CPU. */ /* First, adjust the counts. */ if (rdp->nxtlist != NULL) { @@ -1381,32 +1386,6 @@ static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) "cpuofl"); rcu_report_qs_rdp(smp_processor_id(), rsp, rdp, rsp->gpnum); /* Note that rcu_report_qs_rdp() might call trace_rcu_grace_period(). */ - - /* - * Remove the dying CPU from the bitmasks in the rcu_node - * hierarchy. Because we are in stop_machine() context, we - * automatically exclude ->onofflock critical sections. - */ - do { - raw_spin_lock_irqsave(&rnp->lock, flags); - rnp->qsmaskinit &= ~mask; - if (rnp->qsmaskinit != 0) { - raw_spin_unlock_irqrestore(&rnp->lock, flags); - break; - } - if (rnp == rdp->mynode) { - need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp); - if (need_report & RCU_OFL_TASKS_NORM_GP) - rcu_report_unblock_qs_rnp(rnp, flags); - else - raw_spin_unlock_irqrestore(&rnp->lock, flags); - if (need_report & RCU_OFL_TASKS_EXP_GP) - rcu_report_exp_rnp(rsp, rnp, true); - } else - raw_spin_unlock_irqrestore(&rnp->lock, flags); - mask = rnp->grpmask; - rnp = rnp->parent; - } while (rnp != NULL); } /* @@ -1417,11 +1396,53 @@ static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) */ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) { + unsigned long flags; + unsigned long mask; + int need_report = 0; struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); - struct rcu_node *rnp = rdp->mynode; + struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rnp. */ + /* Adjust any no-longer-needed kthreads. */ rcu_stop_cpu_kthread(cpu); rcu_node_kthread_setaffinity(rnp, -1); + + /* Remove the dying CPU from the bitmasks in the rcu_node hierarchy. */ + + /* Exclude any attempts to start a new grace period. */ + raw_spin_lock_irqsave(&rsp->onofflock, flags); + + /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */ + mask = rdp->grpmask; /* rnp->grplo is constant. */ + do { + raw_spin_lock(&rnp->lock); /* irqs already disabled. */ + rnp->qsmaskinit &= ~mask; + if (rnp->qsmaskinit != 0) { + if (rnp != rdp->mynode) + raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ + break; + } + if (rnp == rdp->mynode) + need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp); + else + raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ + mask = rnp->grpmask; + rnp = rnp->parent; + } while (rnp != NULL); + + /* + * We still hold the leaf rcu_node structure lock here, and + * irqs are still disabled. The reason for this subterfuge is + * because invoking rcu_report_unblock_qs_rnp() with ->onofflock + * held leads to deadlock. + */ + raw_spin_unlock(&rsp->onofflock); /* irqs remain disabled. */ + rnp = rdp->mynode; + if (need_report & RCU_OFL_TASKS_NORM_GP) + rcu_report_unblock_qs_rnp(rnp, flags); + else + raw_spin_unlock_irqrestore(&rnp->lock, flags); + if (need_report & RCU_OFL_TASKS_EXP_GP) + rcu_report_exp_rnp(rsp, rnp, true); } #else /* #ifdef CONFIG_HOTPLUG_CPU */ diff --git a/kernel/rcutree.h b/kernel/rcutree.h index e2ac8ee..cdd1be0 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -289,7 +289,6 @@ struct rcu_data { /* 4) reasons this CPU needed to be kicked by force_quiescent_state */ unsigned long dynticks_fqs; /* Kicked due to dynticks idle. */ unsigned long offline_fqs; /* Kicked due to being offline. */ - unsigned long resched_ipi; /* Sent a resched IPI. */ /* 5) __rcu_pending() statistics. */ unsigned long n_rcu_pending; /* rcu_pending() calls since boot. */ diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 5e25ee3..07f8804 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -610,7 +610,7 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, * absolutely necessary, but this is a good performance/complexity * tradeoff. */ - if (rcu_preempt_blocked_readers_cgp(rnp)) + if (rcu_preempt_blocked_readers_cgp(rnp) && rnp->qsmask == 0) retval |= RCU_OFL_TASKS_NORM_GP; if (rcu_preempted_readers_exp(rnp)) retval |= RCU_OFL_TASKS_EXP_GP; diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index db0987c..ed459ed 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c @@ -72,7 +72,7 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) rdp->dynticks->dynticks_nesting, rdp->dynticks->dynticks_nmi_nesting, rdp->dynticks_fqs); - seq_printf(m, " of=%lu ri=%lu", rdp->offline_fqs, rdp->resched_ipi); + seq_printf(m, " of=%lu", rdp->offline_fqs); seq_printf(m, " ql=%ld/%ld qs=%c%c%c%c", rdp->qlen_lazy, rdp->qlen, ".N"[rdp->nxttail[RCU_NEXT_READY_TAIL] != @@ -144,7 +144,7 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp) rdp->dynticks->dynticks_nesting, rdp->dynticks->dynticks_nmi_nesting, rdp->dynticks_fqs); - seq_printf(m, ",%lu,%lu", rdp->offline_fqs, rdp->resched_ipi); + seq_printf(m, ",%lu", rdp->offline_fqs); seq_printf(m, ",%ld,%ld,\"%c%c%c%c\"", rdp->qlen_lazy, rdp->qlen, ".N"[rdp->nxttail[RCU_NEXT_READY_TAIL] != rdp->nxttail[RCU_NEXT_TAIL]], @@ -168,7 +168,7 @@ static int show_rcudata_csv(struct seq_file *m, void *unused) { seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pgp\",\"pq\","); seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\","); - seq_puts(m, "\"of\",\"ri\",\"qll\",\"ql\",\"qs\""); + seq_puts(m, "\"of\",\"qll\",\"ql\",\"qs\""); #ifdef CONFIG_RCU_BOOST seq_puts(m, "\"kt\",\"ktl\""); #endif /* #ifdef CONFIG_RCU_BOOST */ -- cgit v1.1