From 28728dd310d48834cd486dac3cac9ae96b9deb96 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 12 Jan 2016 08:33:37 -0800 Subject: rcu: Make expedited RCU-sched grace period immediately detect idle Currently, sync_sched_exp_handler() will force a reschedule unless this CPU has already checked in or unless a reschedule has already been called for. This is clearly wasteful if sync_sched_exp_handler() interrupted an idle CPU, so this commit immediately reports the quiescent state in that case. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'kernel/rcu/tree.c') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 531a328076bd..5f4336fadc28 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3649,6 +3649,11 @@ static void sync_sched_exp_handler(void *data) if (!(READ_ONCE(rnp->expmask) & rdp->grpmask) || __this_cpu_read(rcu_sched_data.cpu_no_qs.b.exp)) return; + if (rcu_is_cpu_rrupt_from_idle()) { + rcu_report_exp_rdp(&rcu_sched_state, + this_cpu_ptr(&rcu_sched_data), true); + return; + } __this_cpu_write(rcu_sched_data.cpu_no_qs.b.exp, true); resched_cpu(smp_processor_id()); } -- cgit v1.2.3 From 251c617c75f48e03523c43c4ce1dff44bc3ae2bd Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 13 Jan 2016 10:52:35 -0800 Subject: rcu: Make expedited RCU-preempt stall warnings count accurately Currently, synchronize_sched_expedited_wait() simply sets the ndetected variable to the rcu_print_task_exp_stall() return value. This means that if the last rcu_node structure has no stalled tasks, record of any stalled tasks in previous rcu_node structures is lost, which can in turn result in failure to dump out the blocking rcu_node structures. Or could, had the test been correct. This commit therefore adds the return value of rcu_print_task_exp_stall() to ndetected and corrects the later test for ndetected. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/rcu/tree.c') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 5f4336fadc28..687d8a5f35c7 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3778,7 +3778,7 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp) rsp->name); ndetected = 0; rcu_for_each_leaf_node(rsp, rnp) { - ndetected = rcu_print_task_exp_stall(rnp); + ndetected += rcu_print_task_exp_stall(rnp); mask = 1; for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask <<= 1) { struct rcu_data *rdp; @@ -3797,7 +3797,7 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp) pr_cont(" } %lu jiffies s: %lu root: %#lx/%c\n", jiffies - jiffies_start, rsp->expedited_sequence, rnp_root->expmask, ".T"[!!rnp_root->exp_tasks]); - if (!ndetected) { + if (ndetected) { pr_err("blocking rcu_node structures:"); rcu_for_each_node_breadth_first(rsp, rnp) { if (rnp == rnp_root) -- cgit v1.2.3 From a1e1224849d9610b50fd1dd7d6f44308a59e46af Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 13 Jan 2016 13:57:54 -0800 Subject: rcu: Make cond_resched_rcu_qs() supply RCU-sched expedited QS Although cond_resched_rcu_qs() supplies quiescent states to all flavors of normal RCU grace periods, it does nothing for expedited RCU-sched grace periods. This commit therefore adds a check for a need for a quiescent state from the current CPU by an expedited RCU-sched grace period, and invokes rcu_sched_qs() to supply that quiescent state if so. Note that the check is racy in that we might be migrated to some other CPU just after checking the per-CPU variable. This is OK because the act of migration will do a context switch, which will supply the needed quiescent state. The only downside is that we might do an unnecessary call to rcu_sched_qs(), but the probability is low and the overhead is small. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'kernel/rcu/tree.c') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 687d8a5f35c7..178575c01d09 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -370,6 +370,21 @@ void rcu_all_qs(void) rcu_momentary_dyntick_idle(); local_irq_restore(flags); } + if (unlikely(raw_cpu_read(rcu_sched_data.cpu_no_qs.b.exp))) { + /* + * Yes, we just checked a per-CPU variable with preemption + * enabled, so we might be migrated to some other CPU at + * this point. That is OK because in that case, the + * migration will supply the needed quiescent state. + * We might end up needlessly disabling preemption and + * invoking rcu_sched_qs() on the destination CPU, but + * the probability and cost are both quite low, so this + * should not be a problem in practice. + */ + preempt_disable(); + rcu_sched_qs(); + preempt_enable(); + } this_cpu_inc(rcu_qs_ctr); barrier(); /* Avoid RCU read-side critical sections leaking up. */ } -- cgit v1.2.3 From bea2de44ae647698dc848a671fdee6e53c192423 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 28 Jan 2016 20:30:06 -0800 Subject: rcu: Add funnel-locking tracing for expedited grace periods Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 31 +++++++++++++++++++++++++++---- kernel/rcu/tree_plugin.h | 3 +++ 2 files changed, 30 insertions(+), 4 deletions(-) (limited to 'kernel/rcu/tree.c') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 178575c01d09..79e9206a7b11 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3584,10 +3584,18 @@ static bool sync_exp_work_done(struct rcu_state *rsp, struct rcu_node *rnp, atomic_long_t *stat, unsigned long s) { if (rcu_exp_gp_seq_done(rsp, s)) { - if (rnp) + if (rnp) { mutex_unlock(&rnp->exp_funnel_mutex); - else if (rdp) + trace_rcu_exp_funnel_lock(rsp->name, rnp->level, + rnp->grplo, rnp->grphi, + TPS("rel")); + } else if (rdp) { mutex_unlock(&rdp->exp_funnel_mutex); + trace_rcu_exp_funnel_lock(rsp->name, + rdp->mynode->level + 1, + rdp->cpu, rdp->cpu, + TPS("rel")); + } /* Ensure test happens before caller kfree(). */ smp_mb__before_atomic(); /* ^^^ */ atomic_long_inc(stat); @@ -3619,6 +3627,9 @@ static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s) if (sync_exp_work_done(rsp, rnp0, NULL, &rdp->expedited_workdone0, s)) return NULL; + trace_rcu_exp_funnel_lock(rsp->name, rnp0->level, + rnp0->grplo, rnp0->grphi, + TPS("acq")); return rnp0; } } @@ -3634,16 +3645,28 @@ static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s) if (sync_exp_work_done(rsp, NULL, NULL, &rdp->expedited_workdone1, s)) return NULL; mutex_lock(&rdp->exp_funnel_mutex); + trace_rcu_exp_funnel_lock(rsp->name, rdp->mynode->level + 1, + rdp->cpu, rdp->cpu, TPS("acq")); rnp0 = rdp->mynode; for (; rnp0 != NULL; rnp0 = rnp0->parent) { if (sync_exp_work_done(rsp, rnp1, rdp, &rdp->expedited_workdone2, s)) return NULL; mutex_lock(&rnp0->exp_funnel_mutex); - if (rnp1) + trace_rcu_exp_funnel_lock(rsp->name, rnp0->level, + rnp0->grplo, rnp0->grphi, TPS("acq")); + if (rnp1) { mutex_unlock(&rnp1->exp_funnel_mutex); - else + trace_rcu_exp_funnel_lock(rsp->name, rnp1->level, + rnp1->grplo, rnp1->grphi, + TPS("rel")); + } else { mutex_unlock(&rdp->exp_funnel_mutex); + trace_rcu_exp_funnel_lock(rsp->name, + rdp->mynode->level + 1, + rdp->cpu, rdp->cpu, + TPS("rel")); + } rnp1 = rnp0; } if (sync_exp_work_done(rsp, rnp1, rdp, diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index a2ac2628ef8e..cd2dae43ff48 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -767,6 +767,9 @@ void synchronize_rcu_expedited(void) /* Clean up and exit. */ rcu_exp_gp_seq_end(rsp); mutex_unlock(&rnp_unlock->exp_funnel_mutex); + trace_rcu_exp_funnel_lock(rsp->name, rnp_unlock->level, + rnp_unlock->grplo, rnp_unlock->grphi, + TPS("rel")); } EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); -- cgit v1.2.3 From 4f41530245c7fd4837152e264d120d05ae940eb0 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 28 Jan 2016 20:49:49 -0800 Subject: rcu: Add expedited-grace-period event tracing Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 20 +++++++++++++------- kernel/rcu/tree_plugin.h | 3 +++ 2 files changed, 16 insertions(+), 7 deletions(-) (limited to 'kernel/rcu/tree.c') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 79e9206a7b11..524026fd9dd7 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3584,17 +3584,18 @@ static bool sync_exp_work_done(struct rcu_state *rsp, struct rcu_node *rnp, atomic_long_t *stat, unsigned long s) { if (rcu_exp_gp_seq_done(rsp, s)) { + trace_rcu_exp_grace_period(rsp->name, s, TPS("done")); if (rnp) { - mutex_unlock(&rnp->exp_funnel_mutex); trace_rcu_exp_funnel_lock(rsp->name, rnp->level, rnp->grplo, rnp->grphi, TPS("rel")); + mutex_unlock(&rnp->exp_funnel_mutex); } else if (rdp) { - mutex_unlock(&rdp->exp_funnel_mutex); trace_rcu_exp_funnel_lock(rsp->name, rdp->mynode->level + 1, rdp->cpu, rdp->cpu, TPS("rel")); + mutex_unlock(&rdp->exp_funnel_mutex); } /* Ensure test happens before caller kfree(). */ smp_mb__before_atomic(); /* ^^^ */ @@ -3624,12 +3625,12 @@ static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s) rnp0 = rcu_get_root(rsp); if (!mutex_is_locked(&rnp0->exp_funnel_mutex)) { if (mutex_trylock(&rnp0->exp_funnel_mutex)) { - if (sync_exp_work_done(rsp, rnp0, NULL, - &rdp->expedited_workdone0, s)) - return NULL; trace_rcu_exp_funnel_lock(rsp->name, rnp0->level, rnp0->grplo, rnp0->grphi, TPS("acq")); + if (sync_exp_work_done(rsp, rnp0, NULL, + &rdp->expedited_workdone0, s)) + return NULL; return rnp0; } } @@ -3656,16 +3657,16 @@ static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s) trace_rcu_exp_funnel_lock(rsp->name, rnp0->level, rnp0->grplo, rnp0->grphi, TPS("acq")); if (rnp1) { - mutex_unlock(&rnp1->exp_funnel_mutex); trace_rcu_exp_funnel_lock(rsp->name, rnp1->level, rnp1->grplo, rnp1->grphi, TPS("rel")); + mutex_unlock(&rnp1->exp_funnel_mutex); } else { - mutex_unlock(&rdp->exp_funnel_mutex); trace_rcu_exp_funnel_lock(rsp->name, rdp->mynode->level + 1, rdp->cpu, rdp->cpu, TPS("rel")); + mutex_unlock(&rdp->exp_funnel_mutex); } rnp1 = rnp0; } @@ -3895,16 +3896,21 @@ void synchronize_sched_expedited(void) /* Take a snapshot of the sequence number. */ s = rcu_exp_gp_seq_snap(rsp); + trace_rcu_exp_grace_period(rsp->name, s, TPS("snap")); rnp = exp_funnel_lock(rsp, s); if (rnp == NULL) return; /* Someone else did our work for us. */ rcu_exp_gp_seq_start(rsp); + trace_rcu_exp_grace_period(rsp->name, s, TPS("start")); sync_rcu_exp_select_cpus(rsp, sync_sched_exp_handler); synchronize_sched_expedited_wait(rsp); rcu_exp_gp_seq_end(rsp); + trace_rcu_exp_grace_period(rsp->name, s, TPS("end")); + trace_rcu_exp_funnel_lock(rsp->name, rnp->level, + rnp->grplo, rnp->grphi, TPS("rel")); mutex_unlock(&rnp->exp_funnel_mutex); } EXPORT_SYMBOL_GPL(synchronize_sched_expedited); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index cd2dae43ff48..36e94aed38a7 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -750,12 +750,14 @@ void synchronize_rcu_expedited(void) } s = rcu_exp_gp_seq_snap(rsp); + trace_rcu_exp_grace_period(rsp->name, s, TPS("snap")); rnp_unlock = exp_funnel_lock(rsp, s); if (rnp_unlock == NULL) return; /* Someone else did our work for us. */ rcu_exp_gp_seq_start(rsp); + trace_rcu_exp_grace_period(rsp->name, s, TPS("start")); /* Initialize the rcu_node tree in preparation for the wait. */ sync_rcu_exp_select_cpus(rsp, sync_rcu_exp_handler); @@ -766,6 +768,7 @@ void synchronize_rcu_expedited(void) /* Clean up and exit. */ rcu_exp_gp_seq_end(rsp); + trace_rcu_exp_grace_period(rsp->name, s, TPS("end")); mutex_unlock(&rnp_unlock->exp_funnel_mutex); trace_rcu_exp_funnel_lock(rsp->name, rnp_unlock->level, rnp_unlock->grplo, rnp_unlock->grphi, -- cgit v1.2.3 From e2fd9d35847d1936398d44c4df68dceb3d7f64e7 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 30 Jan 2016 17:23:19 -0800 Subject: rcu: Remove expedited GP funnel-lock bypass Commit #cdacbe1f91264 ("rcu: Add fastpath bypassing funnel locking") turns out to be a pessimization at high load because it forces a tree full of tasks to wait for an expedited grace period that they probably do not need. This commit therefore removes this optimization. Signed-off-by: Paul E. McKenney --- Documentation/RCU/trace.txt | 10 +++++----- kernel/rcu/tree.c | 19 ------------------- kernel/rcu/tree.h | 1 - kernel/rcu/tree_trace.c | 7 +++---- 4 files changed, 8 insertions(+), 29 deletions(-) (limited to 'kernel/rcu/tree.c') diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt index ec6998b1b6d0..00a3a38b375a 100644 --- a/Documentation/RCU/trace.txt +++ b/Documentation/RCU/trace.txt @@ -237,17 +237,17 @@ o "ktl" is the low-order 16 bits (in hexadecimal) of the count of The output of "cat rcu/rcu_preempt/rcuexp" looks as follows: -s=21872 wd0=0 wd1=0 wd2=0 wd3=5 n=0 enq=0 sc=21872 +s=21872 wd1=0 wd2=0 wd3=5 n=0 enq=0 sc=21872 These fields are as follows: o "s" is the sequence number, with an odd number indicating that an expedited grace period is in progress. -o "wd0", "wd1", "wd2", and "wd3" are the number of times that an - attempt to start an expedited grace period found that someone - else had completed an expedited grace period that satisfies the - attempted request. "Our work is done." +o "wd1", "wd2", and "wd3" are the number of times that an attempt + to start an expedited grace period found that someone else had + completed an expedited grace period that satisfies the attempted + request. "Our work is done." o "n" is number of times that a concurrent CPU-hotplug operation forced a fallback to a normal grace period. diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 524026fd9dd7..62e73e0a929f 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3616,25 +3616,6 @@ static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s) struct rcu_node *rnp0; struct rcu_node *rnp1 = NULL; - /* - * First try directly acquiring the root lock in order to reduce - * latency in the common case where expedited grace periods are - * rare. We check mutex_is_locked() to avoid pathological levels of - * memory contention on ->exp_funnel_mutex in the heavy-load case. - */ - rnp0 = rcu_get_root(rsp); - if (!mutex_is_locked(&rnp0->exp_funnel_mutex)) { - if (mutex_trylock(&rnp0->exp_funnel_mutex)) { - trace_rcu_exp_funnel_lock(rsp->name, rnp0->level, - rnp0->grplo, rnp0->grphi, - TPS("acq")); - if (sync_exp_work_done(rsp, rnp0, NULL, - &rdp->expedited_workdone0, s)) - return NULL; - return rnp0; - } - } - /* * Each pass through the following loop works its way * up the rcu_node tree, returning if others have done the diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index df668c0f9e64..ac9a7b0c36ae 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -388,7 +388,6 @@ struct rcu_data { struct rcu_head oom_head; #endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ struct mutex exp_funnel_mutex; - atomic_long_t expedited_workdone0; /* # done by others #0. */ atomic_long_t expedited_workdone1; /* # done by others #1. */ atomic_long_t expedited_workdone2; /* # done by others #2. */ atomic_long_t expedited_workdone3; /* # done by others #3. */ diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c index 1088e64f01ad..d149c412a4e5 100644 --- a/kernel/rcu/tree_trace.c +++ b/kernel/rcu/tree_trace.c @@ -185,17 +185,16 @@ static int show_rcuexp(struct seq_file *m, void *v) int cpu; struct rcu_state *rsp = (struct rcu_state *)m->private; struct rcu_data *rdp; - unsigned long s0 = 0, s1 = 0, s2 = 0, s3 = 0; + unsigned long s1 = 0, s2 = 0, s3 = 0; for_each_possible_cpu(cpu) { rdp = per_cpu_ptr(rsp->rda, cpu); - s0 += atomic_long_read(&rdp->expedited_workdone0); s1 += atomic_long_read(&rdp->expedited_workdone1); s2 += atomic_long_read(&rdp->expedited_workdone2); s3 += atomic_long_read(&rdp->expedited_workdone3); } - seq_printf(m, "s=%lu wd0=%lu wd1=%lu wd2=%lu wd3=%lu n=%lu enq=%d sc=%lu\n", - rsp->expedited_sequence, s0, s1, s2, s3, + seq_printf(m, "s=%lu wd1=%lu wd2=%lu wd3=%lu n=%lu enq=%d sc=%lu\n", + rsp->expedited_sequence, s1, s2, s3, atomic_long_read(&rsp->expedited_normal), atomic_read(&rsp->expedited_need_qs), rsp->expedited_sequence / 2); -- cgit v1.2.3 From ec3833ed02ae6ef2a933ece9de7cbab0c64c699e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 11 Jan 2016 16:29:29 -0800 Subject: rcu: Force boolean subscript for expedited stall warnings The cpu_online() function can return values other than 0 and 1, which can result in subscript overflow when applied to a two-element array. This commit allows for this behavior by using "!!" on the return value from cpu_online() when used as a subscript. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/rcu/tree.c') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 62e73e0a929f..64c2e3288551 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3808,7 +3808,7 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp) ndetected++; rdp = per_cpu_ptr(rsp->rda, cpu); pr_cont(" %d-%c%c%c", cpu, - "O."[cpu_online(cpu)], + "O."[!!cpu_online(cpu)], "o."[!!(rdp->grpmask & rnp->expmaskinit)], "N."[!!(rdp->grpmask & rnp->expmaskinitnext)]); } -- cgit v1.2.3 From d40a4f09a448382961fa9b1a2f7d4f34813f0273 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 8 Mar 2016 14:43:44 -0800 Subject: rcu: Shorten expedited_workdone* to exp_workdone* Just a name change to save a few lines and a bit of typing. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 8 +++----- kernel/rcu/tree.h | 6 +++--- kernel/rcu/tree_trace.c | 6 +++--- 3 files changed, 9 insertions(+), 11 deletions(-) (limited to 'kernel/rcu/tree.c') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 64c2e3288551..89f028767765 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3624,15 +3624,14 @@ static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s) * can be inexact, as it is just promoting locality and is not * strictly needed for correctness. */ - if (sync_exp_work_done(rsp, NULL, NULL, &rdp->expedited_workdone1, s)) + if (sync_exp_work_done(rsp, NULL, NULL, &rdp->exp_workdone1, s)) return NULL; mutex_lock(&rdp->exp_funnel_mutex); trace_rcu_exp_funnel_lock(rsp->name, rdp->mynode->level + 1, rdp->cpu, rdp->cpu, TPS("acq")); rnp0 = rdp->mynode; for (; rnp0 != NULL; rnp0 = rnp0->parent) { - if (sync_exp_work_done(rsp, rnp1, rdp, - &rdp->expedited_workdone2, s)) + if (sync_exp_work_done(rsp, rnp1, rdp, &rdp->exp_workdone2, s)) return NULL; mutex_lock(&rnp0->exp_funnel_mutex); trace_rcu_exp_funnel_lock(rsp->name, rnp0->level, @@ -3651,8 +3650,7 @@ static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s) } rnp1 = rnp0; } - if (sync_exp_work_done(rsp, rnp1, rdp, - &rdp->expedited_workdone3, s)) + if (sync_exp_work_done(rsp, rnp1, rdp, &rdp->exp_workdone3, s)) return NULL; return rnp1; } diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index ac9a7b0c36ae..6a8f09446924 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -388,9 +388,9 @@ struct rcu_data { struct rcu_head oom_head; #endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ struct mutex exp_funnel_mutex; - atomic_long_t expedited_workdone1; /* # done by others #1. */ - atomic_long_t expedited_workdone2; /* # done by others #2. */ - atomic_long_t expedited_workdone3; /* # done by others #3. */ + atomic_long_t exp_workdone1; /* # done by others #1. */ + atomic_long_t exp_workdone2; /* # done by others #2. */ + atomic_long_t exp_workdone3; /* # done by others #3. */ /* 7) Callback offloading. */ #ifdef CONFIG_RCU_NOCB_CPU diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c index d149c412a4e5..86782f9a4604 100644 --- a/kernel/rcu/tree_trace.c +++ b/kernel/rcu/tree_trace.c @@ -189,9 +189,9 @@ static int show_rcuexp(struct seq_file *m, void *v) for_each_possible_cpu(cpu) { rdp = per_cpu_ptr(rsp->rda, cpu); - s1 += atomic_long_read(&rdp->expedited_workdone1); - s2 += atomic_long_read(&rdp->expedited_workdone2); - s3 += atomic_long_read(&rdp->expedited_workdone3); + s1 += atomic_long_read(&rdp->exp_workdone1); + s2 += atomic_long_read(&rdp->exp_workdone2); + s3 += atomic_long_read(&rdp->exp_workdone3); } seq_printf(m, "s=%lu wd1=%lu wd2=%lu wd3=%lu n=%lu enq=%d sc=%lu\n", rsp->expedited_sequence, s1, s2, s3, -- cgit v1.2.3 From f6a12f34a448cc8a624070fd365c29c890138a48 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 30 Jan 2016 17:57:35 -0800 Subject: rcu: Enforce expedited-GP fairness via funnel wait queue The current mutex-based funnel-locking approach used by expedited grace periods is subject to severe unfairness. The problem arises when a few tasks, making a path from leaves to root, all wake up before other tasks do. A new task can then follow this path all the way to the root, which needlessly delays tasks whose grace period is done, but who do not happen to acquire the lock quickly enough. This commit avoids this problem by maintaining per-rcu_node wait queues, along with a per-rcu_node counter that tracks the latest grace period sought by an earlier task to visit this node. If that grace period would satisfy the current task, instead of proceeding up the tree, it waits on the current rcu_node structure using a pair of wait queues provided for that purpose. This decouples awakening of old tasks from the arrival of new tasks. If the wakeups prove to be a bottleneck, additional kthreads can be brought to bear for that purpose. Signed-off-by: Paul E. McKenney --- include/trace/events/rcu.h | 5 +- kernel/rcu/tree.c | 155 +++++++++++++++++++++++---------------------- kernel/rcu/tree.h | 10 ++- kernel/rcu/tree_plugin.h | 16 ++--- 4 files changed, 93 insertions(+), 93 deletions(-) (limited to 'kernel/rcu/tree.c') diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index aacc172eba7e..d3e756539d44 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -179,6 +179,7 @@ TRACE_EVENT(rcu_grace_period_init, * "snap": Captured snapshot of expedited grace period sequence number. * "start": Started a real expedited grace period. * "end": Ended a real expedited grace period. + * "endwake": Woke piggybackers up. * "done": Someone else did the expedited grace period for us. */ TRACE_EVENT(rcu_exp_grace_period, @@ -210,8 +211,8 @@ TRACE_EVENT(rcu_exp_grace_period, * and highest-numbered CPU associated with the current rcu_node structure, * and a string. identifying the grace-period-related event as follows: * - * "acq": Acquired a level of funnel lock - * "rel": Released a level of funnel lock + * "nxtlvl": Advance to next level of rcu_node funnel + * "wait": Wait for someone else to do expedited GP */ TRACE_EVENT(rcu_exp_funnel_lock, diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 89f028767765..bd2658edce00 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -102,6 +102,7 @@ struct rcu_state sname##_state = { \ .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \ .name = RCU_STATE_NAME(sname), \ .abbr = sabbr, \ + .exp_mutex = __MUTEX_INITIALIZER(sname##_state.exp_mutex), \ } RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched); @@ -3484,7 +3485,7 @@ static void __maybe_unused sync_exp_reset_tree(struct rcu_state *rsp) * for the current expedited grace period. Works only for preemptible * RCU -- other RCU implementation use other means. * - * Caller must hold the root rcu_node's exp_funnel_mutex. + * Caller must hold the rcu_state's exp_mutex. */ static int sync_rcu_preempt_exp_done(struct rcu_node *rnp) { @@ -3500,8 +3501,8 @@ static int sync_rcu_preempt_exp_done(struct rcu_node *rnp) * recursively up the tree. (Calm down, calm down, we do the recursion * iteratively!) * - * Caller must hold the root rcu_node's exp_funnel_mutex and the - * specified rcu_node structure's ->lock. + * Caller must hold the rcu_state's exp_mutex and the specified rcu_node + * structure's ->lock. */ static void __rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, bool wake, unsigned long flags) @@ -3538,7 +3539,7 @@ static void __rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, * Report expedited quiescent state for specified node. This is a * lock-acquisition wrapper function for __rcu_report_exp_rnp(). * - * Caller must hold the root rcu_node's exp_funnel_mutex. + * Caller must hold the rcu_state's exp_mutex. */ static void __maybe_unused rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, bool wake) @@ -3551,8 +3552,8 @@ static void __maybe_unused rcu_report_exp_rnp(struct rcu_state *rsp, /* * Report expedited quiescent state for multiple CPUs, all covered by the - * specified leaf rcu_node structure. Caller must hold the root - * rcu_node's exp_funnel_mutex. + * specified leaf rcu_node structure. Caller must hold the rcu_state's + * exp_mutex. */ static void rcu_report_exp_cpu_mult(struct rcu_state *rsp, struct rcu_node *rnp, unsigned long mask, bool wake) @@ -3570,7 +3571,6 @@ static void rcu_report_exp_cpu_mult(struct rcu_state *rsp, struct rcu_node *rnp, /* * Report expedited quiescent state for specified rcu_data (CPU). - * Caller must hold the root rcu_node's exp_funnel_mutex. */ static void rcu_report_exp_rdp(struct rcu_state *rsp, struct rcu_data *rdp, bool wake) @@ -3579,24 +3579,11 @@ static void rcu_report_exp_rdp(struct rcu_state *rsp, struct rcu_data *rdp, } /* Common code for synchronize_{rcu,sched}_expedited() work-done checking. */ -static bool sync_exp_work_done(struct rcu_state *rsp, struct rcu_node *rnp, - struct rcu_data *rdp, - atomic_long_t *stat, unsigned long s) +static bool sync_exp_work_done(struct rcu_state *rsp, atomic_long_t *stat, + unsigned long s) { if (rcu_exp_gp_seq_done(rsp, s)) { trace_rcu_exp_grace_period(rsp->name, s, TPS("done")); - if (rnp) { - trace_rcu_exp_funnel_lock(rsp->name, rnp->level, - rnp->grplo, rnp->grphi, - TPS("rel")); - mutex_unlock(&rnp->exp_funnel_mutex); - } else if (rdp) { - trace_rcu_exp_funnel_lock(rsp->name, - rdp->mynode->level + 1, - rdp->cpu, rdp->cpu, - TPS("rel")); - mutex_unlock(&rdp->exp_funnel_mutex); - } /* Ensure test happens before caller kfree(). */ smp_mb__before_atomic(); /* ^^^ */ atomic_long_inc(stat); @@ -3606,53 +3593,53 @@ static bool sync_exp_work_done(struct rcu_state *rsp, struct rcu_node *rnp, } /* - * Funnel-lock acquisition for expedited grace periods. Returns a - * pointer to the root rcu_node structure, or NULL if some other - * task did the expedited grace period for us. + * Funnel-lock acquisition for expedited grace periods. Returns true + * if some other task completed an expedited grace period that this task + * can piggy-back on, and with no mutex held. Otherwise, returns false + * with the mutex held, indicating that the caller must actually do the + * expedited grace period. */ -static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s) +static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s) { struct rcu_data *rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id()); - struct rcu_node *rnp0; - struct rcu_node *rnp1 = NULL; + struct rcu_node *rnp = rdp->mynode; /* - * Each pass through the following loop works its way - * up the rcu_node tree, returning if others have done the - * work or otherwise falls through holding the root rnp's - * ->exp_funnel_mutex. The mapping from CPU to rcu_node structure - * can be inexact, as it is just promoting locality and is not - * strictly needed for correctness. + * Each pass through the following loop works its way up + * the rcu_node tree, returning if others have done the work or + * otherwise falls through to acquire rsp->exp_mutex. The mapping + * from CPU to rcu_node structure can be inexact, as it is just + * promoting locality and is not strictly needed for correctness. */ - if (sync_exp_work_done(rsp, NULL, NULL, &rdp->exp_workdone1, s)) - return NULL; - mutex_lock(&rdp->exp_funnel_mutex); - trace_rcu_exp_funnel_lock(rsp->name, rdp->mynode->level + 1, - rdp->cpu, rdp->cpu, TPS("acq")); - rnp0 = rdp->mynode; - for (; rnp0 != NULL; rnp0 = rnp0->parent) { - if (sync_exp_work_done(rsp, rnp1, rdp, &rdp->exp_workdone2, s)) - return NULL; - mutex_lock(&rnp0->exp_funnel_mutex); - trace_rcu_exp_funnel_lock(rsp->name, rnp0->level, - rnp0->grplo, rnp0->grphi, TPS("acq")); - if (rnp1) { - trace_rcu_exp_funnel_lock(rsp->name, rnp1->level, - rnp1->grplo, rnp1->grphi, - TPS("rel")); - mutex_unlock(&rnp1->exp_funnel_mutex); - } else { - trace_rcu_exp_funnel_lock(rsp->name, - rdp->mynode->level + 1, - rdp->cpu, rdp->cpu, - TPS("rel")); - mutex_unlock(&rdp->exp_funnel_mutex); + for (; rnp != NULL; rnp = rnp->parent) { + if (sync_exp_work_done(rsp, &rdp->exp_workdone1, s)) + return true; + + /* Work not done, either wait here or go up. */ + spin_lock(&rnp->exp_lock); + if (ULONG_CMP_GE(rnp->exp_seq_rq, s)) { + + /* Someone else doing GP, so wait for them. */ + spin_unlock(&rnp->exp_lock); + trace_rcu_exp_funnel_lock(rsp->name, rnp->level, + rnp->grplo, rnp->grphi, + TPS("wait")); + wait_event(rnp->exp_wq[(s >> 1) & 0x1], + sync_exp_work_done(rsp, + &rdp->exp_workdone2, s)); + return true; } - rnp1 = rnp0; + rnp->exp_seq_rq = s; /* Followers can wait on us. */ + spin_unlock(&rnp->exp_lock); + trace_rcu_exp_funnel_lock(rsp->name, rnp->level, rnp->grplo, + rnp->grphi, TPS("nxtlvl")); } - if (sync_exp_work_done(rsp, rnp1, rdp, &rdp->exp_workdone3, s)) - return NULL; - return rnp1; + mutex_lock(&rsp->exp_mutex); + if (sync_exp_work_done(rsp, &rdp->exp_workdone3, s)) { + mutex_unlock(&rsp->exp_mutex); + return true; + } + return false; } /* Invoked on each online non-idle CPU for expedited quiescent state. */ @@ -3841,6 +3828,27 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp) } } +/* + * Wake up everyone who piggybacked on the just-completed expedited + * grace period. Also update all the ->exp_seq_rq counters as needed + * in order to avoid counter-wrap problems. + */ +static void rcu_exp_wake(struct rcu_state *rsp, unsigned long s) +{ + struct rcu_node *rnp; + + rcu_for_each_node_breadth_first(rsp, rnp) { + if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s)) { + spin_lock(&rnp->exp_lock); + /* Recheck, avoid hang in case someone just arrived. */ + if (ULONG_CMP_LT(rnp->exp_seq_rq, s)) + rnp->exp_seq_rq = s; + spin_unlock(&rnp->exp_lock); + } + wake_up_all(&rnp->exp_wq[(rsp->expedited_sequence >> 1) & 0x1]); + } +} + /** * synchronize_sched_expedited - Brute-force RCU-sched grace period * @@ -3860,7 +3868,6 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp) void synchronize_sched_expedited(void) { unsigned long s; - struct rcu_node *rnp; struct rcu_state *rsp = &rcu_sched_state; /* If only one CPU, this is automatically a grace period. */ @@ -3877,20 +3884,23 @@ void synchronize_sched_expedited(void) s = rcu_exp_gp_seq_snap(rsp); trace_rcu_exp_grace_period(rsp->name, s, TPS("snap")); - rnp = exp_funnel_lock(rsp, s); - if (rnp == NULL) + if (exp_funnel_lock(rsp, s)) return; /* Someone else did our work for us. */ rcu_exp_gp_seq_start(rsp); trace_rcu_exp_grace_period(rsp->name, s, TPS("start")); + + /* Initialize the rcu_node tree in preparation for the wait. */ sync_rcu_exp_select_cpus(rsp, sync_sched_exp_handler); - synchronize_sched_expedited_wait(rsp); + /* Wait and clean up, including waking everyone. */ + synchronize_sched_expedited_wait(rsp); rcu_exp_gp_seq_end(rsp); trace_rcu_exp_grace_period(rsp->name, s, TPS("end")); - trace_rcu_exp_funnel_lock(rsp->name, rnp->level, - rnp->grplo, rnp->grphi, TPS("rel")); - mutex_unlock(&rnp->exp_funnel_mutex); + rcu_exp_wake(rsp, s); + + trace_rcu_exp_grace_period(rsp->name, s, TPS("endwake")); + mutex_unlock(&rsp->exp_mutex); } EXPORT_SYMBOL_GPL(synchronize_sched_expedited); @@ -4190,7 +4200,6 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1); rdp->cpu = cpu; rdp->rsp = rsp; - mutex_init(&rdp->exp_funnel_mutex); rcu_boot_init_nocb_percpu_data(rdp); raw_spin_unlock_irqrestore_rcu_node(rnp, flags); } @@ -4448,10 +4457,8 @@ static void __init rcu_init_one(struct rcu_state *rsp) { static const char * const buf[] = RCU_NODE_NAME_INIT; static const char * const fqs[] = RCU_FQS_NAME_INIT; - static const char * const exp[] = RCU_EXP_NAME_INIT; static struct lock_class_key rcu_node_class[RCU_NUM_LVLS]; static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS]; - static struct lock_class_key rcu_exp_class[RCU_NUM_LVLS]; static u8 fl_mask = 0x1; int levelcnt[RCU_NUM_LVLS]; /* # nodes in each level. */ @@ -4510,9 +4517,9 @@ static void __init rcu_init_one(struct rcu_state *rsp) rnp->level = i; INIT_LIST_HEAD(&rnp->blkd_tasks); rcu_init_one_nocb(rnp); - mutex_init(&rnp->exp_funnel_mutex); - lockdep_set_class_and_name(&rnp->exp_funnel_mutex, - &rcu_exp_class[i], exp[i]); + init_waitqueue_head(&rnp->exp_wq[0]); + init_waitqueue_head(&rnp->exp_wq[1]); + spin_lock_init(&rnp->exp_lock); } } diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 6a8f09446924..f9d4fbb1e014 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -70,7 +70,6 @@ # define NUM_RCU_LVL_INIT { NUM_RCU_LVL_0 } # define RCU_NODE_NAME_INIT { "rcu_node_0" } # define RCU_FQS_NAME_INIT { "rcu_node_fqs_0" } -# define RCU_EXP_NAME_INIT { "rcu_node_exp_0" } #elif NR_CPUS <= RCU_FANOUT_2 # define RCU_NUM_LVLS 2 # define NUM_RCU_LVL_0 1 @@ -79,7 +78,6 @@ # define NUM_RCU_LVL_INIT { NUM_RCU_LVL_0, NUM_RCU_LVL_1 } # define RCU_NODE_NAME_INIT { "rcu_node_0", "rcu_node_1" } # define RCU_FQS_NAME_INIT { "rcu_node_fqs_0", "rcu_node_fqs_1" } -# define RCU_EXP_NAME_INIT { "rcu_node_exp_0", "rcu_node_exp_1" } #elif NR_CPUS <= RCU_FANOUT_3 # define RCU_NUM_LVLS 3 # define NUM_RCU_LVL_0 1 @@ -89,7 +87,6 @@ # define NUM_RCU_LVL_INIT { NUM_RCU_LVL_0, NUM_RCU_LVL_1, NUM_RCU_LVL_2 } # define RCU_NODE_NAME_INIT { "rcu_node_0", "rcu_node_1", "rcu_node_2" } # define RCU_FQS_NAME_INIT { "rcu_node_fqs_0", "rcu_node_fqs_1", "rcu_node_fqs_2" } -# define RCU_EXP_NAME_INIT { "rcu_node_exp_0", "rcu_node_exp_1", "rcu_node_exp_2" } #elif NR_CPUS <= RCU_FANOUT_4 # define RCU_NUM_LVLS 4 # define NUM_RCU_LVL_0 1 @@ -100,7 +97,6 @@ # define NUM_RCU_LVL_INIT { NUM_RCU_LVL_0, NUM_RCU_LVL_1, NUM_RCU_LVL_2, NUM_RCU_LVL_3 } # define RCU_NODE_NAME_INIT { "rcu_node_0", "rcu_node_1", "rcu_node_2", "rcu_node_3" } # define RCU_FQS_NAME_INIT { "rcu_node_fqs_0", "rcu_node_fqs_1", "rcu_node_fqs_2", "rcu_node_fqs_3" } -# define RCU_EXP_NAME_INIT { "rcu_node_exp_0", "rcu_node_exp_1", "rcu_node_exp_2", "rcu_node_exp_3" } #else # error "CONFIG_RCU_FANOUT insufficient for NR_CPUS" #endif /* #if (NR_CPUS) <= RCU_FANOUT_1 */ @@ -252,7 +248,9 @@ struct rcu_node { /* Counts of upcoming no-CB GP requests. */ raw_spinlock_t fqslock ____cacheline_internodealigned_in_smp; - struct mutex exp_funnel_mutex ____cacheline_internodealigned_in_smp; + spinlock_t exp_lock ____cacheline_internodealigned_in_smp; + unsigned long exp_seq_rq; + wait_queue_head_t exp_wq[2]; } ____cacheline_internodealigned_in_smp; /* @@ -387,7 +385,6 @@ struct rcu_data { #ifdef CONFIG_RCU_FAST_NO_HZ struct rcu_head oom_head; #endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ - struct mutex exp_funnel_mutex; atomic_long_t exp_workdone1; /* # done by others #1. */ atomic_long_t exp_workdone2; /* # done by others #2. */ atomic_long_t exp_workdone3; /* # done by others #3. */ @@ -504,6 +501,7 @@ struct rcu_state { /* _rcu_barrier(). */ /* End of fields guarded by barrier_mutex. */ + struct mutex exp_mutex; /* Serialize expedited GP. */ unsigned long expedited_sequence; /* Take a ticket. */ atomic_long_t expedited_normal; /* # fallbacks to normal. */ atomic_t expedited_need_qs; /* # CPUs left to check in. */ diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 36e94aed38a7..c82c3640493f 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -738,8 +738,6 @@ static void sync_rcu_exp_handler(void *info) */ void synchronize_rcu_expedited(void) { - struct rcu_node *rnp; - struct rcu_node *rnp_unlock; struct rcu_state *rsp = rcu_state_p; unsigned long s; @@ -752,8 +750,7 @@ void synchronize_rcu_expedited(void) s = rcu_exp_gp_seq_snap(rsp); trace_rcu_exp_grace_period(rsp->name, s, TPS("snap")); - rnp_unlock = exp_funnel_lock(rsp, s); - if (rnp_unlock == NULL) + if (exp_funnel_lock(rsp, s)) return; /* Someone else did our work for us. */ rcu_exp_gp_seq_start(rsp); @@ -763,16 +760,13 @@ void synchronize_rcu_expedited(void) sync_rcu_exp_select_cpus(rsp, sync_rcu_exp_handler); /* Wait for snapshotted ->blkd_tasks lists to drain. */ - rnp = rcu_get_root(rsp); synchronize_sched_expedited_wait(rsp); - - /* Clean up and exit. */ rcu_exp_gp_seq_end(rsp); trace_rcu_exp_grace_period(rsp->name, s, TPS("end")); - mutex_unlock(&rnp_unlock->exp_funnel_mutex); - trace_rcu_exp_funnel_lock(rsp->name, rnp_unlock->level, - rnp_unlock->grplo, rnp_unlock->grphi, - TPS("rel")); + rcu_exp_wake(rsp, s); + + trace_rcu_exp_grace_period(rsp->name, s, TPS("endwake")); + mutex_unlock(&rsp->exp_mutex); } EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); -- cgit v1.2.3 From 356051e1de3cf65575da4ee92d1f5cee86677ee2 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 16 Mar 2016 13:22:53 -0700 Subject: rcu: Add exp_funnel_lock() fastpath This commit speeds up the low-contention case, especially for systems with large rcu_node trees, by attempting to directly acquire the ->exp_mutex. This fastpath checks the leaves and root first in order to avoid excessive memory contention on the mutex itself. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'kernel/rcu/tree.c') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index bd2658edce00..892a140ae7b6 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3603,6 +3603,15 @@ static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s) { struct rcu_data *rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id()); struct rcu_node *rnp = rdp->mynode; + struct rcu_node *rnp_root = rcu_get_root(rsp); + + /* Low-contention fastpath. */ + if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s) && + (rnp == rnp_root || + ULONG_CMP_LT(READ_ONCE(rnp_root->exp_seq_rq), s)) && + !mutex_is_locked(&rsp->exp_mutex) && + mutex_trylock(&rsp->exp_mutex)) + goto fastpath; /* * Each pass through the following loop works its way up @@ -3635,6 +3644,7 @@ static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s) rnp->grphi, TPS("nxtlvl")); } mutex_lock(&rsp->exp_mutex); +fastpath: if (sync_exp_work_done(rsp, &rdp->exp_workdone3, s)) { mutex_unlock(&rsp->exp_mutex); return true; -- cgit v1.2.3 From 4ea3e85b113ab37a2d55cfabf0d709ddec088bb3 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 16 Mar 2016 16:22:25 -0700 Subject: rcu: Consolidate expedited GP code into rcu_exp_wait_wake() Currently, synchronize_rcu_expedited() and rcu_sched_expedited() have significant duplicate code. This commit therefore consolidates some of this code into rcu_exp_wake(), which is now renamed to rcu_exp_wait_wake() in recognition of its added responsibilities. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 18 +++++++++--------- kernel/rcu/tree_plugin.h | 10 ++-------- 2 files changed, 11 insertions(+), 17 deletions(-) (limited to 'kernel/rcu/tree.c') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 892a140ae7b6..fd86eca9478e 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3839,14 +3839,18 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp) } /* - * Wake up everyone who piggybacked on the just-completed expedited + * Wait for the current expedited grace period to complete, and then + * wake up everyone who piggybacked on the just-completed expedited * grace period. Also update all the ->exp_seq_rq counters as needed * in order to avoid counter-wrap problems. */ -static void rcu_exp_wake(struct rcu_state *rsp, unsigned long s) +static void rcu_exp_wait_wake(struct rcu_state *rsp, unsigned long s) { struct rcu_node *rnp; + synchronize_sched_expedited_wait(rsp); + rcu_exp_gp_seq_end(rsp); + trace_rcu_exp_grace_period(rsp->name, s, TPS("end")); rcu_for_each_node_breadth_first(rsp, rnp) { if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s)) { spin_lock(&rnp->exp_lock); @@ -3857,6 +3861,8 @@ static void rcu_exp_wake(struct rcu_state *rsp, unsigned long s) } wake_up_all(&rnp->exp_wq[(rsp->expedited_sequence >> 1) & 0x1]); } + trace_rcu_exp_grace_period(rsp->name, s, TPS("endwake")); + mutex_unlock(&rsp->exp_mutex); } /** @@ -3904,13 +3910,7 @@ void synchronize_sched_expedited(void) sync_rcu_exp_select_cpus(rsp, sync_sched_exp_handler); /* Wait and clean up, including waking everyone. */ - synchronize_sched_expedited_wait(rsp); - rcu_exp_gp_seq_end(rsp); - trace_rcu_exp_grace_period(rsp->name, s, TPS("end")); - rcu_exp_wake(rsp, s); - - trace_rcu_exp_grace_period(rsp->name, s, TPS("endwake")); - mutex_unlock(&rsp->exp_mutex); + rcu_exp_wait_wake(rsp, s); } EXPORT_SYMBOL_GPL(synchronize_sched_expedited); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index c82c3640493f..b6d5dde6eab9 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -759,14 +759,8 @@ void synchronize_rcu_expedited(void) /* Initialize the rcu_node tree in preparation for the wait. */ sync_rcu_exp_select_cpus(rsp, sync_rcu_exp_handler); - /* Wait for snapshotted ->blkd_tasks lists to drain. */ - synchronize_sched_expedited_wait(rsp); - rcu_exp_gp_seq_end(rsp); - trace_rcu_exp_grace_period(rsp->name, s, TPS("end")); - rcu_exp_wake(rsp, s); - - trace_rcu_exp_grace_period(rsp->name, s, TPS("endwake")); - mutex_unlock(&rsp->exp_mutex); + /* Wait for ->blkd_tasks lists to drain, then wake everyone up. */ + rcu_exp_wait_wake(rsp, s); } EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); -- cgit v1.2.3 From 179e5dcd1e5bdfac1128431d131b31322aedd2bc Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 16 Mar 2016 16:27:44 -0700 Subject: rcu: Consolidate expedited GP tracing into rcu_exp_gp_seq_snap() This commit moves some duplicate code from synchronize_rcu_expedited() and synchronize_sched_expedited() into rcu_exp_gp_seq_snap(). This doesn't save lines of code, but does eliminate a "tell me twice" issue. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 8 +++++--- kernel/rcu/tree_plugin.h | 2 -- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'kernel/rcu/tree.c') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index fd86eca9478e..5b1c8fd89af0 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3392,8 +3392,12 @@ static void rcu_exp_gp_seq_end(struct rcu_state *rsp) } static unsigned long rcu_exp_gp_seq_snap(struct rcu_state *rsp) { + unsigned long s; + smp_mb(); /* Caller's modifications seen first by other CPUs. */ - return rcu_seq_snap(&rsp->expedited_sequence); + s = rcu_seq_snap(&rsp->expedited_sequence); + trace_rcu_exp_grace_period(rsp->name, s, TPS("snap")); + return s; } static bool rcu_exp_gp_seq_done(struct rcu_state *rsp, unsigned long s) { @@ -3898,8 +3902,6 @@ void synchronize_sched_expedited(void) /* Take a snapshot of the sequence number. */ s = rcu_exp_gp_seq_snap(rsp); - trace_rcu_exp_grace_period(rsp->name, s, TPS("snap")); - if (exp_funnel_lock(rsp, s)) return; /* Someone else did our work for us. */ diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index b6d5dde6eab9..529a44085a63 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -748,8 +748,6 @@ void synchronize_rcu_expedited(void) } s = rcu_exp_gp_seq_snap(rsp); - trace_rcu_exp_grace_period(rsp->name, s, TPS("snap")); - if (exp_funnel_lock(rsp, s)) return; /* Someone else did our work for us. */ -- cgit v1.2.3 From aff12cdf86e6fa891d1c30c0fad112d138bd7b10 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 16 Mar 2016 16:32:24 -0700 Subject: rcu: Consolidate expedited GP code into exp_funnel_lock() This commit pulls the grace-period-start counter adjustment and tracing from synchronize_rcu_expedited() and synchronize_sched_expedited() into exp_funnel_lock(), thus eliminating some code duplication. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 5 ++--- kernel/rcu/tree_plugin.h | 3 --- 2 files changed, 2 insertions(+), 6 deletions(-) (limited to 'kernel/rcu/tree.c') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 5b1c8fd89af0..e8fff14e417b 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3653,6 +3653,8 @@ fastpath: mutex_unlock(&rsp->exp_mutex); return true; } + rcu_exp_gp_seq_start(rsp); + trace_rcu_exp_grace_period(rsp->name, s, TPS("start")); return false; } @@ -3905,9 +3907,6 @@ void synchronize_sched_expedited(void) if (exp_funnel_lock(rsp, s)) return; /* Someone else did our work for us. */ - rcu_exp_gp_seq_start(rsp); - trace_rcu_exp_grace_period(rsp->name, s, TPS("start")); - /* Initialize the rcu_node tree in preparation for the wait. */ sync_rcu_exp_select_cpus(rsp, sync_sched_exp_handler); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 529a44085a63..ff1cd4e1188d 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -751,9 +751,6 @@ void synchronize_rcu_expedited(void) if (exp_funnel_lock(rsp, s)) return; /* Someone else did our work for us. */ - rcu_exp_gp_seq_start(rsp); - trace_rcu_exp_grace_period(rsp->name, s, TPS("start")); - /* Initialize the rcu_node tree in preparation for the wait. */ sync_rcu_exp_select_cpus(rsp, sync_rcu_exp_handler); -- cgit v1.2.3 From 3b5f668e715bc19610ad967ef97a7e8c55a186ec Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 16 Mar 2016 16:47:55 -0700 Subject: rcu: Overlap wakeups with next expedited grace period The current expedited grace-period implementation makes subsequent grace periods wait on wakeups for the prior grace period. This does not fit the dictionary definition of "expedited", so this commit allows these two phases to overlap. Doing this requires four waitqueues rather than two because tasks can now be waiting on the previous, current, and next grace periods. The fourth waitqueue makes the bit masking work out nicely. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 17 ++++++++++++++--- kernel/rcu/tree.h | 3 ++- 2 files changed, 16 insertions(+), 4 deletions(-) (limited to 'kernel/rcu/tree.c') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index e8fff14e417b..1df100cb7a62 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -103,6 +103,7 @@ struct rcu_state sname##_state = { \ .name = RCU_STATE_NAME(sname), \ .abbr = sabbr, \ .exp_mutex = __MUTEX_INITIALIZER(sname##_state.exp_mutex), \ + .exp_wake_mutex = __MUTEX_INITIALIZER(sname##_state.exp_wake_mutex), \ } RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched); @@ -3637,7 +3638,7 @@ static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s) trace_rcu_exp_funnel_lock(rsp->name, rnp->level, rnp->grplo, rnp->grphi, TPS("wait")); - wait_event(rnp->exp_wq[(s >> 1) & 0x1], + wait_event(rnp->exp_wq[(s >> 1) & 0x3], sync_exp_work_done(rsp, &rdp->exp_workdone2, s)); return true; @@ -3857,6 +3858,14 @@ static void rcu_exp_wait_wake(struct rcu_state *rsp, unsigned long s) synchronize_sched_expedited_wait(rsp); rcu_exp_gp_seq_end(rsp); trace_rcu_exp_grace_period(rsp->name, s, TPS("end")); + + /* + * Switch over to wakeup mode, allowing the next GP, but -only- the + * next GP, to proceed. + */ + mutex_lock(&rsp->exp_wake_mutex); + mutex_unlock(&rsp->exp_mutex); + rcu_for_each_node_breadth_first(rsp, rnp) { if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s)) { spin_lock(&rnp->exp_lock); @@ -3865,10 +3874,10 @@ static void rcu_exp_wait_wake(struct rcu_state *rsp, unsigned long s) rnp->exp_seq_rq = s; spin_unlock(&rnp->exp_lock); } - wake_up_all(&rnp->exp_wq[(rsp->expedited_sequence >> 1) & 0x1]); + wake_up_all(&rnp->exp_wq[(rsp->expedited_sequence >> 1) & 0x3]); } trace_rcu_exp_grace_period(rsp->name, s, TPS("endwake")); - mutex_unlock(&rsp->exp_mutex); + mutex_unlock(&rsp->exp_wake_mutex); } /** @@ -4530,6 +4539,8 @@ static void __init rcu_init_one(struct rcu_state *rsp) rcu_init_one_nocb(rnp); init_waitqueue_head(&rnp->exp_wq[0]); init_waitqueue_head(&rnp->exp_wq[1]); + init_waitqueue_head(&rnp->exp_wq[2]); + init_waitqueue_head(&rnp->exp_wq[3]); spin_lock_init(&rnp->exp_lock); } } diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index f9d4fbb1e014..1194ab0da56a 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -250,7 +250,7 @@ struct rcu_node { spinlock_t exp_lock ____cacheline_internodealigned_in_smp; unsigned long exp_seq_rq; - wait_queue_head_t exp_wq[2]; + wait_queue_head_t exp_wq[4]; } ____cacheline_internodealigned_in_smp; /* @@ -502,6 +502,7 @@ struct rcu_state { /* End of fields guarded by barrier_mutex. */ struct mutex exp_mutex; /* Serialize expedited GP. */ + struct mutex exp_wake_mutex; /* Serialize wakeup. */ unsigned long expedited_sequence; /* Take a ticket. */ atomic_long_t expedited_normal; /* # fallbacks to normal. */ atomic_t expedited_need_qs; /* # CPUs left to check in. */ -- cgit v1.2.3 From 86057b80ae31d37fcbdb5f57d15aaf1148c69f96 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 31 Dec 2015 08:48:36 -0800 Subject: rcu: Awaken grace-period kthread when stalled Recent kernels can fail to awaken the grace-period kthread for quiescent-state forcing. This commit is a crude hack that does a wakeup any time a stall is detected. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel/rcu/tree.c') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 531a328076bd..a327a253c178 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1224,8 +1224,10 @@ static void rcu_check_gp_kthread_starvation(struct rcu_state *rsp) rsp->gp_flags, gp_state_getname(rsp->gp_state), rsp->gp_state, rsp->gp_kthread ? rsp->gp_kthread->state : ~0); - if (rsp->gp_kthread) + if (rsp->gp_kthread) { sched_show_task(rsp->gp_kthread); + wake_up_process(rsp->gp_kthread); + } } } -- cgit v1.2.3 From fcfd0a237bfcf0c314005007e9d76e55a25e2bad Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 3 Jan 2016 16:42:18 -0800 Subject: rcu: Make FQS schedule advance only if FQS happened Currently, the force-quiescent-state (FQS) code in rcu_gp_kthread() can advance the next FQS even if one was not executed last time. This can happen due timeout-duration uncertainty. This commit therefore avoids advancing the FQS schedule unless an FQS was just executed. In the corner case where an FQS was not executed, but is due now, the code does a one-jiffy wait. This change prepares for kthread kicking. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) (limited to 'kernel/rcu/tree.c') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index a327a253c178..6116cfad18ff 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2146,6 +2146,15 @@ static int __noreturn rcu_gp_kthread(void *arg) TPS("fqsend")); cond_resched_rcu_qs(); WRITE_ONCE(rsp->gp_activity, jiffies); + ret = 0; /* Force full wait till next FQS. */ + j = jiffies_till_next_fqs; + if (j > HZ) { + j = HZ; + jiffies_till_next_fqs = HZ; + } else if (j < 1) { + j = 1; + jiffies_till_next_fqs = 1; + } } else { /* Deal with stray signal. */ cond_resched_rcu_qs(); @@ -2154,14 +2163,12 @@ static int __noreturn rcu_gp_kthread(void *arg) trace_rcu_grace_period(rsp->name, READ_ONCE(rsp->gpnum), TPS("fqswaitsig")); - } - j = jiffies_till_next_fqs; - if (j > HZ) { - j = HZ; - jiffies_till_next_fqs = HZ; - } else if (j < 1) { - j = 1; - jiffies_till_next_fqs = 1; + ret = 1; /* Keep old FQS timing. */ + j = jiffies; + if (time_after(jiffies, rsp->jiffies_force_qs)) + j = 1; + else + j = rsp->jiffies_force_qs - j; } } -- cgit v1.2.3 From 8c7c4829a81c1838f18c12ce5a3a5c29a08bf0a8 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 3 Jan 2016 20:29:57 -0800 Subject: rcu: Awaken grace-period kthread if too long since FQS Recent kernels can fail to awaken the grace-period kthread for quiescent-state forcing. This commit is a crude hack that does a wakeup if a scheduling-clock interrupt sees that it has been too long since force-quiescent-state (FQS) processing. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 39 +++++++++++++++++++++++++++++++++++++-- kernel/rcu/tree.h | 2 ++ 2 files changed, 39 insertions(+), 2 deletions(-) (limited to 'kernel/rcu/tree.c') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 6116cfad18ff..a739292be605 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -385,9 +385,11 @@ module_param(qlowmark, long, 0444); static ulong jiffies_till_first_fqs = ULONG_MAX; static ulong jiffies_till_next_fqs = ULONG_MAX; +static bool rcu_kick_kthreads; module_param(jiffies_till_first_fqs, ulong, 0644); module_param(jiffies_till_next_fqs, ulong, 0644); +module_param(rcu_kick_kthreads, bool, 0644); /* * How long the grace period must be before we start recruiting @@ -1251,6 +1253,24 @@ static void rcu_dump_cpu_stacks(struct rcu_state *rsp) } } +/* + * If too much time has passed in the current grace period, and if + * so configured, go kick the relevant kthreads. + */ +static void rcu_stall_kick_kthreads(struct rcu_state *rsp) +{ + unsigned long j; + + if (!rcu_kick_kthreads) + return; + j = READ_ONCE(rsp->jiffies_kick_kthreads); + if (time_after(jiffies, j) && rsp->gp_kthread) { + WARN_ONCE(1, "Kicking %s grace-period kthread\n", rsp->name); + wake_up_process(rsp->gp_kthread); + WRITE_ONCE(rsp->jiffies_kick_kthreads, j + HZ); + } +} + static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum) { int cpu; @@ -1262,6 +1282,11 @@ static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum) struct rcu_node *rnp = rcu_get_root(rsp); long totqlen = 0; + /* Kick and suppress, if so configured. */ + rcu_stall_kick_kthreads(rsp); + if (rcu_cpu_stall_suppress) + return; + /* Only let one CPU complain about others per time interval. */ raw_spin_lock_irqsave_rcu_node(rnp, flags); @@ -1335,6 +1360,11 @@ static void print_cpu_stall(struct rcu_state *rsp) struct rcu_node *rnp = rcu_get_root(rsp); long totqlen = 0; + /* Kick and suppress, if so configured. */ + rcu_stall_kick_kthreads(rsp); + if (rcu_cpu_stall_suppress) + return; + /* * OK, time to rat on ourselves... * See Documentation/RCU/stallwarn.txt for info on how to debug @@ -1379,8 +1409,10 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) unsigned long js; struct rcu_node *rnp; - if (rcu_cpu_stall_suppress || !rcu_gp_in_progress(rsp)) + if ((rcu_cpu_stall_suppress && !rcu_kick_kthreads) || + !rcu_gp_in_progress(rsp)) return; + rcu_stall_kick_kthreads(rsp); j = jiffies; /* @@ -2119,8 +2151,11 @@ static int __noreturn rcu_gp_kthread(void *arg) } ret = 0; for (;;) { - if (!ret) + if (!ret) { rsp->jiffies_force_qs = jiffies + j; + WRITE_ONCE(rsp->jiffies_kick_kthreads, + jiffies + 3 * j); + } trace_rcu_grace_period(rsp->name, READ_ONCE(rsp->gpnum), TPS("fqswait")); diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index df668c0f9e64..34d3973f7223 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -513,6 +513,8 @@ struct rcu_state { unsigned long jiffies_force_qs; /* Time at which to invoke */ /* force_quiescent_state(). */ + unsigned long jiffies_kick_kthreads; /* Time at which to kick */ + /* kthreads, if configured. */ unsigned long n_force_qs; /* Number of calls to */ /* force_quiescent_state(). */ unsigned long n_force_qs_lh; /* ~Number of calls leaving */ -- cgit v1.2.3 From 5dffed1e5721f6deae4fd67d32386ef037c5fc56 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 17 Feb 2016 11:54:28 -0800 Subject: rcu: Dump ftrace buffer when kicking grace-period kthread If it is necessary to kick the grace-period kthread, that is a good time to dump the trace buffer in order to learn why kicking was needed. This commit therefore does the dump. Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/rcu/tree.c') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index a739292be605..86edb92276d3 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1266,6 +1266,7 @@ static void rcu_stall_kick_kthreads(struct rcu_state *rsp) j = READ_ONCE(rsp->jiffies_kick_kthreads); if (time_after(jiffies, j) && rsp->gp_kthread) { WARN_ONCE(1, "Kicking %s grace-period kthread\n", rsp->name); + rcu_ftrace_dump(DUMP_ALL); wake_up_process(rsp->gp_kthread); WRITE_ONCE(rsp->jiffies_kick_kthreads, j + HZ); } -- cgit v1.2.3 From 291783b8ad77a83a6fdf91d55eee7f1ad72ed4d1 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 12 Jan 2016 13:43:30 -0800 Subject: rcutorture: Expedited-GP batch progress access to torturing This commit provides rcu_exp_batches_completed() and rcu_exp_batches_completed_sched() functions to allow torture-test modules to check how many expedited grace period batches have completed. These are analogous to the existing rcu_batches_completed(), rcu_batches_completed_bh(), and rcu_batches_completed_sched() functions. Signed-off-by: Paul E. McKenney --- include/linux/rcutiny.h | 16 ++++++++++++++++ include/linux/rcutree.h | 2 ++ kernel/rcu/tree.c | 22 ++++++++++++++++++++++ 3 files changed, 40 insertions(+) (limited to 'kernel/rcu/tree.c') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 64809aea661c..93aea75029fb 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -149,6 +149,22 @@ static inline unsigned long rcu_batches_completed_sched(void) return 0; } +/* + * Return the number of expedited grace periods completed. + */ +static inline unsigned long rcu_exp_batches_completed(void) +{ + return 0; +} + +/* + * Return the number of expedited sched grace periods completed. + */ +static inline unsigned long rcu_exp_batches_completed_sched(void) +{ + return 0; +} + static inline void rcu_force_quiescent_state(void) { } diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index ad1eda9fa4da..5043cb823fb2 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -87,6 +87,8 @@ unsigned long rcu_batches_started_sched(void); unsigned long rcu_batches_completed(void); unsigned long rcu_batches_completed_bh(void); unsigned long rcu_batches_completed_sched(void); +unsigned long rcu_exp_batches_completed(void); +unsigned long rcu_exp_batches_completed_sched(void); void show_rcu_gp_kthreads(void); void rcu_force_quiescent_state(void); diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 531a328076bd..88df64087dfe 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -459,6 +459,28 @@ unsigned long rcu_batches_completed_bh(void) } EXPORT_SYMBOL_GPL(rcu_batches_completed_bh); +/* + * Return the number of RCU expedited batches completed thus far for + * debug & stats. Odd numbers mean that a batch is in progress, even + * numbers mean idle. The value returned will thus be roughly double + * the cumulative batches since boot. + */ +unsigned long rcu_exp_batches_completed(void) +{ + return rcu_state_p->expedited_sequence; +} +EXPORT_SYMBOL_GPL(rcu_exp_batches_completed); + +/* + * Return the number of RCU-sched expedited batches completed thus far + * for debug & stats. Similar to rcu_exp_batches_completed(). + */ +unsigned long rcu_exp_batches_completed_sched(void) +{ + return rcu_sched_state.expedited_sequence; +} +EXPORT_SYMBOL_GPL(rcu_exp_batches_completed_sched); + /* * Force a quiescent state. */ -- cgit v1.2.3