From fe15d706cfc1cb321dbe2329b04b5ca185edff60 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 4 Jan 2012 13:30:33 -0800 Subject: rcu: Add lockdep-RCU checks for simple self-deadlock It is illegal to have a grace period within a same-flavor RCU read-side critical section, so this commit adds lockdep-RCU checks to splat when such abuse is encountered. This commit does not detect more elaborate RCU deadlock situations. These situations might be a job for lockdep enhancements. Signed-off-by: Paul E. McKenney --- kernel/rcutiny.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'kernel/rcutiny.c') diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index 977296dca0a4..8e00d461911e 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c @@ -319,6 +319,10 @@ static void rcu_process_callbacks(struct softirq_action *unused) */ void synchronize_sched(void) { + rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) && + !lock_is_held(&rcu_lock_map) && + !lock_is_held(&rcu_sched_lock_map), + "Illegal synchronize_sched() in RCU read-side critical section"); cond_resched(); } EXPORT_SYMBOL_GPL(synchronize_sched); -- cgit v1.2.3 From 486e259340fc4c60474f2c14703e3b3634bb58ca Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 6 Jan 2012 14:11:30 -0800 Subject: rcu: Avoid waking up CPUs having only kfree_rcu() callbacks When CONFIG_RCU_FAST_NO_HZ is enabled, RCU will allow a given CPU to enter dyntick-idle mode even if it still has RCU callbacks queued. RCU avoids system hangs in this case by scheduling a timer for several jiffies in the future. However, if all of the callbacks on that CPU are from kfree_rcu(), there is no reason to wake the CPU up, as it is not a problem to defer freeing of memory. This commit therefore tracks the number of callbacks on a given CPU that are from kfree_rcu(), and avoids scheduling the timer if all of a given CPU's callbacks are from kfree_rcu(). Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 2 +- include/linux/rcutiny.h | 6 ++++ include/linux/rcutree.h | 2 ++ include/trace/events/rcu.h | 63 ++++++++++++++++++++++-------------- kernel/rcu.h | 4 ++- kernel/rcutiny.c | 4 +-- kernel/rcutree.c | 29 ++++++++++------- kernel/rcutree.h | 3 +- kernel/rcutree_plugin.h | 79 ++++++++++++++++++++++++++++++++++++++++++++-- kernel/rcutree_trace.c | 8 ++--- 10 files changed, 153 insertions(+), 47 deletions(-) (limited to 'kernel/rcutiny.c') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 81c04f4348ec..a67d5f1072ea 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -841,7 +841,7 @@ void __kfree_rcu(struct rcu_head *head, unsigned long offset) /* See the kfree_rcu() header comment. */ BUILD_BUG_ON(!__is_kfree_rcu_offset(offset)); - call_rcu(head, (rcu_callback)offset); + kfree_call_rcu(head, (rcu_callback)offset); } /** diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 00b7a5e493d2..51bf29c81485 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -83,6 +83,12 @@ static inline void synchronize_sched_expedited(void) synchronize_sched(); } +static inline void kfree_call_rcu(struct rcu_head *head, + void (*func)(struct rcu_head *rcu)) +{ + call_rcu(head, func); +} + #ifdef CONFIG_TINY_RCU static inline void rcu_preempt_note_context_switch(void) diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 73e7195f9997..73892483fd05 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -61,6 +61,8 @@ extern void synchronize_rcu_bh(void); extern void synchronize_sched_expedited(void); extern void synchronize_rcu_expedited(void); +void kfree_call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); + static inline void synchronize_rcu_bh_expedited(void) { synchronize_sched_expedited(); diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index d2d88bed891b..337099783f37 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -313,19 +313,22 @@ TRACE_EVENT(rcu_prep_idle, /* * Tracepoint for the registration of a single RCU callback function. * The first argument is the type of RCU, the second argument is - * a pointer to the RCU callback itself, and the third element is the - * new RCU callback queue length for the current CPU. + * a pointer to the RCU callback itself, the third element is the + * number of lazy callbacks queued, and the fourth element is the + * total number of callbacks queued. */ TRACE_EVENT(rcu_callback, - TP_PROTO(char *rcuname, struct rcu_head *rhp, long qlen), + TP_PROTO(char *rcuname, struct rcu_head *rhp, long qlen_lazy, + long qlen), - TP_ARGS(rcuname, rhp, qlen), + TP_ARGS(rcuname, rhp, qlen_lazy, qlen), TP_STRUCT__entry( __field(char *, rcuname) __field(void *, rhp) __field(void *, func) + __field(long, qlen_lazy) __field(long, qlen) ), @@ -333,11 +336,13 @@ TRACE_EVENT(rcu_callback, __entry->rcuname = rcuname; __entry->rhp = rhp; __entry->func = rhp->func; + __entry->qlen_lazy = qlen_lazy; __entry->qlen = qlen; ), - TP_printk("%s rhp=%p func=%pf %ld", - __entry->rcuname, __entry->rhp, __entry->func, __entry->qlen) + TP_printk("%s rhp=%p func=%pf %ld/%ld", + __entry->rcuname, __entry->rhp, __entry->func, + __entry->qlen_lazy, __entry->qlen) ); /* @@ -345,20 +350,21 @@ TRACE_EVENT(rcu_callback, * kfree() form. The first argument is the RCU type, the second argument * is a pointer to the RCU callback, the third argument is the offset * of the callback within the enclosing RCU-protected data structure, - * and the fourth argument is the new RCU callback queue length for the - * current CPU. + * the fourth argument is the number of lazy callbacks queued, and the + * fifth argument is the total number of callbacks queued. */ TRACE_EVENT(rcu_kfree_callback, TP_PROTO(char *rcuname, struct rcu_head *rhp, unsigned long offset, - long qlen), + long qlen_lazy, long qlen), - TP_ARGS(rcuname, rhp, offset, qlen), + TP_ARGS(rcuname, rhp, offset, qlen_lazy, qlen), TP_STRUCT__entry( __field(char *, rcuname) __field(void *, rhp) __field(unsigned long, offset) + __field(long, qlen_lazy) __field(long, qlen) ), @@ -366,41 +372,45 @@ TRACE_EVENT(rcu_kfree_callback, __entry->rcuname = rcuname; __entry->rhp = rhp; __entry->offset = offset; + __entry->qlen_lazy = qlen_lazy; __entry->qlen = qlen; ), - TP_printk("%s rhp=%p func=%ld %ld", + TP_printk("%s rhp=%p func=%ld %ld/%ld", __entry->rcuname, __entry->rhp, __entry->offset, - __entry->qlen) + __entry->qlen_lazy, __entry->qlen) ); /* * Tracepoint for marking the beginning rcu_do_batch, performed to start * RCU callback invocation. The first argument is the RCU flavor, - * the second is the total number of callbacks (including those that - * are not yet ready to be invoked), and the third argument is the - * current RCU-callback batch limit. + * the second is the number of lazy callbacks queued, the third is + * the total number of callbacks queued, and the fourth argument is + * the current RCU-callback batch limit. */ TRACE_EVENT(rcu_batch_start, - TP_PROTO(char *rcuname, long qlen, int blimit), + TP_PROTO(char *rcuname, long qlen_lazy, long qlen, int blimit), - TP_ARGS(rcuname, qlen, blimit), + TP_ARGS(rcuname, qlen_lazy, qlen, blimit), TP_STRUCT__entry( __field(char *, rcuname) + __field(long, qlen_lazy) __field(long, qlen) __field(int, blimit) ), TP_fast_assign( __entry->rcuname = rcuname; + __entry->qlen_lazy = qlen_lazy; __entry->qlen = qlen; __entry->blimit = blimit; ), - TP_printk("%s CBs=%ld bl=%d", - __entry->rcuname, __entry->qlen, __entry->blimit) + TP_printk("%s CBs=%ld/%ld bl=%d", + __entry->rcuname, __entry->qlen_lazy, __entry->qlen, + __entry->blimit) ); /* @@ -531,16 +541,21 @@ TRACE_EVENT(rcu_torture_read, #else /* #ifdef CONFIG_RCU_TRACE */ #define trace_rcu_grace_period(rcuname, gpnum, gpevent) do { } while (0) -#define trace_rcu_grace_period_init(rcuname, gpnum, level, grplo, grphi, qsmask) do { } while (0) +#define trace_rcu_grace_period_init(rcuname, gpnum, level, grplo, grphi, \ + qsmask) do { } while (0) #define trace_rcu_preempt_task(rcuname, pid, gpnum) do { } while (0) #define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0) -#define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, grplo, grphi, gp_tasks) do { } while (0) +#define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, \ + grplo, grphi, gp_tasks) do { } \ + while (0) #define trace_rcu_fqs(rcuname, gpnum, cpu, qsevent) do { } while (0) #define trace_rcu_dyntick(polarity, oldnesting, newnesting) do { } while (0) #define trace_rcu_prep_idle(reason) do { } while (0) -#define trace_rcu_callback(rcuname, rhp, qlen) do { } while (0) -#define trace_rcu_kfree_callback(rcuname, rhp, offset, qlen) do { } while (0) -#define trace_rcu_batch_start(rcuname, qlen, blimit) do { } while (0) +#define trace_rcu_callback(rcuname, rhp, qlen_lazy, qlen) do { } while (0) +#define trace_rcu_kfree_callback(rcuname, rhp, offset, qlen_lazy, qlen) \ + do { } while (0) +#define trace_rcu_batch_start(rcuname, qlen_lazy, qlen, blimit) \ + do { } while (0) #define trace_rcu_invoke_callback(rcuname, rhp) do { } while (0) #define trace_rcu_invoke_kfree_callback(rcuname, rhp, offset) do { } while (0) #define trace_rcu_batch_end(rcuname, callbacks_invoked, cb, nr, iit, risk) \ diff --git a/kernel/rcu.h b/kernel/rcu.h index aa88baab5f78..a074b0b43fc2 100644 --- a/kernel/rcu.h +++ b/kernel/rcu.h @@ -76,16 +76,18 @@ static inline void debug_rcu_head_unqueue(struct rcu_head *head) extern void kfree(const void *); -static inline void __rcu_reclaim(char *rn, struct rcu_head *head) +static inline bool __rcu_reclaim(char *rn, struct rcu_head *head) { unsigned long offset = (unsigned long)head->func; if (__is_kfree_rcu_offset(offset)) { RCU_TRACE(trace_rcu_invoke_kfree_callback(rn, head, offset)); kfree((void *)head - offset); + return 1; } else { RCU_TRACE(trace_rcu_invoke_callback(rn, head)); head->func(head); + return 0; } } diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index 8e00d461911e..4eb34fcc2a75 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c @@ -258,7 +258,7 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) /* If no RCU callbacks ready to invoke, just return. */ if (&rcp->rcucblist == rcp->donetail) { - RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, -1)); + RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, 0, -1)); RCU_TRACE(trace_rcu_batch_end(rcp->name, 0, ACCESS_ONCE(rcp->rcucblist), need_resched(), @@ -269,7 +269,7 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) /* Move the ready-to-invoke callbacks to a local list. */ local_irq_save(flags); - RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, -1)); + RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, rcp->qlen, -1)); list = rcp->rcucblist; rcp->rcucblist = *rcp->donetail; *rcp->donetail = NULL; diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 570f7530f4b3..acf2d67ad2f4 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1261,6 +1261,7 @@ static void rcu_send_cbs_to_online(struct rcu_state *rsp) *receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxtlist; receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; + receive_rdp->qlen_lazy += rdp->qlen_lazy; receive_rdp->qlen += rdp->qlen; receive_rdp->n_cbs_adopted += rdp->qlen; rdp->n_cbs_orphaned += rdp->qlen; @@ -1268,6 +1269,7 @@ static void rcu_send_cbs_to_online(struct rcu_state *rsp) rdp->nxtlist = NULL; for (i = 0; i < RCU_NEXT_SIZE; i++) rdp->nxttail[i] = &rdp->nxtlist; + rdp->qlen_lazy = 0; rdp->qlen = 0; } @@ -1368,11 +1370,11 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) { unsigned long flags; struct rcu_head *next, *list, **tail; - int bl, count; + int bl, count, count_lazy; /* If no callbacks are ready, just return.*/ if (!cpu_has_callbacks_ready_to_invoke(rdp)) { - trace_rcu_batch_start(rsp->name, 0, 0); + trace_rcu_batch_start(rsp->name, rdp->qlen_lazy, rdp->qlen, 0); trace_rcu_batch_end(rsp->name, 0, !!ACCESS_ONCE(rdp->nxtlist), need_resched(), is_idle_task(current), rcu_is_callbacks_kthread()); @@ -1385,7 +1387,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) */ local_irq_save(flags); bl = rdp->blimit; - trace_rcu_batch_start(rsp->name, rdp->qlen, bl); + trace_rcu_batch_start(rsp->name, rdp->qlen_lazy, rdp->qlen, bl); list = rdp->nxtlist; rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL]; *rdp->nxttail[RCU_DONE_TAIL] = NULL; @@ -1396,12 +1398,13 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) local_irq_restore(flags); /* Invoke callbacks. */ - count = 0; + count = count_lazy = 0; while (list) { next = list->next; prefetch(next); debug_rcu_head_unqueue(list); - __rcu_reclaim(rsp->name, list); + if (__rcu_reclaim(rsp->name, list)) + count_lazy++; list = next; /* Stop only if limit reached and CPU has something to do. */ if (++count >= bl && @@ -1416,6 +1419,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) rcu_is_callbacks_kthread()); /* Update count, and requeue any remaining callbacks. */ + rdp->qlen_lazy -= count_lazy; rdp->qlen -= count; rdp->n_cbs_invoked += count; if (list != NULL) { @@ -1702,7 +1706,7 @@ static void invoke_rcu_core(void) static void __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), - struct rcu_state *rsp) + struct rcu_state *rsp, bool lazy) { unsigned long flags; struct rcu_data *rdp; @@ -1727,12 +1731,14 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), *rdp->nxttail[RCU_NEXT_TAIL] = head; rdp->nxttail[RCU_NEXT_TAIL] = &head->next; rdp->qlen++; + if (lazy) + rdp->qlen_lazy++; if (__is_kfree_rcu_offset((unsigned long)func)) trace_rcu_kfree_callback(rsp->name, head, (unsigned long)func, - rdp->qlen); + rdp->qlen_lazy, rdp->qlen); else - trace_rcu_callback(rsp->name, head, rdp->qlen); + trace_rcu_callback(rsp->name, head, rdp->qlen_lazy, rdp->qlen); /* If interrupts were disabled, don't dive into RCU core. */ if (irqs_disabled_flags(flags)) { @@ -1779,16 +1785,16 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), */ void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) { - __call_rcu(head, func, &rcu_sched_state); + __call_rcu(head, func, &rcu_sched_state, 0); } EXPORT_SYMBOL_GPL(call_rcu_sched); /* - * Queue an RCU for invocation after a quicker grace period. + * Queue an RCU callback for invocation after a quicker grace period. */ void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) { - __call_rcu(head, func, &rcu_bh_state); + __call_rcu(head, func, &rcu_bh_state, 0); } EXPORT_SYMBOL_GPL(call_rcu_bh); @@ -2036,6 +2042,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) rdp->nxtlist = NULL; for (i = 0; i < RCU_NEXT_SIZE; i++) rdp->nxttail[i] = &rdp->nxtlist; + rdp->qlen_lazy = 0; rdp->qlen = 0; rdp->dynticks = &per_cpu(rcu_dynticks, cpu); WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_NESTING); diff --git a/kernel/rcutree.h b/kernel/rcutree.h index fddff92d6676..af2af3cc5e65 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -265,7 +265,8 @@ struct rcu_data { */ struct rcu_head *nxtlist; struct rcu_head **nxttail[RCU_NEXT_SIZE]; - long qlen; /* # of queued callbacks */ + long qlen_lazy; /* # of lazy queued callbacks */ + long qlen; /* # of queued callbacks, incl lazy */ long qlen_last_fqs_check; /* qlen at last check for QS forcing */ unsigned long n_cbs_invoked; /* count of RCU cbs invoked. */ diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 3680b6b35bf3..7adf232bb66b 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -671,10 +671,24 @@ static void rcu_preempt_do_callbacks(void) */ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) { - __call_rcu(head, func, &rcu_preempt_state); + __call_rcu(head, func, &rcu_preempt_state, 0); } EXPORT_SYMBOL_GPL(call_rcu); +/* + * Queue an RCU callback for lazy invocation after a grace period. + * This will likely be later named something like "call_rcu_lazy()", + * but this change will require some way of tagging the lazy RCU + * callbacks in the list of pending callbacks. Until then, this + * function may only be called from __kfree_rcu(). + */ +void kfree_call_rcu(struct rcu_head *head, + void (*func)(struct rcu_head *rcu)) +{ + __call_rcu(head, func, &rcu_preempt_state, 1); +} +EXPORT_SYMBOL_GPL(kfree_call_rcu); + /** * synchronize_rcu - wait until a grace period has elapsed. * @@ -1064,6 +1078,22 @@ static void rcu_preempt_process_callbacks(void) { } +/* + * Queue an RCU callback for lazy invocation after a grace period. + * This will likely be later named something like "call_rcu_lazy()", + * but this change will require some way of tagging the lazy RCU + * callbacks in the list of pending callbacks. Until then, this + * function may only be called from __kfree_rcu(). + * + * Because there is no preemptible RCU, we use RCU-sched instead. + */ +void kfree_call_rcu(struct rcu_head *head, + void (*func)(struct rcu_head *rcu)) +{ + __call_rcu(head, func, &rcu_sched_state, 1); +} +EXPORT_SYMBOL_GPL(kfree_call_rcu); + /* * Wait for an rcu-preempt grace period, but make it happen quickly. * But because preemptible RCU does not exist, map to rcu-sched. @@ -2051,6 +2081,48 @@ int rcu_needs_cpu(int cpu) return per_cpu(rcu_dyntick_holdoff, cpu) == jiffies; } +/* + * Does the specified flavor of RCU have non-lazy callbacks pending on + * the specified CPU? Both RCU flavor and CPU are specified by the + * rcu_data structure. + */ +static bool __rcu_cpu_has_nonlazy_callbacks(struct rcu_data *rdp) +{ + return rdp->qlen != rdp->qlen_lazy; +} + +#ifdef CONFIG_TREE_PREEMPT_RCU + +/* + * Are there non-lazy RCU-preempt callbacks? (There cannot be if there + * is no RCU-preempt in the kernel.) + */ +static bool rcu_preempt_cpu_has_nonlazy_callbacks(int cpu) +{ + struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); + + return __rcu_cpu_has_nonlazy_callbacks(rdp); +} + +#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ + +static bool rcu_preempt_cpu_has_nonlazy_callbacks(int cpu) +{ + return 0; +} + +#endif /* else #ifdef CONFIG_TREE_PREEMPT_RCU */ + +/* + * Does any flavor of RCU have non-lazy callbacks on the specified CPU? + */ +static bool rcu_cpu_has_nonlazy_callbacks(int cpu) +{ + return __rcu_cpu_has_nonlazy_callbacks(&per_cpu(rcu_sched_data, cpu)) || + __rcu_cpu_has_nonlazy_callbacks(&per_cpu(rcu_bh_data, cpu)) || + rcu_preempt_cpu_has_nonlazy_callbacks(cpu); +} + /* * Timer handler used to force CPU to start pushing its remaining RCU * callbacks in the case where it entered dyntick-idle mode with callbacks @@ -2149,8 +2221,9 @@ static void rcu_prepare_for_idle(int cpu) trace_rcu_prep_idle("Dyntick with callbacks"); per_cpu(rcu_dyntick_drain, cpu) = 0; per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1; - hrtimer_start(&per_cpu(rcu_idle_gp_timer, cpu), - rcu_idle_gp_wait, HRTIMER_MODE_REL); + if (rcu_cpu_has_nonlazy_callbacks(cpu)) + hrtimer_start(&per_cpu(rcu_idle_gp_timer, cpu), + rcu_idle_gp_wait, HRTIMER_MODE_REL); return; /* Nothing more to do immediately. */ } else if (--per_cpu(rcu_dyntick_drain, cpu) <= 0) { /* We have hit the limit, so time to give up. */ diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index 654cfe67f0d1..db0987c1e1bd 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c @@ -73,8 +73,8 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) rdp->dynticks->dynticks_nmi_nesting, rdp->dynticks_fqs); seq_printf(m, " of=%lu ri=%lu", rdp->offline_fqs, rdp->resched_ipi); - seq_printf(m, " ql=%ld qs=%c%c%c%c", - rdp->qlen, + seq_printf(m, " ql=%ld/%ld qs=%c%c%c%c", + rdp->qlen_lazy, rdp->qlen, ".N"[rdp->nxttail[RCU_NEXT_READY_TAIL] != rdp->nxttail[RCU_NEXT_TAIL]], ".R"[rdp->nxttail[RCU_WAIT_TAIL] != @@ -145,7 +145,7 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp) rdp->dynticks->dynticks_nmi_nesting, rdp->dynticks_fqs); seq_printf(m, ",%lu,%lu", rdp->offline_fqs, rdp->resched_ipi); - seq_printf(m, ",%ld,\"%c%c%c%c\"", rdp->qlen, + seq_printf(m, ",%ld,%ld,\"%c%c%c%c\"", rdp->qlen_lazy, rdp->qlen, ".N"[rdp->nxttail[RCU_NEXT_READY_TAIL] != rdp->nxttail[RCU_NEXT_TAIL]], ".R"[rdp->nxttail[RCU_WAIT_TAIL] != @@ -168,7 +168,7 @@ static int show_rcudata_csv(struct seq_file *m, void *unused) { seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pgp\",\"pq\","); seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\","); - seq_puts(m, "\"of\",\"ri\",\"ql\",\"qs\""); + seq_puts(m, "\"of\",\"ri\",\"qll\",\"ql\",\"qs\""); #ifdef CONFIG_RCU_BOOST seq_puts(m, "\"kt\",\"ktl\""); #endif /* #ifdef CONFIG_RCU_BOOST */ -- cgit v1.2.3 From 29e37d814188ac8d60f2120583704d3ef6d634b4 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 17 Nov 2011 16:55:56 -0800 Subject: rcu: Allow nesting of rcu_idle_enter() and rcu_idle_exit() Use of RCU in the idle loop is incorrect, quite a few instances of just that have made their way into mainline, primarily event tracing. The problem with RCU read-side critical sections on CPUs that RCU believes to be idle is that RCU is completely ignoring the CPU, along with any attempts and RCU read-side critical sections. The approaches of eliminating the offending uses and of pushing the definition of idle down beyond the offending uses have both proved impractical. The new approach is to encapsulate offending uses of RCU with rcu_idle_exit() and rcu_idle_enter(), but this requires nesting for code that is invoked both during idle and and during normal execution. Therefore, this commit modifies rcu_idle_enter() and rcu_idle_exit() to permit nesting. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett Acked-by: Deepthi Dharwar --- kernel/rcu.h | 21 ++++++++++++++++++++- kernel/rcutiny.c | 16 ++++++++++++---- kernel/rcutree.c | 21 ++++++++++++++------- 3 files changed, 46 insertions(+), 12 deletions(-) (limited to 'kernel/rcutiny.c') diff --git a/kernel/rcu.h b/kernel/rcu.h index 30876f4063b6..8ba99cdc6515 100644 --- a/kernel/rcu.h +++ b/kernel/rcu.h @@ -33,8 +33,27 @@ * Process-level increment to ->dynticks_nesting field. This allows for * architectures that use half-interrupts and half-exceptions from * process context. + * + * DYNTICK_TASK_NEST_MASK defines a field of width DYNTICK_TASK_NEST_WIDTH + * that counts the number of process-based reasons why RCU cannot + * consider the corresponding CPU to be idle, and DYNTICK_TASK_NEST_VALUE + * is the value used to increment or decrement this field. + * + * The rest of the bits could in principle be used to count interrupts, + * but this would mean that a negative-one value in the interrupt + * field could incorrectly zero out the DYNTICK_TASK_NEST_MASK field. + * We therefore provide a two-bit guard field defined by DYNTICK_TASK_MASK + * that is set to DYNTICK_TASK_FLAG upon initial exit from idle. + * The DYNTICK_TASK_EXIT_IDLE value is thus the combined value used upon + * initial exit from idle. */ -#define DYNTICK_TASK_NESTING (LLONG_MAX / 2 - 1) +#define DYNTICK_TASK_NEST_WIDTH 7 +#define DYNTICK_TASK_NEST_VALUE ((LLONG_MAX >> DYNTICK_TASK_NEST_WIDTH) + 1) +#define DYNTICK_TASK_NEST_MASK (LLONG_MAX - DYNTICK_TASK_NEST_VALUE + 1) +#define DYNTICK_TASK_FLAG ((DYNTICK_TASK_NEST_VALUE / 8) * 2) +#define DYNTICK_TASK_MASK ((DYNTICK_TASK_NEST_VALUE / 8) * 3) +#define DYNTICK_TASK_EXIT_IDLE (DYNTICK_TASK_NEST_VALUE + \ + DYNTICK_TASK_FLAG) /* * debug_rcu_head_queue()/debug_rcu_head_unqueue() are used internally diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index 4eb34fcc2a75..c8b0e1582c04 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c @@ -53,7 +53,7 @@ static void __call_rcu(struct rcu_head *head, #include "rcutiny_plugin.h" -static long long rcu_dynticks_nesting = DYNTICK_TASK_NESTING; +static long long rcu_dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; /* Common code for rcu_idle_enter() and rcu_irq_exit(), see kernel/rcutree.c. */ static void rcu_idle_enter_common(long long oldval) @@ -88,7 +88,12 @@ void rcu_idle_enter(void) local_irq_save(flags); oldval = rcu_dynticks_nesting; - rcu_dynticks_nesting = 0; + WARN_ON_ONCE((rcu_dynticks_nesting & DYNTICK_TASK_NEST_MASK) == 0); + if ((rcu_dynticks_nesting & DYNTICK_TASK_NEST_MASK) == + DYNTICK_TASK_NEST_VALUE) + rcu_dynticks_nesting = 0; + else + rcu_dynticks_nesting -= DYNTICK_TASK_NEST_VALUE; rcu_idle_enter_common(oldval); local_irq_restore(flags); } @@ -140,8 +145,11 @@ void rcu_idle_exit(void) local_irq_save(flags); oldval = rcu_dynticks_nesting; - WARN_ON_ONCE(oldval != 0); - rcu_dynticks_nesting = DYNTICK_TASK_NESTING; + WARN_ON_ONCE(rcu_dynticks_nesting < 0); + if (rcu_dynticks_nesting & DYNTICK_TASK_NEST_MASK) + rcu_dynticks_nesting += DYNTICK_TASK_NEST_VALUE; + else + rcu_dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; rcu_idle_exit_common(oldval); local_irq_restore(flags); } diff --git a/kernel/rcutree.c b/kernel/rcutree.c index df0e3c1bb68e..92b47760edf3 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -198,7 +198,7 @@ void rcu_note_context_switch(int cpu) EXPORT_SYMBOL_GPL(rcu_note_context_switch); DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { - .dynticks_nesting = DYNTICK_TASK_NESTING, + .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE, .dynticks = ATOMIC_INIT(1), }; @@ -394,7 +394,11 @@ void rcu_idle_enter(void) local_irq_save(flags); rdtp = &__get_cpu_var(rcu_dynticks); oldval = rdtp->dynticks_nesting; - rdtp->dynticks_nesting = 0; + WARN_ON_ONCE((oldval & DYNTICK_TASK_NEST_MASK) == 0); + if ((oldval & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE) + rdtp->dynticks_nesting = 0; + else + rdtp->dynticks_nesting -= DYNTICK_TASK_NEST_VALUE; rcu_idle_enter_common(rdtp, oldval); local_irq_restore(flags); } @@ -467,7 +471,7 @@ static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval) * Exit idle mode, in other words, -enter- the mode in which RCU * read-side critical sections can occur. * - * We crowbar the ->dynticks_nesting field to DYNTICK_TASK_NESTING to + * We crowbar the ->dynticks_nesting field to DYNTICK_TASK_NEST to * allow for the possibility of usermode upcalls messing up our count * of interrupt nesting level during the busy period that is just * now starting. @@ -481,8 +485,11 @@ void rcu_idle_exit(void) local_irq_save(flags); rdtp = &__get_cpu_var(rcu_dynticks); oldval = rdtp->dynticks_nesting; - WARN_ON_ONCE(oldval != 0); - rdtp->dynticks_nesting = DYNTICK_TASK_NESTING; + WARN_ON_ONCE(oldval < 0); + if (oldval & DYNTICK_TASK_NEST_MASK) + rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE; + else + rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; rcu_idle_exit_common(rdtp, oldval); local_irq_restore(flags); } @@ -2253,7 +2260,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) rdp->qlen_lazy = 0; rdp->qlen = 0; rdp->dynticks = &per_cpu(rcu_dynticks, cpu); - WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_NESTING); + WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE); WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1); rdp->cpu = cpu; rdp->rsp = rsp; @@ -2281,7 +2288,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) rdp->qlen_last_fqs_check = 0; rdp->n_force_qs_snap = rsp->n_force_qs; rdp->blimit = blimit; - rdp->dynticks->dynticks_nesting = DYNTICK_TASK_NESTING; + rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; atomic_set(&rdp->dynticks->dynticks, (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1); rcu_prepare_for_idle_init(cpu); -- cgit v1.2.3 From 8a2ecf474d3ee8dd5d001490349e422cec52f39f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 2 Feb 2012 15:42:04 -0800 Subject: rcu: Add RCU_NONIDLE() for idle-loop RCU read-side critical sections RCU, RCU-bh, and RCU-sched read-side critical sections are forbidden in the inner idle loop, that is, between the rcu_idle_enter() and the rcu_idle_exit() -- RCU will happily ignore any such read-side critical sections. However, things like powertop need tracepoints in the inner idle loop. This commit therefore provides an RCU_NONIDLE() macro that can be used to wrap code in the idle loop that requires RCU read-side critical sections. Suggested-by: Steven Rostedt Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett Acked-by: Deepthi Dharwar --- include/linux/rcupdate.h | 27 +++++++++++++++++++++++++++ kernel/rcutiny.c | 2 ++ kernel/rcutree.c | 2 ++ 3 files changed, 31 insertions(+) (limited to 'kernel/rcutiny.c') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 6ee663c8745a..937217425c47 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -190,6 +190,33 @@ extern void rcu_idle_exit(void); extern void rcu_irq_enter(void); extern void rcu_irq_exit(void); +/** + * RCU_NONIDLE - Indicate idle-loop code that needs RCU readers + * @a: Code that RCU needs to pay attention to. + * + * RCU, RCU-bh, and RCU-sched read-side critical sections are forbidden + * in the inner idle loop, that is, between the rcu_idle_enter() and + * the rcu_idle_exit() -- RCU will happily ignore any such read-side + * critical sections. However, things like powertop need tracepoints + * in the inner idle loop. + * + * This macro provides the way out: RCU_NONIDLE(do_something_with_RCU()) + * will tell RCU that it needs to pay attending, invoke its argument + * (in this example, a call to the do_something_with_RCU() function), + * and then tell RCU to go back to ignoring this CPU. It is permissible + * to nest RCU_NONIDLE() wrappers, but the nesting level is currently + * quite limited. If deeper nesting is required, it will be necessary + * to adjust DYNTICK_TASK_NESTING_VALUE accordingly. + * + * This macro may be used from process-level code only. + */ +#define RCU_NONIDLE(a) \ + do { \ + rcu_idle_exit(); \ + do { a; } while (0); \ + rcu_idle_enter(); \ + } while (0) + /* * Infrastructure to implement the synchronize_() primitives in * TREE_RCU and rcu_barrier_() primitives in TINY_RCU. diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index c8b0e1582c04..37a5444204d2 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c @@ -97,6 +97,7 @@ void rcu_idle_enter(void) rcu_idle_enter_common(oldval); local_irq_restore(flags); } +EXPORT_SYMBOL_GPL(rcu_idle_enter); /* * Exit an interrupt handler towards idle. @@ -153,6 +154,7 @@ void rcu_idle_exit(void) rcu_idle_exit_common(oldval); local_irq_restore(flags); } +EXPORT_SYMBOL_GPL(rcu_idle_exit); /* * Enter an interrupt handler, moving away from idle. diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 92b47760edf3..eacc10b03531 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -402,6 +402,7 @@ void rcu_idle_enter(void) rcu_idle_enter_common(rdtp, oldval); local_irq_restore(flags); } +EXPORT_SYMBOL_GPL(rcu_idle_enter); /** * rcu_irq_exit - inform RCU that current CPU is exiting irq towards idle @@ -493,6 +494,7 @@ void rcu_idle_exit(void) rcu_idle_exit_common(rdtp, oldval); local_irq_restore(flags); } +EXPORT_SYMBOL_GPL(rcu_idle_exit); /** * rcu_irq_enter - inform RCU that current CPU is entering irq away from idle -- cgit v1.2.3