From 2c42818e962e2858334bf45bfc56662b3752df34 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 26 May 2011 22:14:36 -0700 Subject: rcu: Abstract common code for RCU grace-period-wait primitives Pull the code that waits for an RCU grace period into a single function, which is then called by synchronize_rcu() and friends in the case of TREE_RCU and TREE_PREEMPT_RCU, and from rcu_barrier() and friends in the case of TINY_RCU and TINY_PREEMPT_RCU. Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 22 ++-------------------- 1 file changed, 2 insertions(+), 20 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index ba06207b1dd3..a7c6bce1af83 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1613,18 +1613,9 @@ EXPORT_SYMBOL_GPL(call_rcu_bh); */ void synchronize_sched(void) { - struct rcu_synchronize rcu; - if (rcu_blocking_is_gp()) return; - - init_rcu_head_on_stack(&rcu.head); - init_completion(&rcu.completion); - /* Will wake me after RCU finished. */ - call_rcu_sched(&rcu.head, wakeme_after_rcu); - /* Wait for it. */ - wait_for_completion(&rcu.completion); - destroy_rcu_head_on_stack(&rcu.head); + wait_rcu_gp(call_rcu_sched); } EXPORT_SYMBOL_GPL(synchronize_sched); @@ -1639,18 +1630,9 @@ EXPORT_SYMBOL_GPL(synchronize_sched); */ void synchronize_rcu_bh(void) { - struct rcu_synchronize rcu; - if (rcu_blocking_is_gp()) return; - - init_rcu_head_on_stack(&rcu.head); - init_completion(&rcu.completion); - /* Will wake me after RCU finished. */ - call_rcu_bh(&rcu.head, wakeme_after_rcu); - /* Wait for it. */ - wait_for_completion(&rcu.completion); - destroy_rcu_head_on_stack(&rcu.head); + wait_rcu_gp(call_rcu_bh); } EXPORT_SYMBOL_GPL(synchronize_rcu_bh); -- cgit v1.2.3 From 29c00b4a1d9e277786120032aa8364631820d863 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 17 Jun 2011 15:53:19 -0700 Subject: rcu: Add event-tracing for RCU callback invocation There was recently some controversy about the overhead of invoking RCU callbacks. Add TRACE_EVENT()s to obtain fine-grained timings for the start and stop of a batch of callbacks and also for each callback invoked. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 50 ----------------------- include/trace/events/rcu.h | 98 ++++++++++++++++++++++++++++++++++++++++++++++ kernel/rcu.h | 79 +++++++++++++++++++++++++++++++++++++ kernel/rcupdate.c | 5 +++ kernel/rcutiny.c | 26 +++++++++++- kernel/rcutree.c | 15 +++++-- 6 files changed, 219 insertions(+), 54 deletions(-) create mode 100644 include/trace/events/rcu.h create mode 100644 kernel/rcu.h (limited to 'kernel/rcutree.c') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index ae5327de41aa..dd2bc2c6a285 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -794,44 +794,6 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) #define RCU_INIT_POINTER(p, v) \ p = (typeof(*v) __force __rcu *)(v) -/* - * debug_rcu_head_queue()/debug_rcu_head_unqueue() are used internally - * by call_rcu() and rcu callback execution, and are therefore not part of the - * RCU API. Leaving in rcupdate.h because they are used by all RCU flavors. - */ - -#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD -# define STATE_RCU_HEAD_READY 0 -# define STATE_RCU_HEAD_QUEUED 1 - -extern struct debug_obj_descr rcuhead_debug_descr; - -static inline void debug_rcu_head_queue(struct rcu_head *head) -{ - WARN_ON_ONCE((unsigned long)head & 0x3); - debug_object_activate(head, &rcuhead_debug_descr); - debug_object_active_state(head, &rcuhead_debug_descr, - STATE_RCU_HEAD_READY, - STATE_RCU_HEAD_QUEUED); -} - -static inline void debug_rcu_head_unqueue(struct rcu_head *head) -{ - debug_object_active_state(head, &rcuhead_debug_descr, - STATE_RCU_HEAD_QUEUED, - STATE_RCU_HEAD_READY); - debug_object_deactivate(head, &rcuhead_debug_descr); -} -#else /* !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ -static inline void debug_rcu_head_queue(struct rcu_head *head) -{ -} - -static inline void debug_rcu_head_unqueue(struct rcu_head *head) -{ -} -#endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ - static __always_inline bool __is_kfree_rcu_offset(unsigned long offset) { return offset < 4096; @@ -850,18 +812,6 @@ void __kfree_rcu(struct rcu_head *head, unsigned long offset) call_rcu(head, (rcu_callback)offset); } -extern void kfree(const void *); - -static inline void __rcu_reclaim(struct rcu_head *head) -{ - unsigned long offset = (unsigned long)head->func; - - if (__is_kfree_rcu_offset(offset)) - kfree((void *)head - offset); - else - head->func(head); -} - /** * kfree_rcu() - kfree an object after a grace period. * @ptr: pointer to kfree diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h new file mode 100644 index 000000000000..db3f6e9e63e6 --- /dev/null +++ b/include/trace/events/rcu.h @@ -0,0 +1,98 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM rcu + +#if !defined(_TRACE_RCU_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_RCU_H + +#include + +/* + * Tracepoint for calling rcu_do_batch, performed to start callback invocation: + */ +TRACE_EVENT(rcu_batch_start, + + TP_PROTO(long callbacks_ready, int blimit), + + TP_ARGS(callbacks_ready, blimit), + + TP_STRUCT__entry( + __field( long, callbacks_ready ) + __field( int, blimit ) + ), + + TP_fast_assign( + __entry->callbacks_ready = callbacks_ready; + __entry->blimit = blimit; + ), + + TP_printk("CBs=%ld bl=%d", __entry->callbacks_ready, __entry->blimit) +); + +/* + * Tracepoint for the invocation of a single RCU callback + */ +TRACE_EVENT(rcu_invoke_callback, + + TP_PROTO(struct rcu_head *rhp), + + TP_ARGS(rhp), + + TP_STRUCT__entry( + __field( void *, rhp ) + __field( void *, func ) + ), + + TP_fast_assign( + __entry->rhp = rhp; + __entry->func = rhp->func; + ), + + TP_printk("rhp=%p func=%pf", __entry->rhp, __entry->func) +); + +/* + * Tracepoint for the invocation of a single RCU kfree callback + */ +TRACE_EVENT(rcu_invoke_kfree_callback, + + TP_PROTO(struct rcu_head *rhp, unsigned long offset), + + TP_ARGS(rhp, offset), + + TP_STRUCT__entry( + __field(void *, rhp ) + __field(unsigned long, offset ) + ), + + TP_fast_assign( + __entry->rhp = rhp; + __entry->offset = offset; + ), + + TP_printk("rhp=%p func=%ld", __entry->rhp, __entry->offset) +); + +/* + * Tracepoint for leaving rcu_do_batch, performed after callback invocation: + */ +TRACE_EVENT(rcu_batch_end, + + TP_PROTO(int callbacks_invoked), + + TP_ARGS(callbacks_invoked), + + TP_STRUCT__entry( + __field( int, callbacks_invoked ) + ), + + TP_fast_assign( + __entry->callbacks_invoked = callbacks_invoked; + ), + + TP_printk("CBs-invoked=%d", __entry->callbacks_invoked) +); + +#endif /* _TRACE_RCU_H */ + +/* This part must be outside protection */ +#include diff --git a/kernel/rcu.h b/kernel/rcu.h new file mode 100644 index 000000000000..7bc16436aba0 --- /dev/null +++ b/kernel/rcu.h @@ -0,0 +1,79 @@ +/* + * Read-Copy Update definitions shared among RCU implementations. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright IBM Corporation, 2011 + * + * Author: Paul E. McKenney + */ + +#ifndef __LINUX_RCU_H +#define __LINUX_RCU_H + +/* + * debug_rcu_head_queue()/debug_rcu_head_unqueue() are used internally + * by call_rcu() and rcu callback execution, and are therefore not part of the + * RCU API. Leaving in rcupdate.h because they are used by all RCU flavors. + */ + +#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD +# define STATE_RCU_HEAD_READY 0 +# define STATE_RCU_HEAD_QUEUED 1 + +extern struct debug_obj_descr rcuhead_debug_descr; + +static inline void debug_rcu_head_queue(struct rcu_head *head) +{ + WARN_ON_ONCE((unsigned long)head & 0x3); + debug_object_activate(head, &rcuhead_debug_descr); + debug_object_active_state(head, &rcuhead_debug_descr, + STATE_RCU_HEAD_READY, + STATE_RCU_HEAD_QUEUED); +} + +static inline void debug_rcu_head_unqueue(struct rcu_head *head) +{ + debug_object_active_state(head, &rcuhead_debug_descr, + STATE_RCU_HEAD_QUEUED, + STATE_RCU_HEAD_READY); + debug_object_deactivate(head, &rcuhead_debug_descr); +} +#else /* !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ +static inline void debug_rcu_head_queue(struct rcu_head *head) +{ +} + +static inline void debug_rcu_head_unqueue(struct rcu_head *head) +{ +} +#endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ + +extern void kfree(const void *); + +static inline void __rcu_reclaim(struct rcu_head *head) +{ + unsigned long offset = (unsigned long)head->func; + + if (__is_kfree_rcu_offset(offset)) { + trace_rcu_invoke_kfree_callback(head, offset); + kfree((void *)head - offset); + } else { + trace_rcu_invoke_callback(head); + head->func(head); + } +} + +#endif /* __LINUX_RCU_H */ diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 09b3b1b54e02..ca0d23b6b3e8 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -46,6 +46,11 @@ #include #include +#define CREATE_TRACE_POINTS +#include + +#include "rcu.h" + #ifdef CONFIG_DEBUG_LOCK_ALLOC static struct lock_class_key rcu_lock_key; struct lockdep_map rcu_lock_map = diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index f544e343256a..19453ba1392e 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c @@ -37,6 +37,25 @@ #include #include +#ifdef CONFIG_RCU_TRACE + +#include + +#else /* #ifdef CONFIG_RCU_TRACE */ + +/* No by-default tracing in TINY_RCU: Keep TINY_RCU tiny! */ +static void trace_rcu_invoke_kfree_callback(struct rcu_head *rhp, + unsigned long offset) +{ +} +static void trace_rcu_invoke_callback(struct rcu_head *head) +{ +} + +#endif /* #else #ifdef CONFIG_RCU_TRACE */ + +#include "rcu.h" + /* Controls for rcu_kthread() kthread, replacing RCU_SOFTIRQ used previously. */ static struct task_struct *rcu_kthread_task; static DECLARE_WAIT_QUEUE_HEAD(rcu_kthread_wq); @@ -161,11 +180,15 @@ static void rcu_process_callbacks(struct rcu_ctrlblk *rcp) RCU_TRACE(int cb_count = 0); /* If no RCU callbacks ready to invoke, just return. */ - if (&rcp->rcucblist == rcp->donetail) + if (&rcp->rcucblist == rcp->donetail) { + RCU_TRACE(trace_rcu_batch_start(0, -1)); + RCU_TRACE(trace_rcu_batch_end(0)); return; + } /* Move the ready-to-invoke callbacks to a local list. */ local_irq_save(flags); + RCU_TRACE(trace_rcu_batch_start(0, -1)); list = rcp->rcucblist; rcp->rcucblist = *rcp->donetail; *rcp->donetail = NULL; @@ -187,6 +210,7 @@ static void rcu_process_callbacks(struct rcu_ctrlblk *rcp) RCU_TRACE(cb_count++); } RCU_TRACE(rcu_trace_sub_qlen(rcp, cb_count)); + RCU_TRACE(trace_rcu_batch_end(cb_count)); } /* diff --git a/kernel/rcutree.c b/kernel/rcutree.c index a7c6bce1af83..45dcc2036a1e 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -52,6 +52,9 @@ #include #include "rcutree.h" +#include + +#include "rcu.h" /* Data structures. */ @@ -1190,17 +1193,22 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) { unsigned long flags; struct rcu_head *next, *list, **tail; - int count; + int bl, count; /* If no callbacks are ready, just return.*/ - if (!cpu_has_callbacks_ready_to_invoke(rdp)) + if (!cpu_has_callbacks_ready_to_invoke(rdp)) { + trace_rcu_batch_start(0, 0); + trace_rcu_batch_end(0); return; + } /* * Extract the list of ready callbacks, disabling to prevent * races with call_rcu() from interrupt handlers. */ local_irq_save(flags); + bl = rdp->blimit; + trace_rcu_batch_start(rdp->qlen, bl); list = rdp->nxtlist; rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL]; *rdp->nxttail[RCU_DONE_TAIL] = NULL; @@ -1218,11 +1226,12 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) debug_rcu_head_unqueue(list); __rcu_reclaim(list); list = next; - if (++count >= rdp->blimit) + if (++count >= bl) break; } local_irq_save(flags); + trace_rcu_batch_end(count); /* Update count, and requeue any remaining callbacks. */ rdp->qlen -= count; -- cgit v1.2.3 From 300df91ca9358f7f09298eec9503c12b32054ef7 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 18 Jun 2011 22:26:31 -0700 Subject: rcu: Event-trace markers for computing RCU CPU utilization This commit adds the trace_rcu_utilization() marker that is to be used to allow postprocessing scripts compute RCU's CPU utilization, give or take event-trace overhead. Note that we do not include RCU's dyntick-idle interface because event tracing requires RCU protection, which is not available in dyntick-idle mode. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/trace/events/rcu.h | 73 +++++++++++++++++++++++++++++++++------------- kernel/rcutree.c | 16 +++++++++- 2 files changed, 68 insertions(+), 21 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index db3f6e9e63e6..ab458eb689fb 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -7,29 +7,58 @@ #include /* - * Tracepoint for calling rcu_do_batch, performed to start callback invocation: + * Tracepoint for start/end markers used for utilization calculations. + * By convention, the string is of the following forms: + * + * "Start " -- Mark the start of the specified activity, + * such as "context switch". Nesting is permitted. + * "End " -- Mark the end of the specified activity. + */ +TRACE_EVENT(rcu_utilization, + + TP_PROTO(char *s), + + TP_ARGS(s), + + TP_STRUCT__entry( + __field(char *, s) + ), + + TP_fast_assign( + __entry->s = s; + ), + + TP_printk("%s", __entry->s) +); + +/* + * Tracepoint for marking the beginning rcu_do_batch, performed to start + * RCU callback invocation. The first argument is the total number of + * callbacks (including those that are not yet ready to be invoked), + * and the second argument is the current RCU-callback batch limit. */ TRACE_EVENT(rcu_batch_start, - TP_PROTO(long callbacks_ready, int blimit), + TP_PROTO(long qlen, int blimit), - TP_ARGS(callbacks_ready, blimit), + TP_ARGS(qlen, blimit), TP_STRUCT__entry( - __field( long, callbacks_ready ) - __field( int, blimit ) + __field(long, qlen) + __field(int, blimit) ), TP_fast_assign( - __entry->callbacks_ready = callbacks_ready; - __entry->blimit = blimit; + __entry->qlen = qlen; + __entry->blimit = blimit; ), - TP_printk("CBs=%ld bl=%d", __entry->callbacks_ready, __entry->blimit) + TP_printk("CBs=%ld bl=%d", __entry->qlen, __entry->blimit) ); /* - * Tracepoint for the invocation of a single RCU callback + * Tracepoint for the invocation of a single RCU callback function. + * The argument is a pointer to the RCU callback itself. */ TRACE_EVENT(rcu_invoke_callback, @@ -38,20 +67,23 @@ TRACE_EVENT(rcu_invoke_callback, TP_ARGS(rhp), TP_STRUCT__entry( - __field( void *, rhp ) - __field( void *, func ) + __field(void *, rhp) + __field(void *, func) ), TP_fast_assign( - __entry->rhp = rhp; - __entry->func = rhp->func; + __entry->rhp = rhp; + __entry->func = rhp->func; ), TP_printk("rhp=%p func=%pf", __entry->rhp, __entry->func) ); /* - * Tracepoint for the invocation of a single RCU kfree callback + * Tracepoint for the invocation of a single RCU callback of the special + * kfree() form. The first argument is a pointer to the RCU callback + * and the second argument is the offset of the callback within the + * enclosing RCU-protected data structure. */ TRACE_EVENT(rcu_invoke_kfree_callback, @@ -60,12 +92,12 @@ TRACE_EVENT(rcu_invoke_kfree_callback, TP_ARGS(rhp, offset), TP_STRUCT__entry( - __field(void *, rhp ) - __field(unsigned long, offset ) + __field(void *, rhp) + __field(unsigned long, offset) ), TP_fast_assign( - __entry->rhp = rhp; + __entry->rhp = rhp; __entry->offset = offset; ), @@ -73,7 +105,8 @@ TRACE_EVENT(rcu_invoke_kfree_callback, ); /* - * Tracepoint for leaving rcu_do_batch, performed after callback invocation: + * Tracepoint for exiting rcu_do_batch after RCU callbacks have been + * invoked. The first argument is the number of callbacks actually invoked. */ TRACE_EVENT(rcu_batch_end, @@ -82,11 +115,11 @@ TRACE_EVENT(rcu_batch_end, TP_ARGS(callbacks_invoked), TP_STRUCT__entry( - __field( int, callbacks_invoked ) + __field(int, callbacks_invoked) ), TP_fast_assign( - __entry->callbacks_invoked = callbacks_invoked; + __entry->callbacks_invoked = callbacks_invoked; ), TP_printk("CBs-invoked=%d", __entry->callbacks_invoked) diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 45dcc2036a1e..2a9643bd6ae9 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -184,8 +184,10 @@ void rcu_bh_qs(int cpu) */ void rcu_note_context_switch(int cpu) { + trace_rcu_utilization("Start context switch"); rcu_sched_qs(cpu); rcu_preempt_note_context_switch(cpu); + trace_rcu_utilization("End context switch"); } EXPORT_SYMBOL_GPL(rcu_note_context_switch); @@ -1275,6 +1277,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) */ void rcu_check_callbacks(int cpu, int user) { + trace_rcu_utilization("Start scheduler-tick"); if (user || (idle_cpu(cpu) && rcu_scheduler_active && !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) { @@ -1308,6 +1311,7 @@ void rcu_check_callbacks(int cpu, int user) rcu_preempt_check_callbacks(cpu); if (rcu_pending(cpu)) invoke_rcu_core(); + trace_rcu_utilization("End scheduler-tick"); } #ifdef CONFIG_SMP @@ -1369,10 +1373,14 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) unsigned long flags; struct rcu_node *rnp = rcu_get_root(rsp); - if (!rcu_gp_in_progress(rsp)) + trace_rcu_utilization("Start fqs"); + if (!rcu_gp_in_progress(rsp)) { + trace_rcu_utilization("End fqs"); return; /* No grace period in progress, nothing to force. */ + } if (!raw_spin_trylock_irqsave(&rsp->fqslock, flags)) { rsp->n_force_qs_lh++; /* Inexact, can lose counts. Tough! */ + trace_rcu_utilization("End fqs"); return; /* Someone else is already on the job. */ } if (relaxed && ULONG_CMP_GE(rsp->jiffies_force_qs, jiffies)) @@ -1421,11 +1429,13 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) raw_spin_unlock(&rsp->fqslock); /* irqs remain disabled */ rsp->fqs_need_gp = 0; rcu_start_gp(rsp, flags); /* releases rnp->lock */ + trace_rcu_utilization("End fqs"); return; } raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ unlock_fqs_ret: raw_spin_unlock_irqrestore(&rsp->fqslock, flags); + trace_rcu_utilization("End fqs"); } #else /* #ifdef CONFIG_SMP */ @@ -1481,6 +1491,7 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) */ static void rcu_process_callbacks(struct softirq_action *unused) { + trace_rcu_utilization("Start RCU core"); __rcu_process_callbacks(&rcu_sched_state, &__get_cpu_var(rcu_sched_data)); __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); @@ -1488,6 +1499,7 @@ static void rcu_process_callbacks(struct softirq_action *unused) /* If we are last CPU on way to dyntick-idle mode, accelerate it. */ rcu_needs_cpu_flush(); + trace_rcu_utilization("End RCU core"); } /* @@ -1910,6 +1922,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); struct rcu_node *rnp = rdp->mynode; + trace_rcu_utilization("Start CPU hotplug"); switch (action) { case CPU_UP_PREPARE: case CPU_UP_PREPARE_FROZEN: @@ -1945,6 +1958,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, default: break; } + trace_rcu_utilization("End CPU hotplug"); return NOTIFY_OK; } -- cgit v1.2.3 From e99033c5c160f1f247c665923a66acec693a967c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 21 Jun 2011 00:13:44 -0700 Subject: rcu: Put names into TINY_RCU structures under RCU_TRACE In order to allow event tracing to distinguish between flavors of RCU, we need those names in the relevant RCU data structures. TINY_RCU has avoided them for memory-footprint reasons, so add them only if CONFIG_RCU_TRACE=y. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- kernel/rcu.h | 10 ++++++++-- kernel/rcutiny.c | 13 ------------- kernel/rcutiny_plugin.h | 10 ++++------ kernel/rcutree.c | 10 +++++----- kernel/rcutree_plugin.h | 2 +- 5 files changed, 18 insertions(+), 27 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcu.h b/kernel/rcu.h index 7bc16436aba0..d7f00ec8b47b 100644 --- a/kernel/rcu.h +++ b/kernel/rcu.h @@ -23,6 +23,12 @@ #ifndef __LINUX_RCU_H #define __LINUX_RCU_H +#ifdef CONFIG_RCU_TRACE +#define RCU_TRACE(stmt) stmt +#else /* #ifdef CONFIG_RCU_TRACE */ +#define RCU_TRACE(stmt) +#endif /* #else #ifdef CONFIG_RCU_TRACE */ + /* * debug_rcu_head_queue()/debug_rcu_head_unqueue() are used internally * by call_rcu() and rcu callback execution, and are therefore not part of the @@ -68,10 +74,10 @@ static inline void __rcu_reclaim(struct rcu_head *head) unsigned long offset = (unsigned long)head->func; if (__is_kfree_rcu_offset(offset)) { - trace_rcu_invoke_kfree_callback(head, offset); + RCU_TRACE(trace_rcu_invoke_kfree_callback(head, offset)); kfree((void *)head - offset); } else { - trace_rcu_invoke_callback(head); + RCU_TRACE(trace_rcu_invoke_callback(head)); head->func(head); } } diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index 19453ba1392e..0d28974b78f4 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c @@ -38,20 +38,7 @@ #include #ifdef CONFIG_RCU_TRACE - #include - -#else /* #ifdef CONFIG_RCU_TRACE */ - -/* No by-default tracing in TINY_RCU: Keep TINY_RCU tiny! */ -static void trace_rcu_invoke_kfree_callback(struct rcu_head *rhp, - unsigned long offset) -{ -} -static void trace_rcu_invoke_callback(struct rcu_head *head) -{ -} - #endif /* #else #ifdef CONFIG_RCU_TRACE */ #include "rcu.h" diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h index 6b0cedb383e0..791ddf7c99ab 100644 --- a/kernel/rcutiny_plugin.h +++ b/kernel/rcutiny_plugin.h @@ -26,29 +26,26 @@ #include #include -#ifdef CONFIG_RCU_TRACE -#define RCU_TRACE(stmt) stmt -#else /* #ifdef CONFIG_RCU_TRACE */ -#define RCU_TRACE(stmt) -#endif /* #else #ifdef CONFIG_RCU_TRACE */ - /* Global control variables for rcupdate callback mechanism. */ struct rcu_ctrlblk { struct rcu_head *rcucblist; /* List of pending callbacks (CBs). */ struct rcu_head **donetail; /* ->next pointer of last "done" CB. */ struct rcu_head **curtail; /* ->next pointer of last CB. */ RCU_TRACE(long qlen); /* Number of pending CBs. */ + RCU_TRACE(char *name); /* Name of RCU type. */ }; /* Definition for rcupdate control block. */ static struct rcu_ctrlblk rcu_sched_ctrlblk = { .donetail = &rcu_sched_ctrlblk.rcucblist, .curtail = &rcu_sched_ctrlblk.rcucblist, + RCU_TRACE(.name = "rcu_sched") }; static struct rcu_ctrlblk rcu_bh_ctrlblk = { .donetail = &rcu_bh_ctrlblk.rcucblist, .curtail = &rcu_bh_ctrlblk.rcucblist, + RCU_TRACE(.name = "rcu_bh") }; #ifdef CONFIG_DEBUG_LOCK_ALLOC @@ -131,6 +128,7 @@ static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = { .rcb.curtail = &rcu_preempt_ctrlblk.rcb.rcucblist, .nexttail = &rcu_preempt_ctrlblk.rcb.rcucblist, .blkd_tasks = LIST_HEAD_INIT(rcu_preempt_ctrlblk.blkd_tasks), + RCU_TRACE(.rcb.name = "rcu_preempt") }; static int rcu_preempted_readers_exp(void); diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 2a9643bd6ae9..b953e2c72e25 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -61,7 +61,7 @@ static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; #define RCU_STATE_INITIALIZER(structname) { \ - .level = { &structname.node[0] }, \ + .level = { &structname##_state.node[0] }, \ .levelcnt = { \ NUM_RCU_LVL_0, /* root of hierarchy. */ \ NUM_RCU_LVL_1, \ @@ -72,17 +72,17 @@ static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; .signaled = RCU_GP_IDLE, \ .gpnum = -300, \ .completed = -300, \ - .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname.onofflock), \ - .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname.fqslock), \ + .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.onofflock), \ + .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.fqslock), \ .n_force_qs = 0, \ .n_force_qs_ngp = 0, \ .name = #structname, \ } -struct rcu_state rcu_sched_state = RCU_STATE_INITIALIZER(rcu_sched_state); +struct rcu_state rcu_sched_state = RCU_STATE_INITIALIZER(rcu_sched); DEFINE_PER_CPU(struct rcu_data, rcu_sched_data); -struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state); +struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh); DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); static struct rcu_state *rcu_state; diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 43daa46bc6f2..a90bf3c17492 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -64,7 +64,7 @@ static void __init rcu_bootup_announce_oddness(void) #ifdef CONFIG_TREE_PREEMPT_RCU -struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state); +struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt); DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); static struct rcu_state *rcu_state = &rcu_preempt_state; -- cgit v1.2.3 From 72fe701b70e6ced35d734b676c13efbc8fc769a9 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 21 Jun 2011 01:14:54 -0700 Subject: rcu: Add RCU type to callback-invocation tracing Add a string to the rcu_batch_start() and rcu_batch_end() trace messages that indicates the RCU type ("rcu_sched", "rcu_bh", or "rcu_preempt"). The trace messages for the actual invocations themselves are not marked, as it should be clear from the rcu_batch_start() and rcu_batch_end() events before and after. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/trace/events/rcu.h | 28 ++++++++++++++++++---------- kernel/rcutiny.c | 8 ++++---- kernel/rcutree.c | 8 ++++---- 3 files changed, 26 insertions(+), 18 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index ab458eb689fb..508824e5a77d 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -33,27 +33,31 @@ TRACE_EVENT(rcu_utilization, /* * Tracepoint for marking the beginning rcu_do_batch, performed to start - * RCU callback invocation. The first argument is the total number of - * callbacks (including those that are not yet ready to be invoked), - * and the second argument is the current RCU-callback batch limit. + * RCU callback invocation. The first argument is the RCU flavor, + * the second is the total number of callbacks (including those that + * are not yet ready to be invoked), and the third argument is the + * current RCU-callback batch limit. */ TRACE_EVENT(rcu_batch_start, - TP_PROTO(long qlen, int blimit), + TP_PROTO(char *rcuname, long qlen, int blimit), - TP_ARGS(qlen, blimit), + TP_ARGS(rcuname, qlen, blimit), TP_STRUCT__entry( + __field(char *, rcuname) __field(long, qlen) __field(int, blimit) ), TP_fast_assign( + __entry->rcuname = rcuname; __entry->qlen = qlen; __entry->blimit = blimit; ), - TP_printk("CBs=%ld bl=%d", __entry->qlen, __entry->blimit) + TP_printk("%s CBs=%ld bl=%d", + __entry->rcuname, __entry->qlen, __entry->blimit) ); /* @@ -106,23 +110,27 @@ TRACE_EVENT(rcu_invoke_kfree_callback, /* * Tracepoint for exiting rcu_do_batch after RCU callbacks have been - * invoked. The first argument is the number of callbacks actually invoked. + * invoked. The first argument is the name of the RCU flavor and + * the second argument is number of callbacks actually invoked. */ TRACE_EVENT(rcu_batch_end, - TP_PROTO(int callbacks_invoked), + TP_PROTO(char *rcuname, int callbacks_invoked), - TP_ARGS(callbacks_invoked), + TP_ARGS(rcuname, callbacks_invoked), TP_STRUCT__entry( + __field(char *, rcuname) __field(int, callbacks_invoked) ), TP_fast_assign( + __entry->rcuname = rcuname; __entry->callbacks_invoked = callbacks_invoked; ), - TP_printk("CBs-invoked=%d", __entry->callbacks_invoked) + TP_printk("%s CBs-invoked=%d", + __entry->rcuname, __entry->callbacks_invoked) ); #endif /* _TRACE_RCU_H */ diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index 0d28974b78f4..1c37bdd464f1 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c @@ -168,14 +168,14 @@ static void rcu_process_callbacks(struct rcu_ctrlblk *rcp) /* If no RCU callbacks ready to invoke, just return. */ if (&rcp->rcucblist == rcp->donetail) { - RCU_TRACE(trace_rcu_batch_start(0, -1)); - RCU_TRACE(trace_rcu_batch_end(0)); + RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, -1)); + RCU_TRACE(trace_rcu_batch_end(rcp->name, 0)); return; } /* Move the ready-to-invoke callbacks to a local list. */ local_irq_save(flags); - RCU_TRACE(trace_rcu_batch_start(0, -1)); + RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, -1)); list = rcp->rcucblist; rcp->rcucblist = *rcp->donetail; *rcp->donetail = NULL; @@ -197,7 +197,7 @@ static void rcu_process_callbacks(struct rcu_ctrlblk *rcp) RCU_TRACE(cb_count++); } RCU_TRACE(rcu_trace_sub_qlen(rcp, cb_count)); - RCU_TRACE(trace_rcu_batch_end(cb_count)); + RCU_TRACE(trace_rcu_batch_end(rcp->name, cb_count)); } /* diff --git a/kernel/rcutree.c b/kernel/rcutree.c index b953e2c72e25..eb6e731088a0 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1199,8 +1199,8 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) /* If no callbacks are ready, just return.*/ if (!cpu_has_callbacks_ready_to_invoke(rdp)) { - trace_rcu_batch_start(0, 0); - trace_rcu_batch_end(0); + trace_rcu_batch_start(rsp->name, 0, 0); + trace_rcu_batch_end(rsp->name, 0); return; } @@ -1210,7 +1210,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) */ local_irq_save(flags); bl = rdp->blimit; - trace_rcu_batch_start(rdp->qlen, bl); + trace_rcu_batch_start(rsp->name, rdp->qlen, bl); list = rdp->nxtlist; rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL]; *rdp->nxttail[RCU_DONE_TAIL] = NULL; @@ -1233,7 +1233,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) } local_irq_save(flags); - trace_rcu_batch_end(count); + trace_rcu_batch_end(rsp->name, count); /* Update count, and requeue any remaining callbacks. */ rdp->qlen -= count; -- cgit v1.2.3 From e0f23060adfa3f27beaa7918eff70258b88471b6 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 21 Jun 2011 01:29:39 -0700 Subject: rcu: Update comments to reflect softirqs vs. kthreads We now have kthreads only for flavors of RCU that support boosting, so update the now-misleading comments accordingly. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 23 ++++++++++++----------- kernel/rcutree_plugin.h | 3 ++- 2 files changed, 14 insertions(+), 12 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index eb6e731088a0..4e24399cabcf 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -198,7 +198,7 @@ DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { }; #endif /* #ifdef CONFIG_NO_HZ */ -static int blimit = 10; /* Maximum callbacks per softirq. */ +static int blimit = 10; /* Maximum callbacks per rcu_do_batch. */ static int qhimark = 10000; /* If this many pending, ignore blimit. */ static int qlowmark = 100; /* Once only this many pending, use blimit. */ @@ -1261,7 +1261,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) local_irq_restore(flags); - /* Re-raise the RCU softirq if there are callbacks remaining. */ + /* Re-invoke RCU core processing if there are callbacks remaining. */ if (cpu_has_callbacks_ready_to_invoke(rdp)) invoke_rcu_core(); } @@ -1269,7 +1269,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) /* * Check to see if this CPU is in a non-context-switch quiescent state * (user mode or idle loop for rcu, non-softirq execution for rcu_bh). - * Also schedule the RCU softirq handler. + * Also schedule RCU core processing. * * This function must be called with hardirqs disabled. It is normally * invoked from the scheduling-clock interrupt. If rcu_pending returns @@ -1448,9 +1448,9 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) #endif /* #else #ifdef CONFIG_SMP */ /* - * This does the RCU processing work from softirq context for the - * specified rcu_state and rcu_data structures. This may be called - * only from the CPU to whom the rdp belongs. + * This does the RCU core processing work for the specified rcu_state + * and rcu_data structures. This may be called only from the CPU to + * whom the rdp belongs. */ static void __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) @@ -1487,7 +1487,7 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) } /* - * Do softirq processing for the current CPU. + * Do RCU core processing for the current CPU. */ static void rcu_process_callbacks(struct softirq_action *unused) { @@ -1503,10 +1503,11 @@ static void rcu_process_callbacks(struct softirq_action *unused) } /* - * Wake up the current CPU's kthread. This replaces raise_softirq() - * in earlier versions of RCU. Note that because we are running on - * the current CPU with interrupts disabled, the rcu_cpu_kthread_task - * cannot disappear out from under us. + * Schedule RCU callback invocation. If the specified type of RCU + * does not support RCU priority boosting, just do a direct call, + * otherwise wake up the per-CPU kernel kthread. Note that because we + * are running on the current CPU with interrupts disabled, the + * rcu_cpu_kthread_task cannot disappear out from under us. */ static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) { diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index a90bf3c17492..ecd48a2e3eeb 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -1478,7 +1478,8 @@ static int rcu_cpu_kthread_should_stop(int cpu) /* * Per-CPU kernel thread that invokes RCU callbacks. This replaces the - * earlier RCU softirq. + * RCU softirq used in flavors and configurations of RCU that do not + * support RCU priority boosting. */ static int rcu_cpu_kthread(void *arg) { -- cgit v1.2.3 From d4c08f2ac311a360230eef7e5395b0ec8d8f0670 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 25 Jun 2011 06:36:56 -0700 Subject: rcu: Add grace-period, quiescent-state, and call_rcu trace events Add trace events to record grace-period start and end, quiescent states, CPUs noticing grace-period start and end, grace-period initialization, call_rcu() invocation, tasks blocking in RCU read-side critical sections, tasks exiting those same critical sections, force_quiescent_state() detection of dyntick-idle and offline CPUs, CPUs entering and leaving dyntick-idle mode (except from NMIs), CPUs coming online and going offline, and CPUs being kicked for staying in dyntick-idle mode for too long (as in many weeks, even on 32-bit systems). Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney rcu: Add the rcu flavor to callback trace events The earlier trace events for registering RCU callbacks and for invoking them did not include the RCU flavor (rcu_bh, rcu_preempt, or rcu_sched). This commit adds the RCU flavor to those trace events. Signed-off-by: Paul E. McKenney --- include/trace/events/rcu.h | 345 +++++++++++++++++++++++++++++++++++++++++++-- kernel/rcu.h | 6 +- kernel/rcutiny.c | 4 +- kernel/rcutree.c | 45 +++++- kernel/rcutree.h | 1 + kernel/rcutree_plugin.h | 22 ++- 6 files changed, 399 insertions(+), 24 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index ac52aba00a3e..669fbd62ec25 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -24,7 +24,7 @@ TRACE_EVENT(rcu_utilization, TP_ARGS(s), TP_STRUCT__entry( - __field(char *, s) + __field(char *, s) ), TP_fast_assign( @@ -34,6 +34,297 @@ TRACE_EVENT(rcu_utilization, TP_printk("%s", __entry->s) ); +#ifdef CONFIG_RCU_TRACE + +#if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) + +/* + * Tracepoint for grace-period events: starting and ending a grace + * period ("start" and "end", respectively), a CPU noting the start + * of a new grace period or the end of an old grace period ("cpustart" + * and "cpuend", respectively), a CPU passing through a quiescent + * state ("cpuqs"), a CPU coming online or going offline ("cpuonl" + * and "cpuofl", respectively), and a CPU being kicked for being too + * long in dyntick-idle mode ("kick"). + */ +TRACE_EVENT(rcu_grace_period, + + TP_PROTO(char *rcuname, unsigned long gpnum, char *gpevent), + + TP_ARGS(rcuname, gpnum, gpevent), + + TP_STRUCT__entry( + __field(char *, rcuname) + __field(unsigned long, gpnum) + __field(char *, gpevent) + ), + + TP_fast_assign( + __entry->rcuname = rcuname; + __entry->gpnum = gpnum; + __entry->gpevent = gpevent; + ), + + TP_printk("%s %lu %s", + __entry->rcuname, __entry->gpnum, __entry->gpevent) +); + +/* + * Tracepoint for grace-period-initialization events. These are + * distinguished by the type of RCU, the new grace-period number, the + * rcu_node structure level, the starting and ending CPU covered by the + * rcu_node structure, and the mask of CPUs that will be waited for. + * All but the type of RCU are extracted from the rcu_node structure. + */ +TRACE_EVENT(rcu_grace_period_init, + + TP_PROTO(char *rcuname, unsigned long gpnum, u8 level, + int grplo, int grphi, unsigned long qsmask), + + TP_ARGS(rcuname, gpnum, level, grplo, grphi, qsmask), + + TP_STRUCT__entry( + __field(char *, rcuname) + __field(unsigned long, gpnum) + __field(u8, level) + __field(int, grplo) + __field(int, grphi) + __field(unsigned long, qsmask) + ), + + TP_fast_assign( + __entry->rcuname = rcuname; + __entry->gpnum = gpnum; + __entry->level = level; + __entry->grplo = grplo; + __entry->grphi = grphi; + __entry->qsmask = qsmask; + ), + + TP_printk("%s %lu %u %d %d %lx", + __entry->rcuname, __entry->gpnum, __entry->level, + __entry->grplo, __entry->grphi, __entry->qsmask) +); + +/* + * Tracepoint for tasks blocking within preemptible-RCU read-side + * critical sections. Track the type of RCU (which one day might + * include SRCU), the grace-period number that the task is blocking + * (the current or the next), and the task's PID. + */ +TRACE_EVENT(rcu_preempt_task, + + TP_PROTO(char *rcuname, int pid, unsigned long gpnum), + + TP_ARGS(rcuname, pid, gpnum), + + TP_STRUCT__entry( + __field(char *, rcuname) + __field(unsigned long, gpnum) + __field(int, pid) + ), + + TP_fast_assign( + __entry->rcuname = rcuname; + __entry->gpnum = gpnum; + __entry->pid = pid; + ), + + TP_printk("%s %lu %d", + __entry->rcuname, __entry->gpnum, __entry->pid) +); + +/* + * Tracepoint for tasks that blocked within a given preemptible-RCU + * read-side critical section exiting that critical section. Track the + * type of RCU (which one day might include SRCU) and the task's PID. + */ +TRACE_EVENT(rcu_unlock_preempted_task, + + TP_PROTO(char *rcuname, unsigned long gpnum, int pid), + + TP_ARGS(rcuname, gpnum, pid), + + TP_STRUCT__entry( + __field(char *, rcuname) + __field(unsigned long, gpnum) + __field(int, pid) + ), + + TP_fast_assign( + __entry->rcuname = rcuname; + __entry->gpnum = gpnum; + __entry->pid = pid; + ), + + TP_printk("%s %lu %d", __entry->rcuname, __entry->gpnum, __entry->pid) +); + +/* + * Tracepoint for quiescent-state-reporting events. These are + * distinguished by the type of RCU, the grace-period number, the + * mask of quiescent lower-level entities, the rcu_node structure level, + * the starting and ending CPU covered by the rcu_node structure, and + * whether there are any blocked tasks blocking the current grace period. + * All but the type of RCU are extracted from the rcu_node structure. + */ +TRACE_EVENT(rcu_quiescent_state_report, + + TP_PROTO(char *rcuname, unsigned long gpnum, + unsigned long mask, unsigned long qsmask, + u8 level, int grplo, int grphi, int gp_tasks), + + TP_ARGS(rcuname, gpnum, mask, qsmask, level, grplo, grphi, gp_tasks), + + TP_STRUCT__entry( + __field(char *, rcuname) + __field(unsigned long, gpnum) + __field(unsigned long, mask) + __field(unsigned long, qsmask) + __field(u8, level) + __field(int, grplo) + __field(int, grphi) + __field(u8, gp_tasks) + ), + + TP_fast_assign( + __entry->rcuname = rcuname; + __entry->gpnum = gpnum; + __entry->mask = mask; + __entry->qsmask = qsmask; + __entry->level = level; + __entry->grplo = grplo; + __entry->grphi = grphi; + __entry->gp_tasks = gp_tasks; + ), + + TP_printk("%s %lu %lx>%lx %u %d %d %u", + __entry->rcuname, __entry->gpnum, + __entry->mask, __entry->qsmask, __entry->level, + __entry->grplo, __entry->grphi, __entry->gp_tasks) +); + +/* + * Tracepoint for quiescent states detected by force_quiescent_state(). + * These trace events include the type of RCU, the grace-period number + * that was blocked by the CPU, the CPU itself, and the type of quiescent + * state, which can be "dti" for dyntick-idle mode, "ofl" for CPU offline, + * or "kick" when kicking a CPU that has been in dyntick-idle mode for + * too long. + */ +TRACE_EVENT(rcu_fqs, + + TP_PROTO(char *rcuname, unsigned long gpnum, int cpu, char *qsevent), + + TP_ARGS(rcuname, gpnum, cpu, qsevent), + + TP_STRUCT__entry( + __field(char *, rcuname) + __field(unsigned long, gpnum) + __field(int, cpu) + __field(char *, qsevent) + ), + + TP_fast_assign( + __entry->rcuname = rcuname; + __entry->gpnum = gpnum; + __entry->cpu = cpu; + __entry->qsevent = qsevent; + ), + + TP_printk("%s %lu %d %s", + __entry->rcuname, __entry->gpnum, + __entry->cpu, __entry->qsevent) +); + +#endif /* #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) */ + +/* + * Tracepoint for dyntick-idle entry/exit events. These take a string + * as argument: "Start" for entering dyntick-idle mode and "End" for + * leaving it. + */ +TRACE_EVENT(rcu_dyntick, + + TP_PROTO(char *polarity), + + TP_ARGS(polarity), + + TP_STRUCT__entry( + __field(char *, polarity) + ), + + TP_fast_assign( + __entry->polarity = polarity; + ), + + TP_printk("%s", __entry->polarity) +); + +/* + * Tracepoint for the registration of a single RCU callback function. + * The first argument is the type of RCU, the second argument is + * a pointer to the RCU callback itself, and the third element is the + * new RCU callback queue length for the current CPU. + */ +TRACE_EVENT(rcu_callback, + + TP_PROTO(char *rcuname, struct rcu_head *rhp, long qlen), + + TP_ARGS(rcuname, rhp, qlen), + + TP_STRUCT__entry( + __field(char *, rcuname) + __field(void *, rhp) + __field(void *, func) + __field(long, qlen) + ), + + TP_fast_assign( + __entry->rcuname = rcuname; + __entry->rhp = rhp; + __entry->func = rhp->func; + __entry->qlen = qlen; + ), + + TP_printk("%s rhp=%p func=%pf %ld", + __entry->rcuname, __entry->rhp, __entry->func, __entry->qlen) +); + +/* + * Tracepoint for the registration of a single RCU callback of the special + * kfree() form. The first argument is the RCU type, the second argument + * is a pointer to the RCU callback, the third argument is the offset + * of the callback within the enclosing RCU-protected data structure, + * and the fourth argument is the new RCU callback queue length for the + * current CPU. + */ +TRACE_EVENT(rcu_kfree_callback, + + TP_PROTO(char *rcuname, struct rcu_head *rhp, unsigned long offset, + long qlen), + + TP_ARGS(rcuname, rhp, offset, qlen), + + TP_STRUCT__entry( + __field(char *, rcuname) + __field(void *, rhp) + __field(unsigned long, offset) + __field(long, qlen) + ), + + TP_fast_assign( + __entry->rcuname = rcuname; + __entry->rhp = rhp; + __entry->offset = offset; + __entry->qlen = qlen; + ), + + TP_printk("%s rhp=%p func=%ld %ld", + __entry->rcuname, __entry->rhp, __entry->offset, + __entry->qlen) +); + /* * Tracepoint for marking the beginning rcu_do_batch, performed to start * RCU callback invocation. The first argument is the RCU flavor, @@ -65,50 +356,58 @@ TRACE_EVENT(rcu_batch_start, /* * Tracepoint for the invocation of a single RCU callback function. - * The argument is a pointer to the RCU callback itself. + * The first argument is the type of RCU, and the second argument is + * a pointer to the RCU callback itself. */ TRACE_EVENT(rcu_invoke_callback, - TP_PROTO(struct rcu_head *rhp), + TP_PROTO(char *rcuname, struct rcu_head *rhp), - TP_ARGS(rhp), + TP_ARGS(rcuname, rhp), TP_STRUCT__entry( - __field(void *, rhp) - __field(void *, func) + __field(char *, rcuname) + __field(void *, rhp) + __field(void *, func) ), TP_fast_assign( + __entry->rcuname = rcuname; __entry->rhp = rhp; __entry->func = rhp->func; ), - TP_printk("rhp=%p func=%pf", __entry->rhp, __entry->func) + TP_printk("%s rhp=%p func=%pf", + __entry->rcuname, __entry->rhp, __entry->func) ); /* * Tracepoint for the invocation of a single RCU callback of the special - * kfree() form. The first argument is a pointer to the RCU callback - * and the second argument is the offset of the callback within the - * enclosing RCU-protected data structure. + * kfree() form. The first argument is the RCU flavor, the second + * argument is a pointer to the RCU callback, and the third argument + * is the offset of the callback within the enclosing RCU-protected + * data structure. */ TRACE_EVENT(rcu_invoke_kfree_callback, - TP_PROTO(struct rcu_head *rhp, unsigned long offset), + TP_PROTO(char *rcuname, struct rcu_head *rhp, unsigned long offset), - TP_ARGS(rhp, offset), + TP_ARGS(rcuname, rhp, offset), TP_STRUCT__entry( - __field(void *, rhp) + __field(char *, rcuname) + __field(void *, rhp) __field(unsigned long, offset) ), TP_fast_assign( + __entry->rcuname = rcuname; __entry->rhp = rhp; __entry->offset = offset; ), - TP_printk("rhp=%p func=%ld", __entry->rhp, __entry->offset) + TP_printk("%s rhp=%p func=%ld", + __entry->rcuname, __entry->rhp, __entry->offset) ); /* @@ -136,6 +435,24 @@ TRACE_EVENT(rcu_batch_end, __entry->rcuname, __entry->callbacks_invoked) ); +#else /* #ifdef CONFIG_RCU_TRACE */ + +#define trace_rcu_grace_period(rcuname, gpnum, gpevent) do { } while (0) +#define trace_rcu_grace_period_init(rcuname, gpnum, level, grplo, grphi, qsmask) do { } while (0) +#define trace_rcu_preempt_task(rcuname, pid, gpnum) do { } while (0) +#define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0) +#define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, grplo, grphi, gp_tasks) do { } while (0) +#define trace_rcu_fqs(rcuname, gpnum, cpu, qsevent) do { } while (0) +#define trace_rcu_dyntick(polarity) do { } while (0) +#define trace_rcu_callback(rcuname, rhp, qlen) do { } while (0) +#define trace_rcu_kfree_callback(rcuname, rhp, offset, qlen) do { } while (0) +#define trace_rcu_batch_start(rcuname, qlen, blimit) do { } while (0) +#define trace_rcu_invoke_callback(rcuname, rhp) do { } while (0) +#define trace_rcu_invoke_kfree_callback(rcuname, rhp, offset) do { } while (0) +#define trace_rcu_batch_end(rcuname, callbacks_invoked) do { } while (0) + +#endif /* #else #ifdef CONFIG_RCU_TRACE */ + #endif /* _TRACE_RCU_H */ /* This part must be outside protection */ diff --git a/kernel/rcu.h b/kernel/rcu.h index d7f00ec8b47b..f600868d550d 100644 --- a/kernel/rcu.h +++ b/kernel/rcu.h @@ -69,15 +69,15 @@ static inline void debug_rcu_head_unqueue(struct rcu_head *head) extern void kfree(const void *); -static inline void __rcu_reclaim(struct rcu_head *head) +static inline void __rcu_reclaim(char *rn, struct rcu_head *head) { unsigned long offset = (unsigned long)head->func; if (__is_kfree_rcu_offset(offset)) { - RCU_TRACE(trace_rcu_invoke_kfree_callback(head, offset)); + RCU_TRACE(trace_rcu_invoke_kfree_callback(rn, head, offset)); kfree((void *)head - offset); } else { - RCU_TRACE(trace_rcu_invoke_callback(head)); + RCU_TRACE(trace_rcu_invoke_callback(rn, head)); head->func(head); } } diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index c9321d86999b..da775c87f27f 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c @@ -147,6 +147,7 @@ void rcu_check_callbacks(int cpu, int user) */ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) { + char *rn = NULL; struct rcu_head *next, *list; unsigned long flags; RCU_TRACE(int cb_count = 0); @@ -171,12 +172,13 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) local_irq_restore(flags); /* Invoke the callbacks on the local list. */ + RCU_TRACE(rn = rcp->name); while (list) { next = list->next; prefetch(next); debug_rcu_head_unqueue(list); local_bh_disable(); - __rcu_reclaim(list); + __rcu_reclaim(rn, list); local_bh_enable(); list = next; RCU_TRACE(cb_count++); diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 4e24399cabcf..7e0282949f8a 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -166,6 +166,8 @@ void rcu_sched_qs(int cpu) rdp->passed_quiesc_completed = rdp->gpnum - 1; barrier(); + if (rdp->passed_quiesc == 0) + trace_rcu_grace_period("rcu_sched", rdp->gpnum, "cpuqs"); rdp->passed_quiesc = 1; } @@ -175,6 +177,8 @@ void rcu_bh_qs(int cpu) rdp->passed_quiesc_completed = rdp->gpnum - 1; barrier(); + if (rdp->passed_quiesc == 0) + trace_rcu_grace_period("rcu_bh", rdp->gpnum, "cpuqs"); rdp->passed_quiesc = 1; } @@ -319,6 +323,7 @@ static int rcu_implicit_offline_qs(struct rcu_data *rdp) * trust its state not to change because interrupts are disabled. */ if (cpu_is_offline(rdp->cpu)) { + trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "ofl"); rdp->offline_fqs++; return 1; } @@ -359,6 +364,7 @@ void rcu_enter_nohz(void) local_irq_restore(flags); return; } + trace_rcu_dyntick("Start"); /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ smp_mb__before_atomic_inc(); /* See above. */ atomic_inc(&rdtp->dynticks); @@ -396,6 +402,7 @@ void rcu_exit_nohz(void) /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ smp_mb__after_atomic_inc(); /* See above. */ WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); + trace_rcu_dyntick("End"); local_irq_restore(flags); } @@ -501,6 +508,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) * of the current RCU grace period. */ if ((curr & 0x1) == 0 || ULONG_CMP_GE(curr, snap + 2)) { + trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "dti"); rdp->dynticks_fqs++; return 1; } @@ -683,6 +691,7 @@ static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct * go looking for one. */ rdp->gpnum = rnp->gpnum; + trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpustart"); if (rnp->qsmask & rdp->grpmask) { rdp->qs_pending = 1; rdp->passed_quiesc = 0; @@ -746,6 +755,7 @@ __rcu_process_gp_end(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_dat /* Remember that we saw this grace-period completion. */ rdp->completed = rnp->completed; + trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpuend"); /* * If we were in an extended quiescent state, we may have @@ -856,6 +866,7 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) /* Advance to a new grace period and initialize state. */ rsp->gpnum++; + trace_rcu_grace_period(rsp->name, rsp->gpnum, "start"); WARN_ON_ONCE(rsp->signaled == RCU_GP_INIT); rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */ rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; @@ -870,6 +881,9 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state OK. */ rcu_start_gp_per_cpu(rsp, rnp, rdp); rcu_preempt_boost_start_gp(rnp); + trace_rcu_grace_period_init(rsp->name, rnp->gpnum, + rnp->level, rnp->grplo, + rnp->grphi, rnp->qsmask); raw_spin_unlock_irqrestore(&rnp->lock, flags); return; } @@ -906,6 +920,9 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) if (rnp == rdp->mynode) rcu_start_gp_per_cpu(rsp, rnp, rdp); rcu_preempt_boost_start_gp(rnp); + trace_rcu_grace_period_init(rsp->name, rnp->gpnum, + rnp->level, rnp->grplo, + rnp->grphi, rnp->qsmask); raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ } @@ -939,6 +956,7 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) if (gp_duration > rsp->gp_max) rsp->gp_max = gp_duration; rsp->completed = rsp->gpnum; + trace_rcu_grace_period(rsp->name, rsp->completed, "end"); rsp->signaled = RCU_GP_IDLE; rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */ } @@ -967,6 +985,10 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp, return; } rnp->qsmask &= ~mask; + trace_rcu_quiescent_state_report(rsp->name, rnp->gpnum, + mask, rnp->qsmask, rnp->level, + rnp->grplo, rnp->grphi, + !!rnp->gp_tasks); if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) { /* Other bits still set at this level, so done. */ @@ -1135,11 +1157,20 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) if (rnp->qsmaskinit != 0) { if (rnp != rdp->mynode) raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ + else + trace_rcu_grace_period(rsp->name, + rnp->gpnum + 1 - + !!(rnp->qsmask & mask), + "cpuofl"); break; } - if (rnp == rdp->mynode) + if (rnp == rdp->mynode) { + trace_rcu_grace_period(rsp->name, + rnp->gpnum + 1 - + !!(rnp->qsmask & mask), + "cpuofl"); need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp); - else + } else raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ mask = rnp->grpmask; rnp = rnp->parent; @@ -1226,7 +1257,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) next = list->next; prefetch(next); debug_rcu_head_unqueue(list); - __rcu_reclaim(list); + __rcu_reclaim(rsp->name, list); list = next; if (++count >= bl) break; @@ -1552,6 +1583,12 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), rdp->nxttail[RCU_NEXT_TAIL] = &head->next; rdp->qlen++; + if (__is_kfree_rcu_offset((unsigned long)func)) + trace_rcu_kfree_callback(rsp->name, head, (unsigned long)func, + rdp->qlen); + else + trace_rcu_callback(rsp->name, head, rdp->qlen); + /* If interrupts were disabled, don't dive into RCU core. */ if (irqs_disabled_flags(flags)) { local_irq_restore(flags); @@ -1850,6 +1887,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) rdp->dynticks = &per_cpu(rcu_dynticks, cpu); #endif /* #ifdef CONFIG_NO_HZ */ rdp->cpu = cpu; + rdp->rsp = rsp; raw_spin_unlock_irqrestore(&rnp->lock, flags); } @@ -1898,6 +1936,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) rdp->gpnum = rnp->completed; /* if GP in progress... */ rdp->completed = rnp->completed; rdp->passed_quiesc_completed = rnp->completed - 1; + trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpuonl"); } raw_spin_unlock(&rnp->lock); /* irqs already disabled. */ rnp = rnp->parent; diff --git a/kernel/rcutree.h b/kernel/rcutree.h index eee6c9406b46..d11a0065321c 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -299,6 +299,7 @@ struct rcu_data { unsigned long n_rp_need_nothing; int cpu; + struct rcu_state *rsp; }; /* Values for signaled field in struct rcu_state. */ diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 94d9ca1e4061..bdb2e82f78d3 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -124,6 +124,8 @@ static void rcu_preempt_qs(int cpu) rdp->passed_quiesc_completed = rdp->gpnum - 1; barrier(); + if (rdp->passed_quiesc == 0) + trace_rcu_grace_period("rcu_preempt", rdp->gpnum, "cpuqs"); rdp->passed_quiesc = 1; current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; } @@ -190,6 +192,11 @@ static void rcu_preempt_note_context_switch(int cpu) if (rnp->qsmask & rdp->grpmask) rnp->gp_tasks = &t->rcu_node_entry; } + trace_rcu_preempt_task(rdp->rsp->name, + t->pid, + (rnp->qsmask & rdp->grpmask) + ? rnp->gpnum + : rnp->gpnum + 1); raw_spin_unlock_irqrestore(&rnp->lock, flags); } else if (t->rcu_read_lock_nesting < 0 && t->rcu_read_unlock_special) { @@ -344,6 +351,8 @@ static noinline void rcu_read_unlock_special(struct task_struct *t) smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ np = rcu_next_node_entry(t, rnp); list_del_init(&t->rcu_node_entry); + trace_rcu_unlock_preempted_task("rcu_preempt", + rnp->gpnum, t->pid); if (&t->rcu_node_entry == rnp->gp_tasks) rnp->gp_tasks = np; if (&t->rcu_node_entry == rnp->exp_tasks) @@ -364,10 +373,17 @@ static noinline void rcu_read_unlock_special(struct task_struct *t) * we aren't waiting on any CPUs, report the quiescent state. * Note that rcu_report_unblock_qs_rnp() releases rnp->lock. */ - if (empty) - raw_spin_unlock_irqrestore(&rnp->lock, flags); - else + if (!empty && !rcu_preempt_blocked_readers_cgp(rnp)) { + trace_rcu_quiescent_state_report("preempt_rcu", + rnp->gpnum, + 0, rnp->qsmask, + rnp->level, + rnp->grplo, + rnp->grphi, + !!rnp->gp_tasks); rcu_report_unblock_qs_rnp(rnp, flags); + } else + raw_spin_unlock_irqrestore(&rnp->lock, flags); #ifdef CONFIG_RCU_BOOST /* Unboost if we were boosted. */ -- cgit v1.2.3 From e4cc1f22b2f4e9b0207a8cdb63e56dcf99e82d35 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 27 Jun 2011 00:17:43 -0700 Subject: rcu: Simplify quiescent-state accounting There is often a delay between the time that a CPU passes through a quiescent state and the time that this quiescent state is reported to the RCU core. It is quite possible that the grace period ended before the quiescent state could be reported, for example, some other CPU might have deduced that this CPU passed through dyntick-idle mode. It is critically important that quiescent state be counted only against the grace period that was in effect at the time that the quiescent state was detected. Previously, this was handled by recording the number of the last grace period to complete when passing through a quiescent state. The RCU core then checks this number against the current value, and rejects the quiescent state if there is a mismatch. However, one additional possibility must be accounted for, namely that the quiescent state was recorded after the prior grace period completed but before the current grace period started. In this case, the RCU core must reject the quiescent state, but the recorded number will match. This is handled when the CPU becomes aware of a new grace period -- at that point, it invalidates any prior quiescent state. This works, but is a bit indirect. The new approach records the current grace period, and the RCU core checks to see (1) that this is still the current grace period and (2) that this grace period has not yet ended. This approach simplifies reasoning about correctness, and this commit changes over to this new approach. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- Documentation/RCU/trace.txt | 34 +++++++++++++++++----------------- kernel/rcutree.c | 44 ++++++++++++++++++++++---------------------- kernel/rcutree.h | 6 +++--- kernel/rcutree_plugin.h | 6 +++--- kernel/rcutree_trace.c | 8 ++++---- 5 files changed, 49 insertions(+), 49 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt index a67af0a39ded..aaf65f6c6cd7 100644 --- a/Documentation/RCU/trace.txt +++ b/Documentation/RCU/trace.txt @@ -33,23 +33,23 @@ rcu/rcuboost: The output of "cat rcu/rcudata" looks as follows: rcu_sched: - 0 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=545/1/0 df=50 of=0 ri=0 ql=163 qs=NRW. kt=0/W/0 ktl=ebc3 b=10 ci=153737 co=0 ca=0 - 1 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=967/1/0 df=58 of=0 ri=0 ql=634 qs=NRW. kt=0/W/1 ktl=58c b=10 ci=191037 co=0 ca=0 - 2 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=1081/1/0 df=175 of=0 ri=0 ql=74 qs=N.W. kt=0/W/2 ktl=da94 b=10 ci=75991 co=0 ca=0 - 3 c=20942 g=20943 pq=1 pqc=20942 qp=1 dt=1846/0/0 df=404 of=0 ri=0 ql=0 qs=.... kt=0/W/3 ktl=d1cd b=10 ci=72261 co=0 ca=0 - 4 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=369/1/0 df=83 of=0 ri=0 ql=48 qs=N.W. kt=0/W/4 ktl=e0e7 b=10 ci=128365 co=0 ca=0 - 5 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=381/1/0 df=64 of=0 ri=0 ql=169 qs=NRW. kt=0/W/5 ktl=fb2f b=10 ci=164360 co=0 ca=0 - 6 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=1037/1/0 df=183 of=0 ri=0 ql=62 qs=N.W. kt=0/W/6 ktl=d2ad b=10 ci=65663 co=0 ca=0 - 7 c=20897 g=20897 pq=1 pqc=20896 qp=0 dt=1572/0/0 df=382 of=0 ri=0 ql=0 qs=.... kt=0/W/7 ktl=cf15 b=10 ci=75006 co=0 ca=0 + 0 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=545/1/0 df=50 of=0 ri=0 ql=163 qs=NRW. kt=0/W/0 ktl=ebc3 b=10 ci=153737 co=0 ca=0 + 1 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=967/1/0 df=58 of=0 ri=0 ql=634 qs=NRW. kt=0/W/1 ktl=58c b=10 ci=191037 co=0 ca=0 + 2 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=1081/1/0 df=175 of=0 ri=0 ql=74 qs=N.W. kt=0/W/2 ktl=da94 b=10 ci=75991 co=0 ca=0 + 3 c=20942 g=20943 pq=1 pgp=20942 qp=1 dt=1846/0/0 df=404 of=0 ri=0 ql=0 qs=.... kt=0/W/3 ktl=d1cd b=10 ci=72261 co=0 ca=0 + 4 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=369/1/0 df=83 of=0 ri=0 ql=48 qs=N.W. kt=0/W/4 ktl=e0e7 b=10 ci=128365 co=0 ca=0 + 5 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=381/1/0 df=64 of=0 ri=0 ql=169 qs=NRW. kt=0/W/5 ktl=fb2f b=10 ci=164360 co=0 ca=0 + 6 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=1037/1/0 df=183 of=0 ri=0 ql=62 qs=N.W. kt=0/W/6 ktl=d2ad b=10 ci=65663 co=0 ca=0 + 7 c=20897 g=20897 pq=1 pgp=20896 qp=0 dt=1572/0/0 df=382 of=0 ri=0 ql=0 qs=.... kt=0/W/7 ktl=cf15 b=10 ci=75006 co=0 ca=0 rcu_bh: - 0 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=545/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/0 ktl=ebc3 b=10 ci=0 co=0 ca=0 - 1 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=967/1/0 df=3 of=0 ri=1 ql=0 qs=.... kt=0/W/1 ktl=58c b=10 ci=151 co=0 ca=0 - 2 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=1081/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/2 ktl=da94 b=10 ci=0 co=0 ca=0 - 3 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=1846/0/0 df=8 of=0 ri=1 ql=0 qs=.... kt=0/W/3 ktl=d1cd b=10 ci=0 co=0 ca=0 - 4 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=369/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/4 ktl=e0e7 b=10 ci=0 co=0 ca=0 - 5 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=381/1/0 df=4 of=0 ri=1 ql=0 qs=.... kt=0/W/5 ktl=fb2f b=10 ci=0 co=0 ca=0 - 6 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=1037/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/6 ktl=d2ad b=10 ci=0 co=0 ca=0 - 7 c=1474 g=1474 pq=1 pqc=1473 qp=0 dt=1572/0/0 df=8 of=0 ri=1 ql=0 qs=.... kt=0/W/7 ktl=cf15 b=10 ci=0 co=0 ca=0 + 0 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=545/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/0 ktl=ebc3 b=10 ci=0 co=0 ca=0 + 1 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=967/1/0 df=3 of=0 ri=1 ql=0 qs=.... kt=0/W/1 ktl=58c b=10 ci=151 co=0 ca=0 + 2 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=1081/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/2 ktl=da94 b=10 ci=0 co=0 ca=0 + 3 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=1846/0/0 df=8 of=0 ri=1 ql=0 qs=.... kt=0/W/3 ktl=d1cd b=10 ci=0 co=0 ca=0 + 4 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=369/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/4 ktl=e0e7 b=10 ci=0 co=0 ca=0 + 5 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=381/1/0 df=4 of=0 ri=1 ql=0 qs=.... kt=0/W/5 ktl=fb2f b=10 ci=0 co=0 ca=0 + 6 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=1037/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/6 ktl=d2ad b=10 ci=0 co=0 ca=0 + 7 c=1474 g=1474 pq=1 pgp=1473 qp=0 dt=1572/0/0 df=8 of=0 ri=1 ql=0 qs=.... kt=0/W/7 ktl=cf15 b=10 ci=0 co=0 ca=0 The first section lists the rcu_data structures for rcu_sched, the second for rcu_bh. Note that CONFIG_TREE_PREEMPT_RCU kernels will have an @@ -84,7 +84,7 @@ o "pq" indicates that this CPU has passed through a quiescent state CPU has not yet reported that fact, (2) some other CPU has not yet reported for this grace period, or (3) both. -o "pqc" indicates which grace period the last-observed quiescent +o "pgp" indicates which grace period the last-observed quiescent state for this CPU corresponds to. This is important for handling the race between CPU 0 reporting an extended dynticks-idle quiescent state for CPU 1 and CPU 1 suddenly waking up and diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 7e0282949f8a..7e2f297aeec8 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -159,32 +159,34 @@ static int rcu_gp_in_progress(struct rcu_state *rsp) * Note a quiescent state. Because we do not need to know * how many quiescent states passed, just if there was at least * one since the start of the grace period, this just sets a flag. + * The caller must have disabled preemption. */ void rcu_sched_qs(int cpu) { struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu); - rdp->passed_quiesc_completed = rdp->gpnum - 1; + rdp->passed_quiesce_gpnum = rdp->gpnum; barrier(); - if (rdp->passed_quiesc == 0) + if (rdp->passed_quiesce == 0) trace_rcu_grace_period("rcu_sched", rdp->gpnum, "cpuqs"); - rdp->passed_quiesc = 1; + rdp->passed_quiesce = 1; } void rcu_bh_qs(int cpu) { struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu); - rdp->passed_quiesc_completed = rdp->gpnum - 1; + rdp->passed_quiesce_gpnum = rdp->gpnum; barrier(); - if (rdp->passed_quiesc == 0) + if (rdp->passed_quiesce == 0) trace_rcu_grace_period("rcu_bh", rdp->gpnum, "cpuqs"); - rdp->passed_quiesc = 1; + rdp->passed_quiesce = 1; } /* * Note a context switch. This is a quiescent state for RCU-sched, * and requires special handling for preemptible RCU. + * The caller must have disabled preemption. */ void rcu_note_context_switch(int cpu) { @@ -694,7 +696,7 @@ static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpustart"); if (rnp->qsmask & rdp->grpmask) { rdp->qs_pending = 1; - rdp->passed_quiesc = 0; + rdp->passed_quiesce = 0; } else rdp->qs_pending = 0; } @@ -1027,7 +1029,7 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp, * based on quiescent states detected in an earlier grace period! */ static void -rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastcomp) +rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastgp) { unsigned long flags; unsigned long mask; @@ -1035,17 +1037,15 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long las rnp = rdp->mynode; raw_spin_lock_irqsave(&rnp->lock, flags); - if (lastcomp != rnp->completed) { + if (lastgp != rnp->gpnum || rnp->completed == rnp->gpnum) { /* - * Someone beat us to it for this grace period, so leave. - * The race with GP start is resolved by the fact that we - * hold the leaf rcu_node lock, so that the per-CPU bits - * cannot yet be initialized -- so we would simply find our - * CPU's bit already cleared in rcu_report_qs_rnp() if this - * race occurred. + * The grace period in which this quiescent state was + * recorded has ended, so don't report it upwards. + * We will instead need a new quiescent state that lies + * within the current grace period. */ - rdp->passed_quiesc = 0; /* try again later! */ + rdp->passed_quiesce = 0; /* need qs for new gp. */ raw_spin_unlock_irqrestore(&rnp->lock, flags); return; } @@ -1089,14 +1089,14 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp) * Was there a quiescent state since the beginning of the grace * period? If no, then exit and wait for the next call. */ - if (!rdp->passed_quiesc) + if (!rdp->passed_quiesce) return; /* * Tell RCU we are done (but rcu_report_qs_rdp() will be the * judge of that). */ - rcu_report_qs_rdp(rdp->cpu, rsp, rdp, rdp->passed_quiesc_completed); + rcu_report_qs_rdp(rdp->cpu, rsp, rdp, rdp->passed_quiesce_gpnum); } #ifdef CONFIG_HOTPLUG_CPU @@ -1712,7 +1712,7 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) check_cpu_stall(rsp, rdp); /* Is the RCU core waiting for a quiescent state from this CPU? */ - if (rdp->qs_pending && !rdp->passed_quiesc) { + if (rdp->qs_pending && !rdp->passed_quiesce) { /* * If force_quiescent_state() coming soon and this CPU @@ -1724,7 +1724,7 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs) - 1, jiffies)) set_need_resched(); - } else if (rdp->qs_pending && rdp->passed_quiesc) { + } else if (rdp->qs_pending && rdp->passed_quiesce) { rdp->n_rp_report_qs++; return 1; } @@ -1907,7 +1907,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) /* Set up local state, ensuring consistent view of global state. */ raw_spin_lock_irqsave(&rnp->lock, flags); - rdp->passed_quiesc = 0; /* We could be racing with new GP, */ + rdp->passed_quiesce = 0; /* We could be racing with new GP, */ rdp->qs_pending = 1; /* so set up to respond to current GP. */ rdp->beenonline = 1; /* We have now been online. */ rdp->preemptible = preemptible; @@ -1935,7 +1935,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) if (rnp == rdp->mynode) { rdp->gpnum = rnp->completed; /* if GP in progress... */ rdp->completed = rnp->completed; - rdp->passed_quiesc_completed = rnp->completed - 1; + rdp->passed_quiesce_gpnum = rnp->gpnum - 1; trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpuonl"); } raw_spin_unlock(&rnp->lock); /* irqs already disabled. */ diff --git a/kernel/rcutree.h b/kernel/rcutree.h index d11a0065321c..51638b68b2dc 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -230,9 +230,9 @@ struct rcu_data { /* in order to detect GP end. */ unsigned long gpnum; /* Highest gp number that this CPU */ /* is aware of having started. */ - unsigned long passed_quiesc_completed; - /* Value of completed at time of qs. */ - bool passed_quiesc; /* User-mode/idle loop etc. */ + unsigned long passed_quiesce_gpnum; + /* gpnum at time of quiescent state. */ + bool passed_quiesce; /* User-mode/idle loop etc. */ bool qs_pending; /* Core waits for quiesc state. */ bool beenonline; /* CPU online at least once. */ bool preemptible; /* Preemptible RCU? */ diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index bdb2e82f78d3..4bac5a29fb69 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -122,11 +122,11 @@ static void rcu_preempt_qs(int cpu) { struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); - rdp->passed_quiesc_completed = rdp->gpnum - 1; + rdp->passed_quiesce_gpnum = rdp->gpnum; barrier(); - if (rdp->passed_quiesc == 0) + if (rdp->passed_quiesce == 0) trace_rcu_grace_period("rcu_preempt", rdp->gpnum, "cpuqs"); - rdp->passed_quiesc = 1; + rdp->passed_quiesce = 1; current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; } diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index f328ed1c6e46..9feffa4c0695 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c @@ -61,11 +61,11 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) { if (!rdp->beenonline) return; - seq_printf(m, "%3d%cc=%lu g=%lu pq=%d pqc=%lu qp=%d", + seq_printf(m, "%3d%cc=%lu g=%lu pq=%d pgp=%lu qp=%d", rdp->cpu, cpu_is_offline(rdp->cpu) ? '!' : ' ', rdp->completed, rdp->gpnum, - rdp->passed_quiesc, rdp->passed_quiesc_completed, + rdp->passed_quiesce, rdp->passed_quiesce_gpnum, rdp->qs_pending); #ifdef CONFIG_NO_HZ seq_printf(m, " dt=%d/%d/%d df=%lu", @@ -139,7 +139,7 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp) rdp->cpu, cpu_is_offline(rdp->cpu) ? "\"N\"" : "\"Y\"", rdp->completed, rdp->gpnum, - rdp->passed_quiesc, rdp->passed_quiesc_completed, + rdp->passed_quiesce, rdp->passed_quiesce_gpnum, rdp->qs_pending); #ifdef CONFIG_NO_HZ seq_printf(m, ",%d,%d,%d,%lu", @@ -170,7 +170,7 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp) static int show_rcudata_csv(struct seq_file *m, void *unused) { - seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pqc\",\"pq\","); + seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pgp\",\"pq\","); #ifdef CONFIG_NO_HZ seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\","); #endif /* #ifdef CONFIG_NO_HZ */ -- cgit v1.2.3 From 717f98f0f87c16c1e5768ac8f67c27f3d3535a25 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 5 Aug 2011 22:39:02 -0700 Subject: rcu: Eliminate in_irq() checks in rcu_enter_nohz() The in_irq() check in rcu_enter_nohz() is redundant because if we really are in an interrupt, the attempt to re-enter dyntick-idle mode will invoke rcu_needs_cpu() in any case, which will force the check for RCU callbacks. So this commit removes the check along with the set_need_resched(). Suggested-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 7e2f297aeec8..0c6c30dc6a7c 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -373,13 +373,6 @@ void rcu_enter_nohz(void) smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */ WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); local_irq_restore(flags); - - /* If the interrupt queued a callback, get out of dyntick mode. */ - if (in_irq() && - (__get_cpu_var(rcu_sched_data).nxtlist || - __get_cpu_var(rcu_bh_data).nxtlist || - rcu_preempt_needs_cpu(smp_processor_id()))) - set_need_resched(); } /* -- cgit v1.2.3 From 7eb4f4553ceaa6c64da83c8a71d5a991c0188655 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 30 Jul 2011 07:32:48 -0700 Subject: rcu: Make rcu_implicit_dynticks_qs() locals be correct size When the ->dynticks field in the rcu_dynticks structure changed to an atomic_t, its size on 64-bit systems changed from 64 bits to 32 bits. The local variables in rcu_implicit_dynticks_qs() need to change as well, hence this commit. Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 0c6c30dc6a7c..ebd18e56947b 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -488,11 +488,11 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp) */ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) { - unsigned long curr; - unsigned long snap; + unsigned int curr; + unsigned int snap; - curr = (unsigned long)atomic_add_return(0, &rdp->dynticks->dynticks); - snap = (unsigned long)rdp->dynticks_snap; + curr = (unsigned int)atomic_add_return(0, &rdp->dynticks->dynticks); + snap = (unsigned int)rdp->dynticks_snap; /* * If the CPU passed through or entered a dynticks idle phase with @@ -502,7 +502,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) * read-side critical section that started before the beginning * of the current RCU grace period. */ - if ((curr & 0x1) == 0 || ULONG_CMP_GE(curr, snap + 2)) { + if ((curr & 0x1) == 0 || UINT_CMP_GE(curr, snap + 2)) { trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "dti"); rdp->dynticks_fqs++; return 1; -- cgit v1.2.3 From 4627e240dfee4a0a46a58010b1b721b4ded1918f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 3 Aug 2011 03:34:24 -0700 Subject: rcu: Dump local stack if cannot dump all CPUs' stacks The trigger_all_cpu_backtrace() function is a no-op in architectures that do not define arch_trigger_all_cpu_backtrace. On such architectures, RCU CPU stall warning messages contain no stack trace information, which makes debugging quite difficult. This commit therefore substitutes dump_stack() for architectures that do not define arch_trigger_all_cpu_backtrace, so that at least the local CPU's stack is dumped as part of the RCU CPU stall warning message. Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index ebd18e56947b..a07bf553e02a 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -583,7 +583,8 @@ static void print_other_cpu_stall(struct rcu_state *rsp) } printk("} (detected by %d, t=%ld jiffies)\n", smp_processor_id(), (long)(jiffies - rsp->gp_start)); - trigger_all_cpu_backtrace(); + if (!trigger_all_cpu_backtrace()) + dump_stack(); /* If so configured, complain about tasks blocking the grace period. */ @@ -604,7 +605,8 @@ static void print_cpu_stall(struct rcu_state *rsp) */ printk(KERN_ERR "INFO: %s detected stall on CPU %d (t=%lu jiffies)\n", rsp->name, smp_processor_id(), jiffies - rsp->gp_start); - trigger_all_cpu_backtrace(); + if (!trigger_all_cpu_backtrace()) + dump_stack(); raw_spin_lock_irqsave(&rnp->lock, flags); if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall)) -- cgit v1.2.3 From 5c51dd7349d4bb26f845f17f85daa168f5fa03f2 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 4 Aug 2011 06:59:03 -0700 Subject: rcu: Prevent early boot set_need_resched() from __rcu_pending() There isn't a whole lot of point in poking the scheduler before there are other tasks to switch to. This commit therefore adds a check for rcu_scheduler_fully_active in __rcu_pending() to suppress any pre-scheduler calls to set_need_resched(). The downside of this approach is additional runtime overhead in a reasonably hot code path. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index a07bf553e02a..0051dbf6958e 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1707,7 +1707,8 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) check_cpu_stall(rsp, rdp); /* Is the RCU core waiting for a quiescent state from this CPU? */ - if (rdp->qs_pending && !rdp->passed_quiesce) { + if (rcu_scheduler_fully_active && + rdp->qs_pending && !rdp->passed_quiesce) { /* * If force_quiescent_state() coming soon and this CPU -- cgit v1.2.3 From 037067a1b6f9a70f862f3ed9d59fe28b7cd55ac4 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 7 Aug 2011 20:26:31 -0700 Subject: rcu: Prohibit grace periods during early boot Greater use of RCU during early boot (before the scheduler is operating) is causing RCU to attempt to start grace periods during that time, which in turn is resulting in both RCU and the callback functions attempting to use the scheduler before it is ready. This commit prevents these problems by prohibiting RCU grace periods until after the scheduler has spawned the first non-idle task. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 0051dbf6958e..9970116163ba 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -838,8 +838,11 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) struct rcu_data *rdp = this_cpu_ptr(rsp->rda); struct rcu_node *rnp = rcu_get_root(rsp); - if (!cpu_needs_another_gp(rsp, rdp) || rsp->fqs_active) { - if (cpu_needs_another_gp(rsp, rdp)) + if (!rcu_scheduler_fully_active || + !cpu_needs_another_gp(rsp, rdp) || + rsp->fqs_active) { + if (rcu_scheduler_fully_active && + cpu_needs_another_gp(rsp, rdp)) rsp->fqs_need_gp = 1; if (rnp->completed == rsp->completed) { raw_spin_unlock_irqrestore(&rnp->lock, flags); -- cgit v1.2.3 From 9bc8b5586f94be6391458074ecbba8827ba8ba9d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 13 Aug 2011 13:31:47 -0700 Subject: rcu: Suppress NMI backtraces when stall ends before dump It is possible for an RCU CPU stall to end just as it is detected, in which case the current code will uselessly dump all CPU's stacks. This commit therefore checks for this condition and refrains from sending needless NMIs. And yes, the stall might also end just after we checked all CPUs and tasks, but in that case we would at least have given some clue as to which CPU/task was at fault. Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 13 +++++++++---- kernel/rcutree.h | 2 +- kernel/rcutree_plugin.h | 13 +++++++++---- 3 files changed, 19 insertions(+), 9 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 9970116163ba..ade788320dd6 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -545,6 +545,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp) int cpu; long delta; unsigned long flags; + int ndetected; struct rcu_node *rnp = rcu_get_root(rsp); /* Only let one CPU complain about others per time interval. */ @@ -561,7 +562,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp) * Now rat on any tasks that got kicked up to the root rcu_node * due to CPU offlining. */ - rcu_print_task_stall(rnp); + ndetected = rcu_print_task_stall(rnp); raw_spin_unlock_irqrestore(&rnp->lock, flags); /* @@ -573,17 +574,21 @@ static void print_other_cpu_stall(struct rcu_state *rsp) rsp->name); rcu_for_each_leaf_node(rsp, rnp) { raw_spin_lock_irqsave(&rnp->lock, flags); - rcu_print_task_stall(rnp); + ndetected += rcu_print_task_stall(rnp); raw_spin_unlock_irqrestore(&rnp->lock, flags); if (rnp->qsmask == 0) continue; for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++) - if (rnp->qsmask & (1UL << cpu)) + if (rnp->qsmask & (1UL << cpu)) { printk(" %d", rnp->grplo + cpu); + ndetected++; + } } printk("} (detected by %d, t=%ld jiffies)\n", smp_processor_id(), (long)(jiffies - rsp->gp_start)); - if (!trigger_all_cpu_backtrace()) + if (ndetected == 0) + printk(KERN_ERR "INFO: Stall ended before state dump start\n"); + else if (!trigger_all_cpu_backtrace()) dump_stack(); /* If so configured, complain about tasks blocking the grace period. */ diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 51638b68b2dc..f509f728f9fb 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -438,7 +438,7 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, static void rcu_stop_cpu_kthread(int cpu); #endif /* #ifdef CONFIG_HOTPLUG_CPU */ static void rcu_print_detail_task_stall(struct rcu_state *rsp); -static void rcu_print_task_stall(struct rcu_node *rnp); +static int rcu_print_task_stall(struct rcu_node *rnp); static void rcu_preempt_stall_reset(void); static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); #ifdef CONFIG_HOTPLUG_CPU diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index eeb38ee8ebba..d3127e8764cb 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -483,16 +483,20 @@ static void rcu_print_detail_task_stall(struct rcu_state *rsp) * Scan the current list of tasks blocked within RCU read-side critical * sections, printing out the tid of each. */ -static void rcu_print_task_stall(struct rcu_node *rnp) +static int rcu_print_task_stall(struct rcu_node *rnp) { struct task_struct *t; + int ndetected = 0; if (!rcu_preempt_blocked_readers_cgp(rnp)) - return; + return 0; t = list_entry(rnp->gp_tasks, struct task_struct, rcu_node_entry); - list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) + list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) { printk(" P%d", t->pid); + ndetected++; + } + return ndetected; } /* @@ -976,8 +980,9 @@ static void rcu_print_detail_task_stall(struct rcu_state *rsp) * Because preemptible RCU does not exist, we never have to check for * tasks blocked within RCU read-side critical sections. */ -static void rcu_print_task_stall(struct rcu_node *rnp) +static int rcu_print_task_stall(struct rcu_node *rnp) { + return 0; } /* -- cgit v1.2.3 From 06ae115a1d551cd952d80df06eaf8b5153351875 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 14 Aug 2011 15:56:54 -0700 Subject: rcu: Avoid having just-onlined CPU resched itself when RCU is idle CPUs set rdp->qs_pending when coming online to resolve races with grace-period start. However, this means that if RCU is idle, the just-onlined CPU might needlessly send itself resched IPIs. Adjust the online-CPU initialization to avoid this, and also to correctly cause the CPU to respond to the current grace period if needed. Signed-off-by: Paul E. McKenney Tested-by: Josh Boyer Tested-by: Christian Hoffmann --- kernel/rcutree.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index ade788320dd6..c95fa89ffef9 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1911,8 +1911,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) /* Set up local state, ensuring consistent view of global state. */ raw_spin_lock_irqsave(&rnp->lock, flags); - rdp->passed_quiesce = 0; /* We could be racing with new GP, */ - rdp->qs_pending = 1; /* so set up to respond to current GP. */ rdp->beenonline = 1; /* We have now been online. */ rdp->preemptible = preemptible; rdp->qlen_last_fqs_check = 0; @@ -1937,8 +1935,15 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) rnp->qsmaskinit |= mask; mask = rnp->grpmask; if (rnp == rdp->mynode) { - rdp->gpnum = rnp->completed; /* if GP in progress... */ + /* + * If there is a grace period in progress, we will + * set up to wait for it next time we run the + * RCU core code. + */ + rdp->gpnum = rnp->completed; rdp->completed = rnp->completed; + rdp->passed_quiesce = 0; + rdp->qs_pending = 0; rdp->passed_quiesce_gpnum = rnp->gpnum - 1; trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpuonl"); } -- cgit v1.2.3 From 5b61b0baa9e80289c53413e573befc5790a04ac7 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Fri, 19 Aug 2011 11:39:11 -0700 Subject: rcu: Wire up RCU_BOOST_PRIO for rcutree RCU boost threads start life at RCU_BOOST_PRIO, while others remain at RCU_KTHREAD_PRIO. While here, change thread names to match other kthreads, and adjust rcu_yield() to not override the priority set by the user. This last change sets the stage for runtime changes to priority in the -rt tree. Signed-off-by: Mike Galbraith Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 2 -- kernel/rcutree_plugin.h | 20 +++++++++++++++----- 2 files changed, 15 insertions(+), 7 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index c95fa89ffef9..8455043c9250 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -131,8 +131,6 @@ static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu); static void invoke_rcu_core(void); static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); -#define RCU_KTHREAD_PRIO 1 /* RT priority for per-CPU kthreads. */ - /* * Track the rcutorture test sequence number and the update version * number within a given test. The rcutorture_testseq is incremented diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 28422767d854..b4cbe5bf2326 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -27,6 +27,14 @@ #include #include +#define RCU_KTHREAD_PRIO 1 + +#ifdef CONFIG_RCU_BOOST +#define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO +#else +#define RCU_BOOST_PRIO RCU_KTHREAD_PRIO +#endif + /* * Check the RCU kernel configuration parameters and print informative * messages about anything out of the ordinary. If you like #ifdef, you @@ -1364,13 +1372,13 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, if (rnp->boost_kthread_task != NULL) return 0; t = kthread_create(rcu_boost_kthread, (void *)rnp, - "rcub%d", rnp_index); + "rcub/%d", rnp_index); if (IS_ERR(t)) return PTR_ERR(t); raw_spin_lock_irqsave(&rnp->lock, flags); rnp->boost_kthread_task = t; raw_spin_unlock_irqrestore(&rnp->lock, flags); - sp.sched_priority = RCU_KTHREAD_PRIO; + sp.sched_priority = RCU_BOOST_PRIO; sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */ return 0; @@ -1465,6 +1473,7 @@ static void rcu_yield(void (*f)(unsigned long), unsigned long arg) { struct sched_param sp; struct timer_list yield_timer; + int prio = current->rt_priority; setup_timer_on_stack(&yield_timer, f, arg); mod_timer(&yield_timer, jiffies + 2); @@ -1472,7 +1481,8 @@ static void rcu_yield(void (*f)(unsigned long), unsigned long arg) sched_setscheduler_nocheck(current, SCHED_NORMAL, &sp); set_user_nice(current, 19); schedule(); - sp.sched_priority = RCU_KTHREAD_PRIO; + set_user_nice(current, 0); + sp.sched_priority = prio; sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); del_timer(&yield_timer); } @@ -1591,7 +1601,7 @@ static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu) t = kthread_create_on_node(rcu_cpu_kthread, (void *)(long)cpu, cpu_to_node(cpu), - "rcuc%d", cpu); + "rcuc/%d", cpu); if (IS_ERR(t)) return PTR_ERR(t); if (cpu_online(cpu)) @@ -1700,7 +1710,7 @@ static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp, return 0; if (rnp->node_kthread_task == NULL) { t = kthread_create(rcu_node_kthread, (void *)rnp, - "rcun%d", rnp_index); + "rcun/%d", rnp_index); if (IS_ERR(t)) return PTR_ERR(t); raw_spin_lock_irqsave(&rnp->lock, flags); -- cgit v1.2.3 From e90c53d3e238dd0b7b02964370e8fece1778df96 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 20 Aug 2011 18:29:32 -0700 Subject: rcu: Remove rcu_needs_cpu_flush() to avoid false quiescent states The purpose of rcu_needs_cpu_flush() was to iterate on pushing the current grace period in order to help the current CPU enter dyntick-idle mode. However, this can result in failures if the CPU starts entering dyntick-idle mode, but then backs out. In this case, the call to rcu_pending() from rcu_needs_cpu_flush() might end up announcing a non-existing quiescent state. This commit therefore removes rcu_needs_cpu_flush() in favor of letting the dyntick-idle machinery at the end of the softirq handler push the loop along via its call to rcu_pending(). Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 3 --- kernel/rcutree.h | 1 - kernel/rcutree_plugin.h | 25 ------------------------- 3 files changed, 29 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 8455043c9250..e75df0c93abd 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1528,9 +1528,6 @@ static void rcu_process_callbacks(struct softirq_action *unused) &__get_cpu_var(rcu_sched_data)); __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); rcu_preempt_process_callbacks(); - - /* If we are last CPU on way to dyntick-idle mode, accelerate it. */ - rcu_needs_cpu_flush(); trace_rcu_utilization("End RCU core"); } diff --git a/kernel/rcutree.h b/kernel/rcutree.h index f509f728f9fb..849ce9ec51fe 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -458,7 +458,6 @@ static int rcu_preempt_needs_cpu(int cpu); static void __cpuinit rcu_preempt_init_percpu_data(int cpu); static void rcu_preempt_send_cbs_to_online(void); static void __init __rcu_init_preempt(void); -static void rcu_needs_cpu_flush(void); static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); static void invoke_rcu_callbacks_kthread(void); diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index b4cbe5bf2326..4b9b9f8a4184 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -1948,15 +1948,6 @@ int rcu_needs_cpu(int cpu) return rcu_needs_cpu_quick_check(cpu); } -/* - * Check to see if we need to continue a callback-flush operations to - * allow the last CPU to enter dyntick-idle mode. But fast dyntick-idle - * entry is not configured, so we never do need to. - */ -static void rcu_needs_cpu_flush(void) -{ -} - #else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */ #define RCU_NEEDS_CPU_FLUSHES 5 @@ -2032,20 +2023,4 @@ int rcu_needs_cpu(int cpu) return c; } -/* - * Check to see if we need to continue a callback-flush operations to - * allow the last CPU to enter dyntick-idle mode. - */ -static void rcu_needs_cpu_flush(void) -{ - int cpu = smp_processor_id(); - unsigned long flags; - - if (per_cpu(rcu_dyntick_drain, cpu) <= 0) - return; - local_irq_save(flags); - (void)rcu_needs_cpu(cpu); - local_irq_restore(flags); -} - #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */ -- cgit v1.2.3 From afe24b122eb6edb5f1cb942570ac8d766105c7fc Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 24 Aug 2011 16:52:09 -0700 Subject: rcu: Move propagation of ->completed from rcu_start_gp() to rcu_report_qs_rsp() It is possible for the CPU that noted the end of the prior grace period to not need a new one, and therefore to decide to propagate ->completed throughout the rcu_node tree without starting another grace period. However, in so doing, it releases the root rcu_node structure's lock, which can allow some other CPU to start another grace period. The first CPU will be propagating ->completed in parallel with the second CPU initializing the rcu_node tree for the new grace period. In theory this is harmless, but in practice we need to keep things simple. This commit therefore moves the propagation of ->completed to rcu_report_qs_rsp(), and refrains from marking the old grace period as having been completed until it has finished doing this. This prevents anyone from starting a new grace period concurrently with marking the old grace period as having been completed. Of course, the optimization where a CPU needing a new grace period doesn't bother marking the old one completed is still in effect: In that case, the marking happens implicitly as part of initializing the new grace period. Signed-off-by: Paul E. McKenney --- kernel/rcutree.c | 71 ++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 51 insertions(+), 20 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/rcutree.c b/kernel/rcutree.c index e75df0c93abd..e234eb92a177 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -842,28 +842,24 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) struct rcu_node *rnp = rcu_get_root(rsp); if (!rcu_scheduler_fully_active || - !cpu_needs_another_gp(rsp, rdp) || - rsp->fqs_active) { - if (rcu_scheduler_fully_active && - cpu_needs_another_gp(rsp, rdp)) - rsp->fqs_need_gp = 1; - if (rnp->completed == rsp->completed) { - raw_spin_unlock_irqrestore(&rnp->lock, flags); - return; - } - raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ + !cpu_needs_another_gp(rsp, rdp)) { + /* + * Either the scheduler hasn't yet spawned the first + * non-idle task or this CPU does not need another + * grace period. Either way, don't start a new grace + * period. + */ + raw_spin_unlock_irqrestore(&rnp->lock, flags); + return; + } + if (rsp->fqs_active) { /* - * Propagate new ->completed value to rcu_node structures - * so that other CPUs don't have to wait until the start - * of the next grace period to process their callbacks. + * This CPU needs a grace period, but force_quiescent_state() + * is running. Tell it to start one on this CPU's behalf. */ - rcu_for_each_node_breadth_first(rsp, rnp) { - raw_spin_lock(&rnp->lock); /* irqs already disabled. */ - rnp->completed = rsp->completed; - raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ - } - local_irq_restore(flags); + rsp->fqs_need_gp = 1; + raw_spin_unlock_irqrestore(&rnp->lock, flags); return; } @@ -947,6 +943,8 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) __releases(rcu_get_root(rsp)->lock) { unsigned long gp_duration; + struct rcu_node *rnp = rcu_get_root(rsp); + struct rcu_data *rdp = this_cpu_ptr(rsp->rda); WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); @@ -958,7 +956,40 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) gp_duration = jiffies - rsp->gp_start; if (gp_duration > rsp->gp_max) rsp->gp_max = gp_duration; - rsp->completed = rsp->gpnum; + + /* + * We know the grace period is complete, but to everyone else + * it appears to still be ongoing. But it is also the case + * that to everyone else it looks like there is nothing that + * they can do to advance the grace period. It is therefore + * safe for us to drop the lock in order to mark the grace + * period as completed in all of the rcu_node structures. + * + * But if this CPU needs another grace period, it will take + * care of this while initializing the next grace period. + * We use RCU_WAIT_TAIL instead of the usual RCU_DONE_TAIL + * because the callbacks have not yet been advanced: Those + * callbacks are waiting on the grace period that just now + * completed. + */ + if (*rdp->nxttail[RCU_WAIT_TAIL] == NULL) { + raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ + + /* + * Propagate new ->completed value to rcu_node structures + * so that other CPUs don't have to wait until the start + * of the next grace period to process their callbacks. + */ + rcu_for_each_node_breadth_first(rsp, rnp) { + raw_spin_lock(&rnp->lock); /* irqs already disabled. */ + rnp->completed = rsp->gpnum; + raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ + } + rnp = rcu_get_root(rsp); + raw_spin_lock(&rnp->lock); /* irqs already disabled. */ + } + + rsp->completed = rsp->gpnum; /* Declare the grace period complete. */ trace_rcu_grace_period(rsp->name, rsp->completed, "end"); rsp->signaled = RCU_GP_IDLE; rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */ -- cgit v1.2.3 From 9984de1a5a8a96275fcab818f7419af5a3c86e71 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Mon, 23 May 2011 14:51:41 -0400 Subject: kernel: Map most files to use export.h instead of module.h The changed files were only including linux/module.h for the EXPORT_SYMBOL infrastructure, and nothing else. Revector them onto the isolated export header for faster compile times. Nothing to see here but a whole lot of instances of: -#include +#include This commit is only changing the kernel dir; next targets will probably be mm, fs, the arch dirs, etc. Signed-off-by: Paul Gortmaker --- kernel/async.c | 2 +- kernel/audit.c | 2 +- kernel/auditsc.c | 2 +- kernel/capability.c | 2 +- kernel/cgroup_freezer.c | 2 +- kernel/cpu.c | 2 +- kernel/cpuset.c | 2 +- kernel/crash_dump.c | 2 +- kernel/cred.c | 2 +- kernel/dma.c | 2 +- kernel/freezer.c | 2 +- kernel/futex.c | 2 +- kernel/groups.c | 2 +- kernel/hrtimer.c | 2 +- kernel/hung_task.c | 2 +- kernel/irq_work.c | 2 +- kernel/kfifo.c | 2 +- kernel/kprobes.c | 2 +- kernel/ksysfs.c | 2 +- kernel/kthread.c | 2 +- kernel/latencytop.c | 2 +- kernel/lockdep_proc.c | 2 +- kernel/module.c | 2 +- kernel/mutex-debug.c | 2 +- kernel/mutex.c | 2 +- kernel/notifier.c | 2 +- kernel/nsproxy.c | 2 +- kernel/padata.c | 2 +- kernel/pid.c | 2 +- kernel/posix-timers.c | 2 +- kernel/profile.c | 2 +- kernel/ptrace.c | 2 +- kernel/rcupdate.c | 2 +- kernel/rcutiny.c | 2 +- kernel/rcutree.c | 2 +- kernel/relay.c | 2 +- kernel/resource.c | 2 +- kernel/rtmutex-debug.c | 2 +- kernel/rtmutex-tester.c | 2 +- kernel/rtmutex.c | 2 +- kernel/rwsem.c | 2 +- kernel/sched_clock.c | 2 +- kernel/semaphore.c | 2 +- kernel/signal.c | 2 +- kernel/smp.c | 2 +- kernel/softirq.c | 2 +- kernel/spinlock.c | 2 +- kernel/srcu.c | 2 +- kernel/stacktrace.c | 2 +- kernel/stop_machine.c | 2 +- kernel/sys.c | 2 +- kernel/time.c | 2 +- kernel/timer.c | 2 +- kernel/up.c | 2 +- kernel/user-return-notifier.c | 2 +- kernel/user.c | 2 +- kernel/user_namespace.c | 2 +- kernel/utsname.c | 2 +- kernel/utsname_sysctl.c | 2 +- kernel/wait.c | 2 +- kernel/workqueue.c | 2 +- 61 files changed, 61 insertions(+), 61 deletions(-) (limited to 'kernel/rcutree.c') diff --git a/kernel/async.c b/kernel/async.c index 4c2843c0043e..80b74b88fefe 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -51,7 +51,7 @@ asynchronous and synchronous parts of the kernel. #include #include #include -#include +#include #include #include #include diff --git a/kernel/audit.c b/kernel/audit.c index 0a1355ca3d79..09fae2677a45 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -45,7 +45,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/kernel/auditsc.c b/kernel/auditsc.c index ce4b054acee5..47b7fc1ea893 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -48,7 +48,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/kernel/capability.c b/kernel/capability.c index 283c529f8b1c..b463871a4e69 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index e691818d7e45..5e828a2ca8e6 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -14,7 +14,7 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. */ -#include +#include #include #include #include diff --git a/kernel/cpu.c b/kernel/cpu.c index 12b7458f23b1..6a81ca906a06 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 10131fdaff70..d970fb508e34 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -37,7 +37,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/kernel/crash_dump.c b/kernel/crash_dump.c index 5f85690285d4..20e699265cef 100644 --- a/kernel/crash_dump.c +++ b/kernel/crash_dump.c @@ -2,7 +2,7 @@ #include #include #include -#include +#include /* * If we have booted due to a crash, max_pfn will be a very low value. We need diff --git a/kernel/cred.c b/kernel/cred.c index bb55d052d858..5791612a4045 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -8,7 +8,7 @@ * as published by the Free Software Foundation; either version * 2 of the Licence, or (at your option) any later version. */ -#include +#include #include #include #include diff --git a/kernel/dma.c b/kernel/dma.c index f903189c5304..68a2306522c8 100644 --- a/kernel/dma.c +++ b/kernel/dma.c @@ -9,7 +9,7 @@ * [It also happened to remove the sizeof(char *) == sizeof(int) * assumption introduced because of those /proc/dma patches. -- Hennus] */ -#include +#include #include #include #include diff --git a/kernel/freezer.c b/kernel/freezer.c index 66a594e8ad2f..f24aa0005530 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -6,7 +6,7 @@ #include #include -#include +#include #include #include diff --git a/kernel/futex.c b/kernel/futex.c index 1511dff0cfd6..ea87f4d2f455 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -55,7 +55,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/kernel/groups.c b/kernel/groups.c index 1cc476d52dd3..99b53d1eb7ea 100644 --- a/kernel/groups.c +++ b/kernel/groups.c @@ -2,7 +2,7 @@ * Supplementary group IDs */ #include -#include +#include #include #include #include diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index a9205e32a059..422e567eecf6 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -32,7 +32,7 @@ */ #include -#include +#include #include #include #include diff --git a/kernel/hung_task.c b/kernel/hung_task.c index ea640120ab86..8b1748d0172c 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include /* diff --git a/kernel/irq_work.c b/kernel/irq_work.c index 0e2cde4f380b..3e460ea44955 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c @@ -6,7 +6,7 @@ */ #include -#include +#include #include #include diff --git a/kernel/kfifo.c b/kernel/kfifo.c index 01a0700e873f..c744b88c44e2 100644 --- a/kernel/kfifo.c +++ b/kernel/kfifo.c @@ -20,7 +20,7 @@ */ #include -#include +#include #include #include #include diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 2f193d0ba7f2..e5d84644823b 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -36,7 +36,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c index 3b053c04dd86..6771de3a655d 100644 --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/kernel/kthread.c b/kernel/kthread.c index 4ba7cccb4994..b6d216a92639 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/kernel/latencytop.c b/kernel/latencytop.c index 4ac8ebfcab59..a462b317f9a0 100644 --- a/kernel/latencytop.c +++ b/kernel/latencytop.c @@ -53,7 +53,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c index 71edd2f60c02..91c32a0b612c 100644 --- a/kernel/lockdep_proc.c +++ b/kernel/lockdep_proc.c @@ -11,7 +11,7 @@ * Code for /proc/lockdep and /proc/lockdep_stats: * */ -#include +#include #include #include #include diff --git a/kernel/module.c b/kernel/module.c index 93342d992f34..84205ae1607a 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -16,7 +16,7 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#include +#include #include #include #include diff --git a/kernel/mutex-debug.c b/kernel/mutex-debug.c index 73da83aff418..7e3443fe1f48 100644 --- a/kernel/mutex-debug.c +++ b/kernel/mutex-debug.c @@ -14,7 +14,7 @@ */ #include #include -#include +#include #include #include #include diff --git a/kernel/mutex.c b/kernel/mutex.c index d607ed5dd441..89096dd8786f 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c @@ -19,7 +19,7 @@ */ #include #include -#include +#include #include #include #include diff --git a/kernel/notifier.c b/kernel/notifier.c index 8d7b435806c9..2d5cc4ccff7f 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index 9aeab4b98c64..b576f7f14bc6 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -14,7 +14,7 @@ */ #include -#include +#include #include #include #include diff --git a/kernel/padata.c b/kernel/padata.c index b91941df5e63..b45259931512 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -18,7 +18,7 @@ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. */ -#include +#include #include #include #include diff --git a/kernel/pid.c b/kernel/pid.c index 8cafe7e72ad2..fa5f72227e5f 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -27,7 +27,7 @@ */ #include -#include +#include #include #include #include diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 4556182527f3..69185ae6b701 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -46,7 +46,7 @@ #include #include #include -#include +#include /* * Management arrays for POSIX timers. Timers are kept in slab memory diff --git a/kernel/profile.c b/kernel/profile.c index 961b389fe52f..76b8e77773ee 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -13,7 +13,7 @@ * to resolve timer interrupt livelocks, William Irwin, Oracle, 2004 */ -#include +#include #include #include #include diff --git a/kernel/ptrace.c b/kernel/ptrace.c index a70d2a5d8c7b..24d04477b257 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -8,7 +8,7 @@ */ #include -#include +#include #include #include #include diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index ca0d23b6b3e8..c5b98e565aee 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -43,7 +43,7 @@ #include #include #include -#include +#include #include #define CREATE_TRACE_POINTS diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index da775c87f27f..b5e525d67fe3 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/kernel/rcutree.c b/kernel/rcutree.c index e234eb92a177..6b76d812740c 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -38,7 +38,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/kernel/relay.c b/kernel/relay.c index 859ea5a9605f..226fade4d727 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/kernel/resource.c b/kernel/resource.c index c8dc249da5ce..7640b3a947d0 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -7,7 +7,7 @@ * Arbitrary resource management. */ -#include +#include #include #include #include diff --git a/kernel/rtmutex-debug.c b/kernel/rtmutex-debug.c index a2e7e7210f3e..8eafd1bd273e 100644 --- a/kernel/rtmutex-debug.c +++ b/kernel/rtmutex-debug.c @@ -18,7 +18,7 @@ */ #include #include -#include +#include #include #include #include diff --git a/kernel/rtmutex-tester.c b/kernel/rtmutex-tester.c index 5c9ccd380966..3d9f31cd79e7 100644 --- a/kernel/rtmutex-tester.c +++ b/kernel/rtmutex-tester.c @@ -7,7 +7,7 @@ * */ #include -#include +#include #include #include #include diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c index 5e8d9cce7470..f9d8482dd487 100644 --- a/kernel/rtmutex.c +++ b/kernel/rtmutex.c @@ -11,7 +11,7 @@ * See Documentation/rt-mutex-design.txt for details. */ #include -#include +#include #include #include diff --git a/kernel/rwsem.c b/kernel/rwsem.c index 9f48f3d82e9b..b152f74f02de 100644 --- a/kernel/rwsem.c +++ b/kernel/rwsem.c @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c index 9d8af0b3fb64..c685e31492df 100644 --- a/kernel/sched_clock.c +++ b/kernel/sched_clock.c @@ -62,7 +62,7 @@ */ #include #include -#include +#include #include #include #include diff --git a/kernel/semaphore.c b/kernel/semaphore.c index d831841e55a7..60636a4e25c3 100644 --- a/kernel/semaphore.c +++ b/kernel/semaphore.c @@ -27,7 +27,7 @@ #include #include -#include +#include #include #include #include diff --git a/kernel/signal.c b/kernel/signal.c index d252be2d3de5..b3f78d09a105 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -11,7 +11,7 @@ */ #include -#include +#include #include #include #include diff --git a/kernel/smp.c b/kernel/smp.c index fb67dfa8394e..db197d60489b 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/kernel/softirq.c b/kernel/softirq.c index fca82c32042b..2c71d91efff0 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -10,7 +10,7 @@ * Remote softirq infrastructure is by Jens Axboe. */ -#include +#include #include #include #include diff --git a/kernel/spinlock.c b/kernel/spinlock.c index be6517fb9c14..84c7d96918bf 100644 --- a/kernel/spinlock.c +++ b/kernel/spinlock.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include /* * If lockdep is enabled then we use the non-preemption spin-ops diff --git a/kernel/srcu.c b/kernel/srcu.c index 73ce23feaea9..0febf61e1aa3 100644 --- a/kernel/srcu.c +++ b/kernel/srcu.c @@ -24,7 +24,7 @@ * */ -#include +#include #include #include #include diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c index d20c6983aad9..00fe55cc5a82 100644 --- a/kernel/stacktrace.c +++ b/kernel/stacktrace.c @@ -7,7 +7,7 @@ */ #include #include -#include +#include #include #include diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index ba5070ce5765..e78db365fa83 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/kernel/sys.c b/kernel/sys.c index 58459509b14c..4a0286241829 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -4,7 +4,7 @@ * Copyright (C) 1991, 1992 Linus Torvalds */ -#include +#include #include #include #include diff --git a/kernel/time.c b/kernel/time.c index d77606214529..73e416db0a1e 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -27,7 +27,7 @@ * with nanosecond accuracy */ -#include +#include #include #include #include diff --git a/kernel/timer.c b/kernel/timer.c index 8cff36119e4d..dbaa62422b13 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -20,7 +20,7 @@ */ #include -#include +#include #include #include #include diff --git a/kernel/up.c b/kernel/up.c index 1ff27a28bb7d..c54c75e9faf7 100644 --- a/kernel/up.c +++ b/kernel/up.c @@ -4,7 +4,7 @@ #include #include -#include +#include #include int smp_call_function_single(int cpu, void (*func) (void *info), void *info, diff --git a/kernel/user-return-notifier.c b/kernel/user-return-notifier.c index 92cb706c7fc8..1744bb80f1fb 100644 --- a/kernel/user-return-notifier.c +++ b/kernel/user-return-notifier.c @@ -2,7 +2,7 @@ #include #include #include -#include +#include static DEFINE_PER_CPU(struct hlist_head, return_notifier_list); diff --git a/kernel/user.c b/kernel/user.c index 9e03e9c1df8d..71dd2363ab0f 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include /* diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 9da289c34f22..3b906e98b1db 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -5,7 +5,7 @@ * License. */ -#include +#include #include #include #include diff --git a/kernel/utsname.c b/kernel/utsname.c index bff131b9510a..405caf91aad5 100644 --- a/kernel/utsname.c +++ b/kernel/utsname.c @@ -9,7 +9,7 @@ * License. */ -#include +#include #include #include #include diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c index a2cd77e70d4d..5a709452ec19 100644 --- a/kernel/utsname_sysctl.c +++ b/kernel/utsname_sysctl.c @@ -9,7 +9,7 @@ * License. */ -#include +#include #include #include #include diff --git a/kernel/wait.c b/kernel/wait.c index f45ea8d2a1ce..26fa7797f90f 100644 --- a/kernel/wait.c +++ b/kernel/wait.c @@ -4,7 +4,7 @@ * (C) 2004 William Irwin, Oracle */ #include -#include +#include #include #include #include diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 1783aabc6128..42fa9ad0a810 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -23,7 +23,7 @@ * Please read Documentation/workqueue.txt for details. */ -#include +#include #include #include #include -- cgit v1.2.3