From 9a11b49a805665e13a56aa067afaf81d43ec1514 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 3 Jul 2006 00:24:33 -0700 Subject: [PATCH] lockdep: better lock debugging Generic lock debugging: - generalized lock debugging framework. For example, a bug in one lock subsystem turns off debugging in all lock subsystems. - got rid of the caller address passing (__IP__/__IP_DECL__/etc.) from the mutex/rtmutex debugging code: it caused way too much prototype hackery, and lockdep will give the same information anyway. - ability to do silent tests - check lock freeing in vfree too. - more finegrained debugging options, to allow distributions to turn off more expensive debugging features. There's no separate 'held mutexes' list anymore - but there's a 'held locks' stack within lockdep, which unifies deadlock detection across all lock classes. (this is independent of the lockdep validation stuff - lockdep first checks whether we are holding a lock already) Here are the current debugging options: CONFIG_DEBUG_MUTEXES=y CONFIG_DEBUG_LOCK_ALLOC=y which do: config DEBUG_MUTEXES bool "Mutex debugging, basic checks" config DEBUG_LOCK_ALLOC bool "Detect incorrect freeing of live mutexes" Signed-off-by: Ingo Molnar Signed-off-by: Arjan van de Ven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/fork.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index 9064bf9e131b..1cd46a4fb0d3 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -919,10 +919,6 @@ static inline void rt_mutex_init_task(struct task_struct *p) spin_lock_init(&p->pi_lock); plist_head_init(&p->pi_waiters, &p->pi_lock); p->pi_blocked_on = NULL; -# ifdef CONFIG_DEBUG_RT_MUTEXES - spin_lock_init(&p->held_list_lock); - INIT_LIST_HEAD(&p->held_list_head); -# endif #endif } -- cgit v1.2.3 From de30a2b355ea85350ca2f58f3b9bf4e5bc007986 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 3 Jul 2006 00:24:42 -0700 Subject: [PATCH] lockdep: irqtrace subsystem, core Accurate hard-IRQ-flags and softirq-flags state tracing. This allows us to attach extra functionality to IRQ flags on/off events (such as trace-on/off). Signed-off-by: Ingo Molnar Signed-off-by: Arjan van de Ven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/kernel/irq.c | 2 +- include/asm-powerpc/irqflags.h | 31 ++++++++++ include/linux/hardirq.h | 26 +++++++- include/linux/init_task.h | 2 + include/linux/interrupt.h | 11 ++-- include/linux/irqflags.h | 96 +++++++++++++++++++++++++++++ include/linux/sched.h | 15 +++++ kernel/fork.c | 19 ++++++ kernel/sched.c | 4 +- kernel/softirq.c | 137 +++++++++++++++++++++++++++++++++++------ 10 files changed, 313 insertions(+), 30 deletions(-) create mode 100644 include/asm-powerpc/irqflags.h create mode 100644 include/linux/irqflags.h (limited to 'kernel/fork.c') diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 525baab45d2d..027728b95429 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -429,7 +429,7 @@ void do_softirq(void) local_bh_disable(); do_softirq_onstack(); account_system_vtime(current); - __local_bh_enable(); + _local_bh_enable(); } local_irq_restore(flags); diff --git a/include/asm-powerpc/irqflags.h b/include/asm-powerpc/irqflags.h new file mode 100644 index 000000000000..7970cbaeaa54 --- /dev/null +++ b/include/asm-powerpc/irqflags.h @@ -0,0 +1,31 @@ +/* + * include/asm-powerpc/irqflags.h + * + * IRQ flags handling + * + * This file gets included from lowlevel asm headers too, to provide + * wrapped versions of the local_irq_*() APIs, based on the + * raw_local_irq_*() macros from the lowlevel headers. + */ +#ifndef _ASM_IRQFLAGS_H +#define _ASM_IRQFLAGS_H + +/* + * Get definitions for raw_local_save_flags(x), etc. + */ +#include + +/* + * Do the CPU's IRQ-state tracing from assembly code. We call a + * C function, so save all the C-clobbered registers: + */ +#ifdef CONFIG_TRACE_IRQFLAGS + +#error No support on PowerPC yet for CONFIG_TRACE_IRQFLAGS + +#else +# define TRACE_IRQS_ON +# define TRACE_IRQS_OFF +#endif + +#endif diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 114ae583cca9..b1d4332b5cf0 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -86,9 +86,6 @@ extern void synchronize_irq(unsigned int irq); # define synchronize_irq(irq) barrier() #endif -#define nmi_enter() irq_enter() -#define nmi_exit() sub_preempt_count(HARDIRQ_OFFSET) - struct task_struct; #ifndef CONFIG_VIRT_CPU_ACCOUNTING @@ -97,12 +94,35 @@ static inline void account_system_vtime(struct task_struct *tsk) } #endif +/* + * It is safe to do non-atomic ops on ->hardirq_context, + * because NMI handlers may not preempt and the ops are + * always balanced, so the interrupted value of ->hardirq_context + * will always be restored. + */ #define irq_enter() \ do { \ account_system_vtime(current); \ add_preempt_count(HARDIRQ_OFFSET); \ + trace_hardirq_enter(); \ + } while (0) + +/* + * Exit irq context without processing softirqs: + */ +#define __irq_exit() \ + do { \ + trace_hardirq_exit(); \ + account_system_vtime(current); \ + sub_preempt_count(HARDIRQ_OFFSET); \ } while (0) +/* + * Exit irq context and process softirqs if needed: + */ extern void irq_exit(void); +#define nmi_enter() irq_enter() +#define nmi_exit() __irq_exit() + #endif /* LINUX_HARDIRQ_H */ diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 1b7bb37624bb..444a3ae0de2a 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -3,6 +3,7 @@ #include #include +#include #define INIT_FDTABLE \ { \ @@ -124,6 +125,7 @@ extern struct group_info init_groups; .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \ .fs_excl = ATOMIC_INIT(0), \ .pi_lock = SPIN_LOCK_UNLOCKED, \ + INIT_TRACE_IRQFLAGS \ } diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 73463fbb38e4..d5afee95fd43 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -199,13 +200,11 @@ static inline void __deprecated save_and_cli(unsigned long *x) #define save_and_cli(x) save_and_cli(&x) #endif /* CONFIG_SMP */ -/* SoftIRQ primitives. */ -#define local_bh_disable() \ - do { add_preempt_count(SOFTIRQ_OFFSET); barrier(); } while (0) -#define __local_bh_enable() \ - do { barrier(); sub_preempt_count(SOFTIRQ_OFFSET); } while (0) - +extern void local_bh_disable(void); +extern void __local_bh_enable(void); +extern void _local_bh_enable(void); extern void local_bh_enable(void); +extern void local_bh_enable_ip(unsigned long ip); /* PLEASE, avoid to allocate new softirqs, if you need not _really_ high frequency threaded job scheduling. For almost all the purposes diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h new file mode 100644 index 000000000000..412e025bc5c7 --- /dev/null +++ b/include/linux/irqflags.h @@ -0,0 +1,96 @@ +/* + * include/linux/irqflags.h + * + * IRQ flags tracing: follow the state of the hardirq and softirq flags and + * provide callbacks for transitions between ON and OFF states. + * + * This file gets included from lowlevel asm headers too, to provide + * wrapped versions of the local_irq_*() APIs, based on the + * raw_local_irq_*() macros from the lowlevel headers. + */ +#ifndef _LINUX_TRACE_IRQFLAGS_H +#define _LINUX_TRACE_IRQFLAGS_H + +#ifdef CONFIG_TRACE_IRQFLAGS + extern void trace_hardirqs_on(void); + extern void trace_hardirqs_off(void); + extern void trace_softirqs_on(unsigned long ip); + extern void trace_softirqs_off(unsigned long ip); +# define trace_hardirq_context(p) ((p)->hardirq_context) +# define trace_softirq_context(p) ((p)->softirq_context) +# define trace_hardirqs_enabled(p) ((p)->hardirqs_enabled) +# define trace_softirqs_enabled(p) ((p)->softirqs_enabled) +# define trace_hardirq_enter() do { current->hardirq_context++; } while (0) +# define trace_hardirq_exit() do { current->hardirq_context--; } while (0) +# define trace_softirq_enter() do { current->softirq_context++; } while (0) +# define trace_softirq_exit() do { current->softirq_context--; } while (0) +# define INIT_TRACE_IRQFLAGS .softirqs_enabled = 1, +#else +# define trace_hardirqs_on() do { } while (0) +# define trace_hardirqs_off() do { } while (0) +# define trace_softirqs_on(ip) do { } while (0) +# define trace_softirqs_off(ip) do { } while (0) +# define trace_hardirq_context(p) 0 +# define trace_softirq_context(p) 0 +# define trace_hardirqs_enabled(p) 0 +# define trace_softirqs_enabled(p) 0 +# define trace_hardirq_enter() do { } while (0) +# define trace_hardirq_exit() do { } while (0) +# define trace_softirq_enter() do { } while (0) +# define trace_softirq_exit() do { } while (0) +# define INIT_TRACE_IRQFLAGS +#endif + +#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT + +#include + +#define local_irq_enable() \ + do { trace_hardirqs_on(); raw_local_irq_enable(); } while (0) +#define local_irq_disable() \ + do { raw_local_irq_disable(); trace_hardirqs_off(); } while (0) +#define local_irq_save(flags) \ + do { raw_local_irq_save(flags); trace_hardirqs_off(); } while (0) + +#define local_irq_restore(flags) \ + do { \ + if (raw_irqs_disabled_flags(flags)) { \ + raw_local_irq_restore(flags); \ + trace_hardirqs_off(); \ + } else { \ + trace_hardirqs_on(); \ + raw_local_irq_restore(flags); \ + } \ + } while (0) +#else /* !CONFIG_TRACE_IRQFLAGS_SUPPORT */ +/* + * The local_irq_*() APIs are equal to the raw_local_irq*() + * if !TRACE_IRQFLAGS. + */ +# define raw_local_irq_disable() local_irq_disable() +# define raw_local_irq_enable() local_irq_enable() +# define raw_local_irq_save(flags) local_irq_save(flags) +# define raw_local_irq_restore(flags) local_irq_restore(flags) +#endif /* CONFIG_TRACE_IRQFLAGS_SUPPORT */ + +#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT +#define safe_halt() \ + do { \ + trace_hardirqs_on(); \ + raw_safe_halt(); \ + } while (0) + +#define local_save_flags(flags) raw_local_save_flags(flags) + +#define irqs_disabled() \ +({ \ + unsigned long flags; \ + \ + raw_local_save_flags(flags); \ + raw_irqs_disabled_flags(flags); \ +}) + +#define irqs_disabled_flags(flags) raw_irqs_disabled_flags(flags) +#endif /* CONFIG_X86 */ + +#endif diff --git a/include/linux/sched.h b/include/linux/sched.h index bdabeee10a78..ad7a89014d29 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -871,6 +871,21 @@ struct task_struct { /* mutex deadlock detection */ struct mutex_waiter *blocked_on; #endif +#ifdef CONFIG_TRACE_IRQFLAGS + unsigned int irq_events; + int hardirqs_enabled; + unsigned long hardirq_enable_ip; + unsigned int hardirq_enable_event; + unsigned long hardirq_disable_ip; + unsigned int hardirq_disable_event; + int softirqs_enabled; + unsigned long softirq_disable_ip; + unsigned int softirq_disable_event; + unsigned long softirq_enable_ip; + unsigned int softirq_enable_event; + int hardirq_context; + int softirq_context; +#endif /* journalling filesystem info */ void *journal_info; diff --git a/kernel/fork.c b/kernel/fork.c index 1cd46a4fb0d3..b7db7fb74f53 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -968,6 +968,10 @@ static task_t *copy_process(unsigned long clone_flags, if (!p) goto fork_out; +#ifdef CONFIG_TRACE_IRQFLAGS + DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); + DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); +#endif retval = -EAGAIN; if (atomic_read(&p->user->processes) >= p->signal->rlim[RLIMIT_NPROC].rlim_cur) { @@ -1042,6 +1046,21 @@ static task_t *copy_process(unsigned long clone_flags, } mpol_fix_fork_child_flag(p); #endif +#ifdef CONFIG_TRACE_IRQFLAGS + p->irq_events = 0; + p->hardirqs_enabled = 0; + p->hardirq_enable_ip = 0; + p->hardirq_enable_event = 0; + p->hardirq_disable_ip = _THIS_IP_; + p->hardirq_disable_event = 0; + p->softirqs_enabled = 1; + p->softirq_enable_ip = _THIS_IP_; + p->softirq_enable_event = 0; + p->softirq_disable_ip = 0; + p->softirq_disable_event = 0; + p->hardirq_context = 0; + p->softirq_context = 0; +#endif rt_mutex_init_task(p); diff --git a/kernel/sched.c b/kernel/sched.c index 48c1faa60a67..911829966534 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4462,7 +4462,9 @@ int __sched cond_resched_softirq(void) BUG_ON(!in_softirq()); if (need_resched() && __resched_legal()) { - __local_bh_enable(); + raw_local_irq_disable(); + _local_bh_enable(); + raw_local_irq_enable(); __cond_resched(); local_bh_disable(); return 1; diff --git a/kernel/softirq.c b/kernel/softirq.c index 8f03e3b89b55..584609b6a66e 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -61,6 +61,119 @@ static inline void wakeup_softirqd(void) wake_up_process(tsk); } +/* + * This one is for softirq.c-internal use, + * where hardirqs are disabled legitimately: + */ +static void __local_bh_disable(unsigned long ip) +{ + unsigned long flags; + + WARN_ON_ONCE(in_irq()); + + raw_local_irq_save(flags); + add_preempt_count(SOFTIRQ_OFFSET); + /* + * Were softirqs turned off above: + */ + if (softirq_count() == SOFTIRQ_OFFSET) + trace_softirqs_off(ip); + raw_local_irq_restore(flags); +} + +void local_bh_disable(void) +{ + __local_bh_disable((unsigned long)__builtin_return_address(0)); +} + +EXPORT_SYMBOL(local_bh_disable); + +void __local_bh_enable(void) +{ + WARN_ON_ONCE(in_irq()); + + /* + * softirqs should never be enabled by __local_bh_enable(), + * it always nests inside local_bh_enable() sections: + */ + WARN_ON_ONCE(softirq_count() == SOFTIRQ_OFFSET); + + sub_preempt_count(SOFTIRQ_OFFSET); +} +EXPORT_SYMBOL_GPL(__local_bh_enable); + +/* + * Special-case - softirqs can safely be enabled in + * cond_resched_softirq(), or by __do_softirq(), + * without processing still-pending softirqs: + */ +void _local_bh_enable(void) +{ + WARN_ON_ONCE(in_irq()); + WARN_ON_ONCE(!irqs_disabled()); + + if (softirq_count() == SOFTIRQ_OFFSET) + trace_softirqs_on((unsigned long)__builtin_return_address(0)); + sub_preempt_count(SOFTIRQ_OFFSET); +} + +EXPORT_SYMBOL(_local_bh_enable); + +void local_bh_enable(void) +{ + unsigned long flags; + + WARN_ON_ONCE(in_irq()); + WARN_ON_ONCE(irqs_disabled()); + + local_irq_save(flags); + /* + * Are softirqs going to be turned on now: + */ + if (softirq_count() == SOFTIRQ_OFFSET) + trace_softirqs_on((unsigned long)__builtin_return_address(0)); + /* + * Keep preemption disabled until we are done with + * softirq processing: + */ + sub_preempt_count(SOFTIRQ_OFFSET - 1); + + if (unlikely(!in_interrupt() && local_softirq_pending())) + do_softirq(); + + dec_preempt_count(); + local_irq_restore(flags); + preempt_check_resched(); +} +EXPORT_SYMBOL(local_bh_enable); + +void local_bh_enable_ip(unsigned long ip) +{ + unsigned long flags; + + WARN_ON_ONCE(in_irq()); + + local_irq_save(flags); + /* + * Are softirqs going to be turned on now: + */ + if (softirq_count() == SOFTIRQ_OFFSET) + trace_softirqs_on(ip); + /* + * Keep preemption disabled until we are done with + * softirq processing: + */ + sub_preempt_count(SOFTIRQ_OFFSET - 1); + + if (unlikely(!in_interrupt() && local_softirq_pending())) + do_softirq(); + + dec_preempt_count(); + local_irq_restore(flags); + preempt_check_resched(); +} +EXPORT_SYMBOL(local_bh_enable_ip); + /* * We restart softirq processing MAX_SOFTIRQ_RESTART times, * and we fall back to softirqd after that. @@ -80,8 +193,9 @@ asmlinkage void __do_softirq(void) int cpu; pending = local_softirq_pending(); + __local_bh_disable((unsigned long)__builtin_return_address(0)); + trace_softirq_enter(); - local_bh_disable(); cpu = smp_processor_id(); restart: /* Reset the pending bitmask before enabling irqs */ @@ -109,7 +223,8 @@ restart: if (pending) wakeup_softirqd(); - __local_bh_enable(); + trace_softirq_exit(); + _local_bh_enable(); } #ifndef __ARCH_HAS_DO_SOFTIRQ @@ -136,23 +251,6 @@ EXPORT_SYMBOL(do_softirq); #endif -void local_bh_enable(void) -{ - WARN_ON(irqs_disabled()); - /* - * Keep preemption disabled until we are done with - * softirq processing: - */ - sub_preempt_count(SOFTIRQ_OFFSET - 1); - - if (unlikely(!in_interrupt() && local_softirq_pending())) - do_softirq(); - - dec_preempt_count(); - preempt_check_resched(); -} -EXPORT_SYMBOL(local_bh_enable); - #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED # define invoke_softirq() __do_softirq() #else @@ -165,6 +263,7 @@ EXPORT_SYMBOL(local_bh_enable); void irq_exit(void) { account_system_vtime(current); + trace_hardirq_exit(); sub_preempt_count(IRQ_EXIT_OFFSET); if (!in_interrupt() && local_softirq_pending()) invoke_softirq(); -- cgit v1.2.3 From fbb9ce9530fd9b66096d5187fa6a115d16d9746c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 3 Jul 2006 00:24:50 -0700 Subject: [PATCH] lockdep: core Do 'make oldconfig' and accept all the defaults for new config options - reboot into the kernel and if everything goes well it should boot up fine and you should have /proc/lockdep and /proc/lockdep_stats files. Typically if the lock validator finds some problem it will print out voluminous debug output that begins with "BUG: ..." and which syslog output can be used by kernel developers to figure out the precise locking scenario. What does the lock validator do? It "observes" and maps all locking rules as they occur dynamically (as triggered by the kernel's natural use of spinlocks, rwlocks, mutexes and rwsems). Whenever the lock validator subsystem detects a new locking scenario, it validates this new rule against the existing set of rules. If this new rule is consistent with the existing set of rules then the new rule is added transparently and the kernel continues as normal. If the new rule could create a deadlock scenario then this condition is printed out. When determining validity of locking, all possible "deadlock scenarios" are considered: assuming arbitrary number of CPUs, arbitrary irq context and task context constellations, running arbitrary combinations of all the existing locking scenarios. In a typical system this means millions of separate scenarios. This is why we call it a "locking correctness" validator - for all rules that are observed the lock validator proves it with mathematical certainty that a deadlock could not occur (assuming that the lock validator implementation itself is correct and its internal data structures are not corrupted by some other kernel subsystem). [see more details and conditionals of this statement in include/linux/lockdep.h and Documentation/lockdep-design.txt] Furthermore, this "all possible scenarios" property of the validator also enables the finding of complex, highly unlikely multi-CPU multi-context races via single single-context rules, increasing the likelyhood of finding bugs drastically. In practical terms: the lock validator already found a bug in the upstream kernel that could only occur on systems with 3 or more CPUs, and which needed 3 very unlikely code sequences to occur at once on the 3 CPUs. That bug was found and reported on a single-CPU system (!). So in essence a race will be found "piecemail-wise", triggering all the necessary components for the race, without having to reproduce the race scenario itself! In its short existence the lock validator found and reported many bugs before they actually caused a real deadlock. To further increase the efficiency of the validator, the mapping is not per "lock instance", but per "lock-class". For example, all struct inode objects in the kernel have inode->inotify_mutex. If there are 10,000 inodes cached, then there are 10,000 lock objects. But ->inotify_mutex is a single "lock type", and all locking activities that occur against ->inotify_mutex are "unified" into this single lock-class. The advantage of the lock-class approach is that all historical ->inotify_mutex uses are mapped into a single (and as narrow as possible) set of locking rules - regardless of how many different tasks or inode structures it took to build this set of rules. The set of rules persist during the lifetime of the kernel. To see the rough magnitude of checking that the lock validator does, here's a portion of /proc/lockdep_stats, fresh after bootup: lock-classes: 694 [max: 2048] direct dependencies: 1598 [max: 8192] indirect dependencies: 17896 all direct dependencies: 16206 dependency chains: 1910 [max: 8192] in-hardirq chains: 17 in-softirq chains: 105 in-process chains: 1065 stack-trace entries: 38761 [max: 131072] combined max dependencies: 2033928 hardirq-safe locks: 24 hardirq-unsafe locks: 176 softirq-safe locks: 53 softirq-unsafe locks: 137 irq-safe locks: 59 irq-unsafe locks: 176 The lock validator has observed 1598 actual single-thread locking patterns, and has validated all possible 2033928 distinct locking scenarios. More details about the design of the lock validator can be found in Documentation/lockdep-design.txt, which can also found at: http://redhat.com/~mingo/lockdep-patches/lockdep-design.txt [bunk@stusta.de: cleanups] Signed-off-by: Ingo Molnar Signed-off-by: Arjan van de Ven Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hardirq.h | 5 +- include/linux/init_task.h | 2 + include/linux/lockdep.h | 347 ++++++ include/linux/sched.h | 7 + init/main.c | 14 + kernel/Makefile | 1 + kernel/fork.c | 5 + kernel/irq/manage.c | 6 + kernel/lockdep.c | 2703 ++++++++++++++++++++++++++++++++++++++++++++ kernel/lockdep_internals.h | 78 ++ kernel/module.c | 3 + lib/Kconfig.debug | 2 +- lib/locking-selftest.c | 4 +- 13 files changed, 3171 insertions(+), 6 deletions(-) create mode 100644 include/linux/lockdep.h create mode 100644 kernel/lockdep.c create mode 100644 kernel/lockdep_internals.h (limited to 'kernel/fork.c') diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index b1d4332b5cf0..50d8b5744cf6 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -3,6 +3,7 @@ #include #include +#include #include #include @@ -122,7 +123,7 @@ static inline void account_system_vtime(struct task_struct *tsk) */ extern void irq_exit(void); -#define nmi_enter() irq_enter() -#define nmi_exit() __irq_exit() +#define nmi_enter() do { lockdep_off(); irq_enter(); } while (0) +#define nmi_exit() do { __irq_exit(); lockdep_on(); } while (0) #endif /* LINUX_HARDIRQ_H */ diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 444a3ae0de2a..60aac2cea0cf 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -4,6 +4,7 @@ #include #include #include +#include #define INIT_FDTABLE \ { \ @@ -126,6 +127,7 @@ extern struct group_info init_groups; .fs_excl = ATOMIC_INIT(0), \ .pi_lock = SPIN_LOCK_UNLOCKED, \ INIT_TRACE_IRQFLAGS \ + INIT_LOCKDEP \ } diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h new file mode 100644 index 000000000000..80ec7a4dbc98 --- /dev/null +++ b/include/linux/lockdep.h @@ -0,0 +1,347 @@ +/* + * Runtime locking correctness validator + * + * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar + * + * see Documentation/lockdep-design.txt for more details. + */ +#ifndef __LINUX_LOCKDEP_H +#define __LINUX_LOCKDEP_H + +#include +#include +#include +#include + +#ifdef CONFIG_LOCKDEP + +/* + * Lock-class usage-state bits: + */ +enum lock_usage_bit +{ + LOCK_USED = 0, + LOCK_USED_IN_HARDIRQ, + LOCK_USED_IN_SOFTIRQ, + LOCK_ENABLED_SOFTIRQS, + LOCK_ENABLED_HARDIRQS, + LOCK_USED_IN_HARDIRQ_READ, + LOCK_USED_IN_SOFTIRQ_READ, + LOCK_ENABLED_SOFTIRQS_READ, + LOCK_ENABLED_HARDIRQS_READ, + LOCK_USAGE_STATES +}; + +/* + * Usage-state bitmasks: + */ +#define LOCKF_USED (1 << LOCK_USED) +#define LOCKF_USED_IN_HARDIRQ (1 << LOCK_USED_IN_HARDIRQ) +#define LOCKF_USED_IN_SOFTIRQ (1 << LOCK_USED_IN_SOFTIRQ) +#define LOCKF_ENABLED_HARDIRQS (1 << LOCK_ENABLED_HARDIRQS) +#define LOCKF_ENABLED_SOFTIRQS (1 << LOCK_ENABLED_SOFTIRQS) + +#define LOCKF_ENABLED_IRQS (LOCKF_ENABLED_HARDIRQS | LOCKF_ENABLED_SOFTIRQS) +#define LOCKF_USED_IN_IRQ (LOCKF_USED_IN_HARDIRQ | LOCKF_USED_IN_SOFTIRQ) + +#define LOCKF_USED_IN_HARDIRQ_READ (1 << LOCK_USED_IN_HARDIRQ_READ) +#define LOCKF_USED_IN_SOFTIRQ_READ (1 << LOCK_USED_IN_SOFTIRQ_READ) +#define LOCKF_ENABLED_HARDIRQS_READ (1 << LOCK_ENABLED_HARDIRQS_READ) +#define LOCKF_ENABLED_SOFTIRQS_READ (1 << LOCK_ENABLED_SOFTIRQS_READ) + +#define LOCKF_ENABLED_IRQS_READ \ + (LOCKF_ENABLED_HARDIRQS_READ | LOCKF_ENABLED_SOFTIRQS_READ) +#define LOCKF_USED_IN_IRQ_READ \ + (LOCKF_USED_IN_HARDIRQ_READ | LOCKF_USED_IN_SOFTIRQ_READ) + +#define MAX_LOCKDEP_SUBCLASSES 8UL + +/* + * Lock-classes are keyed via unique addresses, by embedding the + * lockclass-key into the kernel (or module) .data section. (For + * static locks we use the lock address itself as the key.) + */ +struct lockdep_subclass_key { + char __one_byte; +} __attribute__ ((__packed__)); + +struct lock_class_key { + struct lockdep_subclass_key subkeys[MAX_LOCKDEP_SUBCLASSES]; +}; + +/* + * The lock-class itself: + */ +struct lock_class { + /* + * class-hash: + */ + struct list_head hash_entry; + + /* + * global list of all lock-classes: + */ + struct list_head lock_entry; + + struct lockdep_subclass_key *key; + unsigned int subclass; + + /* + * IRQ/softirq usage tracking bits: + */ + unsigned long usage_mask; + struct stack_trace usage_traces[LOCK_USAGE_STATES]; + + /* + * These fields represent a directed graph of lock dependencies, + * to every node we attach a list of "forward" and a list of + * "backward" graph nodes. + */ + struct list_head locks_after, locks_before; + + /* + * Generation counter, when doing certain classes of graph walking, + * to ensure that we check one node only once: + */ + unsigned int version; + + /* + * Statistics counter: + */ + unsigned long ops; + + const char *name; + int name_version; +}; + +/* + * Map the lock object (the lock instance) to the lock-class object. + * This is embedded into specific lock instances: + */ +struct lockdep_map { + struct lock_class_key *key; + struct lock_class *class[MAX_LOCKDEP_SUBCLASSES]; + const char *name; +}; + +/* + * Every lock has a list of other locks that were taken after it. + * We only grow the list, never remove from it: + */ +struct lock_list { + struct list_head entry; + struct lock_class *class; + struct stack_trace trace; +}; + +/* + * We record lock dependency chains, so that we can cache them: + */ +struct lock_chain { + struct list_head entry; + u64 chain_key; +}; + +struct held_lock { + /* + * One-way hash of the dependency chain up to this point. We + * hash the hashes step by step as the dependency chain grows. + * + * We use it for dependency-caching and we skip detection + * passes and dependency-updates if there is a cache-hit, so + * it is absolutely critical for 100% coverage of the validator + * to have a unique key value for every unique dependency path + * that can occur in the system, to make a unique hash value + * as likely as possible - hence the 64-bit width. + * + * The task struct holds the current hash value (initialized + * with zero), here we store the previous hash value: + */ + u64 prev_chain_key; + struct lock_class *class; + unsigned long acquire_ip; + struct lockdep_map *instance; + + /* + * The lock-stack is unified in that the lock chains of interrupt + * contexts nest ontop of process context chains, but we 'separate' + * the hashes by starting with 0 if we cross into an interrupt + * context, and we also keep do not add cross-context lock + * dependencies - the lock usage graph walking covers that area + * anyway, and we'd just unnecessarily increase the number of + * dependencies otherwise. [Note: hardirq and softirq contexts + * are separated from each other too.] + * + * The following field is used to detect when we cross into an + * interrupt context: + */ + int irq_context; + int trylock; + int read; + int check; + int hardirqs_off; +}; + +/* + * Initialization, self-test and debugging-output methods: + */ +extern void lockdep_init(void); +extern void lockdep_info(void); +extern void lockdep_reset(void); +extern void lockdep_reset_lock(struct lockdep_map *lock); +extern void lockdep_free_key_range(void *start, unsigned long size); + +extern void lockdep_off(void); +extern void lockdep_on(void); +extern int lockdep_internal(void); + +/* + * These methods are used by specific locking variants (spinlocks, + * rwlocks, mutexes and rwsems) to pass init/acquire/release events + * to lockdep: + */ + +extern void lockdep_init_map(struct lockdep_map *lock, const char *name, + struct lock_class_key *key); + +/* + * Reinitialize a lock key - for cases where there is special locking or + * special initialization of locks so that the validator gets the scope + * of dependencies wrong: they are either too broad (they need a class-split) + * or they are too narrow (they suffer from a false class-split): + */ +#define lockdep_set_class(lock, key) \ + lockdep_init_map(&(lock)->dep_map, #key, key) +#define lockdep_set_class_and_name(lock, key, name) \ + lockdep_init_map(&(lock)->dep_map, name, key) + +/* + * Acquire a lock. + * + * Values for "read": + * + * 0: exclusive (write) acquire + * 1: read-acquire (no recursion allowed) + * 2: read-acquire with same-instance recursion allowed + * + * Values for check: + * + * 0: disabled + * 1: simple checks (freeing, held-at-exit-time, etc.) + * 2: full validation + */ +extern void lock_acquire(struct lockdep_map *lock, unsigned int subclass, + int trylock, int read, int check, unsigned long ip); + +extern void lock_release(struct lockdep_map *lock, int nested, + unsigned long ip); + +# define INIT_LOCKDEP .lockdep_recursion = 0, + +#else /* !LOCKDEP */ + +static inline void lockdep_off(void) +{ +} + +static inline void lockdep_on(void) +{ +} + +static inline int lockdep_internal(void) +{ + return 0; +} + +# define lock_acquire(l, s, t, r, c, i) do { } while (0) +# define lock_release(l, n, i) do { } while (0) +# define lockdep_init() do { } while (0) +# define lockdep_info() do { } while (0) +# define lockdep_init_map(lock, name, key) do { (void)(key); } while (0) +# define lockdep_set_class(lock, key) do { (void)(key); } while (0) +# define lockdep_set_class_and_name(lock, key, name) \ + do { (void)(key); } while (0) +# define INIT_LOCKDEP +# define lockdep_reset() do { debug_locks = 1; } while (0) +# define lockdep_free_key_range(start, size) do { } while (0) +/* + * The class key takes no space if lockdep is disabled: + */ +struct lock_class_key { }; +#endif /* !LOCKDEP */ + +#ifdef CONFIG_TRACE_IRQFLAGS +extern void early_boot_irqs_off(void); +extern void early_boot_irqs_on(void); +#else +# define early_boot_irqs_off() do { } while (0) +# define early_boot_irqs_on() do { } while (0) +#endif + +/* + * For trivial one-depth nesting of a lock-class, the following + * global define can be used. (Subsystems with multiple levels + * of nesting should define their own lock-nesting subclasses.) + */ +#define SINGLE_DEPTH_NESTING 1 + +/* + * Map the dependency ops to NOP or to real lockdep ops, depending + * on the per lock-class debug mode: + */ + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# ifdef CONFIG_PROVE_LOCKING +# define spin_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, i) +# else +# define spin_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, i) +# endif +# define spin_release(l, n, i) lock_release(l, n, i) +#else +# define spin_acquire(l, s, t, i) do { } while (0) +# define spin_release(l, n, i) do { } while (0) +#endif + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# ifdef CONFIG_PROVE_LOCKING +# define rwlock_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, i) +# define rwlock_acquire_read(l, s, t, i) lock_acquire(l, s, t, 2, 2, i) +# else +# define rwlock_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, i) +# define rwlock_acquire_read(l, s, t, i) lock_acquire(l, s, t, 2, 1, i) +# endif +# define rwlock_release(l, n, i) lock_release(l, n, i) +#else +# define rwlock_acquire(l, s, t, i) do { } while (0) +# define rwlock_acquire_read(l, s, t, i) do { } while (0) +# define rwlock_release(l, n, i) do { } while (0) +#endif + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# ifdef CONFIG_PROVE_LOCKING +# define mutex_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, i) +# else +# define mutex_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, i) +# endif +# define mutex_release(l, n, i) lock_release(l, n, i) +#else +# define mutex_acquire(l, s, t, i) do { } while (0) +# define mutex_release(l, n, i) do { } while (0) +#endif + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# ifdef CONFIG_PROVE_LOCKING +# define rwsem_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, i) +# define rwsem_acquire_read(l, s, t, i) lock_acquire(l, s, t, 1, 2, i) +# else +# define rwsem_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, i) +# define rwsem_acquire_read(l, s, t, i) lock_acquire(l, s, t, 1, 1, i) +# endif +# define rwsem_release(l, n, i) lock_release(l, n, i) +#else +# define rwsem_acquire(l, s, t, i) do { } while (0) +# define rwsem_acquire_read(l, s, t, i) do { } while (0) +# define rwsem_release(l, n, i) do { } while (0) +#endif + +#endif /* __LINUX_LOCKDEP_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index ad7a89014d29..8ebddba4448d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -886,6 +886,13 @@ struct task_struct { int hardirq_context; int softirq_context; #endif +#ifdef CONFIG_LOCKDEP +# define MAX_LOCK_DEPTH 30UL + u64 curr_chain_key; + int lockdep_depth; + struct held_lock held_locks[MAX_LOCK_DEPTH]; + unsigned int lockdep_recursion; +#endif /* journalling filesystem info */ void *journal_info; diff --git a/init/main.c b/init/main.c index fc73e1cd8614..fc473d4b56fd 100644 --- a/init/main.c +++ b/init/main.c @@ -48,6 +48,7 @@ #include #include #include +#include #include #include @@ -457,6 +458,15 @@ asmlinkage void __init start_kernel(void) smp_setup_processor_id(); + /* + * Need to run as early as possible, to initialize the + * lockdep hash: + */ + lockdep_init(); + + local_irq_disable(); + early_boot_irqs_off(); + /* * Interrupts are still disabled. Do necessary setups, then * enable them @@ -502,6 +512,7 @@ asmlinkage void __init start_kernel(void) profile_init(); if (!irqs_disabled()) printk("start_kernel(): bug: interrupts were enabled early\n"); + early_boot_irqs_on(); local_irq_enable(); /* @@ -512,6 +523,9 @@ asmlinkage void __init start_kernel(void) console_init(); if (panic_later) panic(panic_later, panic_param); + + lockdep_info(); + /* * Need to run this when irqs are enabled, because it wants * to self-test [hard/soft]-irqs on/off lock inversion bugs diff --git a/kernel/Makefile b/kernel/Makefile index e7fd20e70f1b..049aa79e5c1c 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -13,6 +13,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-y += time/ obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o +obj-$(CONFIG_LOCKDEP) += lockdep.o obj-$(CONFIG_FUTEX) += futex.o ifeq ($(CONFIG_COMPAT),y) obj-$(CONFIG_FUTEX) += futex_compat.o diff --git a/kernel/fork.c b/kernel/fork.c index b7db7fb74f53..7f48abdd7bb6 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1061,6 +1061,11 @@ static task_t *copy_process(unsigned long clone_flags, p->hardirq_context = 0; p->softirq_context = 0; #endif +#ifdef CONFIG_LOCKDEP + p->lockdep_depth = 0; /* no locks held yet */ + p->curr_chain_key = 0; + p->lockdep_recursion = 0; +#endif rt_mutex_init_task(p); diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index c911c6ec4dd6..4e461438e48b 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -410,6 +410,12 @@ int request_irq(unsigned int irq, struct irqaction *action; int retval; +#ifdef CONFIG_LOCKDEP + /* + * Lockdep wants atomic interrupt handlers: + */ + irqflags |= SA_INTERRUPT; +#endif /* * Sanity-check: shared interrupts must pass in a real dev-ID, * otherwise we'll have trouble later trying to figure out diff --git a/kernel/lockdep.c b/kernel/lockdep.c new file mode 100644 index 000000000000..dd0580910a97 --- /dev/null +++ b/kernel/lockdep.c @@ -0,0 +1,2703 @@ +/* + * kernel/lockdep.c + * + * Runtime locking correctness validator + * + * Started by Ingo Molnar: + * + * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar + * + * this code maps all the lock dependencies as they occur in a live kernel + * and will warn about the following classes of locking bugs: + * + * - lock inversion scenarios + * - circular lock dependencies + * - hardirq/softirq safe/unsafe locking bugs + * + * Bugs are reported even if the current locking scenario does not cause + * any deadlock at this point. + * + * I.e. if anytime in the past two locks were taken in a different order, + * even if it happened for another task, even if those were different + * locks (but of the same class as this lock), this code will detect it. + * + * Thanks to Arjan van de Ven for coming up with the initial idea of + * mapping lock dependencies runtime. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "lockdep_internals.h" + +/* + * hash_lock: protects the lockdep hashes and class/list/hash allocators. + * + * This is one of the rare exceptions where it's justified + * to use a raw spinlock - we really dont want the spinlock + * code to recurse back into the lockdep code. + */ +static raw_spinlock_t hash_lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; + +static int lockdep_initialized; + +unsigned long nr_list_entries; +static struct lock_list list_entries[MAX_LOCKDEP_ENTRIES]; + +/* + * Allocate a lockdep entry. (assumes hash_lock held, returns + * with NULL on failure) + */ +static struct lock_list *alloc_list_entry(void) +{ + if (nr_list_entries >= MAX_LOCKDEP_ENTRIES) { + __raw_spin_unlock(&hash_lock); + debug_locks_off(); + printk("BUG: MAX_LOCKDEP_ENTRIES too low!\n"); + printk("turning off the locking correctness validator.\n"); + return NULL; + } + return list_entries + nr_list_entries++; +} + +/* + * All data structures here are protected by the global debug_lock. + * + * Mutex key structs only get allocated, once during bootup, and never + * get freed - this significantly simplifies the debugging code. + */ +unsigned long nr_lock_classes; +static struct lock_class lock_classes[MAX_LOCKDEP_KEYS]; + +/* + * We keep a global list of all lock classes. The list only grows, + * never shrinks. The list is only accessed with the lockdep + * spinlock lock held. + */ +LIST_HEAD(all_lock_classes); + +/* + * The lockdep classes are in a hash-table as well, for fast lookup: + */ +#define CLASSHASH_BITS (MAX_LOCKDEP_KEYS_BITS - 1) +#define CLASSHASH_SIZE (1UL << CLASSHASH_BITS) +#define CLASSHASH_MASK (CLASSHASH_SIZE - 1) +#define __classhashfn(key) ((((unsigned long)key >> CLASSHASH_BITS) + (unsigned long)key) & CLASSHASH_MASK) +#define classhashentry(key) (classhash_table + __classhashfn((key))) + +static struct list_head classhash_table[CLASSHASH_SIZE]; + +unsigned long nr_lock_chains; +static struct lock_chain lock_chains[MAX_LOCKDEP_CHAINS]; + +/* + * We put the lock dependency chains into a hash-table as well, to cache + * their existence: + */ +#define CHAINHASH_BITS (MAX_LOCKDEP_CHAINS_BITS-1) +#define CHAINHASH_SIZE (1UL << CHAINHASH_BITS) +#define CHAINHASH_MASK (CHAINHASH_SIZE - 1) +#define __chainhashfn(chain) \ + (((chain >> CHAINHASH_BITS) + chain) & CHAINHASH_MASK) +#define chainhashentry(chain) (chainhash_table + __chainhashfn((chain))) + +static struct list_head chainhash_table[CHAINHASH_SIZE]; + +/* + * The hash key of the lock dependency chains is a hash itself too: + * it's a hash of all locks taken up to that lock, including that lock. + * It's a 64-bit hash, because it's important for the keys to be + * unique. + */ +#define iterate_chain_key(key1, key2) \ + (((key1) << MAX_LOCKDEP_KEYS_BITS/2) ^ \ + ((key1) >> (64-MAX_LOCKDEP_KEYS_BITS/2)) ^ \ + (key2)) + +void lockdep_off(void) +{ + current->lockdep_recursion++; +} + +EXPORT_SYMBOL(lockdep_off); + +void lockdep_on(void) +{ + current->lockdep_recursion--; +} + +EXPORT_SYMBOL(lockdep_on); + +int lockdep_internal(void) +{ + return current->lockdep_recursion != 0; +} + +EXPORT_SYMBOL(lockdep_internal); + +/* + * Debugging switches: + */ + +#define VERBOSE 0 +#ifdef VERBOSE +# define VERY_VERBOSE 0 +#endif + +#if VERBOSE +# define HARDIRQ_VERBOSE 1 +# define SOFTIRQ_VERBOSE 1 +#else +# define HARDIRQ_VERBOSE 0 +# define SOFTIRQ_VERBOSE 0 +#endif + +#if VERBOSE || HARDIRQ_VERBOSE || SOFTIRQ_VERBOSE +/* + * Quick filtering for interesting events: + */ +static int class_filter(struct lock_class *class) +{ + if (class->name_version == 1 && + !strcmp(class->name, "&rl->lock")) + return 1; + if (class->name_version == 1 && + !strcmp(class->name, "&ni->mrec_lock")) + return 1; + if (class->name_version == 1 && + !strcmp(class->name, "mft_ni_runlist_lock")) + return 1; + if (class->name_version == 1 && + !strcmp(class->name, "mft_ni_mrec_lock")) + return 1; + if (class->name_version == 1 && + !strcmp(class->name, "&vol->lcnbmp_lock")) + return 1; + return 0; +} +#endif + +static int verbose(struct lock_class *class) +{ +#if VERBOSE + return class_filter(class); +#endif + return 0; +} + +#ifdef CONFIG_TRACE_IRQFLAGS + +static int hardirq_verbose(struct lock_class *class) +{ +#if HARDIRQ_VERBOSE + return class_filter(class); +#endif + return 0; +} + +static int softirq_verbose(struct lock_class *class) +{ +#if SOFTIRQ_VERBOSE + return class_filter(class); +#endif + return 0; +} + +#endif + +/* + * Stack-trace: tightly packed array of stack backtrace + * addresses. Protected by the hash_lock. + */ +unsigned long nr_stack_trace_entries; +static unsigned long stack_trace[MAX_STACK_TRACE_ENTRIES]; + +static int save_trace(struct stack_trace *trace) +{ + trace->nr_entries = 0; + trace->max_entries = MAX_STACK_TRACE_ENTRIES - nr_stack_trace_entries; + trace->entries = stack_trace + nr_stack_trace_entries; + + save_stack_trace(trace, NULL, 0, 3); + + trace->max_entries = trace->nr_entries; + + nr_stack_trace_entries += trace->nr_entries; + if (DEBUG_LOCKS_WARN_ON(nr_stack_trace_entries > MAX_STACK_TRACE_ENTRIES)) + return 0; + + if (nr_stack_trace_entries == MAX_STACK_TRACE_ENTRIES) { + __raw_spin_unlock(&hash_lock); + if (debug_locks_off()) { + printk("BUG: MAX_STACK_TRACE_ENTRIES too low!\n"); + printk("turning off the locking correctness validator.\n"); + dump_stack(); + } + return 0; + } + + return 1; +} + +unsigned int nr_hardirq_chains; +unsigned int nr_softirq_chains; +unsigned int nr_process_chains; +unsigned int max_lockdep_depth; +unsigned int max_recursion_depth; + +#ifdef CONFIG_DEBUG_LOCKDEP +/* + * We cannot printk in early bootup code. Not even early_printk() + * might work. So we mark any initialization errors and printk + * about it later on, in lockdep_info(). + */ +static int lockdep_init_error; + +/* + * Various lockdep statistics: + */ +atomic_t chain_lookup_hits; +atomic_t chain_lookup_misses; +atomic_t hardirqs_on_events; +atomic_t hardirqs_off_events; +atomic_t redundant_hardirqs_on; +atomic_t redundant_hardirqs_off; +atomic_t softirqs_on_events; +atomic_t softirqs_off_events; +atomic_t redundant_softirqs_on; +atomic_t redundant_softirqs_off; +atomic_t nr_unused_locks; +atomic_t nr_cyclic_checks; +atomic_t nr_cyclic_check_recursions; +atomic_t nr_find_usage_forwards_checks; +atomic_t nr_find_usage_forwards_recursions; +atomic_t nr_find_usage_backwards_checks; +atomic_t nr_find_usage_backwards_recursions; +# define debug_atomic_inc(ptr) atomic_inc(ptr) +# define debug_atomic_dec(ptr) atomic_dec(ptr) +# define debug_atomic_read(ptr) atomic_read(ptr) +#else +# define debug_atomic_inc(ptr) do { } while (0) +# define debug_atomic_dec(ptr) do { } while (0) +# define debug_atomic_read(ptr) 0 +#endif + +/* + * Locking printouts: + */ + +static const char *usage_str[] = +{ + [LOCK_USED] = "initial-use ", + [LOCK_USED_IN_HARDIRQ] = "in-hardirq-W", + [LOCK_USED_IN_SOFTIRQ] = "in-softirq-W", + [LOCK_ENABLED_SOFTIRQS] = "softirq-on-W", + [LOCK_ENABLED_HARDIRQS] = "hardirq-on-W", + [LOCK_USED_IN_HARDIRQ_READ] = "in-hardirq-R", + [LOCK_USED_IN_SOFTIRQ_READ] = "in-softirq-R", + [LOCK_ENABLED_SOFTIRQS_READ] = "softirq-on-R", + [LOCK_ENABLED_HARDIRQS_READ] = "hardirq-on-R", +}; + +const char * __get_key_name(struct lockdep_subclass_key *key, char *str) +{ + unsigned long offs, size; + char *modname; + + return kallsyms_lookup((unsigned long)key, &size, &offs, &modname, str); +} + +void +get_usage_chars(struct lock_class *class, char *c1, char *c2, char *c3, char *c4) +{ + *c1 = '.', *c2 = '.', *c3 = '.', *c4 = '.'; + + if (class->usage_mask & LOCKF_USED_IN_HARDIRQ) + *c1 = '+'; + else + if (class->usage_mask & LOCKF_ENABLED_HARDIRQS) + *c1 = '-'; + + if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ) + *c2 = '+'; + else + if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS) + *c2 = '-'; + + if (class->usage_mask & LOCKF_ENABLED_HARDIRQS_READ) + *c3 = '-'; + if (class->usage_mask & LOCKF_USED_IN_HARDIRQ_READ) { + *c3 = '+'; + if (class->usage_mask & LOCKF_ENABLED_HARDIRQS_READ) + *c3 = '?'; + } + + if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS_READ) + *c4 = '-'; + if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ_READ) { + *c4 = '+'; + if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS_READ) + *c4 = '?'; + } +} + +static void print_lock_name(struct lock_class *class) +{ + char str[128], c1, c2, c3, c4; + const char *name; + + get_usage_chars(class, &c1, &c2, &c3, &c4); + + name = class->name; + if (!name) { + name = __get_key_name(class->key, str); + printk(" (%s", name); + } else { + printk(" (%s", name); + if (class->name_version > 1) + printk("#%d", class->name_version); + if (class->subclass) + printk("/%d", class->subclass); + } + printk("){%c%c%c%c}", c1, c2, c3, c4); +} + +static void print_lockdep_cache(struct lockdep_map *lock) +{ + const char *name; + char str[128]; + + name = lock->name; + if (!name) + name = __get_key_name(lock->key->subkeys, str); + + printk("%s", name); +} + +static void print_lock(struct held_lock *hlock) +{ + print_lock_name(hlock->class); + printk(", at: "); + print_ip_sym(hlock->acquire_ip); +} + +static void lockdep_print_held_locks(struct task_struct *curr) +{ + int i, depth = curr->lockdep_depth; + + if (!depth) { + printk("no locks held by %s/%d.\n", curr->comm, curr->pid); + return; + } + printk("%d lock%s held by %s/%d:\n", + depth, depth > 1 ? "s" : "", curr->comm, curr->pid); + + for (i = 0; i < depth; i++) { + printk(" #%d: ", i); + print_lock(curr->held_locks + i); + } +} +/* + * Helper to print a nice hierarchy of lock dependencies: + */ +static void print_spaces(int nr) +{ + int i; + + for (i = 0; i < nr; i++) + printk(" "); +} + +static void print_lock_class_header(struct lock_class *class, int depth) +{ + int bit; + + print_spaces(depth); + printk("->"); + print_lock_name(class); + printk(" ops: %lu", class->ops); + printk(" {\n"); + + for (bit = 0; bit < LOCK_USAGE_STATES; bit++) { + if (class->usage_mask & (1 << bit)) { + int len = depth; + + print_spaces(depth); + len += printk(" %s", usage_str[bit]); + len += printk(" at:\n"); + print_stack_trace(class->usage_traces + bit, len); + } + } + print_spaces(depth); + printk(" }\n"); + + print_spaces(depth); + printk(" ... key at: "); + print_ip_sym((unsigned long)class->key); +} + +/* + * printk all lock dependencies starting at : + */ +static void print_lock_dependencies(struct lock_class *class, int depth) +{ + struct lock_list *entry; + + if (DEBUG_LOCKS_WARN_ON(depth >= 20)) + return; + + print_lock_class_header(class, depth); + + list_for_each_entry(entry, &class->locks_after, entry) { + DEBUG_LOCKS_WARN_ON(!entry->class); + print_lock_dependencies(entry->class, depth + 1); + + print_spaces(depth); + printk(" ... acquired at:\n"); + print_stack_trace(&entry->trace, 2); + printk("\n"); + } +} + +/* + * Add a new dependency to the head of the list: + */ +static int add_lock_to_list(struct lock_class *class, struct lock_class *this, + struct list_head *head, unsigned long ip) +{ + struct lock_list *entry; + /* + * Lock not present yet - get a new dependency struct and + * add it to the list: + */ + entry = alloc_list_entry(); + if (!entry) + return 0; + + entry->class = this; + save_trace(&entry->trace); + + /* + * Since we never remove from the dependency list, the list can + * be walked lockless by other CPUs, it's only allocation + * that must be protected by the spinlock. But this also means + * we must make new entries visible only once writes to the + * entry become visible - hence the RCU op: + */ + list_add_tail_rcu(&entry->entry, head); + + return 1; +} + +/* + * Recursive, forwards-direction lock-dependency checking, used for + * both noncyclic checking and for hardirq-unsafe/softirq-unsafe + * checking. + * + * (to keep the stackframe of the recursive functions small we + * use these global variables, and we also mark various helper + * functions as noinline.) + */ +static struct held_lock *check_source, *check_target; + +/* + * Print a dependency chain entry (this is only done when a deadlock + * has been detected): + */ +static noinline int +print_circular_bug_entry(struct lock_list *target, unsigned int depth) +{ + if (debug_locks_silent) + return 0; + printk("\n-> #%u", depth); + print_lock_name(target->class); + printk(":\n"); + print_stack_trace(&target->trace, 6); + + return 0; +} + +/* + * When a circular dependency is detected, print the + * header first: + */ +static noinline int +print_circular_bug_header(struct lock_list *entry, unsigned int depth) +{ + struct task_struct *curr = current; + + __raw_spin_unlock(&hash_lock); + debug_locks_off(); + if (debug_locks_silent) + return 0; + + printk("\n=======================================================\n"); + printk( "[ INFO: possible circular locking dependency detected ]\n"); + printk( "-------------------------------------------------------\n"); + printk("%s/%d is trying to acquire lock:\n", + curr->comm, curr->pid); + print_lock(check_source); + printk("\nbut task is already holding lock:\n"); + print_lock(check_target); + printk("\nwhich lock already depends on the new lock.\n\n"); + printk("\nthe existing dependency chain (in reverse order) is:\n"); + + print_circular_bug_entry(entry, depth); + + return 0; +} + +static noinline int print_circular_bug_tail(void) +{ + struct task_struct *curr = current; + struct lock_list this; + + if (debug_locks_silent) + return 0; + + this.class = check_source->class; + save_trace(&this.trace); + print_circular_bug_entry(&this, 0); + + printk("\nother info that might help us debug this:\n\n"); + lockdep_print_held_locks(curr); + + printk("\nstack backtrace:\n"); + dump_stack(); + + return 0; +} + +static int noinline print_infinite_recursion_bug(void) +{ + __raw_spin_unlock(&hash_lock); + DEBUG_LOCKS_WARN_ON(1); + + return 0; +} + +/* + * Prove that the dependency graph starting at can not + * lead to . Print an error and return 0 if it does. + */ +static noinline int +check_noncircular(struct lock_class *source, unsigned int depth) +{ + struct lock_list *entry; + + debug_atomic_inc(&nr_cyclic_check_recursions); + if (depth > max_recursion_depth) + max_recursion_depth = depth; + if (depth >= 20) + return print_infinite_recursion_bug(); + /* + * Check this lock's dependency list: + */ + list_for_each_entry(entry, &source->locks_after, entry) { + if (entry->class == check_target->class) + return print_circular_bug_header(entry, depth+1); + debug_atomic_inc(&nr_cyclic_checks); + if (!check_noncircular(entry->class, depth+1)) + return print_circular_bug_entry(entry, depth+1); + } + return 1; +} + +static int very_verbose(struct lock_class *class) +{ +#if VERY_VERBOSE + return class_filter(class); +#endif + return 0; +} +#ifdef CONFIG_TRACE_IRQFLAGS + +/* + * Forwards and backwards subgraph searching, for the purposes of + * proving that two subgraphs can be connected by a new dependency + * without creating any illegal irq-safe -> irq-unsafe lock dependency. + */ +static enum lock_usage_bit find_usage_bit; +static struct lock_class *forwards_match, *backwards_match; + +/* + * Find a node in the forwards-direction dependency sub-graph starting + * at that matches . + * + * Return 2 if such a node exists in the subgraph, and put that node + * into . + * + * Return 1 otherwise and keep unchanged. + * Return 0 on error. + */ +static noinline int +find_usage_forwards(struct lock_class *source, unsigned int depth) +{ + struct lock_list *entry; + int ret; + + if (depth > max_recursion_depth) + max_recursion_depth = depth; + if (depth >= 20) + return print_infinite_recursion_bug(); + + debug_atomic_inc(&nr_find_usage_forwards_checks); + if (source->usage_mask & (1 << find_usage_bit)) { + forwards_match = source; + return 2; + } + + /* + * Check this lock's dependency list: + */ + list_for_each_entry(entry, &source->locks_after, entry) { + debug_atomic_inc(&nr_find_usage_forwards_recursions); + ret = find_usage_forwards(entry->class, depth+1); + if (ret == 2 || ret == 0) + return ret; + } + return 1; +} + +/* + * Find a node in the backwards-direction dependency sub-graph starting + * at that matches . + * + * Return 2 if such a node exists in the subgraph, and put that node + * into . + * + * Return 1 otherwise and keep unchanged. + * Return 0 on error. + */ +static noinline int +find_usage_backwards(struct lock_class *source, unsigned int depth) +{ + struct lock_list *entry; + int ret; + + if (depth > max_recursion_depth) + max_recursion_depth = depth; + if (depth >= 20) + return print_infinite_recursion_bug(); + + debug_atomic_inc(&nr_find_usage_backwards_checks); + if (source->usage_mask & (1 << find_usage_bit)) { + backwards_match = source; + return 2; + } + + /* + * Check this lock's dependency list: + */ + list_for_each_entry(entry, &source->locks_before, entry) { + debug_atomic_inc(&nr_find_usage_backwards_recursions); + ret = find_usage_backwards(entry->class, depth+1); + if (ret == 2 || ret == 0) + return ret; + } + return 1; +} + +static int +print_bad_irq_dependency(struct task_struct *curr, + struct held_lock *prev, + struct held_lock *next, + enum lock_usage_bit bit1, + enum lock_usage_bit bit2, + const char *irqclass) +{ + __raw_spin_unlock(&hash_lock); + debug_locks_off(); + if (debug_locks_silent) + return 0; + + printk("\n======================================================\n"); + printk( "[ INFO: %s-safe -> %s-unsafe lock order detected ]\n", + irqclass, irqclass); + printk( "------------------------------------------------------\n"); + printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n", + curr->comm, curr->pid, + curr->hardirq_context, hardirq_count() >> HARDIRQ_SHIFT, + curr->softirq_context, softirq_count() >> SOFTIRQ_SHIFT, + curr->hardirqs_enabled, + curr->softirqs_enabled); + print_lock(next); + + printk("\nand this task is already holding:\n"); + print_lock(prev); + printk("which would create a new lock dependency:\n"); + print_lock_name(prev->class); + printk(" ->"); + print_lock_name(next->class); + printk("\n"); + + printk("\nbut this new dependency connects a %s-irq-safe lock:\n", + irqclass); + print_lock_name(backwards_match); + printk("\n... which became %s-irq-safe at:\n", irqclass); + + print_stack_trace(backwards_match->usage_traces + bit1, 1); + + printk("\nto a %s-irq-unsafe lock:\n", irqclass); + print_lock_name(forwards_match); + printk("\n... which became %s-irq-unsafe at:\n", irqclass); + printk("..."); + + print_stack_trace(forwards_match->usage_traces + bit2, 1); + + printk("\nother info that might help us debug this:\n\n"); + lockdep_print_held_locks(curr); + + printk("\nthe %s-irq-safe lock's dependencies:\n", irqclass); + print_lock_dependencies(backwards_match, 0); + + printk("\nthe %s-irq-unsafe lock's dependencies:\n", irqclass); + print_lock_dependencies(forwards_match, 0); + + printk("\nstack backtrace:\n"); + dump_stack(); + + return 0; +} + +static int +check_usage(struct task_struct *curr, struct held_lock *prev, + struct held_lock *next, enum lock_usage_bit bit_backwards, + enum lock_usage_bit bit_forwards, const char *irqclass) +{ + int ret; + + find_usage_bit = bit_backwards; + /* fills in */ + ret = find_usage_backwards(prev->class, 0); + if (!ret || ret == 1) + return ret; + + find_usage_bit = bit_forwards; + ret = find_usage_forwards(next->class, 0); + if (!ret || ret == 1) + return ret; + /* ret == 2 */ + return print_bad_irq_dependency(curr, prev, next, + bit_backwards, bit_forwards, irqclass); +} + +#endif + +static int +print_deadlock_bug(struct task_struct *curr, struct held_lock *prev, + struct held_lock *next) +{ + debug_locks_off(); + __raw_spin_unlock(&hash_lock); + if (debug_locks_silent) + return 0; + + printk("\n=============================================\n"); + printk( "[ INFO: possible recursive locking detected ]\n"); + printk( "---------------------------------------------\n"); + printk("%s/%d is trying to acquire lock:\n", + curr->comm, curr->pid); + print_lock(next); + printk("\nbut task is already holding lock:\n"); + print_lock(prev); + + printk("\nother info that might help us debug this:\n"); + lockdep_print_held_locks(curr); + + printk("\nstack backtrace:\n"); + dump_stack(); + + return 0; +} + +/* + * Check whether we are holding such a class already. + * + * (Note that this has to be done separately, because the graph cannot + * detect such classes of deadlocks.) + * + * Returns: 0 on deadlock detected, 1 on OK, 2 on recursive read + */ +static int +check_deadlock(struct task_struct *curr, struct held_lock *next, + struct lockdep_map *next_instance, int read) +{ + struct held_lock *prev; + int i; + + for (i = 0; i < curr->lockdep_depth; i++) { + prev = curr->held_locks + i; + if (prev->class != next->class) + continue; + /* + * Allow read-after-read recursion of the same + * lock instance (i.e. read_lock(lock)+read_lock(lock)): + */ + if ((read == 2) && prev->read && + (prev->instance == next_instance)) + return 2; + return print_deadlock_bug(curr, prev, next); + } + return 1; +} + +/* + * There was a chain-cache miss, and we are about to add a new dependency + * to a previous lock. We recursively validate the following rules: + * + * - would the adding of the -> dependency create a + * circular dependency in the graph? [== circular deadlock] + * + * - does the new prev->next dependency connect any hardirq-safe lock + * (in the full backwards-subgraph starting at ) with any + * hardirq-unsafe lock (in the full forwards-subgraph starting at + * )? [== illegal lock inversion with hardirq contexts] + * + * - does the new prev->next dependency connect any softirq-safe lock + * (in the full backwards-subgraph starting at ) with any + * softirq-unsafe lock (in the full forwards-subgraph starting at + * )? [== illegal lock inversion with softirq contexts] + * + * any of these scenarios could lead to a deadlock. + * + * Then if all the validations pass, we add the forwards and backwards + * dependency. + */ +static int +check_prev_add(struct task_struct *curr, struct held_lock *prev, + struct held_lock *next) +{ + struct lock_list *entry; + int ret; + + /* + * Prove that the new -> dependency would not + * create a circular dependency in the graph. (We do this by + * forward-recursing into the graph starting at , and + * checking whether we can reach .) + * + * We are using global variables to control the recursion, to + * keep the stackframe size of the recursive functions low: + */ + check_source = next; + check_target = prev; + if (!(check_noncircular(next->class, 0))) + return print_circular_bug_tail(); + +#ifdef CONFIG_TRACE_IRQFLAGS + /* + * Prove that the new dependency does not connect a hardirq-safe + * lock with a hardirq-unsafe lock - to achieve this we search + * the backwards-subgraph starting at , and the + * forwards-subgraph starting at : + */ + if (!check_usage(curr, prev, next, LOCK_USED_IN_HARDIRQ, + LOCK_ENABLED_HARDIRQS, "hard")) + return 0; + + /* + * Prove that the new dependency does not connect a hardirq-safe-read + * lock with a hardirq-unsafe lock - to achieve this we search + * the backwards-subgraph starting at , and the + * forwards-subgraph starting at : + */ + if (!check_usage(curr, prev, next, LOCK_USED_IN_HARDIRQ_READ, + LOCK_ENABLED_HARDIRQS, "hard-read")) + return 0; + + /* + * Prove that the new dependency does not connect a softirq-safe + * lock with a softirq-unsafe lock - to achieve this we search + * the backwards-subgraph starting at , and the + * forwards-subgraph starting at : + */ + if (!check_usage(curr, prev, next, LOCK_USED_IN_SOFTIRQ, + LOCK_ENABLED_SOFTIRQS, "soft")) + return 0; + /* + * Prove that the new dependency does not connect a softirq-safe-read + * lock with a softirq-unsafe lock - to achieve this we search + * the backwards-subgraph starting at , and the + * forwards-subgraph starting at : + */ + if (!check_usage(curr, prev, next, LOCK_USED_IN_SOFTIRQ_READ, + LOCK_ENABLED_SOFTIRQS, "soft")) + return 0; +#endif + /* + * For recursive read-locks we do all the dependency checks, + * but we dont store read-triggered dependencies (only + * write-triggered dependencies). This ensures that only the + * write-side dependencies matter, and that if for example a + * write-lock never takes any other locks, then the reads are + * equivalent to a NOP. + */ + if (next->read == 2 || prev->read == 2) + return 1; + /* + * Is the -> dependency already present? + * + * (this may occur even though this is a new chain: consider + * e.g. the L1 -> L2 -> L3 -> L4 and the L5 -> L1 -> L2 -> L3 + * chains - the second one will be new, but L1 already has + * L2 added to its dependency list, due to the first chain.) + */ + list_for_each_entry(entry, &prev->class->locks_after, entry) { + if (entry->class == next->class) + return 2; + } + + /* + * Ok, all validations passed, add the new lock + * to the previous lock's dependency list: + */ + ret = add_lock_to_list(prev->class, next->class, + &prev->class->locks_after, next->acquire_ip); + if (!ret) + return 0; + /* + * Return value of 2 signals 'dependency already added', + * in that case we dont have to add the backlink either. + */ + if (ret == 2) + return 2; + ret = add_lock_to_list(next->class, prev->class, + &next->class->locks_before, next->acquire_ip); + + /* + * Debugging printouts: + */ + if (verbose(prev->class) || verbose(next->class)) { + __raw_spin_unlock(&hash_lock); + printk("\n new dependency: "); + print_lock_name(prev->class); + printk(" => "); + print_lock_name(next->class); + printk("\n"); + dump_stack(); + __raw_spin_lock(&hash_lock); + } + return 1; +} + +/* + * Add the dependency to all directly-previous locks that are 'relevant'. + * The ones that are relevant are (in increasing distance from curr): + * all consecutive trylock entries and the final non-trylock entry - or + * the end of this context's lock-chain - whichever comes first. + */ +static int +check_prevs_add(struct task_struct *curr, struct held_lock *next) +{ + int depth = curr->lockdep_depth; + struct held_lock *hlock; + + /* + * Debugging checks. + * + * Depth must not be zero for a non-head lock: + */ + if (!depth) + goto out_bug; + /* + * At least two relevant locks must exist for this + * to be a head: + */ + if (curr->held_locks[depth].irq_context != + curr->held_locks[depth-1].irq_context) + goto out_bug; + + for (;;) { + hlock = curr->held_locks + depth-1; + /* + * Only non-recursive-read entries get new dependencies + * added: + */ + if (hlock->read != 2) { + check_prev_add(curr, hlock, next); + /* + * Stop after the first non-trylock entry, + * as non-trylock entries have added their + * own direct dependencies already, so this + * lock is connected to them indirectly: + */ + if (!hlock->trylock) + break; + } + depth--; + /* + * End of lock-stack? + */ + if (!depth) + break; + /* + * Stop the search if we cross into another context: + */ + if (curr->held_locks[depth].irq_context != + curr->held_locks[depth-1].irq_context) + break; + } + return 1; +out_bug: + __raw_spin_unlock(&hash_lock); + DEBUG_LOCKS_WARN_ON(1); + + return 0; +} + + +/* + * Is this the address of a static object: + */ +static int static_obj(void *obj) +{ + unsigned long start = (unsigned long) &_stext, + end = (unsigned long) &_end, + addr = (unsigned long) obj; +#ifdef CONFIG_SMP + int i; +#endif + + /* + * static variable? + */ + if ((addr >= start) && (addr < end)) + return 1; + +#ifdef CONFIG_SMP + /* + * percpu var? + */ + for_each_possible_cpu(i) { + start = (unsigned long) &__per_cpu_start + per_cpu_offset(i); + end = (unsigned long) &__per_cpu_end + per_cpu_offset(i); + + if ((addr >= start) && (addr < end)) + return 1; + } +#endif + + /* + * module var? + */ + return is_module_address(addr); +} + +/* + * To make lock name printouts unique, we calculate a unique + * class->name_version generation counter: + */ +static int count_matching_names(struct lock_class *new_class) +{ + struct lock_class *class; + int count = 0; + + if (!new_class->name) + return 0; + + list_for_each_entry(class, &all_lock_classes, lock_entry) { + if (new_class->key - new_class->subclass == class->key) + return class->name_version; + if (class->name && !strcmp(class->name, new_class->name)) + count = max(count, class->name_version); + } + + return count + 1; +} + +extern void __error_too_big_MAX_LOCKDEP_SUBCLASSES(void); + +/* + * Register a lock's class in the hash-table, if the class is not present + * yet. Otherwise we look it up. We cache the result in the lock object + * itself, so actual lookup of the hash should be once per lock object. + */ +static inline struct lock_class * +register_lock_class(struct lockdep_map *lock, unsigned int subclass) +{ + struct lockdep_subclass_key *key; + struct list_head *hash_head; + struct lock_class *class; + +#ifdef CONFIG_DEBUG_LOCKDEP + /* + * If the architecture calls into lockdep before initializing + * the hashes then we'll warn about it later. (we cannot printk + * right now) + */ + if (unlikely(!lockdep_initialized)) { + lockdep_init(); + lockdep_init_error = 1; + } +#endif + + /* + * Static locks do not have their class-keys yet - for them the key + * is the lock object itself: + */ + if (unlikely(!lock->key)) + lock->key = (void *)lock; + + /* + * NOTE: the class-key must be unique. For dynamic locks, a static + * lock_class_key variable is passed in through the mutex_init() + * (or spin_lock_init()) call - which acts as the key. For static + * locks we use the lock object itself as the key. + */ + if (sizeof(struct lock_class_key) > sizeof(struct lock_class)) + __error_too_big_MAX_LOCKDEP_SUBCLASSES(); + + key = lock->key->subkeys + subclass; + + hash_head = classhashentry(key); + + /* + * We can walk the hash lockfree, because the hash only + * grows, and we are careful when adding entries to the end: + */ + list_for_each_entry(class, hash_head, hash_entry) + if (class->key == key) + goto out_set; + + /* + * Debug-check: all keys must be persistent! + */ + if (!static_obj(lock->key)) { + debug_locks_off(); + printk("INFO: trying to register non-static key.\n"); + printk("the code is fine but needs lockdep annotation.\n"); + printk("turning off the locking correctness validator.\n"); + dump_stack(); + + return NULL; + } + + __raw_spin_lock(&hash_lock); + /* + * We have to do the hash-walk again, to avoid races + * with another CPU: + */ + list_for_each_entry(class, hash_head, hash_entry) + if (class->key == key) + goto out_unlock_set; + /* + * Allocate a new key from the static array, and add it to + * the hash: + */ + if (nr_lock_classes >= MAX_LOCKDEP_KEYS) { + __raw_spin_unlock(&hash_lock); + debug_locks_off(); + printk("BUG: MAX_LOCKDEP_KEYS too low!\n"); + printk("turning off the locking correctness validator.\n"); + return NULL; + } + class = lock_classes + nr_lock_classes++; + debug_atomic_inc(&nr_unused_locks); + class->key = key; + class->name = lock->name; + class->subclass = subclass; + INIT_LIST_HEAD(&class->lock_entry); + INIT_LIST_HEAD(&class->locks_before); + INIT_LIST_HEAD(&class->locks_after); + class->name_version = count_matching_names(class); + /* + * We use RCU's safe list-add method to make + * parallel walking of the hash-list safe: + */ + list_add_tail_rcu(&class->hash_entry, hash_head); + + if (verbose(class)) { + __raw_spin_unlock(&hash_lock); + printk("\nnew class %p: %s", class->key, class->name); + if (class->name_version > 1) + printk("#%d", class->name_version); + printk("\n"); + dump_stack(); + __raw_spin_lock(&hash_lock); + } +out_unlock_set: + __raw_spin_unlock(&hash_lock); + +out_set: + lock->class[subclass] = class; + + DEBUG_LOCKS_WARN_ON(class->subclass != subclass); + + return class; +} + +/* + * Look up a dependency chain. If the key is not present yet then + * add it and return 0 - in this case the new dependency chain is + * validated. If the key is already hashed, return 1. + */ +static inline int lookup_chain_cache(u64 chain_key) +{ + struct list_head *hash_head = chainhashentry(chain_key); + struct lock_chain *chain; + + DEBUG_LOCKS_WARN_ON(!irqs_disabled()); + /* + * We can walk it lock-free, because entries only get added + * to the hash: + */ + list_for_each_entry(chain, hash_head, entry) { + if (chain->chain_key == chain_key) { +cache_hit: + debug_atomic_inc(&chain_lookup_hits); + /* + * In the debugging case, force redundant checking + * by returning 1: + */ +#ifdef CONFIG_DEBUG_LOCKDEP + __raw_spin_lock(&hash_lock); + return 1; +#endif + return 0; + } + } + /* + * Allocate a new chain entry from the static array, and add + * it to the hash: + */ + __raw_spin_lock(&hash_lock); + /* + * We have to walk the chain again locked - to avoid duplicates: + */ + list_for_each_entry(chain, hash_head, entry) { + if (chain->chain_key == chain_key) { + __raw_spin_unlock(&hash_lock); + goto cache_hit; + } + } + if (unlikely(nr_lock_chains >= MAX_LOCKDEP_CHAINS)) { + __raw_spin_unlock(&hash_lock); + debug_locks_off(); + printk("BUG: MAX_LOCKDEP_CHAINS too low!\n"); + printk("turning off the locking correctness validator.\n"); + return 0; + } + chain = lock_chains + nr_lock_chains++; + chain->chain_key = chain_key; + list_add_tail_rcu(&chain->entry, hash_head); + debug_atomic_inc(&chain_lookup_misses); +#ifdef CONFIG_TRACE_IRQFLAGS + if (current->hardirq_context) + nr_hardirq_chains++; + else { + if (current->softirq_context) + nr_softirq_chains++; + else + nr_process_chains++; + } +#else + nr_process_chains++; +#endif + + return 1; +} + +/* + * We are building curr_chain_key incrementally, so double-check + * it from scratch, to make sure that it's done correctly: + */ +static void check_chain_key(struct task_struct *curr) +{ +#ifdef CONFIG_DEBUG_LOCKDEP + struct held_lock *hlock, *prev_hlock = NULL; + unsigned int i, id; + u64 chain_key = 0; + + for (i = 0; i < curr->lockdep_depth; i++) { + hlock = curr->held_locks + i; + if (chain_key != hlock->prev_chain_key) { + debug_locks_off(); + printk("hm#1, depth: %u [%u], %016Lx != %016Lx\n", + curr->lockdep_depth, i, + (unsigned long long)chain_key, + (unsigned long long)hlock->prev_chain_key); + WARN_ON(1); + return; + } + id = hlock->class - lock_classes; + DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS); + if (prev_hlock && (prev_hlock->irq_context != + hlock->irq_context)) + chain_key = 0; + chain_key = iterate_chain_key(chain_key, id); + prev_hlock = hlock; + } + if (chain_key != curr->curr_chain_key) { + debug_locks_off(); + printk("hm#2, depth: %u [%u], %016Lx != %016Lx\n", + curr->lockdep_depth, i, + (unsigned long long)chain_key, + (unsigned long long)curr->curr_chain_key); + WARN_ON(1); + } +#endif +} + +#ifdef CONFIG_TRACE_IRQFLAGS + +/* + * print irq inversion bug: + */ +static int +print_irq_inversion_bug(struct task_struct *curr, struct lock_class *other, + struct held_lock *this, int forwards, + const char *irqclass) +{ + __raw_spin_unlock(&hash_lock); + debug_locks_off(); + if (debug_locks_silent) + return 0; + + printk("\n=========================================================\n"); + printk( "[ INFO: possible irq lock inversion dependency detected ]\n"); + printk( "---------------------------------------------------------\n"); + printk("%s/%d just changed the state of lock:\n", + curr->comm, curr->pid); + print_lock(this); + if (forwards) + printk("but this lock took another, %s-irq-unsafe lock in the past:\n", irqclass); + else + printk("but this lock was taken by another, %s-irq-safe lock in the past:\n", irqclass); + print_lock_name(other); + printk("\n\nand interrupts could create inverse lock ordering between them.\n\n"); + + printk("\nother info that might help us debug this:\n"); + lockdep_print_held_locks(curr); + + printk("\nthe first lock's dependencies:\n"); + print_lock_dependencies(this->class, 0); + + printk("\nthe second lock's dependencies:\n"); + print_lock_dependencies(other, 0); + + printk("\nstack backtrace:\n"); + dump_stack(); + + return 0; +} + +/* + * Prove that in the forwards-direction subgraph starting at + * there is no lock matching : + */ +static int +check_usage_forwards(struct task_struct *curr, struct held_lock *this, + enum lock_usage_bit bit, const char *irqclass) +{ + int ret; + + find_usage_bit = bit; + /* fills in */ + ret = find_usage_forwards(this->class, 0); + if (!ret || ret == 1) + return ret; + + return print_irq_inversion_bug(curr, forwards_match, this, 1, irqclass); +} + +/* + * Prove that in the backwards-direction subgraph starting at + * there is no lock matching : + */ +static int +check_usage_backwards(struct task_struct *curr, struct held_lock *this, + enum lock_usage_bit bit, const char *irqclass) +{ + int ret; + + find_usage_bit = bit; + /* fills in */ + ret = find_usage_backwards(this->class, 0); + if (!ret || ret == 1) + return ret; + + return print_irq_inversion_bug(curr, backwards_match, this, 0, irqclass); +} + +static inline void print_irqtrace_events(struct task_struct *curr) +{ + printk("irq event stamp: %u\n", curr->irq_events); + printk("hardirqs last enabled at (%u): ", curr->hardirq_enable_event); + print_ip_sym(curr->hardirq_enable_ip); + printk("hardirqs last disabled at (%u): ", curr->hardirq_disable_event); + print_ip_sym(curr->hardirq_disable_ip); + printk("softirqs last enabled at (%u): ", curr->softirq_enable_event); + print_ip_sym(curr->softirq_enable_ip); + printk("softirqs last disabled at (%u): ", curr->softirq_disable_event); + print_ip_sym(curr->softirq_disable_ip); +} + +#else +static inline void print_irqtrace_events(struct task_struct *curr) +{ +} +#endif + +static int +print_usage_bug(struct task_struct *curr, struct held_lock *this, + enum lock_usage_bit prev_bit, enum lock_usage_bit new_bit) +{ + __raw_spin_unlock(&hash_lock); + debug_locks_off(); + if (debug_locks_silent) + return 0; + + printk("\n=================================\n"); + printk( "[ INFO: inconsistent lock state ]\n"); + printk( "---------------------------------\n"); + + printk("inconsistent {%s} -> {%s} usage.\n", + usage_str[prev_bit], usage_str[new_bit]); + + printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] takes:\n", + curr->comm, curr->pid, + trace_hardirq_context(curr), hardirq_count() >> HARDIRQ_SHIFT, + trace_softirq_context(curr), softirq_count() >> SOFTIRQ_SHIFT, + trace_hardirqs_enabled(curr), + trace_softirqs_enabled(curr)); + print_lock(this); + + printk("{%s} state was registered at:\n", usage_str[prev_bit]); + print_stack_trace(this->class->usage_traces + prev_bit, 1); + + print_irqtrace_events(curr); + printk("\nother info that might help us debug this:\n"); + lockdep_print_held_locks(curr); + + printk("\nstack backtrace:\n"); + dump_stack(); + + return 0; +} + +/* + * Print out an error if an invalid bit is set: + */ +static inline int +valid_state(struct task_struct *curr, struct held_lock *this, + enum lock_usage_bit new_bit, enum lock_usage_bit bad_bit) +{ + if (unlikely(this->class->usage_mask & (1 << bad_bit))) + return print_usage_bug(curr, this, bad_bit, new_bit); + return 1; +} + +#define STRICT_READ_CHECKS 1 + +/* + * Mark a lock with a usage bit, and validate the state transition: + */ +static int mark_lock(struct task_struct *curr, struct held_lock *this, + enum lock_usage_bit new_bit, unsigned long ip) +{ + unsigned int new_mask = 1 << new_bit, ret = 1; + + /* + * If already set then do not dirty the cacheline, + * nor do any checks: + */ + if (likely(this->class->usage_mask & new_mask)) + return 1; + + __raw_spin_lock(&hash_lock); + /* + * Make sure we didnt race: + */ + if (unlikely(this->class->usage_mask & new_mask)) { + __raw_spin_unlock(&hash_lock); + return 1; + } + + this->class->usage_mask |= new_mask; + +#ifdef CONFIG_TRACE_IRQFLAGS + if (new_bit == LOCK_ENABLED_HARDIRQS || + new_bit == LOCK_ENABLED_HARDIRQS_READ) + ip = curr->hardirq_enable_ip; + else if (new_bit == LOCK_ENABLED_SOFTIRQS || + new_bit == LOCK_ENABLED_SOFTIRQS_READ) + ip = curr->softirq_enable_ip; +#endif + if (!save_trace(this->class->usage_traces + new_bit)) + return 0; + + switch (new_bit) { +#ifdef CONFIG_TRACE_IRQFLAGS + case LOCK_USED_IN_HARDIRQ: + if (!valid_state(curr, this, new_bit, LOCK_ENABLED_HARDIRQS)) + return 0; + if (!valid_state(curr, this, new_bit, + LOCK_ENABLED_HARDIRQS_READ)) + return 0; + /* + * just marked it hardirq-safe, check that this lock + * took no hardirq-unsafe lock in the past: + */ + if (!check_usage_forwards(curr, this, + LOCK_ENABLED_HARDIRQS, "hard")) + return 0; +#if STRICT_READ_CHECKS + /* + * just marked it hardirq-safe, check that this lock + * took no hardirq-unsafe-read lock in the past: + */ + if (!check_usage_forwards(curr, this, + LOCK_ENABLED_HARDIRQS_READ, "hard-read")) + return 0; +#endif + if (hardirq_verbose(this->class)) + ret = 2; + break; + case LOCK_USED_IN_SOFTIRQ: + if (!valid_state(curr, this, new_bit, LOCK_ENABLED_SOFTIRQS)) + return 0; + if (!valid_state(curr, this, new_bit, + LOCK_ENABLED_SOFTIRQS_READ)) + return 0; + /* + * just marked it softirq-safe, check that this lock + * took no softirq-unsafe lock in the past: + */ + if (!check_usage_forwards(curr, this, + LOCK_ENABLED_SOFTIRQS, "soft")) + return 0; +#if STRICT_READ_CHECKS + /* + * just marked it softirq-safe, check that this lock + * took no softirq-unsafe-read lock in the past: + */ + if (!check_usage_forwards(curr, this, + LOCK_ENABLED_SOFTIRQS_READ, "soft-read")) + return 0; +#endif + if (softirq_verbose(this->class)) + ret = 2; + break; + case LOCK_USED_IN_HARDIRQ_READ: + if (!valid_state(curr, this, new_bit, LOCK_ENABLED_HARDIRQS)) + return 0; + /* + * just marked it hardirq-read-safe, check that this lock + * took no hardirq-unsafe lock in the past: + */ + if (!check_usage_forwards(curr, this, + LOCK_ENABLED_HARDIRQS, "hard")) + return 0; + if (hardirq_verbose(this->class)) + ret = 2; + break; + case LOCK_USED_IN_SOFTIRQ_READ: + if (!valid_state(curr, this, new_bit, LOCK_ENABLED_SOFTIRQS)) + return 0; + /* + * just marked it softirq-read-safe, check that this lock + * took no softirq-unsafe lock in the past: + */ + if (!check_usage_forwards(curr, this, + LOCK_ENABLED_SOFTIRQS, "soft")) + return 0; + if (softirq_verbose(this->class)) + ret = 2; + break; + case LOCK_ENABLED_HARDIRQS: + if (!valid_state(curr, this, new_bit, LOCK_USED_IN_HARDIRQ)) + return 0; + if (!valid_state(curr, this, new_bit, + LOCK_USED_IN_HARDIRQ_READ)) + return 0; + /* + * just marked it hardirq-unsafe, check that no hardirq-safe + * lock in the system ever took it in the past: + */ + if (!check_usage_backwards(curr, this, + LOCK_USED_IN_HARDIRQ, "hard")) + return 0; +#if STRICT_READ_CHECKS + /* + * just marked it hardirq-unsafe, check that no + * hardirq-safe-read lock in the system ever took + * it in the past: + */ + if (!check_usage_backwards(curr, this, + LOCK_USED_IN_HARDIRQ_READ, "hard-read")) + return 0; +#endif + if (hardirq_verbose(this->class)) + ret = 2; + break; + case LOCK_ENABLED_SOFTIRQS: + if (!valid_state(curr, this, new_bit, LOCK_USED_IN_SOFTIRQ)) + return 0; + if (!valid_state(curr, this, new_bit, + LOCK_USED_IN_SOFTIRQ_READ)) + return 0; + /* + * just marked it softirq-unsafe, check that no softirq-safe + * lock in the system ever took it in the past: + */ + if (!check_usage_backwards(curr, this, + LOCK_USED_IN_SOFTIRQ, "soft")) + return 0; +#if STRICT_READ_CHECKS + /* + * just marked it softirq-unsafe, check that no + * softirq-safe-read lock in the system ever took + * it in the past: + */ + if (!check_usage_backwards(curr, this, + LOCK_USED_IN_SOFTIRQ_READ, "soft-read")) + return 0; +#endif + if (softirq_verbose(this->class)) + ret = 2; + break; + case LOCK_ENABLED_HARDIRQS_READ: + if (!valid_state(curr, this, new_bit, LOCK_USED_IN_HARDIRQ)) + return 0; +#if STRICT_READ_CHECKS + /* + * just marked it hardirq-read-unsafe, check that no + * hardirq-safe lock in the system ever took it in the past: + */ + if (!check_usage_backwards(curr, this, + LOCK_USED_IN_HARDIRQ, "hard")) + return 0; +#endif + if (hardirq_verbose(this->class)) + ret = 2; + break; + case LOCK_ENABLED_SOFTIRQS_READ: + if (!valid_state(curr, this, new_bit, LOCK_USED_IN_SOFTIRQ)) + return 0; +#if STRICT_READ_CHECKS + /* + * just marked it softirq-read-unsafe, check that no + * softirq-safe lock in the system ever took it in the past: + */ + if (!check_usage_backwards(curr, this, + LOCK_USED_IN_SOFTIRQ, "soft")) + return 0; +#endif + if (softirq_verbose(this->class)) + ret = 2; + break; +#endif + case LOCK_USED: + /* + * Add it to the global list of classes: + */ + list_add_tail_rcu(&this->class->lock_entry, &all_lock_classes); + debug_atomic_dec(&nr_unused_locks); + break; + default: + debug_locks_off(); + WARN_ON(1); + return 0; + } + + __raw_spin_unlock(&hash_lock); + + /* + * We must printk outside of the hash_lock: + */ + if (ret == 2) { + printk("\nmarked lock as {%s}:\n", usage_str[new_bit]); + print_lock(this); + print_irqtrace_events(curr); + dump_stack(); + } + + return ret; +} + +#ifdef CONFIG_TRACE_IRQFLAGS +/* + * Mark all held locks with a usage bit: + */ +static int +mark_held_locks(struct task_struct *curr, int hardirq, unsigned long ip) +{ + enum lock_usage_bit usage_bit; + struct held_lock *hlock; + int i; + + for (i = 0; i < curr->lockdep_depth; i++) { + hlock = curr->held_locks + i; + + if (hardirq) { + if (hlock->read) + usage_bit = LOCK_ENABLED_HARDIRQS_READ; + else + usage_bit = LOCK_ENABLED_HARDIRQS; + } else { + if (hlock->read) + usage_bit = LOCK_ENABLED_SOFTIRQS_READ; + else + usage_bit = LOCK_ENABLED_SOFTIRQS; + } + if (!mark_lock(curr, hlock, usage_bit, ip)) + return 0; + } + + return 1; +} + +/* + * Debugging helper: via this flag we know that we are in + * 'early bootup code', and will warn about any invalid irqs-on event: + */ +static int early_boot_irqs_enabled; + +void early_boot_irqs_off(void) +{ + early_boot_irqs_enabled = 0; +} + +void early_boot_irqs_on(void) +{ + early_boot_irqs_enabled = 1; +} + +/* + * Hardirqs will be enabled: + */ +void trace_hardirqs_on(void) +{ + struct task_struct *curr = current; + unsigned long ip; + + if (unlikely(!debug_locks || current->lockdep_recursion)) + return; + + if (DEBUG_LOCKS_WARN_ON(unlikely(!early_boot_irqs_enabled))) + return; + + if (unlikely(curr->hardirqs_enabled)) { + debug_atomic_inc(&redundant_hardirqs_on); + return; + } + /* we'll do an OFF -> ON transition: */ + curr->hardirqs_enabled = 1; + ip = (unsigned long) __builtin_return_address(0); + + if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) + return; + if (DEBUG_LOCKS_WARN_ON(current->hardirq_context)) + return; + /* + * We are going to turn hardirqs on, so set the + * usage bit for all held locks: + */ + if (!mark_held_locks(curr, 1, ip)) + return; + /* + * If we have softirqs enabled, then set the usage + * bit for all held locks. (disabled hardirqs prevented + * this bit from being set before) + */ + if (curr->softirqs_enabled) + if (!mark_held_locks(curr, 0, ip)) + return; + + curr->hardirq_enable_ip = ip; + curr->hardirq_enable_event = ++curr->irq_events; + debug_atomic_inc(&hardirqs_on_events); +} + +EXPORT_SYMBOL(trace_hardirqs_on); + +/* + * Hardirqs were disabled: + */ +void trace_hardirqs_off(void) +{ + struct task_struct *curr = current; + + if (unlikely(!debug_locks || current->lockdep_recursion)) + return; + + if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) + return; + + if (curr->hardirqs_enabled) { + /* + * We have done an ON -> OFF transition: + */ + curr->hardirqs_enabled = 0; + curr->hardirq_disable_ip = _RET_IP_; + curr->hardirq_disable_event = ++curr->irq_events; + debug_atomic_inc(&hardirqs_off_events); + } else + debug_atomic_inc(&redundant_hardirqs_off); +} + +EXPORT_SYMBOL(trace_hardirqs_off); + +/* + * Softirqs will be enabled: + */ +void trace_softirqs_on(unsigned long ip) +{ + struct task_struct *curr = current; + + if (unlikely(!debug_locks)) + return; + + if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) + return; + + if (curr->softirqs_enabled) { + debug_atomic_inc(&redundant_softirqs_on); + return; + } + + /* + * We'll do an OFF -> ON transition: + */ + curr->softirqs_enabled = 1; + curr->softirq_enable_ip = ip; + curr->softirq_enable_event = ++curr->irq_events; + debug_atomic_inc(&softirqs_on_events); + /* + * We are going to turn softirqs on, so set the + * usage bit for all held locks, if hardirqs are + * enabled too: + */ + if (curr->hardirqs_enabled) + mark_held_locks(curr, 0, ip); +} + +/* + * Softirqs were disabled: + */ +void trace_softirqs_off(unsigned long ip) +{ + struct task_struct *curr = current; + + if (unlikely(!debug_locks)) + return; + + if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) + return; + + if (curr->softirqs_enabled) { + /* + * We have done an ON -> OFF transition: + */ + curr->softirqs_enabled = 0; + curr->softirq_disable_ip = ip; + curr->softirq_disable_event = ++curr->irq_events; + debug_atomic_inc(&softirqs_off_events); + DEBUG_LOCKS_WARN_ON(!softirq_count()); + } else + debug_atomic_inc(&redundant_softirqs_off); +} + +#endif + +/* + * Initialize a lock instance's lock-class mapping info: + */ +void lockdep_init_map(struct lockdep_map *lock, const char *name, + struct lock_class_key *key) +{ + if (unlikely(!debug_locks)) + return; + + if (DEBUG_LOCKS_WARN_ON(!key)) + return; + if (DEBUG_LOCKS_WARN_ON(!name)) + return; + /* + * Sanity check, the lock-class key must be persistent: + */ + if (!static_obj(key)) { + printk("BUG: key %p not in .data!\n", key); + DEBUG_LOCKS_WARN_ON(1); + return; + } + lock->name = name; + lock->key = key; + memset(lock->class, 0, sizeof(lock->class[0])*MAX_LOCKDEP_SUBCLASSES); +} + +EXPORT_SYMBOL_GPL(lockdep_init_map); + +/* + * This gets called for every mutex_lock*()/spin_lock*() operation. + * We maintain the dependency maps and validate the locking attempt: + */ +static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, + int trylock, int read, int check, int hardirqs_off, + unsigned long ip) +{ + struct task_struct *curr = current; + struct held_lock *hlock; + struct lock_class *class; + unsigned int depth, id; + int chain_head = 0; + u64 chain_key; + + if (unlikely(!debug_locks)) + return 0; + + if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) + return 0; + + if (unlikely(subclass >= MAX_LOCKDEP_SUBCLASSES)) { + debug_locks_off(); + printk("BUG: MAX_LOCKDEP_SUBCLASSES too low!\n"); + printk("turning off the locking correctness validator.\n"); + return 0; + } + + class = lock->class[subclass]; + /* not cached yet? */ + if (unlikely(!class)) { + class = register_lock_class(lock, subclass); + if (!class) + return 0; + } + debug_atomic_inc((atomic_t *)&class->ops); + if (very_verbose(class)) { + printk("\nacquire class [%p] %s", class->key, class->name); + if (class->name_version > 1) + printk("#%d", class->name_version); + printk("\n"); + dump_stack(); + } + + /* + * Add the lock to the list of currently held locks. + * (we dont increase the depth just yet, up until the + * dependency checks are done) + */ + depth = curr->lockdep_depth; + if (DEBUG_LOCKS_WARN_ON(depth >= MAX_LOCK_DEPTH)) + return 0; + + hlock = curr->held_locks + depth; + + hlock->class = class; + hlock->acquire_ip = ip; + hlock->instance = lock; + hlock->trylock = trylock; + hlock->read = read; + hlock->check = check; + hlock->hardirqs_off = hardirqs_off; + + if (check != 2) + goto out_calc_hash; +#ifdef CONFIG_TRACE_IRQFLAGS + /* + * If non-trylock use in a hardirq or softirq context, then + * mark the lock as used in these contexts: + */ + if (!trylock) { + if (read) { + if (curr->hardirq_context) + if (!mark_lock(curr, hlock, + LOCK_USED_IN_HARDIRQ_READ, ip)) + return 0; + if (curr->softirq_context) + if (!mark_lock(curr, hlock, + LOCK_USED_IN_SOFTIRQ_READ, ip)) + return 0; + } else { + if (curr->hardirq_context) + if (!mark_lock(curr, hlock, LOCK_USED_IN_HARDIRQ, ip)) + return 0; + if (curr->softirq_context) + if (!mark_lock(curr, hlock, LOCK_USED_IN_SOFTIRQ, ip)) + return 0; + } + } + if (!hardirqs_off) { + if (read) { + if (!mark_lock(curr, hlock, + LOCK_ENABLED_HARDIRQS_READ, ip)) + return 0; + if (curr->softirqs_enabled) + if (!mark_lock(curr, hlock, + LOCK_ENABLED_SOFTIRQS_READ, ip)) + return 0; + } else { + if (!mark_lock(curr, hlock, + LOCK_ENABLED_HARDIRQS, ip)) + return 0; + if (curr->softirqs_enabled) + if (!mark_lock(curr, hlock, + LOCK_ENABLED_SOFTIRQS, ip)) + return 0; + } + } +#endif + /* mark it as used: */ + if (!mark_lock(curr, hlock, LOCK_USED, ip)) + return 0; +out_calc_hash: + /* + * Calculate the chain hash: it's the combined has of all the + * lock keys along the dependency chain. We save the hash value + * at every step so that we can get the current hash easily + * after unlock. The chain hash is then used to cache dependency + * results. + * + * The 'key ID' is what is the most compact key value to drive + * the hash, not class->key. + */ + id = class - lock_classes; + if (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS)) + return 0; + + chain_key = curr->curr_chain_key; + if (!depth) { + if (DEBUG_LOCKS_WARN_ON(chain_key != 0)) + return 0; + chain_head = 1; + } + + hlock->prev_chain_key = chain_key; + +#ifdef CONFIG_TRACE_IRQFLAGS + /* + * Keep track of points where we cross into an interrupt context: + */ + hlock->irq_context = 2*(curr->hardirq_context ? 1 : 0) + + curr->softirq_context; + if (depth) { + struct held_lock *prev_hlock; + + prev_hlock = curr->held_locks + depth-1; + /* + * If we cross into another context, reset the + * hash key (this also prevents the checking and the + * adding of the dependency to 'prev'): + */ + if (prev_hlock->irq_context != hlock->irq_context) { + chain_key = 0; + chain_head = 1; + } + } +#endif + chain_key = iterate_chain_key(chain_key, id); + curr->curr_chain_key = chain_key; + + /* + * Trylock needs to maintain the stack of held locks, but it + * does not add new dependencies, because trylock can be done + * in any order. + * + * We look up the chain_key and do the O(N^2) check and update of + * the dependencies only if this is a new dependency chain. + * (If lookup_chain_cache() returns with 1 it acquires + * hash_lock for us) + */ + if (!trylock && (check == 2) && lookup_chain_cache(chain_key)) { + /* + * Check whether last held lock: + * + * - is irq-safe, if this lock is irq-unsafe + * - is softirq-safe, if this lock is hardirq-unsafe + * + * And check whether the new lock's dependency graph + * could lead back to the previous lock. + * + * any of these scenarios could lead to a deadlock. If + * All validations + */ + int ret = check_deadlock(curr, hlock, lock, read); + + if (!ret) + return 0; + /* + * Mark recursive read, as we jump over it when + * building dependencies (just like we jump over + * trylock entries): + */ + if (ret == 2) + hlock->read = 2; + /* + * Add dependency only if this lock is not the head + * of the chain, and if it's not a secondary read-lock: + */ + if (!chain_head && ret != 2) + if (!check_prevs_add(curr, hlock)) + return 0; + __raw_spin_unlock(&hash_lock); + } + curr->lockdep_depth++; + check_chain_key(curr); + if (unlikely(curr->lockdep_depth >= MAX_LOCK_DEPTH)) { + debug_locks_off(); + printk("BUG: MAX_LOCK_DEPTH too low!\n"); + printk("turning off the locking correctness validator.\n"); + return 0; + } + if (unlikely(curr->lockdep_depth > max_lockdep_depth)) + max_lockdep_depth = curr->lockdep_depth; + + return 1; +} + +static int +print_unlock_inbalance_bug(struct task_struct *curr, struct lockdep_map *lock, + unsigned long ip) +{ + if (!debug_locks_off()) + return 0; + if (debug_locks_silent) + return 0; + + printk("\n=====================================\n"); + printk( "[ BUG: bad unlock balance detected! ]\n"); + printk( "-------------------------------------\n"); + printk("%s/%d is trying to release lock (", + curr->comm, curr->pid); + print_lockdep_cache(lock); + printk(") at:\n"); + print_ip_sym(ip); + printk("but there are no more locks to release!\n"); + printk("\nother info that might help us debug this:\n"); + lockdep_print_held_locks(curr); + + printk("\nstack backtrace:\n"); + dump_stack(); + + return 0; +} + +/* + * Common debugging checks for both nested and non-nested unlock: + */ +static int check_unlock(struct task_struct *curr, struct lockdep_map *lock, + unsigned long ip) +{ + if (unlikely(!debug_locks)) + return 0; + if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) + return 0; + + if (curr->lockdep_depth <= 0) + return print_unlock_inbalance_bug(curr, lock, ip); + + return 1; +} + +/* + * Remove the lock to the list of currently held locks in a + * potentially non-nested (out of order) manner. This is a + * relatively rare operation, as all the unlock APIs default + * to nested mode (which uses lock_release()): + */ +static int +lock_release_non_nested(struct task_struct *curr, + struct lockdep_map *lock, unsigned long ip) +{ + struct held_lock *hlock, *prev_hlock; + unsigned int depth; + int i; + + /* + * Check whether the lock exists in the current stack + * of held locks: + */ + depth = curr->lockdep_depth; + if (DEBUG_LOCKS_WARN_ON(!depth)) + return 0; + + prev_hlock = NULL; + for (i = depth-1; i >= 0; i--) { + hlock = curr->held_locks + i; + /* + * We must not cross into another context: + */ + if (prev_hlock && prev_hlock->irq_context != hlock->irq_context) + break; + if (hlock->instance == lock) + goto found_it; + prev_hlock = hlock; + } + return print_unlock_inbalance_bug(curr, lock, ip); + +found_it: + /* + * We have the right lock to unlock, 'hlock' points to it. + * Now we remove it from the stack, and add back the other + * entries (if any), recalculating the hash along the way: + */ + curr->lockdep_depth = i; + curr->curr_chain_key = hlock->prev_chain_key; + + for (i++; i < depth; i++) { + hlock = curr->held_locks + i; + if (!__lock_acquire(hlock->instance, + hlock->class->subclass, hlock->trylock, + hlock->read, hlock->check, hlock->hardirqs_off, + hlock->acquire_ip)) + return 0; + } + + if (DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth - 1)) + return 0; + return 1; +} + +/* + * Remove the lock to the list of currently held locks - this gets + * called on mutex_unlock()/spin_unlock*() (or on a failed + * mutex_lock_interruptible()). This is done for unlocks that nest + * perfectly. (i.e. the current top of the lock-stack is unlocked) + */ +static int lock_release_nested(struct task_struct *curr, + struct lockdep_map *lock, unsigned long ip) +{ + struct held_lock *hlock; + unsigned int depth; + + /* + * Pop off the top of the lock stack: + */ + depth = curr->lockdep_depth - 1; + hlock = curr->held_locks + depth; + + /* + * Is the unlock non-nested: + */ + if (hlock->instance != lock) + return lock_release_non_nested(curr, lock, ip); + curr->lockdep_depth--; + + if (DEBUG_LOCKS_WARN_ON(!depth && (hlock->prev_chain_key != 0))) + return 0; + + curr->curr_chain_key = hlock->prev_chain_key; + +#ifdef CONFIG_DEBUG_LOCKDEP + hlock->prev_chain_key = 0; + hlock->class = NULL; + hlock->acquire_ip = 0; + hlock->irq_context = 0; +#endif + return 1; +} + +/* + * Remove the lock to the list of currently held locks - this gets + * called on mutex_unlock()/spin_unlock*() (or on a failed + * mutex_lock_interruptible()). This is done for unlocks that nest + * perfectly. (i.e. the current top of the lock-stack is unlocked) + */ +static void +__lock_release(struct lockdep_map *lock, int nested, unsigned long ip) +{ + struct task_struct *curr = current; + + if (!check_unlock(curr, lock, ip)) + return; + + if (nested) { + if (!lock_release_nested(curr, lock, ip)) + return; + } else { + if (!lock_release_non_nested(curr, lock, ip)) + return; + } + + check_chain_key(curr); +} + +/* + * Check whether we follow the irq-flags state precisely: + */ +static void check_flags(unsigned long flags) +{ +#if defined(CONFIG_DEBUG_LOCKDEP) && defined(CONFIG_TRACE_IRQFLAGS) + if (!debug_locks) + return; + + if (irqs_disabled_flags(flags)) + DEBUG_LOCKS_WARN_ON(current->hardirqs_enabled); + else + DEBUG_LOCKS_WARN_ON(!current->hardirqs_enabled); + + /* + * We dont accurately track softirq state in e.g. + * hardirq contexts (such as on 4KSTACKS), so only + * check if not in hardirq contexts: + */ + if (!hardirq_count()) { + if (softirq_count()) + DEBUG_LOCKS_WARN_ON(current->softirqs_enabled); + else + DEBUG_LOCKS_WARN_ON(!current->softirqs_enabled); + } + + if (!debug_locks) + print_irqtrace_events(current); +#endif +} + +/* + * We are not always called with irqs disabled - do that here, + * and also avoid lockdep recursion: + */ +void lock_acquire(struct lockdep_map *lock, unsigned int subclass, + int trylock, int read, int check, unsigned long ip) +{ + unsigned long flags; + + if (unlikely(current->lockdep_recursion)) + return; + + raw_local_irq_save(flags); + check_flags(flags); + + current->lockdep_recursion = 1; + __lock_acquire(lock, subclass, trylock, read, check, + irqs_disabled_flags(flags), ip); + current->lockdep_recursion = 0; + raw_local_irq_restore(flags); +} + +EXPORT_SYMBOL_GPL(lock_acquire); + +void lock_release(struct lockdep_map *lock, int nested, unsigned long ip) +{ + unsigned long flags; + + if (unlikely(current->lockdep_recursion)) + return; + + raw_local_irq_save(flags); + check_flags(flags); + current->lockdep_recursion = 1; + __lock_release(lock, nested, ip); + current->lockdep_recursion = 0; + raw_local_irq_restore(flags); +} + +EXPORT_SYMBOL_GPL(lock_release); + +/* + * Used by the testsuite, sanitize the validator state + * after a simulated failure: + */ + +void lockdep_reset(void) +{ + unsigned long flags; + + raw_local_irq_save(flags); + current->curr_chain_key = 0; + current->lockdep_depth = 0; + current->lockdep_recursion = 0; + memset(current->held_locks, 0, MAX_LOCK_DEPTH*sizeof(struct held_lock)); + nr_hardirq_chains = 0; + nr_softirq_chains = 0; + nr_process_chains = 0; + debug_locks = 1; + raw_local_irq_restore(flags); +} + +static void zap_class(struct lock_class *class) +{ + int i; + + /* + * Remove all dependencies this lock is + * involved in: + */ + for (i = 0; i < nr_list_entries; i++) { + if (list_entries[i].class == class) + list_del_rcu(&list_entries[i].entry); + } + /* + * Unhash the class and remove it from the all_lock_classes list: + */ + list_del_rcu(&class->hash_entry); + list_del_rcu(&class->lock_entry); + +} + +static inline int within(void *addr, void *start, unsigned long size) +{ + return addr >= start && addr < start + size; +} + +void lockdep_free_key_range(void *start, unsigned long size) +{ + struct lock_class *class, *next; + struct list_head *head; + unsigned long flags; + int i; + + raw_local_irq_save(flags); + __raw_spin_lock(&hash_lock); + + /* + * Unhash all classes that were created by this module: + */ + for (i = 0; i < CLASSHASH_SIZE; i++) { + head = classhash_table + i; + if (list_empty(head)) + continue; + list_for_each_entry_safe(class, next, head, hash_entry) + if (within(class->key, start, size)) + zap_class(class); + } + + __raw_spin_unlock(&hash_lock); + raw_local_irq_restore(flags); +} + +void lockdep_reset_lock(struct lockdep_map *lock) +{ + struct lock_class *class, *next, *entry; + struct list_head *head; + unsigned long flags; + int i, j; + + raw_local_irq_save(flags); + __raw_spin_lock(&hash_lock); + + /* + * Remove all classes this lock has: + */ + for (i = 0; i < CLASSHASH_SIZE; i++) { + head = classhash_table + i; + if (list_empty(head)) + continue; + list_for_each_entry_safe(class, next, head, hash_entry) { + for (j = 0; j < MAX_LOCKDEP_SUBCLASSES; j++) { + entry = lock->class[j]; + if (class == entry) { + zap_class(class); + lock->class[j] = NULL; + break; + } + } + } + } + + /* + * Debug check: in the end all mapped classes should + * be gone. + */ + for (j = 0; j < MAX_LOCKDEP_SUBCLASSES; j++) { + entry = lock->class[j]; + if (!entry) + continue; + __raw_spin_unlock(&hash_lock); + DEBUG_LOCKS_WARN_ON(1); + raw_local_irq_restore(flags); + return; + } + + __raw_spin_unlock(&hash_lock); + raw_local_irq_restore(flags); +} + +void __init lockdep_init(void) +{ + int i; + + /* + * Some architectures have their own start_kernel() + * code which calls lockdep_init(), while we also + * call lockdep_init() from the start_kernel() itself, + * and we want to initialize the hashes only once: + */ + if (lockdep_initialized) + return; + + for (i = 0; i < CLASSHASH_SIZE; i++) + INIT_LIST_HEAD(classhash_table + i); + + for (i = 0; i < CHAINHASH_SIZE; i++) + INIT_LIST_HEAD(chainhash_table + i); + + lockdep_initialized = 1; +} + +void __init lockdep_info(void) +{ + printk("Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar\n"); + + printk("... MAX_LOCKDEP_SUBCLASSES: %lu\n", MAX_LOCKDEP_SUBCLASSES); + printk("... MAX_LOCK_DEPTH: %lu\n", MAX_LOCK_DEPTH); + printk("... MAX_LOCKDEP_KEYS: %lu\n", MAX_LOCKDEP_KEYS); + printk("... CLASSHASH_SIZE: %lu\n", CLASSHASH_SIZE); + printk("... MAX_LOCKDEP_ENTRIES: %lu\n", MAX_LOCKDEP_ENTRIES); + printk("... MAX_LOCKDEP_CHAINS: %lu\n", MAX_LOCKDEP_CHAINS); + printk("... CHAINHASH_SIZE: %lu\n", CHAINHASH_SIZE); + + printk(" memory used by lock dependency info: %lu kB\n", + (sizeof(struct lock_class) * MAX_LOCKDEP_KEYS + + sizeof(struct list_head) * CLASSHASH_SIZE + + sizeof(struct lock_list) * MAX_LOCKDEP_ENTRIES + + sizeof(struct lock_chain) * MAX_LOCKDEP_CHAINS + + sizeof(struct list_head) * CHAINHASH_SIZE) / 1024); + + printk(" per task-struct memory footprint: %lu bytes\n", + sizeof(struct held_lock) * MAX_LOCK_DEPTH); + +#ifdef CONFIG_DEBUG_LOCKDEP + if (lockdep_init_error) + printk("WARNING: lockdep init error! Arch code didnt call lockdep_init() early enough?\n"); +#endif +} + +static inline int in_range(const void *start, const void *addr, const void *end) +{ + return addr >= start && addr <= end; +} + +static void +print_freed_lock_bug(struct task_struct *curr, const void *mem_from, + const void *mem_to) +{ + if (!debug_locks_off()) + return; + if (debug_locks_silent) + return; + + printk("\n=========================\n"); + printk( "[ BUG: held lock freed! ]\n"); + printk( "-------------------------\n"); + printk("%s/%d is freeing memory %p-%p, with a lock still held there!\n", + curr->comm, curr->pid, mem_from, mem_to-1); + lockdep_print_held_locks(curr); + + printk("\nstack backtrace:\n"); + dump_stack(); +} + +/* + * Called when kernel memory is freed (or unmapped), or if a lock + * is destroyed or reinitialized - this code checks whether there is + * any held lock in the memory range of to : + */ +void debug_check_no_locks_freed(const void *mem_from, unsigned long mem_len) +{ + const void *mem_to = mem_from + mem_len, *lock_from, *lock_to; + struct task_struct *curr = current; + struct held_lock *hlock; + unsigned long flags; + int i; + + if (unlikely(!debug_locks)) + return; + + local_irq_save(flags); + for (i = 0; i < curr->lockdep_depth; i++) { + hlock = curr->held_locks + i; + + lock_from = (void *)hlock->instance; + lock_to = (void *)(hlock->instance + 1); + + if (!in_range(mem_from, lock_from, mem_to) && + !in_range(mem_from, lock_to, mem_to)) + continue; + + print_freed_lock_bug(curr, mem_from, mem_to); + break; + } + local_irq_restore(flags); +} + +static void print_held_locks_bug(struct task_struct *curr) +{ + if (!debug_locks_off()) + return; + if (debug_locks_silent) + return; + + printk("\n=====================================\n"); + printk( "[ BUG: lock held at task exit time! ]\n"); + printk( "-------------------------------------\n"); + printk("%s/%d is exiting with locks still held!\n", + curr->comm, curr->pid); + lockdep_print_held_locks(curr); + + printk("\nstack backtrace:\n"); + dump_stack(); +} + +void debug_check_no_locks_held(struct task_struct *task) +{ + if (unlikely(task->lockdep_depth > 0)) + print_held_locks_bug(task); +} + +void debug_show_all_locks(void) +{ + struct task_struct *g, *p; + int count = 10; + int unlock = 1; + + printk("\nShowing all locks held in the system:\n"); + + /* + * Here we try to get the tasklist_lock as hard as possible, + * if not successful after 2 seconds we ignore it (but keep + * trying). This is to enable a debug printout even if a + * tasklist_lock-holding task deadlocks or crashes. + */ +retry: + if (!read_trylock(&tasklist_lock)) { + if (count == 10) + printk("hm, tasklist_lock locked, retrying... "); + if (count) { + count--; + printk(" #%d", 10-count); + mdelay(200); + goto retry; + } + printk(" ignoring it.\n"); + unlock = 0; + } + if (count != 10) + printk(" locked it.\n"); + + do_each_thread(g, p) { + if (p->lockdep_depth) + lockdep_print_held_locks(p); + if (!unlock) + if (read_trylock(&tasklist_lock)) + unlock = 1; + } while_each_thread(g, p); + + printk("\n"); + printk("=============================================\n\n"); + + if (unlock) + read_unlock(&tasklist_lock); +} + +EXPORT_SYMBOL_GPL(debug_show_all_locks); + +void debug_show_held_locks(struct task_struct *task) +{ + lockdep_print_held_locks(task); +} + +EXPORT_SYMBOL_GPL(debug_show_held_locks); + diff --git a/kernel/lockdep_internals.h b/kernel/lockdep_internals.h new file mode 100644 index 000000000000..0d355f24fe04 --- /dev/null +++ b/kernel/lockdep_internals.h @@ -0,0 +1,78 @@ +/* + * kernel/lockdep_internals.h + * + * Runtime locking correctness validator + * + * lockdep subsystem internal functions and variables. + */ + +/* + * MAX_LOCKDEP_ENTRIES is the maximum number of lock dependencies + * we track. + * + * We use the per-lock dependency maps in two ways: we grow it by adding + * every to-be-taken lock to all currently held lock's own dependency + * table (if it's not there yet), and we check it for lock order + * conflicts and deadlocks. + */ +#define MAX_LOCKDEP_ENTRIES 8192UL + +#define MAX_LOCKDEP_KEYS_BITS 11 +#define MAX_LOCKDEP_KEYS (1UL << MAX_LOCKDEP_KEYS_BITS) + +#define MAX_LOCKDEP_CHAINS_BITS 13 +#define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS) + +/* + * Stack-trace: tightly packed array of stack backtrace + * addresses. Protected by the hash_lock. + */ +#define MAX_STACK_TRACE_ENTRIES 131072UL + +extern struct list_head all_lock_classes; + +extern void +get_usage_chars(struct lock_class *class, char *c1, char *c2, char *c3, char *c4); + +extern const char * __get_key_name(struct lockdep_subclass_key *key, char *str); + +extern unsigned long nr_lock_classes; +extern unsigned long nr_list_entries; +extern unsigned long nr_lock_chains; +extern unsigned long nr_stack_trace_entries; + +extern unsigned int nr_hardirq_chains; +extern unsigned int nr_softirq_chains; +extern unsigned int nr_process_chains; +extern unsigned int max_lockdep_depth; +extern unsigned int max_recursion_depth; + +#ifdef CONFIG_DEBUG_LOCKDEP +/* + * Various lockdep statistics: + */ +extern atomic_t chain_lookup_hits; +extern atomic_t chain_lookup_misses; +extern atomic_t hardirqs_on_events; +extern atomic_t hardirqs_off_events; +extern atomic_t redundant_hardirqs_on; +extern atomic_t redundant_hardirqs_off; +extern atomic_t softirqs_on_events; +extern atomic_t softirqs_off_events; +extern atomic_t redundant_softirqs_on; +extern atomic_t redundant_softirqs_off; +extern atomic_t nr_unused_locks; +extern atomic_t nr_cyclic_checks; +extern atomic_t nr_cyclic_check_recursions; +extern atomic_t nr_find_usage_forwards_checks; +extern atomic_t nr_find_usage_forwards_recursions; +extern atomic_t nr_find_usage_backwards_checks; +extern atomic_t nr_find_usage_backwards_recursions; +# define debug_atomic_inc(ptr) atomic_inc(ptr) +# define debug_atomic_dec(ptr) atomic_dec(ptr) +# define debug_atomic_read(ptr) atomic_read(ptr) +#else +# define debug_atomic_inc(ptr) do { } while (0) +# define debug_atomic_dec(ptr) do { } while (0) +# define debug_atomic_read(ptr) 0 +#endif diff --git a/kernel/module.c b/kernel/module.c index 0351625767b1..35e1b1f859d7 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1121,6 +1121,9 @@ static void free_module(struct module *mod) if (mod->percpu) percpu_modfree(mod->percpu); + /* Free lock-classes: */ + lockdep_free_key_range(mod->module_core, mod->core_size); + /* Finally, free the core (containing the module structure) */ module_free(mod, mod->module_core); } diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 16021b09c184..16c2e98b7638 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -48,7 +48,7 @@ config DEBUG_KERNEL config LOG_BUF_SHIFT int "Kernel log buffer size (16 => 64KB, 17 => 128KB)" if DEBUG_KERNEL range 12 21 - default 17 if S390 + default 17 if S390 || LOCKDEP default 16 if X86_NUMAQ || IA64 default 15 if SMP default 14 diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c index 5cd05f20bdec..9bdc8d440b2b 100644 --- a/lib/locking-selftest.c +++ b/lib/locking-selftest.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -889,9 +890,6 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft) #include "locking-selftest-softirq.h" // GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_soft) -#define lockdep_reset() -#define lockdep_reset_lock(x) - #ifdef CONFIG_DEBUG_LOCK_ALLOC # define I_SPINLOCK(x) lockdep_reset_lock(&lock_##x.dep_map) # define I_RWLOCK(x) lockdep_reset_lock(&rwlock_##x.dep_map) -- cgit v1.2.3 From ad33945175bed649ca5fe0881269db005bbb449a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 3 Jul 2006 00:25:15 -0700 Subject: [PATCH] lockdep: annotate ->mmap_sem Teach special (recursive) locking code to the lock validator. Has no effect on non-lockdep kernels. Signed-off-by: Ingo Molnar Signed-off-by: Arjan van de Ven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/fork.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index 7f48abdd7bb6..54953d8a6f17 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -193,7 +193,10 @@ static inline int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) down_write(&oldmm->mmap_sem); flush_cache_mm(oldmm); - down_write(&mm->mmap_sem); + /* + * Not linked in yet - no deadlock potential: + */ + down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING); mm->locked_vm = 0; mm->mmap = NULL; -- cgit v1.2.3 From 36c8b586896f60cb91a4fd526233190b34316baf Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 3 Jul 2006 00:25:41 -0700 Subject: [PATCH] sched: cleanup, remove task_t, convert to struct task_struct cleanup: remove task_t and convert all the uses to struct task_struct. I introduced it for the scheduler anno and it was a mistake. Conversion was mostly scripted, the result was reviewed and all secondary whitespace and style impact (if any) was fixed up by hand. Signed-off-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/kernel/process.c | 2 +- arch/ia64/kernel/mca.c | 10 +- arch/ia64/kernel/smpboot.c | 2 +- arch/mips/kernel/entry.S | 2 +- arch/mips/kernel/mips-mt.c | 6 +- arch/um/kernel/tt/process_kern.c | 2 +- drivers/char/tty_io.c | 2 +- fs/eventpoll.c | 4 +- include/asm-ia64/thread_info.h | 2 +- include/asm-m32r/system.h | 2 +- include/asm-sh/system.h | 2 +- include/linux/sched.h | 55 +++++------ kernel/capability.c | 8 +- kernel/exit.c | 35 +++---- kernel/fork.c | 18 ++-- kernel/hrtimer.c | 2 +- kernel/pid.c | 6 +- kernel/ptrace.c | 6 +- kernel/rtmutex-debug.c | 5 +- kernel/rtmutex-tester.c | 4 +- kernel/rtmutex.c | 11 ++- kernel/sched.c | 192 ++++++++++++++++++++------------------- kernel/timer.c | 2 +- kernel/workqueue.c | 2 +- mm/oom_kill.c | 8 +- 25 files changed, 203 insertions(+), 187 deletions(-) (limited to 'kernel/fork.c') diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index 01c8c8b23337..41ebf51a107a 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -474,7 +474,7 @@ out: */ unsigned long -thread_saved_pc(task_t *t) +thread_saved_pc(struct task_struct *t) { unsigned long base = (unsigned long)task_stack_page(t); unsigned long fp, sp = task_thread_info(t)->pcb.ksp; diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index eb8e8dc5ac8e..2fbe4536fe18 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -678,7 +678,7 @@ copy_reg(const u64 *fr, u64 fnat, u64 *tr, u64 *tnat) */ static void -ia64_mca_modify_comm(const task_t *previous_current) +ia64_mca_modify_comm(const struct task_struct *previous_current) { char *p, comm[sizeof(current->comm)]; if (previous_current->pid) @@ -709,7 +709,7 @@ ia64_mca_modify_comm(const task_t *previous_current) * that we can do backtrace on the MCA/INIT handler code itself. */ -static task_t * +static struct task_struct * ia64_mca_modify_original_stack(struct pt_regs *regs, const struct switch_stack *sw, struct ia64_sal_os_state *sos, @@ -719,7 +719,7 @@ ia64_mca_modify_original_stack(struct pt_regs *regs, ia64_va va; extern char ia64_leave_kernel[]; /* Need asm address, not function descriptor */ const pal_min_state_area_t *ms = sos->pal_min_state; - task_t *previous_current; + struct task_struct *previous_current; struct pt_regs *old_regs; struct switch_stack *old_sw; unsigned size = sizeof(struct pt_regs) + @@ -1023,7 +1023,7 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, pal_processor_state_info_t *psp = (pal_processor_state_info_t *) &sos->proc_state_param; int recover, cpu = smp_processor_id(); - task_t *previous_current; + struct task_struct *previous_current; struct ia64_mca_notify_die nd = { .sos = sos, .monarch_cpu = &monarch_cpu }; @@ -1352,7 +1352,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, { static atomic_t slaves; static atomic_t monarchs; - task_t *previous_current; + struct task_struct *previous_current; int cpu = smp_processor_id(); struct ia64_mca_notify_die nd = { .sos = sos, .monarch_cpu = &monarch_cpu }; diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c index e1960979be29..6203ed4ec8cf 100644 --- a/arch/ia64/kernel/smpboot.c +++ b/arch/ia64/kernel/smpboot.c @@ -124,7 +124,7 @@ extern void __devinit calibrate_delay (void); extern void start_ap (void); extern unsigned long ia64_iobase; -task_t *task_for_booting_cpu; +struct task_struct *task_for_booting_cpu; /* * State for each CPU diff --git a/arch/mips/kernel/entry.S b/arch/mips/kernel/entry.S index ecfd637d702a..01e7fa86aa43 100644 --- a/arch/mips/kernel/entry.S +++ b/arch/mips/kernel/entry.S @@ -65,7 +65,7 @@ need_resched: #endif FEXPORT(ret_from_fork) - jal schedule_tail # a0 = task_t *prev + jal schedule_tail # a0 = struct task_struct *prev FEXPORT(syscall_exit) local_irq_disable # make sure need_resched and diff --git a/arch/mips/kernel/mips-mt.c b/arch/mips/kernel/mips-mt.c index 02237a685ec7..4dcc39f42951 100644 --- a/arch/mips/kernel/mips-mt.c +++ b/arch/mips/kernel/mips-mt.c @@ -47,7 +47,7 @@ unsigned long mt_fpemul_threshold = 0; * used in sys_sched_set/getaffinity() in kernel/sched.c, so * cloned here. */ -static inline task_t *find_process_by_pid(pid_t pid) +static inline struct task_struct *find_process_by_pid(pid_t pid) { return pid ? find_task_by_pid(pid) : current; } @@ -62,7 +62,7 @@ asmlinkage long mipsmt_sys_sched_setaffinity(pid_t pid, unsigned int len, cpumask_t new_mask; cpumask_t effective_mask; int retval; - task_t *p; + struct task_struct *p; if (len < sizeof(new_mask)) return -EINVAL; @@ -127,7 +127,7 @@ asmlinkage long mipsmt_sys_sched_getaffinity(pid_t pid, unsigned int len, unsigned int real_len; cpumask_t mask; int retval; - task_t *p; + struct task_struct *p; real_len = sizeof(mask); if (len < real_len) diff --git a/arch/um/kernel/tt/process_kern.c b/arch/um/kernel/tt/process_kern.c index a9c1443fc548..8368c2dbe635 100644 --- a/arch/um/kernel/tt/process_kern.c +++ b/arch/um/kernel/tt/process_kern.c @@ -119,7 +119,7 @@ void suspend_new_thread(int fd) panic("read failed in suspend_new_thread, err = %d", -err); } -void schedule_tail(task_t *prev); +void schedule_tail(struct task_struct *prev); static void new_thread_handler(int sig) { diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index 6fb77952562d..bfdb90242a90 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c @@ -2336,7 +2336,7 @@ static int fionbio(struct file *file, int __user *p) static int tiocsctty(struct tty_struct *tty, int arg) { - task_t *p; + struct task_struct *p; if (current->signal->leader && (current->signal->session == tty->session)) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 9c677bbd0b08..19ffb043abbc 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -120,7 +120,7 @@ struct epoll_filefd { */ struct wake_task_node { struct list_head llink; - task_t *task; + struct task_struct *task; wait_queue_head_t *wq; }; @@ -413,7 +413,7 @@ static void ep_poll_safewake(struct poll_safewake *psw, wait_queue_head_t *wq) { int wake_nests = 0; unsigned long flags; - task_t *this_task = current; + struct task_struct *this_task = current; struct list_head *lsthead = &psw->wake_task_list, *lnk; struct wake_task_node *tncur; struct wake_task_node tnode; diff --git a/include/asm-ia64/thread_info.h b/include/asm-ia64/thread_info.h index 8bc9869e5765..8adcde0934ca 100644 --- a/include/asm-ia64/thread_info.h +++ b/include/asm-ia64/thread_info.h @@ -68,7 +68,7 @@ struct thread_info { #define end_of_stack(p) (unsigned long *)((void *)(p) + IA64_RBS_OFFSET) #define __HAVE_ARCH_TASK_STRUCT_ALLOCATOR -#define alloc_task_struct() ((task_t *)__get_free_pages(GFP_KERNEL | __GFP_COMP, KERNEL_STACK_SIZE_ORDER)) +#define alloc_task_struct() ((struct task_struct *)__get_free_pages(GFP_KERNEL | __GFP_COMP, KERNEL_STACK_SIZE_ORDER)) #define free_task_struct(tsk) free_pages((unsigned long) (tsk), KERNEL_STACK_SIZE_ORDER) #endif /* !__ASSEMBLY */ diff --git a/include/asm-m32r/system.h b/include/asm-m32r/system.h index 66c4742f09e7..311cebf44eff 100644 --- a/include/asm-m32r/system.h +++ b/include/asm-m32r/system.h @@ -18,7 +18,7 @@ * switch_to(prev, next) should switch from task `prev' to `next' * `prev' will never be the same as `next'. * - * `next' and `prev' should be task_t, but it isn't always defined + * `next' and `prev' should be struct task_struct, but it isn't always defined */ #define switch_to(prev, next, last) do { \ diff --git a/include/asm-sh/system.h b/include/asm-sh/system.h index b752e5cbb830..ce2e60664a86 100644 --- a/include/asm-sh/system.h +++ b/include/asm-sh/system.h @@ -12,7 +12,7 @@ */ #define switch_to(prev, next, last) do { \ - task_t *__last; \ + struct task_struct *__last; \ register unsigned long *__ts1 __asm__ ("r1") = &prev->thread.sp; \ register unsigned long *__ts2 __asm__ ("r2") = &prev->thread.pc; \ register unsigned long *__ts4 __asm__ ("r4") = (unsigned long *)prev; \ diff --git a/include/linux/sched.h b/include/linux/sched.h index 8ebddba4448d..c2797f04d931 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -184,11 +184,11 @@ extern unsigned long weighted_cpuload(const int cpu); extern rwlock_t tasklist_lock; extern spinlock_t mmlist_lock; -typedef struct task_struct task_t; +struct task_struct; extern void sched_init(void); extern void sched_init_smp(void); -extern void init_idle(task_t *idle, int cpu); +extern void init_idle(struct task_struct *idle, int cpu); extern cpumask_t nohz_cpu_mask; @@ -383,7 +383,7 @@ struct signal_struct { wait_queue_head_t wait_chldexit; /* for wait4() */ /* current thread group signal load-balancing target: */ - task_t *curr_target; + struct task_struct *curr_target; /* shared signal handling: */ struct sigpending shared_pending; @@ -699,7 +699,7 @@ extern int groups_search(struct group_info *group_info, gid_t grp); ((gi)->blocks[(i)/NGROUPS_PER_BLOCK][(i)%NGROUPS_PER_BLOCK]) #ifdef ARCH_HAS_PREFETCH_SWITCH_STACK -extern void prefetch_stack(struct task_struct*); +extern void prefetch_stack(struct task_struct *t); #else static inline void prefetch_stack(struct task_struct *t) { } #endif @@ -1031,9 +1031,9 @@ static inline void put_task_struct(struct task_struct *t) #define used_math() tsk_used_math(current) #ifdef CONFIG_SMP -extern int set_cpus_allowed(task_t *p, cpumask_t new_mask); +extern int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask); #else -static inline int set_cpus_allowed(task_t *p, cpumask_t new_mask) +static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask) { if (!cpu_isset(0, new_mask)) return -EINVAL; @@ -1042,7 +1042,8 @@ static inline int set_cpus_allowed(task_t *p, cpumask_t new_mask) #endif extern unsigned long long sched_clock(void); -extern unsigned long long current_sched_time(const task_t *current_task); +extern unsigned long long +current_sched_time(const struct task_struct *current_task); /* sched_exec is called by processes performing an exec */ #ifdef CONFIG_SMP @@ -1060,27 +1061,27 @@ static inline void idle_task_exit(void) {} extern void sched_idle_next(void); #ifdef CONFIG_RT_MUTEXES -extern int rt_mutex_getprio(task_t *p); -extern void rt_mutex_setprio(task_t *p, int prio); -extern void rt_mutex_adjust_pi(task_t *p); +extern int rt_mutex_getprio(struct task_struct *p); +extern void rt_mutex_setprio(struct task_struct *p, int prio); +extern void rt_mutex_adjust_pi(struct task_struct *p); #else -static inline int rt_mutex_getprio(task_t *p) +static inline int rt_mutex_getprio(struct task_struct *p) { return p->normal_prio; } # define rt_mutex_adjust_pi(p) do { } while (0) #endif -extern void set_user_nice(task_t *p, long nice); -extern int task_prio(const task_t *p); -extern int task_nice(const task_t *p); -extern int can_nice(const task_t *p, const int nice); -extern int task_curr(const task_t *p); +extern void set_user_nice(struct task_struct *p, long nice); +extern int task_prio(const struct task_struct *p); +extern int task_nice(const struct task_struct *p); +extern int can_nice(const struct task_struct *p, const int nice); +extern int task_curr(const struct task_struct *p); extern int idle_cpu(int cpu); extern int sched_setscheduler(struct task_struct *, int, struct sched_param *); -extern task_t *idle_task(int cpu); -extern task_t *curr_task(int cpu); -extern void set_curr_task(int cpu, task_t *p); +extern struct task_struct *idle_task(int cpu); +extern struct task_struct *curr_task(int cpu); +extern void set_curr_task(int cpu, struct task_struct *p); void yield(void); @@ -1137,8 +1138,8 @@ extern void FASTCALL(wake_up_new_task(struct task_struct * tsk, #else static inline void kick_process(struct task_struct *tsk) { } #endif -extern void FASTCALL(sched_fork(task_t * p, int clone_flags)); -extern void FASTCALL(sched_exit(task_t * p)); +extern void FASTCALL(sched_fork(struct task_struct * p, int clone_flags)); +extern void FASTCALL(sched_exit(struct task_struct * p)); extern int in_group_p(gid_t); extern int in_egroup_p(gid_t); @@ -1243,17 +1244,17 @@ extern NORET_TYPE void do_group_exit(int); extern void daemonize(const char *, ...); extern int allow_signal(int); extern int disallow_signal(int); -extern task_t *child_reaper; +extern struct task_struct *child_reaper; extern int do_execve(char *, char __user * __user *, char __user * __user *, struct pt_regs *); extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *); -task_t *fork_idle(int); +struct task_struct *fork_idle(int); extern void set_task_comm(struct task_struct *tsk, char *from); extern void get_task_comm(char *to, struct task_struct *tsk); #ifdef CONFIG_SMP -extern void wait_task_inactive(task_t * p); +extern void wait_task_inactive(struct task_struct * p); #else #define wait_task_inactive(p) do { } while (0) #endif @@ -1279,13 +1280,13 @@ extern void wait_task_inactive(task_t * p); /* de_thread depends on thread_group_leader not being a pid based check */ #define thread_group_leader(p) (p == p->group_leader) -static inline task_t *next_thread(const task_t *p) +static inline struct task_struct *next_thread(const struct task_struct *p) { return list_entry(rcu_dereference(p->thread_group.next), - task_t, thread_group); + struct task_struct, thread_group); } -static inline int thread_group_empty(task_t *p) +static inline int thread_group_empty(struct task_struct *p) { return list_empty(&p->thread_group); } diff --git a/kernel/capability.c b/kernel/capability.c index 1a4d8a40d3f9..c7685ad00a97 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -46,7 +46,7 @@ asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr) int ret = 0; pid_t pid; __u32 version; - task_t *target; + struct task_struct *target; struct __user_cap_data_struct data; if (get_user(version, &header->version)) @@ -96,7 +96,7 @@ static inline int cap_set_pg(int pgrp, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted) { - task_t *g, *target; + struct task_struct *g, *target; int ret = -EPERM; int found = 0; @@ -128,7 +128,7 @@ static inline int cap_set_all(kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted) { - task_t *g, *target; + struct task_struct *g, *target; int ret = -EPERM; int found = 0; @@ -172,7 +172,7 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data) { kernel_cap_t inheritable, permitted, effective; __u32 version; - task_t *target; + struct task_struct *target; int ret; pid_t pid; diff --git a/kernel/exit.c b/kernel/exit.c index c595db14cf25..6664c084783d 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -134,8 +134,8 @@ static void delayed_put_task_struct(struct rcu_head *rhp) void release_task(struct task_struct * p) { + struct task_struct *leader; int zap_leader; - task_t *leader; repeat: atomic_dec(&p->user->processes); write_lock_irq(&tasklist_lock); @@ -209,7 +209,7 @@ out: * * "I ask you, have you ever known what it is to be an orphan?" */ -static int will_become_orphaned_pgrp(int pgrp, task_t *ignored_task) +static int will_become_orphaned_pgrp(int pgrp, struct task_struct *ignored_task) { struct task_struct *p; int ret = 1; @@ -582,7 +582,8 @@ static void exit_mm(struct task_struct * tsk) mmput(mm); } -static inline void choose_new_parent(task_t *p, task_t *reaper) +static inline void +choose_new_parent(struct task_struct *p, struct task_struct *reaper) { /* * Make sure we're not reparenting to ourselves and that @@ -592,7 +593,8 @@ static inline void choose_new_parent(task_t *p, task_t *reaper) p->real_parent = reaper; } -static void reparent_thread(task_t *p, task_t *father, int traced) +static void +reparent_thread(struct task_struct *p, struct task_struct *father, int traced) { /* We don't want people slaying init. */ if (p->exit_signal != -1) @@ -656,8 +658,8 @@ static void reparent_thread(task_t *p, task_t *father, int traced) * group, and if no such member exists, give it to * the global child reaper process (ie "init") */ -static void forget_original_parent(struct task_struct * father, - struct list_head *to_release) +static void +forget_original_parent(struct task_struct *father, struct list_head *to_release) { struct task_struct *p, *reaper = father; struct list_head *_p, *_n; @@ -680,7 +682,7 @@ static void forget_original_parent(struct task_struct * father, */ list_for_each_safe(_p, _n, &father->children) { int ptrace; - p = list_entry(_p,struct task_struct,sibling); + p = list_entry(_p, struct task_struct, sibling); ptrace = p->ptrace; @@ -709,7 +711,7 @@ static void forget_original_parent(struct task_struct * father, list_add(&p->ptrace_list, to_release); } list_for_each_safe(_p, _n, &father->ptrace_children) { - p = list_entry(_p,struct task_struct,ptrace_list); + p = list_entry(_p, struct task_struct, ptrace_list); choose_new_parent(p, reaper); reparent_thread(p, father, 1); } @@ -829,7 +831,7 @@ static void exit_notify(struct task_struct *tsk) list_for_each_safe(_p, _n, &ptrace_dead) { list_del_init(_p); - t = list_entry(_p,struct task_struct,ptrace_list); + t = list_entry(_p, struct task_struct, ptrace_list); release_task(t); } @@ -1010,7 +1012,7 @@ asmlinkage void sys_exit_group(int error_code) do_group_exit((error_code & 0xff) << 8); } -static int eligible_child(pid_t pid, int options, task_t *p) +static int eligible_child(pid_t pid, int options, struct task_struct *p) { if (pid > 0) { if (p->pid != pid) @@ -1051,12 +1053,13 @@ static int eligible_child(pid_t pid, int options, task_t *p) return 1; } -static int wait_noreap_copyout(task_t *p, pid_t pid, uid_t uid, +static int wait_noreap_copyout(struct task_struct *p, pid_t pid, uid_t uid, int why, int status, struct siginfo __user *infop, struct rusage __user *rusagep) { int retval = rusagep ? getrusage(p, RUSAGE_BOTH, rusagep) : 0; + put_task_struct(p); if (!retval) retval = put_user(SIGCHLD, &infop->si_signo); @@ -1081,7 +1084,7 @@ static int wait_noreap_copyout(task_t *p, pid_t pid, uid_t uid, * the lock and this task is uninteresting. If we return nonzero, we have * released the lock and the system call should return. */ -static int wait_task_zombie(task_t *p, int noreap, +static int wait_task_zombie(struct task_struct *p, int noreap, struct siginfo __user *infop, int __user *stat_addr, struct rusage __user *ru) { @@ -1243,8 +1246,8 @@ static int wait_task_zombie(task_t *p, int noreap, * the lock and this task is uninteresting. If we return nonzero, we have * released the lock and the system call should return. */ -static int wait_task_stopped(task_t *p, int delayed_group_leader, int noreap, - struct siginfo __user *infop, +static int wait_task_stopped(struct task_struct *p, int delayed_group_leader, + int noreap, struct siginfo __user *infop, int __user *stat_addr, struct rusage __user *ru) { int retval, exit_code; @@ -1358,7 +1361,7 @@ bail_ref: * the lock and this task is uninteresting. If we return nonzero, we have * released the lock and the system call should return. */ -static int wait_task_continued(task_t *p, int noreap, +static int wait_task_continued(struct task_struct *p, int noreap, struct siginfo __user *infop, int __user *stat_addr, struct rusage __user *ru) { @@ -1444,7 +1447,7 @@ repeat: int ret; list_for_each(_p,&tsk->children) { - p = list_entry(_p,struct task_struct,sibling); + p = list_entry(_p, struct task_struct, sibling); ret = eligible_child(pid, options, p); if (!ret) diff --git a/kernel/fork.c b/kernel/fork.c index 54953d8a6f17..56e4e07e45f7 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -933,13 +933,13 @@ static inline void rt_mutex_init_task(struct task_struct *p) * parts of the process environment (as per the clone * flags). The actual kick-off is left to the caller. */ -static task_t *copy_process(unsigned long clone_flags, - unsigned long stack_start, - struct pt_regs *regs, - unsigned long stack_size, - int __user *parent_tidptr, - int __user *child_tidptr, - int pid) +static struct task_struct *copy_process(unsigned long clone_flags, + unsigned long stack_start, + struct pt_regs *regs, + unsigned long stack_size, + int __user *parent_tidptr, + int __user *child_tidptr, + int pid) { int retval; struct task_struct *p = NULL; @@ -1294,9 +1294,9 @@ struct pt_regs * __devinit __attribute__((weak)) idle_regs(struct pt_regs *regs) return regs; } -task_t * __devinit fork_idle(int cpu) +struct task_struct * __devinit fork_idle(int cpu) { - task_t *task; + struct task_struct *task; struct pt_regs regs; task = copy_process(CLONE_VM, 0, idle_regs(®s), 0, NULL, NULL, 0); diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 617304ce67db..d17766d40dab 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -669,7 +669,7 @@ static int hrtimer_wakeup(struct hrtimer *timer) return HRTIMER_NORESTART; } -void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, task_t *task) +void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task) { sl->timer.function = hrtimer_wakeup; sl->task = task; diff --git a/kernel/pid.c b/kernel/pid.c index eeb836b65ca4..93e212f20671 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -218,7 +218,7 @@ struct pid * fastcall find_pid(int nr) return NULL; } -int fastcall attach_pid(task_t *task, enum pid_type type, int nr) +int fastcall attach_pid(struct task_struct *task, enum pid_type type, int nr) { struct pid_link *link; struct pid *pid; @@ -233,7 +233,7 @@ int fastcall attach_pid(task_t *task, enum pid_type type, int nr) return 0; } -void fastcall detach_pid(task_t *task, enum pid_type type) +void fastcall detach_pid(struct task_struct *task, enum pid_type type) { struct pid_link *link; struct pid *pid; @@ -267,7 +267,7 @@ struct task_struct * fastcall pid_task(struct pid *pid, enum pid_type type) /* * Must be called under rcu_read_lock() or with tasklist_lock read-held. */ -task_t *find_task_by_pid_type(int type, int nr) +struct task_struct *find_task_by_pid_type(int type, int nr) { return pid_task(find_pid(nr), type); } diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 335c5b932e14..9a111f70145c 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -28,7 +28,7 @@ * * Must be called with the tasklist lock write-held. */ -void __ptrace_link(task_t *child, task_t *new_parent) +void __ptrace_link(struct task_struct *child, struct task_struct *new_parent) { BUG_ON(!list_empty(&child->ptrace_list)); if (child->parent == new_parent) @@ -46,7 +46,7 @@ void __ptrace_link(task_t *child, task_t *new_parent) * TASK_TRACED, resume it now. * Requires that irqs be disabled. */ -void ptrace_untrace(task_t *child) +void ptrace_untrace(struct task_struct *child) { spin_lock(&child->sighand->siglock); if (child->state == TASK_TRACED) { @@ -65,7 +65,7 @@ void ptrace_untrace(task_t *child) * * Must be called with the tasklist lock write-held. */ -void __ptrace_unlink(task_t *child) +void __ptrace_unlink(struct task_struct *child) { BUG_ON(!child->ptrace); diff --git a/kernel/rtmutex-debug.c b/kernel/rtmutex-debug.c index 353a853bc390..0c1faa950af7 100644 --- a/kernel/rtmutex-debug.c +++ b/kernel/rtmutex-debug.c @@ -96,7 +96,7 @@ void deadlock_trace_off(void) rt_trace_on = 0; } -static void printk_task(task_t *p) +static void printk_task(struct task_struct *p) { if (p) printk("%16s:%5d [%p, %3d]", p->comm, p->pid, p, p->prio); @@ -231,7 +231,8 @@ void debug_rt_mutex_init(struct rt_mutex *lock, const char *name) lock->name = name; } -void rt_mutex_deadlock_account_lock(struct rt_mutex *lock, task_t *task) +void +rt_mutex_deadlock_account_lock(struct rt_mutex *lock, struct task_struct *task) { } diff --git a/kernel/rtmutex-tester.c b/kernel/rtmutex-tester.c index e82c2f848249..494dac872a13 100644 --- a/kernel/rtmutex-tester.c +++ b/kernel/rtmutex-tester.c @@ -33,7 +33,7 @@ struct test_thread_data { }; static struct test_thread_data thread_data[MAX_RT_TEST_THREADS]; -static task_t *threads[MAX_RT_TEST_THREADS]; +static struct task_struct *threads[MAX_RT_TEST_THREADS]; static struct rt_mutex mutexes[MAX_RT_TEST_MUTEXES]; enum test_opcodes { @@ -361,8 +361,8 @@ static ssize_t sysfs_test_command(struct sys_device *dev, const char *buf, static ssize_t sysfs_test_status(struct sys_device *dev, char *buf) { struct test_thread_data *td; + struct task_struct *tsk; char *curr = buf; - task_t *tsk; int i; td = container_of(dev, struct test_thread_data, sysdev); diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c index 91b699aa658b..d2ef13b485e7 100644 --- a/kernel/rtmutex.c +++ b/kernel/rtmutex.c @@ -157,7 +157,7 @@ int max_lock_depth = 1024; * Decreases task's usage by one - may thus free the task. * Returns 0 or -EDEADLK. */ -static int rt_mutex_adjust_prio_chain(task_t *task, +static int rt_mutex_adjust_prio_chain(struct task_struct *task, int deadlock_detect, struct rt_mutex *orig_lock, struct rt_mutex_waiter *orig_waiter, @@ -282,6 +282,7 @@ static int rt_mutex_adjust_prio_chain(task_t *task, spin_unlock_irqrestore(&task->pi_lock, flags); out_put_task: put_task_struct(task); + return ret; } @@ -403,10 +404,10 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, struct rt_mutex_waiter *waiter, int detect_deadlock) { + struct task_struct *owner = rt_mutex_owner(lock); struct rt_mutex_waiter *top_waiter = waiter; - task_t *owner = rt_mutex_owner(lock); - int boost = 0, res; unsigned long flags; + int boost = 0, res; spin_lock_irqsave(¤t->pi_lock, flags); __rt_mutex_adjust_prio(current); @@ -527,9 +528,9 @@ static void remove_waiter(struct rt_mutex *lock, struct rt_mutex_waiter *waiter) { int first = (waiter == rt_mutex_top_waiter(lock)); - int boost = 0; - task_t *owner = rt_mutex_owner(lock); + struct task_struct *owner = rt_mutex_owner(lock); unsigned long flags; + int boost = 0; spin_lock_irqsave(¤t->pi_lock, flags); plist_del(&waiter->list_entry, &lock->wait_list); diff --git a/kernel/sched.c b/kernel/sched.c index b0326141f841..021b31219516 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -179,7 +179,7 @@ static unsigned int static_prio_timeslice(int static_prio) return SCALE_PRIO(DEF_TIMESLICE, static_prio); } -static inline unsigned int task_timeslice(task_t *p) +static inline unsigned int task_timeslice(struct task_struct *p) { return static_prio_timeslice(p->static_prio); } @@ -227,7 +227,7 @@ struct runqueue { unsigned long expired_timestamp; unsigned long long timestamp_last_tick; - task_t *curr, *idle; + struct task_struct *curr, *idle; struct mm_struct *prev_mm; prio_array_t *active, *expired, arrays[2]; int best_expired_prio; @@ -240,7 +240,7 @@ struct runqueue { int active_balance; int push_cpu; - task_t *migration_thread; + struct task_struct *migration_thread; struct list_head migration_queue; #endif @@ -291,16 +291,16 @@ static DEFINE_PER_CPU(struct runqueue, runqueues); #endif #ifndef __ARCH_WANT_UNLOCKED_CTXSW -static inline int task_running(runqueue_t *rq, task_t *p) +static inline int task_running(runqueue_t *rq, struct task_struct *p) { return rq->curr == p; } -static inline void prepare_lock_switch(runqueue_t *rq, task_t *next) +static inline void prepare_lock_switch(runqueue_t *rq, struct task_struct *next) { } -static inline void finish_lock_switch(runqueue_t *rq, task_t *prev) +static inline void finish_lock_switch(runqueue_t *rq, struct task_struct *prev) { #ifdef CONFIG_DEBUG_SPINLOCK /* this is a valid case when another task releases the spinlock */ @@ -317,7 +317,7 @@ static inline void finish_lock_switch(runqueue_t *rq, task_t *prev) } #else /* __ARCH_WANT_UNLOCKED_CTXSW */ -static inline int task_running(runqueue_t *rq, task_t *p) +static inline int task_running(runqueue_t *rq, struct task_struct *p) { #ifdef CONFIG_SMP return p->oncpu; @@ -326,7 +326,7 @@ static inline int task_running(runqueue_t *rq, task_t *p) #endif } -static inline void prepare_lock_switch(runqueue_t *rq, task_t *next) +static inline void prepare_lock_switch(runqueue_t *rq, struct task_struct *next) { #ifdef CONFIG_SMP /* @@ -343,7 +343,7 @@ static inline void prepare_lock_switch(runqueue_t *rq, task_t *next) #endif } -static inline void finish_lock_switch(runqueue_t *rq, task_t *prev) +static inline void finish_lock_switch(runqueue_t *rq, struct task_struct *prev) { #ifdef CONFIG_SMP /* @@ -364,7 +364,7 @@ static inline void finish_lock_switch(runqueue_t *rq, task_t *prev) * __task_rq_lock - lock the runqueue a given task resides on. * Must be called interrupts disabled. */ -static inline runqueue_t *__task_rq_lock(task_t *p) +static inline runqueue_t *__task_rq_lock(struct task_struct *p) __acquires(rq->lock) { struct runqueue *rq; @@ -384,7 +384,7 @@ repeat_lock_task: * interrupts. Note the ordering: we can safely lookup the task_rq without * explicitly disabling preemption. */ -static runqueue_t *task_rq_lock(task_t *p, unsigned long *flags) +static runqueue_t *task_rq_lock(struct task_struct *p, unsigned long *flags) __acquires(rq->lock) { struct runqueue *rq; @@ -541,7 +541,7 @@ static inline runqueue_t *this_rq_lock(void) * long it was from the *first* time it was queued to the time that it * finally hit a cpu. */ -static inline void sched_info_dequeued(task_t *t) +static inline void sched_info_dequeued(struct task_struct *t) { t->sched_info.last_queued = 0; } @@ -551,7 +551,7 @@ static inline void sched_info_dequeued(task_t *t) * long it was waiting to run. We also note when it began so that we * can keep stats on how long its timeslice is. */ -static void sched_info_arrive(task_t *t) +static void sched_info_arrive(struct task_struct *t) { unsigned long now = jiffies, diff = 0; struct runqueue *rq = task_rq(t); @@ -585,7 +585,7 @@ static void sched_info_arrive(task_t *t) * the timestamp if it is already not set. It's assumed that * sched_info_dequeued() will clear that stamp when appropriate. */ -static inline void sched_info_queued(task_t *t) +static inline void sched_info_queued(struct task_struct *t) { if (!t->sched_info.last_queued) t->sched_info.last_queued = jiffies; @@ -595,7 +595,7 @@ static inline void sched_info_queued(task_t *t) * Called when a process ceases being the active-running process, either * voluntarily or involuntarily. Now we can calculate how long we ran. */ -static inline void sched_info_depart(task_t *t) +static inline void sched_info_depart(struct task_struct *t) { struct runqueue *rq = task_rq(t); unsigned long diff = jiffies - t->sched_info.last_arrival; @@ -611,7 +611,8 @@ static inline void sched_info_depart(task_t *t) * their time slice. (This may also be called when switching to or from * the idle task.) We are only called when prev != next. */ -static inline void sched_info_switch(task_t *prev, task_t *next) +static inline void +sched_info_switch(struct task_struct *prev, struct task_struct *next) { struct runqueue *rq = task_rq(prev); @@ -683,7 +684,7 @@ static inline void enqueue_task_head(struct task_struct *p, prio_array_t *array) * Both properties are important to certain workloads. */ -static inline int __normal_prio(task_t *p) +static inline int __normal_prio(struct task_struct *p) { int bonus, prio; @@ -719,7 +720,7 @@ static inline int __normal_prio(task_t *p) #define RTPRIO_TO_LOAD_WEIGHT(rp) \ (PRIO_TO_LOAD_WEIGHT(MAX_RT_PRIO) + LOAD_WEIGHT(rp)) -static void set_load_weight(task_t *p) +static void set_load_weight(struct task_struct *p) { if (has_rt_policy(p)) { #ifdef CONFIG_SMP @@ -737,23 +738,25 @@ static void set_load_weight(task_t *p) p->load_weight = PRIO_TO_LOAD_WEIGHT(p->static_prio); } -static inline void inc_raw_weighted_load(runqueue_t *rq, const task_t *p) +static inline void +inc_raw_weighted_load(runqueue_t *rq, const struct task_struct *p) { rq->raw_weighted_load += p->load_weight; } -static inline void dec_raw_weighted_load(runqueue_t *rq, const task_t *p) +static inline void +dec_raw_weighted_load(runqueue_t *rq, const struct task_struct *p) { rq->raw_weighted_load -= p->load_weight; } -static inline void inc_nr_running(task_t *p, runqueue_t *rq) +static inline void inc_nr_running(struct task_struct *p, runqueue_t *rq) { rq->nr_running++; inc_raw_weighted_load(rq, p); } -static inline void dec_nr_running(task_t *p, runqueue_t *rq) +static inline void dec_nr_running(struct task_struct *p, runqueue_t *rq) { rq->nr_running--; dec_raw_weighted_load(rq, p); @@ -766,7 +769,7 @@ static inline void dec_nr_running(task_t *p, runqueue_t *rq) * setprio syscalls, and whenever the interactivity * estimator recalculates. */ -static inline int normal_prio(task_t *p) +static inline int normal_prio(struct task_struct *p) { int prio; @@ -784,7 +787,7 @@ static inline int normal_prio(task_t *p) * interactivity modifiers. Will be RT if the task got * RT-boosted. If not then it returns p->normal_prio. */ -static int effective_prio(task_t *p) +static int effective_prio(struct task_struct *p) { p->normal_prio = normal_prio(p); /* @@ -800,7 +803,7 @@ static int effective_prio(task_t *p) /* * __activate_task - move a task to the runqueue. */ -static void __activate_task(task_t *p, runqueue_t *rq) +static void __activate_task(struct task_struct *p, runqueue_t *rq) { prio_array_t *target = rq->active; @@ -813,7 +816,7 @@ static void __activate_task(task_t *p, runqueue_t *rq) /* * __activate_idle_task - move idle task to the _front_ of runqueue. */ -static inline void __activate_idle_task(task_t *p, runqueue_t *rq) +static inline void __activate_idle_task(struct task_struct *p, runqueue_t *rq) { enqueue_task_head(p, rq->active); inc_nr_running(p, rq); @@ -823,7 +826,7 @@ static inline void __activate_idle_task(task_t *p, runqueue_t *rq) * Recalculate p->normal_prio and p->prio after having slept, * updating the sleep-average too: */ -static int recalc_task_prio(task_t *p, unsigned long long now) +static int recalc_task_prio(struct task_struct *p, unsigned long long now) { /* Caller must always ensure 'now >= p->timestamp' */ unsigned long sleep_time = now - p->timestamp; @@ -895,7 +898,7 @@ static int recalc_task_prio(task_t *p, unsigned long long now) * Update all the scheduling statistics stuff. (sleep average * calculation, priority modifiers, etc.) */ -static void activate_task(task_t *p, runqueue_t *rq, int local) +static void activate_task(struct task_struct *p, runqueue_t *rq, int local) { unsigned long long now; @@ -962,7 +965,7 @@ static void deactivate_task(struct task_struct *p, runqueue_t *rq) #define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG) #endif -static void resched_task(task_t *p) +static void resched_task(struct task_struct *p) { int cpu; @@ -983,7 +986,7 @@ static void resched_task(task_t *p) smp_send_reschedule(cpu); } #else -static inline void resched_task(task_t *p) +static inline void resched_task(struct task_struct *p) { assert_spin_locked(&task_rq(p)->lock); set_tsk_need_resched(p); @@ -994,7 +997,7 @@ static inline void resched_task(task_t *p) * task_curr - is this task currently executing on a CPU? * @p: the task in question. */ -inline int task_curr(const task_t *p) +inline int task_curr(const struct task_struct *p) { return cpu_curr(task_cpu(p)) == p; } @@ -1009,7 +1012,7 @@ unsigned long weighted_cpuload(const int cpu) typedef struct { struct list_head list; - task_t *task; + struct task_struct *task; int dest_cpu; struct completion done; @@ -1019,7 +1022,8 @@ typedef struct { * The task's runqueue lock must be held. * Returns true if you have to wait for migration thread. */ -static int migrate_task(task_t *p, int dest_cpu, migration_req_t *req) +static int +migrate_task(struct task_struct *p, int dest_cpu, migration_req_t *req) { runqueue_t *rq = task_rq(p); @@ -1049,7 +1053,7 @@ static int migrate_task(task_t *p, int dest_cpu, migration_req_t *req) * smp_call_function() if an IPI is sent by the same process we are * waiting to become inactive. */ -void wait_task_inactive(task_t *p) +void wait_task_inactive(struct task_struct *p) { unsigned long flags; runqueue_t *rq; @@ -1083,7 +1087,7 @@ repeat: * to another CPU then no harm is done and the purpose has been * achieved as well. */ -void kick_process(task_t *p) +void kick_process(struct task_struct *p) { int cpu; @@ -1286,7 +1290,7 @@ nextlevel: * Returns the CPU we should wake onto. */ #if defined(ARCH_HAS_SCHED_WAKE_IDLE) -static int wake_idle(int cpu, task_t *p) +static int wake_idle(int cpu, struct task_struct *p) { cpumask_t tmp; struct sched_domain *sd; @@ -1309,7 +1313,7 @@ static int wake_idle(int cpu, task_t *p) return cpu; } #else -static inline int wake_idle(int cpu, task_t *p) +static inline int wake_idle(int cpu, struct task_struct *p) { return cpu; } @@ -1329,7 +1333,7 @@ static inline int wake_idle(int cpu, task_t *p) * * returns failure only if the task is already active. */ -static int try_to_wake_up(task_t *p, unsigned int state, int sync) +static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) { int cpu, this_cpu, success = 0; unsigned long flags; @@ -1487,14 +1491,14 @@ out: return success; } -int fastcall wake_up_process(task_t *p) +int fastcall wake_up_process(struct task_struct *p) { return try_to_wake_up(p, TASK_STOPPED | TASK_TRACED | TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0); } EXPORT_SYMBOL(wake_up_process); -int fastcall wake_up_state(task_t *p, unsigned int state) +int fastcall wake_up_state(struct task_struct *p, unsigned int state) { return try_to_wake_up(p, state, 0); } @@ -1503,7 +1507,7 @@ int fastcall wake_up_state(task_t *p, unsigned int state) * Perform scheduler related setup for a newly forked process p. * p is forked by current. */ -void fastcall sched_fork(task_t *p, int clone_flags) +void fastcall sched_fork(struct task_struct *p, int clone_flags) { int cpu = get_cpu(); @@ -1571,7 +1575,7 @@ void fastcall sched_fork(task_t *p, int clone_flags) * that must be done for every newly created context, then puts the task * on the runqueue and wakes it. */ -void fastcall wake_up_new_task(task_t *p, unsigned long clone_flags) +void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags) { unsigned long flags; int this_cpu, cpu; @@ -1655,7 +1659,7 @@ void fastcall wake_up_new_task(task_t *p, unsigned long clone_flags) * artificially, because any timeslice recovered here * was given away by the parent in the first place.) */ -void fastcall sched_exit(task_t *p) +void fastcall sched_exit(struct task_struct *p) { unsigned long flags; runqueue_t *rq; @@ -1689,7 +1693,7 @@ void fastcall sched_exit(task_t *p) * prepare_task_switch sets up locking and calls architecture specific * hooks. */ -static inline void prepare_task_switch(runqueue_t *rq, task_t *next) +static inline void prepare_task_switch(runqueue_t *rq, struct task_struct *next) { prepare_lock_switch(rq, next); prepare_arch_switch(next); @@ -1710,7 +1714,7 @@ static inline void prepare_task_switch(runqueue_t *rq, task_t *next) * with the lock held can cause deadlocks; see schedule() for * details.) */ -static inline void finish_task_switch(runqueue_t *rq, task_t *prev) +static inline void finish_task_switch(runqueue_t *rq, struct task_struct *prev) __releases(rq->lock) { struct mm_struct *mm = rq->prev_mm; @@ -1748,7 +1752,7 @@ static inline void finish_task_switch(runqueue_t *rq, task_t *prev) * schedule_tail - first thing a freshly forked thread must call. * @prev: the thread we just switched away from. */ -asmlinkage void schedule_tail(task_t *prev) +asmlinkage void schedule_tail(struct task_struct *prev) __releases(rq->lock) { runqueue_t *rq = this_rq(); @@ -1765,8 +1769,9 @@ asmlinkage void schedule_tail(task_t *prev) * context_switch - switch to the new MM and the new * thread's register state. */ -static inline -task_t * context_switch(runqueue_t *rq, task_t *prev, task_t *next) +static inline struct task_struct * +context_switch(runqueue_t *rq, struct task_struct *prev, + struct task_struct *next) { struct mm_struct *mm = next->mm; struct mm_struct *oldmm = prev->active_mm; @@ -1937,7 +1942,7 @@ static void double_lock_balance(runqueue_t *this_rq, runqueue_t *busiest) * allow dest_cpu, which will force the cpu onto dest_cpu. Then * the cpu_allowed mask is restored. */ -static void sched_migrate_task(task_t *p, int dest_cpu) +static void sched_migrate_task(struct task_struct *p, int dest_cpu) { migration_req_t req; runqueue_t *rq; @@ -1952,11 +1957,13 @@ static void sched_migrate_task(task_t *p, int dest_cpu) if (migrate_task(p, dest_cpu, &req)) { /* Need to wait for migration thread (might exit: take ref). */ struct task_struct *mt = rq->migration_thread; + get_task_struct(mt); task_rq_unlock(rq, &flags); wake_up_process(mt); put_task_struct(mt); wait_for_completion(&req.done); + return; } out: @@ -1980,9 +1987,9 @@ void sched_exec(void) * pull_task - move a task from a remote runqueue to the local runqueue. * Both runqueues must be locked. */ -static -void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p, - runqueue_t *this_rq, prio_array_t *this_array, int this_cpu) +static void pull_task(runqueue_t *src_rq, prio_array_t *src_array, + struct task_struct *p, runqueue_t *this_rq, + prio_array_t *this_array, int this_cpu) { dequeue_task(p, src_array); dec_nr_running(p, src_rq); @@ -2003,7 +2010,7 @@ void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p, * can_migrate_task - may task p from runqueue rq be migrated to this_cpu? */ static -int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu, +int can_migrate_task(struct task_struct *p, runqueue_t *rq, int this_cpu, struct sched_domain *sd, enum idle_type idle, int *all_pinned) { @@ -2052,8 +2059,8 @@ static int move_tasks(runqueue_t *this_rq, int this_cpu, runqueue_t *busiest, best_prio_seen, skip_for_load; prio_array_t *array, *dst_array; struct list_head *head, *curr; + struct task_struct *tmp; long rem_load_move; - task_t *tmp; if (max_nr_move == 0 || max_load_move == 0) goto out; @@ -2105,7 +2112,7 @@ skip_bitmap: head = array->queue + idx; curr = head->prev; skip_queue: - tmp = list_entry(curr, task_t, run_list); + tmp = list_entry(curr, struct task_struct, run_list); curr = curr->prev; @@ -2819,7 +2826,7 @@ EXPORT_PER_CPU_SYMBOL(kstat); * Bank in p->sched_time the ns elapsed since the last tick or switch. */ static inline void -update_cpu_clock(task_t *p, runqueue_t *rq, unsigned long long now) +update_cpu_clock(struct task_struct *p, runqueue_t *rq, unsigned long long now) { p->sched_time += now - max(p->timestamp, rq->timestamp_last_tick); } @@ -2828,7 +2835,7 @@ update_cpu_clock(task_t *p, runqueue_t *rq, unsigned long long now) * Return current->sched_time plus any more ns on the sched_clock * that have not yet been banked. */ -unsigned long long current_sched_time(const task_t *p) +unsigned long long current_sched_time(const struct task_struct *p) { unsigned long long ns; unsigned long flags; @@ -2945,9 +2952,9 @@ void account_steal_time(struct task_struct *p, cputime_t steal) void scheduler_tick(void) { unsigned long long now = sched_clock(); + struct task_struct *p = current; int cpu = smp_processor_id(); runqueue_t *rq = this_rq(); - task_t *p = current; update_cpu_clock(p, rq, now); @@ -3079,7 +3086,8 @@ static void wake_sleeping_dependent(int this_cpu) * utilize, if another task runs on a sibling. This models the * slowdown effect of other tasks running on siblings: */ -static inline unsigned long smt_slice(task_t *p, struct sched_domain *sd) +static inline unsigned long +smt_slice(struct task_struct *p, struct sched_domain *sd) { return p->time_slice * (100 - sd->per_cpu_gain) / 100; } @@ -3090,7 +3098,8 @@ static inline unsigned long smt_slice(task_t *p, struct sched_domain *sd) * acquire their lock. As we only trylock the normal locking order does not * need to be obeyed. */ -static int dependent_sleeper(int this_cpu, runqueue_t *this_rq, task_t *p) +static int +dependent_sleeper(int this_cpu, runqueue_t *this_rq, struct task_struct *p) { struct sched_domain *tmp, *sd = NULL; int ret = 0, i; @@ -3110,8 +3119,8 @@ static int dependent_sleeper(int this_cpu, runqueue_t *this_rq, task_t *p) return 0; for_each_cpu_mask(i, sd->span) { + struct task_struct *smt_curr; runqueue_t *smt_rq; - task_t *smt_curr; if (i == this_cpu) continue; @@ -3157,7 +3166,7 @@ static inline void wake_sleeping_dependent(int this_cpu) { } static inline int -dependent_sleeper(int this_cpu, runqueue_t *this_rq, task_t *p) +dependent_sleeper(int this_cpu, runqueue_t *this_rq, struct task_struct *p) { return 0; } @@ -3211,11 +3220,11 @@ static inline int interactive_sleep(enum sleep_type sleep_type) */ asmlinkage void __sched schedule(void) { + struct task_struct *prev, *next; struct list_head *queue; unsigned long long now; unsigned long run_time; int cpu, idx, new_prio; - task_t *prev, *next; prio_array_t *array; long *switch_count; runqueue_t *rq; @@ -3308,7 +3317,7 @@ need_resched_nonpreemptible: idx = sched_find_first_bit(array->bitmap); queue = array->queue + idx; - next = list_entry(queue->next, task_t, run_list); + next = list_entry(queue->next, struct task_struct, run_list); if (!rt_task(next) && interactive_sleep(next->sleep_type)) { unsigned long long delta = now - next->timestamp; @@ -3776,7 +3785,7 @@ EXPORT_SYMBOL(sleep_on_timeout); * * Used by the rt_mutex code to implement priority inheritance logic. */ -void rt_mutex_setprio(task_t *p, int prio) +void rt_mutex_setprio(struct task_struct *p, int prio) { unsigned long flags; prio_array_t *array; @@ -3817,7 +3826,7 @@ void rt_mutex_setprio(task_t *p, int prio) #endif -void set_user_nice(task_t *p, long nice) +void set_user_nice(struct task_struct *p, long nice) { int old_prio, delta; unsigned long flags; @@ -3873,7 +3882,7 @@ EXPORT_SYMBOL(set_user_nice); * @p: task * @nice: nice value */ -int can_nice(const task_t *p, const int nice) +int can_nice(const struct task_struct *p, const int nice) { /* convert nice value [19,-20] to rlimit style value [1,40] */ int nice_rlim = 20 - nice; @@ -3932,7 +3941,7 @@ asmlinkage long sys_nice(int increment) * RT tasks are offset by -200. Normal tasks are centered * around 0, value goes from -16 to +15. */ -int task_prio(const task_t *p) +int task_prio(const struct task_struct *p) { return p->prio - MAX_RT_PRIO; } @@ -3941,7 +3950,7 @@ int task_prio(const task_t *p) * task_nice - return the nice value of a given task. * @p: the task in question. */ -int task_nice(const task_t *p) +int task_nice(const struct task_struct *p) { return TASK_NICE(p); } @@ -3960,7 +3969,7 @@ int idle_cpu(int cpu) * idle_task - return the idle task for a given cpu. * @cpu: the processor in question. */ -task_t *idle_task(int cpu) +struct task_struct *idle_task(int cpu) { return cpu_rq(cpu)->idle; } @@ -3969,7 +3978,7 @@ task_t *idle_task(int cpu) * find_process_by_pid - find a process with a matching PID value. * @pid: the pid in question. */ -static inline task_t *find_process_by_pid(pid_t pid) +static inline struct task_struct *find_process_by_pid(pid_t pid) { return pid ? find_task_by_pid(pid) : current; } @@ -4103,9 +4112,9 @@ EXPORT_SYMBOL_GPL(sched_setscheduler); static int do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) { - int retval; struct sched_param lparam; struct task_struct *p; + int retval; if (!param || pid < 0) return -EINVAL; @@ -4121,6 +4130,7 @@ do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) read_unlock_irq(&tasklist_lock); retval = sched_setscheduler(p, policy, &lparam); put_task_struct(p); + return retval; } @@ -4156,8 +4166,8 @@ asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param __user *param) */ asmlinkage long sys_sched_getscheduler(pid_t pid) { + struct task_struct *p; int retval = -EINVAL; - task_t *p; if (pid < 0) goto out_nounlock; @@ -4184,8 +4194,8 @@ out_nounlock: asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param __user *param) { struct sched_param lp; + struct task_struct *p; int retval = -EINVAL; - task_t *p; if (!param || pid < 0) goto out_nounlock; @@ -4218,9 +4228,9 @@ out_unlock: long sched_setaffinity(pid_t pid, cpumask_t new_mask) { - task_t *p; - int retval; cpumask_t cpus_allowed; + struct task_struct *p; + int retval; lock_cpu_hotplug(); read_lock(&tasklist_lock); @@ -4306,8 +4316,8 @@ cpumask_t cpu_possible_map __read_mostly = CPU_MASK_ALL; long sched_getaffinity(pid_t pid, cpumask_t *mask) { + struct task_struct *p; int retval; - task_t *p; lock_cpu_hotplug(); read_lock(&tasklist_lock); @@ -4592,9 +4602,9 @@ asmlinkage long sys_sched_get_priority_min(int policy) asmlinkage long sys_sched_rr_get_interval(pid_t pid, struct timespec __user *interval) { + struct task_struct *p; int retval = -EINVAL; struct timespec t; - task_t *p; if (pid < 0) goto out_nounlock; @@ -4641,12 +4651,13 @@ static inline struct task_struct *younger_sibling(struct task_struct *p) return list_entry(p->sibling.next,struct task_struct,sibling); } -static void show_task(task_t *p) +static const char *stat_nam[] = { "R", "S", "D", "T", "t", "Z", "X" }; + +static void show_task(struct task_struct *p) { - task_t *relative; - unsigned state; + struct task_struct *relative; unsigned long free = 0; - static const char *stat_nam[] = { "R", "S", "D", "T", "t", "Z", "X" }; + unsigned state; printk("%-13.13s ", p->comm); state = p->state ? __ffs(p->state) + 1 : 0; @@ -4697,7 +4708,7 @@ static void show_task(task_t *p) void show_state(void) { - task_t *g, *p; + struct task_struct *g, *p; #if (BITS_PER_LONG == 32) printk("\n" @@ -4730,7 +4741,7 @@ void show_state(void) * NOTE: this function does not set the idle thread's NEED_RESCHED * flag, to make booting more robust. */ -void __devinit init_idle(task_t *idle, int cpu) +void __devinit init_idle(struct task_struct *idle, int cpu) { runqueue_t *rq = cpu_rq(cpu); unsigned long flags; @@ -4793,7 +4804,7 @@ cpumask_t nohz_cpu_mask = CPU_MASK_NONE; * task must not exit() & deallocate itself prematurely. The * call is not atomic; no spinlocks may be held. */ -int set_cpus_allowed(task_t *p, cpumask_t new_mask) +int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask) { unsigned long flags; migration_req_t req; @@ -5061,7 +5072,7 @@ void idle_task_exit(void) mmdrop(mm); } -static void migrate_dead(unsigned int dead_cpu, task_t *p) +static void migrate_dead(unsigned int dead_cpu, struct task_struct *p) { struct runqueue *rq = cpu_rq(dead_cpu); @@ -5096,9 +5107,8 @@ static void migrate_dead_tasks(unsigned int dead_cpu) struct list_head *list = &rq->arrays[arr].queue[i]; while (!list_empty(list)) - migrate_dead(dead_cpu, - list_entry(list->next, task_t, - run_list)); + migrate_dead(dead_cpu, list_entry(list->next, + struct task_struct, run_list)); } } } @@ -6801,7 +6811,7 @@ void normalize_rt_tasks(void) * * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED! */ -task_t *curr_task(int cpu) +struct task_struct *curr_task(int cpu) { return cpu_curr(cpu); } @@ -6821,7 +6831,7 @@ task_t *curr_task(int cpu) * * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED! */ -void set_curr_task(int cpu, task_t *p) +void set_curr_task(int cpu, struct task_struct *p) { cpu_curr(cpu) = p; } diff --git a/kernel/timer.c b/kernel/timer.c index b761898d04c8..396a3c024c2c 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1368,7 +1368,7 @@ asmlinkage long sys_getegid(void) static void process_timeout(unsigned long __data) { - wake_up_process((task_t *)__data); + wake_up_process((struct task_struct *)__data); } /** diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 59f0b42bd89e..90d2c6001659 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -51,7 +51,7 @@ struct cpu_workqueue_struct { wait_queue_head_t work_done; struct workqueue_struct *wq; - task_t *thread; + struct task_struct *thread; int run_depth; /* Detect run_workqueue() recursion depth */ } ____cacheline_aligned; diff --git a/mm/oom_kill.c b/mm/oom_kill.c index d46ed0f1dc06..b9af136e5cfa 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -225,7 +225,7 @@ static struct task_struct *select_bad_process(unsigned long *ppoints) * CAP_SYS_RAW_IO set, send SIGTERM instead (but it's unlikely that * we select a process with CAP_SYS_RAW_IO set). */ -static void __oom_kill_task(task_t *p, const char *message) +static void __oom_kill_task(struct task_struct *p, const char *message) { if (p->pid == 1) { WARN_ON(1); @@ -255,10 +255,10 @@ static void __oom_kill_task(task_t *p, const char *message) force_sig(SIGKILL, p); } -static int oom_kill_task(task_t *p, const char *message) +static int oom_kill_task(struct task_struct *p, const char *message) { struct mm_struct *mm; - task_t * g, * q; + struct task_struct *g, *q; mm = p->mm; @@ -316,7 +316,7 @@ static int oom_kill_process(struct task_struct *p, unsigned long points, */ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order) { - task_t *p; + struct task_struct *p; unsigned long points = 0; if (printk_ratelimit()) { -- cgit v1.2.3