diff options
Diffstat (limited to 'kernel/locking')
-rw-r--r-- | kernel/locking/Makefile | 1 | ||||
-rw-r--r-- | kernel/locking/lockdep.c | 46 | ||||
-rw-r--r-- | kernel/locking/lockdep_proc.c | 2 | ||||
-rw-r--r-- | kernel/locking/locktorture.c | 81 | ||||
-rw-r--r-- | kernel/locking/mcs_spinlock.h | 4 | ||||
-rw-r--r-- | kernel/locking/mutex-debug.c | 13 | ||||
-rw-r--r-- | kernel/locking/mutex-debug.h | 27 | ||||
-rw-r--r-- | kernel/locking/mutex.c | 994 | ||||
-rw-r--r-- | kernel/locking/mutex.h | 30 | ||||
-rw-r--r-- | kernel/locking/osq_lock.c | 15 | ||||
-rw-r--r-- | kernel/locking/percpu-rwsem.c | 7 | ||||
-rw-r--r-- | kernel/locking/qrwlock.c | 6 | ||||
-rw-r--r-- | kernel/locking/qspinlock_paravirt.h | 2 | ||||
-rw-r--r-- | kernel/locking/qspinlock_stat.h | 13 | ||||
-rw-r--r-- | kernel/locking/rtmutex-debug.c | 1 | ||||
-rw-r--r-- | kernel/locking/rtmutex.c | 24 | ||||
-rw-r--r-- | kernel/locking/rtmutex_common.h | 4 | ||||
-rw-r--r-- | kernel/locking/rwsem-spinlock.c | 21 | ||||
-rw-r--r-- | kernel/locking/rwsem-xadd.c | 44 | ||||
-rw-r--r-- | kernel/locking/rwsem.c | 1 | ||||
-rw-r--r-- | kernel/locking/semaphore.c | 8 | ||||
-rw-r--r-- | kernel/locking/spinlock.c | 8 | ||||
-rw-r--r-- | kernel/locking/spinlock_debug.c | 86 | ||||
-rw-r--r-- | kernel/locking/test-ww_mutex.c | 646 |
24 files changed, 1475 insertions, 609 deletions
diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile index 6f88e352cd4f..760158d9d98d 100644 --- a/kernel/locking/Makefile +++ b/kernel/locking/Makefile @@ -28,3 +28,4 @@ obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o +obj-$(CONFIG_WW_MUTEX_SELFTEST) += test-ww_mutex.o diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 4d7ffc0a0d00..12e38c213b70 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -28,6 +28,8 @@ #define DISABLE_BRANCH_PROFILING #include <linux/mutex.h> #include <linux/sched.h> +#include <linux/sched/clock.h> +#include <linux/sched/task.h> #include <linux/delay.h> #include <linux/module.h> #include <linux/proc_fs.h> @@ -840,9 +842,9 @@ static struct lock_list *alloc_list_entry(void) /* * Add a new dependency to the head of the list: */ -static int add_lock_to_list(struct lock_class *class, struct lock_class *this, - struct list_head *head, unsigned long ip, - int distance, struct stack_trace *trace) +static int add_lock_to_list(struct lock_class *this, struct list_head *head, + unsigned long ip, int distance, + struct stack_trace *trace) { struct lock_list *entry; /* @@ -1868,14 +1870,14 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, * Ok, all validations passed, add the new lock * to the previous lock's dependency list: */ - ret = add_lock_to_list(hlock_class(prev), hlock_class(next), + ret = add_lock_to_list(hlock_class(next), &hlock_class(prev)->locks_after, next->acquire_ip, distance, &trace); if (!ret) return 0; - ret = add_lock_to_list(hlock_class(next), hlock_class(prev), + ret = add_lock_to_list(hlock_class(prev), &hlock_class(next)->locks_before, next->acquire_ip, distance, &trace); if (!ret) @@ -2203,7 +2205,7 @@ cache_hit: * Important for check_no_collision(). */ if (unlikely(nr_chain_hlocks > MAX_LOCKDEP_CHAIN_HLOCKS)) { - if (debug_locks_off_graph_unlock()) + if (!debug_locks_off_graph_unlock()) return 0; print_lockdep_off("BUG: MAX_LOCKDEP_CHAIN_HLOCKS too low!"); @@ -3191,7 +3193,7 @@ print_lock_nested_lock_not_held(struct task_struct *curr, return 0; } -static int __lock_is_held(struct lockdep_map *lock); +static int __lock_is_held(struct lockdep_map *lock, int read); /* * This gets called for every mutex_lock*()/spin_lock*() operation. @@ -3332,7 +3334,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, } chain_key = iterate_chain_key(chain_key, class_idx); - if (nest_lock && !__lock_is_held(nest_lock)) + if (nest_lock && !__lock_is_held(nest_lock, -1)) return print_lock_nested_lock_not_held(curr, hlock, ip); if (!validate_chain(curr, lock, hlock, chain_head, chain_key)) @@ -3579,7 +3581,7 @@ found_it: return 1; } -static int __lock_is_held(struct lockdep_map *lock) +static int __lock_is_held(struct lockdep_map *lock, int read) { struct task_struct *curr = current; int i; @@ -3587,8 +3589,12 @@ static int __lock_is_held(struct lockdep_map *lock) for (i = 0; i < curr->lockdep_depth; i++) { struct held_lock *hlock = curr->held_locks + i; - if (match_held_lock(hlock, lock)) - return 1; + if (match_held_lock(hlock, lock)) { + if (read == -1 || hlock->read == read) + return 1; + + return 0; + } } return 0; @@ -3772,7 +3778,7 @@ void lock_release(struct lockdep_map *lock, int nested, } EXPORT_SYMBOL_GPL(lock_release); -int lock_is_held(struct lockdep_map *lock) +int lock_is_held_type(struct lockdep_map *lock, int read) { unsigned long flags; int ret = 0; @@ -3784,13 +3790,13 @@ int lock_is_held(struct lockdep_map *lock) check_flags(flags); current->lockdep_recursion = 1; - ret = __lock_is_held(lock); + ret = __lock_is_held(lock, read); current->lockdep_recursion = 0; raw_local_irq_restore(flags); return ret; } -EXPORT_SYMBOL_GPL(lock_is_held); +EXPORT_SYMBOL_GPL(lock_is_held_type); struct pin_cookie lock_pin_lock(struct lockdep_map *lock) { @@ -4408,13 +4414,13 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s) #endif /* #ifdef CONFIG_PROVE_RCU_REPEATEDLY */ /* Note: the following can be executed concurrently, so be careful. */ printk("\n"); - printk("===============================\n"); - printk("[ INFO: suspicious RCU usage. ]\n"); + pr_err("===============================\n"); + pr_err("[ ERR: suspicious RCU usage. ]\n"); print_kernel_ident(); - printk("-------------------------------\n"); - printk("%s:%d %s!\n", file, line, s); - printk("\nother info that might help us debug this:\n\n"); - printk("\n%srcu_scheduler_active = %d, debug_locks = %d\n", + pr_err("-------------------------------\n"); + pr_err("%s:%d %s!\n", file, line, s); + pr_err("\nother info that might help us debug this:\n\n"); + pr_err("\n%srcu_scheduler_active = %d, debug_locks = %d\n", !rcu_lockdep_current_cpu_online() ? "RCU used illegally from offline CPU!\n" : !rcu_is_watching() diff --git a/kernel/locking/lockdep_proc.c b/kernel/locking/lockdep_proc.c index a0f61effad25..6d1fcc786081 100644 --- a/kernel/locking/lockdep_proc.c +++ b/kernel/locking/lockdep_proc.c @@ -18,7 +18,7 @@ #include <linux/debug_locks.h> #include <linux/vmalloc.h> #include <linux/sort.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/div64.h> #include "lockdep_internals.h" diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index f8c5af52a131..f24582d4dad3 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c @@ -32,6 +32,8 @@ #include <linux/smp.h> #include <linux/interrupt.h> #include <linux/sched.h> +#include <uapi/linux/sched/types.h> +#include <linux/rtmutex.h> #include <linux/atomic.h> #include <linux/moduleparam.h> #include <linux/delay.h> @@ -372,6 +374,78 @@ static struct lock_torture_ops mutex_lock_ops = { .name = "mutex_lock" }; +#include <linux/ww_mutex.h> +static DEFINE_WW_CLASS(torture_ww_class); +static DEFINE_WW_MUTEX(torture_ww_mutex_0, &torture_ww_class); +static DEFINE_WW_MUTEX(torture_ww_mutex_1, &torture_ww_class); +static DEFINE_WW_MUTEX(torture_ww_mutex_2, &torture_ww_class); + +static int torture_ww_mutex_lock(void) +__acquires(torture_ww_mutex_0) +__acquires(torture_ww_mutex_1) +__acquires(torture_ww_mutex_2) +{ + LIST_HEAD(list); + struct reorder_lock { + struct list_head link; + struct ww_mutex *lock; + } locks[3], *ll, *ln; + struct ww_acquire_ctx ctx; + + locks[0].lock = &torture_ww_mutex_0; + list_add(&locks[0].link, &list); + + locks[1].lock = &torture_ww_mutex_1; + list_add(&locks[1].link, &list); + + locks[2].lock = &torture_ww_mutex_2; + list_add(&locks[2].link, &list); + + ww_acquire_init(&ctx, &torture_ww_class); + + list_for_each_entry(ll, &list, link) { + int err; + + err = ww_mutex_lock(ll->lock, &ctx); + if (!err) + continue; + + ln = ll; + list_for_each_entry_continue_reverse(ln, &list, link) + ww_mutex_unlock(ln->lock); + + if (err != -EDEADLK) + return err; + + ww_mutex_lock_slow(ll->lock, &ctx); + list_move(&ll->link, &list); + } + + ww_acquire_fini(&ctx); + return 0; +} + +static void torture_ww_mutex_unlock(void) +__releases(torture_ww_mutex_0) +__releases(torture_ww_mutex_1) +__releases(torture_ww_mutex_2) +{ + ww_mutex_unlock(&torture_ww_mutex_0); + ww_mutex_unlock(&torture_ww_mutex_1); + ww_mutex_unlock(&torture_ww_mutex_2); +} + +static struct lock_torture_ops ww_mutex_lock_ops = { + .writelock = torture_ww_mutex_lock, + .write_delay = torture_mutex_delay, + .task_boost = torture_boost_dummy, + .writeunlock = torture_ww_mutex_unlock, + .readlock = NULL, + .read_delay = NULL, + .readunlock = NULL, + .name = "ww_mutex_lock" +}; + #ifdef CONFIG_RT_MUTEXES static DEFINE_RT_MUTEX(torture_rtmutex); @@ -780,6 +854,10 @@ static void lock_torture_cleanup(void) else lock_torture_print_module_parms(cxt.cur_ops, "End of test: SUCCESS"); + + kfree(cxt.lwsa); + kfree(cxt.lrsa); + end: torture_cleanup_end(); } @@ -793,6 +871,7 @@ static int __init lock_torture_init(void) &spin_lock_ops, &spin_lock_irq_ops, &rw_lock_ops, &rw_lock_irq_ops, &mutex_lock_ops, + &ww_mutex_lock_ops, #ifdef CONFIG_RT_MUTEXES &rtmutex_lock_ops, #endif @@ -924,6 +1003,8 @@ static int __init lock_torture_init(void) GFP_KERNEL); if (reader_tasks == NULL) { VERBOSE_TOROUT_ERRSTRING("reader_tasks: Out of memory"); + kfree(writer_tasks); + writer_tasks = NULL; firsterr = -ENOMEM; goto unwind; } diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h index c835270f0c2f..6a385aabcce7 100644 --- a/kernel/locking/mcs_spinlock.h +++ b/kernel/locking/mcs_spinlock.h @@ -28,7 +28,7 @@ struct mcs_spinlock { #define arch_mcs_spin_lock_contended(l) \ do { \ while (!(smp_load_acquire(l))) \ - cpu_relax_lowlatency(); \ + cpu_relax(); \ } while (0) #endif @@ -108,7 +108,7 @@ void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) return; /* Wait until the next pointer is set */ while (!(next = READ_ONCE(node->next))) - cpu_relax_lowlatency(); + cpu_relax(); } /* Pass lock to next waiter. */ diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c index 9c951fade415..9aa713629387 100644 --- a/kernel/locking/mutex-debug.c +++ b/kernel/locking/mutex-debug.c @@ -73,21 +73,8 @@ void debug_mutex_unlock(struct mutex *lock) { if (likely(debug_locks)) { DEBUG_LOCKS_WARN_ON(lock->magic != lock); - - if (!lock->owner) - DEBUG_LOCKS_WARN_ON(!lock->owner); - else - DEBUG_LOCKS_WARN_ON(lock->owner != current); - DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); } - - /* - * __mutex_slowpath_needs_to_unlock() is explicitly 0 for debug - * mutexes so that we can do it here after we've verified state. - */ - mutex_clear_owner(lock); - atomic_set(&lock->count, 1); } void debug_mutex_init(struct mutex *lock, const char *name, diff --git a/kernel/locking/mutex-debug.h b/kernel/locking/mutex-debug.h index 57a871ae3c81..4174417d5309 100644 --- a/kernel/locking/mutex-debug.h +++ b/kernel/locking/mutex-debug.h @@ -26,30 +26,3 @@ extern void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, extern void debug_mutex_unlock(struct mutex *lock); extern void debug_mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key); - -static inline void mutex_set_owner(struct mutex *lock) -{ - WRITE_ONCE(lock->owner, current); -} - -static inline void mutex_clear_owner(struct mutex *lock) -{ - WRITE_ONCE(lock->owner, NULL); -} - -#define spin_lock_mutex(lock, flags) \ - do { \ - struct mutex *l = container_of(lock, struct mutex, wait_lock); \ - \ - DEBUG_LOCKS_WARN_ON(in_interrupt()); \ - local_irq_save(flags); \ - arch_spin_lock(&(lock)->rlock.raw_lock);\ - DEBUG_LOCKS_WARN_ON(l->magic != l); \ - } while (0) - -#define spin_unlock_mutex(lock, flags) \ - do { \ - arch_spin_unlock(&(lock)->rlock.raw_lock); \ - local_irq_restore(flags); \ - preempt_check_resched(); \ - } while (0) diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index a70b90db3909..198527a62149 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -19,49 +19,190 @@ */ #include <linux/mutex.h> #include <linux/ww_mutex.h> -#include <linux/sched.h> +#include <linux/sched/signal.h> #include <linux/sched/rt.h> +#include <linux/sched/wake_q.h> +#include <linux/sched/debug.h> #include <linux/export.h> #include <linux/spinlock.h> #include <linux/interrupt.h> #include <linux/debug_locks.h> #include <linux/osq_lock.h> -/* - * In the DEBUG case we are using the "NULL fastpath" for mutexes, - * which forces all calls into the slowpath: - */ #ifdef CONFIG_DEBUG_MUTEXES # include "mutex-debug.h" -# include <asm-generic/mutex-null.h> -/* - * Must be 0 for the debug case so we do not do the unlock outside of the - * wait_lock region. debug_mutex_unlock() will do the actual unlock in this - * case. - */ -# undef __mutex_slowpath_needs_to_unlock -# define __mutex_slowpath_needs_to_unlock() 0 #else # include "mutex.h" -# include <asm/mutex.h> #endif void __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) { - atomic_set(&lock->count, 1); + atomic_long_set(&lock->owner, 0); spin_lock_init(&lock->wait_lock); INIT_LIST_HEAD(&lock->wait_list); - mutex_clear_owner(lock); #ifdef CONFIG_MUTEX_SPIN_ON_OWNER osq_lock_init(&lock->osq); #endif debug_mutex_init(lock, name, key); } - EXPORT_SYMBOL(__mutex_init); +/* + * @owner: contains: 'struct task_struct *' to the current lock owner, + * NULL means not owned. Since task_struct pointers are aligned at + * at least L1_CACHE_BYTES, we have low bits to store extra state. + * + * Bit0 indicates a non-empty waiter list; unlock must issue a wakeup. + * Bit1 indicates unlock needs to hand the lock to the top-waiter + * Bit2 indicates handoff has been done and we're waiting for pickup. + */ +#define MUTEX_FLAG_WAITERS 0x01 +#define MUTEX_FLAG_HANDOFF 0x02 +#define MUTEX_FLAG_PICKUP 0x04 + +#define MUTEX_FLAGS 0x07 + +static inline struct task_struct *__owner_task(unsigned long owner) +{ + return (struct task_struct *)(owner & ~MUTEX_FLAGS); +} + +static inline unsigned long __owner_flags(unsigned long owner) +{ + return owner & MUTEX_FLAGS; +} + +/* + * Trylock variant that retuns the owning task on failure. + */ +static inline struct task_struct *__mutex_trylock_or_owner(struct mutex *lock) +{ + unsigned long owner, curr = (unsigned long)current; + + owner = atomic_long_read(&lock->owner); + for (;;) { /* must loop, can race against a flag */ + unsigned long old, flags = __owner_flags(owner); + unsigned long task = owner & ~MUTEX_FLAGS; + + if (task) { + if (likely(task != curr)) + break; + + if (likely(!(flags & MUTEX_FLAG_PICKUP))) + break; + + flags &= ~MUTEX_FLAG_PICKUP; + } else { +#ifdef CONFIG_DEBUG_MUTEXES + DEBUG_LOCKS_WARN_ON(flags & MUTEX_FLAG_PICKUP); +#endif + } + + /* + * We set the HANDOFF bit, we must make sure it doesn't live + * past the point where we acquire it. This would be possible + * if we (accidentally) set the bit on an unlocked mutex. + */ + flags &= ~MUTEX_FLAG_HANDOFF; + + old = atomic_long_cmpxchg_acquire(&lock->owner, owner, curr | flags); + if (old == owner) + return NULL; + + owner = old; + } + + return __owner_task(owner); +} + +/* + * Actual trylock that will work on any unlocked state. + */ +static inline bool __mutex_trylock(struct mutex *lock) +{ + return !__mutex_trylock_or_owner(lock); +} + +#ifndef CONFIG_DEBUG_LOCK_ALLOC +/* + * Lockdep annotations are contained to the slow paths for simplicity. + * There is nothing that would stop spreading the lockdep annotations outwards + * except more code. + */ + +/* + * Optimistic trylock that only works in the uncontended case. Make sure to + * follow with a __mutex_trylock() before failing. + */ +static __always_inline bool __mutex_trylock_fast(struct mutex *lock) +{ + unsigned long curr = (unsigned long)current; + + if (!atomic_long_cmpxchg_acquire(&lock->owner, 0UL, curr)) + return true; + + return false; +} + +static __always_inline bool __mutex_unlock_fast(struct mutex *lock) +{ + unsigned long curr = (unsigned long)current; + + if (atomic_long_cmpxchg_release(&lock->owner, curr, 0UL) == curr) + return true; + + return false; +} +#endif + +static inline void __mutex_set_flag(struct mutex *lock, unsigned long flag) +{ + atomic_long_or(flag, &lock->owner); +} + +static inline void __mutex_clear_flag(struct mutex *lock, unsigned long flag) +{ + atomic_long_andnot(flag, &lock->owner); +} + +static inline bool __mutex_waiter_is_first(struct mutex *lock, struct mutex_waiter *waiter) +{ + return list_first_entry(&lock->wait_list, struct mutex_waiter, list) == waiter; +} + +/* + * Give up ownership to a specific task, when @task = NULL, this is equivalent + * to a regular unlock. Sets PICKUP on a handoff, clears HANDOF, preserves + * WAITERS. Provides RELEASE semantics like a regular unlock, the + * __mutex_trylock() provides a matching ACQUIRE semantics for the handoff. + */ +static void __mutex_handoff(struct mutex *lock, struct task_struct *task) +{ + unsigned long owner = atomic_long_read(&lock->owner); + + for (;;) { + unsigned long old, new; + +#ifdef CONFIG_DEBUG_MUTEXES + DEBUG_LOCKS_WARN_ON(__owner_task(owner) != current); + DEBUG_LOCKS_WARN_ON(owner & MUTEX_FLAG_PICKUP); +#endif + + new = (owner & MUTEX_FLAG_WAITERS); + new |= (unsigned long)task; + if (task) + new |= MUTEX_FLAG_PICKUP; + + old = atomic_long_cmpxchg_release(&lock->owner, owner, new); + if (old == owner) + break; + + owner = old; + } +} + #ifndef CONFIG_DEBUG_LOCK_ALLOC /* * We split the mutex lock/unlock logic into separate fastpath and @@ -69,7 +210,7 @@ EXPORT_SYMBOL(__mutex_init); * We also put the fastpath first in the kernel image, to make sure the * branch is predicted by the CPU as default-untaken. */ -__visible void __sched __mutex_lock_slowpath(atomic_t *lock_count); +static void __sched __mutex_lock_slowpath(struct mutex *lock); /** * mutex_lock - acquire the mutex @@ -95,19 +236,15 @@ __visible void __sched __mutex_lock_slowpath(atomic_t *lock_count); void __sched mutex_lock(struct mutex *lock) { might_sleep(); - /* - * The locking fastpath is the 1->0 transition from - * 'unlocked' into 'locked' state. - */ - __mutex_fastpath_lock(&lock->count, __mutex_lock_slowpath); - mutex_set_owner(lock); -} + if (!__mutex_trylock_fast(lock)) + __mutex_lock_slowpath(lock); +} EXPORT_SYMBOL(mutex_lock); #endif -static __always_inline void ww_mutex_lock_acquired(struct ww_mutex *ww, - struct ww_acquire_ctx *ww_ctx) +static __always_inline void +ww_mutex_lock_acquired(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx) { #ifdef CONFIG_DEBUG_MUTEXES /* @@ -146,20 +283,50 @@ static __always_inline void ww_mutex_lock_acquired(struct ww_mutex *ww, ww_ctx->acquired++; } +static inline bool __sched +__ww_ctx_stamp_after(struct ww_acquire_ctx *a, struct ww_acquire_ctx *b) +{ + return a->stamp - b->stamp <= LONG_MAX && + (a->stamp != b->stamp || a > b); +} + /* - * After acquiring lock with fastpath or when we lost out in contested - * slowpath, set ctx and wake up any waiters so they can recheck. + * Wake up any waiters that may have to back off when the lock is held by the + * given context. + * + * Due to the invariants on the wait list, this can only affect the first + * waiter with a context. * - * This function is never called when CONFIG_DEBUG_LOCK_ALLOC is set, - * as the fastpath and opportunistic spinning are disabled in that case. + * The current task must not be on the wait list. */ -static __always_inline void -ww_mutex_set_context_fastpath(struct ww_mutex *lock, - struct ww_acquire_ctx *ctx) +static void __sched +__ww_mutex_wakeup_for_backoff(struct mutex *lock, struct ww_acquire_ctx *ww_ctx) { - unsigned long flags; struct mutex_waiter *cur; + lockdep_assert_held(&lock->wait_lock); + + list_for_each_entry(cur, &lock->wait_list, list) { + if (!cur->ww_ctx) + continue; + + if (cur->ww_ctx->acquired > 0 && + __ww_ctx_stamp_after(cur->ww_ctx, ww_ctx)) { + debug_mutex_wake_waiter(lock, cur); + wake_up_process(cur->task); + } + + break; + } +} + +/* + * After acquiring lock with fastpath or when we lost out in contested + * slowpath, set ctx and wake up any waiters so they can recheck. + */ +static __always_inline void +ww_mutex_set_context_fastpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) +{ ww_mutex_lock_acquired(lock, ctx); lock->ctx = ctx; @@ -176,58 +343,91 @@ ww_mutex_set_context_fastpath(struct ww_mutex *lock, /* * Check if lock is contended, if not there is nobody to wake up */ - if (likely(atomic_read(&lock->base.count) == 0)) + if (likely(!(atomic_long_read(&lock->base.owner) & MUTEX_FLAG_WAITERS))) return; /* * Uh oh, we raced in fastpath, wake up everyone in this case, * so they can see the new lock->ctx. */ - spin_lock_mutex(&lock->base.wait_lock, flags); - list_for_each_entry(cur, &lock->base.wait_list, list) { - debug_mutex_wake_waiter(&lock->base, cur); - wake_up_process(cur->task); - } - spin_unlock_mutex(&lock->base.wait_lock, flags); + spin_lock(&lock->base.wait_lock); + __ww_mutex_wakeup_for_backoff(&lock->base, ctx); + spin_unlock(&lock->base.wait_lock); } /* - * After acquiring lock in the slowpath set ctx and wake up any - * waiters so they can recheck. + * After acquiring lock in the slowpath set ctx. + * + * Unlike for the fast path, the caller ensures that waiters are woken up where + * necessary. * * Callers must hold the mutex wait_lock. */ static __always_inline void -ww_mutex_set_context_slowpath(struct ww_mutex *lock, - struct ww_acquire_ctx *ctx) +ww_mutex_set_context_slowpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) { - struct mutex_waiter *cur; - ww_mutex_lock_acquired(lock, ctx); lock->ctx = ctx; +} + +#ifdef CONFIG_MUTEX_SPIN_ON_OWNER + +static inline +bool ww_mutex_spin_on_owner(struct mutex *lock, struct ww_acquire_ctx *ww_ctx, + struct mutex_waiter *waiter) +{ + struct ww_mutex *ww; + + ww = container_of(lock, struct ww_mutex, base); + + /* + * If ww->ctx is set the contents are undefined, only + * by acquiring wait_lock there is a guarantee that + * they are not invalid when reading. + * + * As such, when deadlock detection needs to be + * performed the optimistic spinning cannot be done. + * + * Check this in every inner iteration because we may + * be racing against another thread's ww_mutex_lock. + */ + if (ww_ctx->acquired > 0 && READ_ONCE(ww->ctx)) + return false; /* - * Give any possible sleeping processes the chance to wake up, - * so they can recheck if they have to back off. + * If we aren't on the wait list yet, cancel the spin + * if there are waiters. We want to avoid stealing the + * lock from a waiter with an earlier stamp, since the + * other thread may already own a lock that we also + * need. */ - list_for_each_entry(cur, &lock->base.wait_list, list) { - debug_mutex_wake_waiter(&lock->base, cur); - wake_up_process(cur->task); - } + if (!waiter && (atomic_long_read(&lock->owner) & MUTEX_FLAG_WAITERS)) + return false; + + /* + * Similarly, stop spinning if we are no longer the + * first waiter. + */ + if (waiter && !__mutex_waiter_is_first(lock, waiter)) + return false; + + return true; } -#ifdef CONFIG_MUTEX_SPIN_ON_OWNER /* - * Look out! "owner" is an entirely speculative pointer - * access and not reliable. + * Look out! "owner" is an entirely speculative pointer access and not + * reliable. + * + * "noinline" so that this function shows up on perf profiles. */ static noinline -bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner) +bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner, + struct ww_acquire_ctx *ww_ctx, struct mutex_waiter *waiter) { bool ret = true; rcu_read_lock(); - while (lock->owner == owner) { + while (__mutex_owner(lock) == owner) { /* * Ensure we emit the owner->on_cpu, dereference _after_ * checking lock->owner still matches owner. If that fails, @@ -236,12 +436,21 @@ bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner) */ barrier(); - if (!owner->on_cpu || need_resched()) { + /* + * Use vcpu_is_preempted to detect lock holder preemption issue. + */ + if (!owner->on_cpu || need_resched() || + vcpu_is_preempted(task_cpu(owner))) { + ret = false; + break; + } + + if (ww_ctx && !ww_mutex_spin_on_owner(lock, ww_ctx, waiter)) { ret = false; break; } - cpu_relax_lowlatency(); + cpu_relax(); } rcu_read_unlock(); @@ -260,27 +469,25 @@ static inline int mutex_can_spin_on_owner(struct mutex *lock) return 0; rcu_read_lock(); - owner = READ_ONCE(lock->owner); + owner = __mutex_owner(lock); + + /* + * As lock holder preemption issue, we both skip spinning if task is not + * on cpu or its cpu is preempted + */ if (owner) - retval = owner->on_cpu; + retval = owner->on_cpu && !vcpu_is_preempted(task_cpu(owner)); rcu_read_unlock(); + /* - * if lock->owner is not set, the mutex owner may have just acquired - * it and not set the owner yet or the mutex has been released. + * If lock->owner is not set, the mutex has been released. Return true + * such that we'll trylock in the spin path, which is a faster option + * than the blocking slow path. */ return retval; } /* - * Atomically try to take the lock when it is available - */ -static inline bool mutex_try_to_acquire(struct mutex *lock) -{ - return !mutex_is_locked(lock) && - (atomic_cmpxchg_acquire(&lock->count, 1, 0) == 1); -} - -/* * Optimistic spinning. * * We try to spin for acquisition when we find that the lock owner @@ -288,13 +495,6 @@ static inline bool mutex_try_to_acquire(struct mutex *lock) * need to reschedule. The rationale is that if the lock owner is * running, it is likely to release the lock soon. * - * Since this needs the lock owner, and this mutex implementation - * doesn't track the owner atomically in the lock field, we need to - * track it non-atomically. - * - * We can't do this for DEBUG_MUTEXES because that relies on wait_lock - * to serialize everything. - * * The mutex spinners are queued up using MCS lock so that only one * spinner can compete for the mutex. However, if mutex spinning isn't * going to happen, there is no point in going through the lock/unlock @@ -302,74 +502,50 @@ static inline bool mutex_try_to_acquire(struct mutex *lock) * * Returns true when the lock was taken, otherwise false, indicating * that we need to jump to the slowpath and sleep. + * + * The waiter flag is set to true if the spinner is a waiter in the wait + * queue. The waiter-spinner will spin on the lock directly and concurrently + * with the spinner at the head of the OSQ, if present, until the owner is + * changed to itself. */ -static bool mutex_optimistic_spin(struct mutex *lock, - struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx) +static __always_inline bool +mutex_optimistic_spin(struct mutex *lock, struct ww_acquire_ctx *ww_ctx, + const bool use_ww_ctx, struct mutex_waiter *waiter) { - struct task_struct *task = current; - - if (!mutex_can_spin_on_owner(lock)) - goto done; - - /* - * In order to avoid a stampede of mutex spinners trying to - * acquire the mutex all at once, the spinners need to take a - * MCS (queued) lock first before spinning on the owner field. - */ - if (!osq_lock(&lock->osq)) - goto done; - - while (true) { - struct task_struct *owner; - - if (use_ww_ctx && ww_ctx->acquired > 0) { - struct ww_mutex *ww; - - ww = container_of(lock, struct ww_mutex, base); - /* - * If ww->ctx is set the contents are undefined, only - * by acquiring wait_lock there is a guarantee that - * they are not invalid when reading. - * - * As such, when deadlock detection needs to be - * performed the optimistic spinning cannot be done. - */ - if (READ_ONCE(ww->ctx)) - break; - } - + if (!waiter) { /* - * If there's an owner, wait for it to either - * release the lock or go to sleep. + * The purpose of the mutex_can_spin_on_owner() function is + * to eliminate the overhead of osq_lock() and osq_unlock() + * in case spinning isn't possible. As a waiter-spinner + * is not going to take OSQ lock anyway, there is no need + * to call mutex_can_spin_on_owner(). */ - owner = READ_ONCE(lock->owner); - if (owner && !mutex_spin_on_owner(lock, owner)) - break; + if (!mutex_can_spin_on_owner(lock)) + goto fail; - /* Try to acquire the mutex if it is unlocked. */ - if (mutex_try_to_acquire(lock)) { - lock_acquired(&lock->dep_map, ip); - - if (use_ww_ctx) { - struct ww_mutex *ww; - ww = container_of(lock, struct ww_mutex, base); + /* + * In order to avoid a stampede of mutex spinners trying to + * acquire the mutex all at once, the spinners need to take a + * MCS (queued) lock first before spinning on the owner field. + */ + if (!osq_lock(&lock->osq)) + goto fail; + } - ww_mutex_set_context_fastpath(ww, ww_ctx); - } + for (;;) { + struct task_struct *owner; - mutex_set_owner(lock); - osq_unlock(&lock->osq); - return true; - } + /* Try to acquire the mutex... */ + owner = __mutex_trylock_or_owner(lock); + if (!owner) + break; /* - * When there's no owner, we might have preempted between the - * owner acquiring the lock and setting the owner field. If - * we're an RT task that will live-lock because we won't let - * the owner complete. + * There's an owner, wait for it to either + * release the lock or go to sleep. */ - if (!owner && (need_resched() || rt_task(task))) - break; + if (!mutex_spin_on_owner(lock, owner, ww_ctx, waiter)) + goto fail_unlock; /* * The cpu_relax() call is a compiler barrier which forces @@ -377,11 +553,20 @@ static bool mutex_optimistic_spin(struct mutex *lock, * memory barriers as we'll eventually observe the right * values at the cost of a few extra spins. */ - cpu_relax_lowlatency(); + cpu_relax(); } - osq_unlock(&lock->osq); -done: + if (!waiter) + osq_unlock(&lock->osq); + + return true; + + +fail_unlock: + if (!waiter) + osq_unlock(&lock->osq); + +fail: /* * If we fell out of the spin path because of need_resched(), * reschedule now, before we try-lock the mutex. This avoids getting @@ -399,15 +584,15 @@ done: return false; } #else -static bool mutex_optimistic_spin(struct mutex *lock, - struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx) +static __always_inline bool +mutex_optimistic_spin(struct mutex *lock, struct ww_acquire_ctx *ww_ctx, + const bool use_ww_ctx, struct mutex_waiter *waiter) { return false; } #endif -__visible __used noinline -void __sched __mutex_unlock_slowpath(atomic_t *lock_count); +static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigned long ip); /** * mutex_unlock - release the mutex @@ -422,21 +607,12 @@ void __sched __mutex_unlock_slowpath(atomic_t *lock_count); */ void __sched mutex_unlock(struct mutex *lock) { - /* - * The unlocking fastpath is the 0->1 transition from 'locked' - * into 'unlocked' state: - */ -#ifndef CONFIG_DEBUG_MUTEXES - /* - * When debugging is enabled we must not clear the owner before time, - * the slow path will always be taken, and that clears the owner field - * after verifying that it was indeed current. - */ - mutex_clear_owner(lock); +#ifndef CONFIG_DEBUG_LOCK_ALLOC + if (__mutex_unlock_fast(lock)) + return; #endif - __mutex_fastpath_unlock(&lock->count, __mutex_unlock_slowpath); + __mutex_unlock_slowpath(lock, _RET_IP_); } - EXPORT_SYMBOL(mutex_unlock); /** @@ -465,36 +641,93 @@ void __sched ww_mutex_unlock(struct ww_mutex *lock) lock->ctx = NULL; } -#ifndef CONFIG_DEBUG_MUTEXES - /* - * When debugging is enabled we must not clear the owner before time, - * the slow path will always be taken, and that clears the owner field - * after verifying that it was indeed current. - */ - mutex_clear_owner(&lock->base); -#endif - __mutex_fastpath_unlock(&lock->base.count, __mutex_unlock_slowpath); + mutex_unlock(&lock->base); } EXPORT_SYMBOL(ww_mutex_unlock); static inline int __sched -__ww_mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx) +__ww_mutex_lock_check_stamp(struct mutex *lock, struct mutex_waiter *waiter, + struct ww_acquire_ctx *ctx) { struct ww_mutex *ww = container_of(lock, struct ww_mutex, base); struct ww_acquire_ctx *hold_ctx = READ_ONCE(ww->ctx); + struct mutex_waiter *cur; + + if (hold_ctx && __ww_ctx_stamp_after(ctx, hold_ctx)) + goto deadlock; + + /* + * If there is a waiter in front of us that has a context, then its + * stamp is earlier than ours and we must back off. + */ + cur = waiter; + list_for_each_entry_continue_reverse(cur, &lock->wait_list, list) { + if (cur->ww_ctx) + goto deadlock; + } - if (!hold_ctx) + return 0; + +deadlock: +#ifdef CONFIG_DEBUG_MUTEXES + DEBUG_LOCKS_WARN_ON(ctx->contending_lock); + ctx->contending_lock = ww; +#endif + return -EDEADLK; +} + +static inline int __sched +__ww_mutex_add_waiter(struct mutex_waiter *waiter, + struct mutex *lock, + struct ww_acquire_ctx *ww_ctx) +{ + struct mutex_waiter *cur; + struct list_head *pos; + + if (!ww_ctx) { + list_add_tail(&waiter->list, &lock->wait_list); return 0; + } - if (ctx->stamp - hold_ctx->stamp <= LONG_MAX && - (ctx->stamp != hold_ctx->stamp || ctx > hold_ctx)) { + /* + * Add the waiter before the first waiter with a higher stamp. + * Waiters without a context are skipped to avoid starving + * them. + */ + pos = &lock->wait_list; + list_for_each_entry_reverse(cur, &lock->wait_list, list) { + if (!cur->ww_ctx) + continue; + + if (__ww_ctx_stamp_after(ww_ctx, cur->ww_ctx)) { + /* Back off immediately if necessary. */ + if (ww_ctx->acquired > 0) { #ifdef CONFIG_DEBUG_MUTEXES - DEBUG_LOCKS_WARN_ON(ctx->contending_lock); - ctx->contending_lock = ww; + struct ww_mutex *ww; + + ww = container_of(lock, struct ww_mutex, base); + DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock); + ww_ctx->contending_lock = ww; #endif - return -EDEADLK; + return -EDEADLK; + } + + break; + } + + pos = &cur->list; + + /* + * Wake up the waiter so that it gets a chance to back + * off. + */ + if (cur->ww_ctx->acquired > 0) { + debug_mutex_wake_waiter(lock, cur); + wake_up_process(cur->task); + } } + list_add_tail(&waiter->list, pos); return 0; } @@ -506,13 +739,15 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, struct lockdep_map *nest_lock, unsigned long ip, struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx) { - struct task_struct *task = current; struct mutex_waiter waiter; - unsigned long flags; + bool first = false; + struct ww_mutex *ww; int ret; - if (use_ww_ctx) { - struct ww_mutex *ww = container_of(lock, struct ww_mutex, base); + might_sleep(); + + ww = container_of(lock, struct ww_mutex, base); + if (use_ww_ctx && ww_ctx) { if (unlikely(ww_ctx == READ_ONCE(ww->ctx))) return -EALREADY; } @@ -520,106 +755,157 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, preempt_disable(); mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip); - if (mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx)) { + if (__mutex_trylock(lock) || + mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx, NULL)) { /* got the lock, yay! */ + lock_acquired(&lock->dep_map, ip); + if (use_ww_ctx && ww_ctx) + ww_mutex_set_context_fastpath(ww, ww_ctx); preempt_enable(); return 0; } - spin_lock_mutex(&lock->wait_lock, flags); - + spin_lock(&lock->wait_lock); /* - * Once more, try to acquire the lock. Only try-lock the mutex if - * it is unlocked to reduce unnecessary xchg() operations. + * After waiting to acquire the wait_lock, try again. */ - if (!mutex_is_locked(lock) && - (atomic_xchg_acquire(&lock->count, 0) == 1)) + if (__mutex_trylock(lock)) { + if (use_ww_ctx && ww_ctx) + __ww_mutex_wakeup_for_backoff(lock, ww_ctx); + goto skip_wait; + } debug_mutex_lock_common(lock, &waiter); - debug_mutex_add_waiter(lock, &waiter, task); - - /* add waiting tasks to the end of the waitqueue (FIFO): */ - list_add_tail(&waiter.list, &lock->wait_list); - waiter.task = task; + debug_mutex_add_waiter(lock, &waiter, current); lock_contended(&lock->dep_map, ip); + if (!use_ww_ctx) { + /* add waiting tasks to the end of the waitqueue (FIFO): */ + list_add_tail(&waiter.list, &lock->wait_list); + +#ifdef CONFIG_DEBUG_MUTEXES + waiter.ww_ctx = MUTEX_POISON_WW_CTX; +#endif + } else { + /* Add in stamp order, waking up waiters that must back off. */ + ret = __ww_mutex_add_waiter(&waiter, lock, ww_ctx); + if (ret) + goto err_early_backoff; + + waiter.ww_ctx = ww_ctx; + } + + waiter.task = current; + + if (__mutex_waiter_is_first(lock, &waiter)) + __mutex_set_flag(lock, MUTEX_FLAG_WAITERS); + + set_current_state(state); for (;;) { /* - * Lets try to take the lock again - this is needed even if - * we get here for the first time (shortly after failing to - * acquire the lock), to make sure that we get a wakeup once - * it's unlocked. Later on, if we sleep, this is the - * operation that gives us the lock. We xchg it to -1, so - * that when we release the lock, we properly wake up the - * other waiters. We only attempt the xchg if the count is - * non-negative in order to avoid unnecessary xchg operations: + * Once we hold wait_lock, we're serialized against + * mutex_unlock() handing the lock off to us, do a trylock + * before testing the error conditions to make sure we pick up + * the handoff. */ - if (atomic_read(&lock->count) >= 0 && - (atomic_xchg_acquire(&lock->count, -1) == 1)) - break; + if (__mutex_trylock(lock)) + goto acquired; /* - * got a signal? (This code gets eliminated in the - * TASK_UNINTERRUPTIBLE case.) + * Check for signals and wound conditions while holding + * wait_lock. This ensures the lock cancellation is ordered + * against mutex_unlock() and wake-ups do not go missing. */ - if (unlikely(signal_pending_state(state, task))) { + if (unlikely(signal_pending_state(state, current))) { ret = -EINTR; goto err; } - if (use_ww_ctx && ww_ctx->acquired > 0) { - ret = __ww_mutex_lock_check_stamp(lock, ww_ctx); + if (use_ww_ctx && ww_ctx && ww_ctx->acquired > 0) { + ret = __ww_mutex_lock_check_stamp(lock, &waiter, ww_ctx); if (ret) goto err; } - __set_task_state(task, state); - - /* didn't get the lock, go to sleep: */ - spin_unlock_mutex(&lock->wait_lock, flags); + spin_unlock(&lock->wait_lock); schedule_preempt_disabled(); - spin_lock_mutex(&lock->wait_lock, flags); + + /* + * ww_mutex needs to always recheck its position since its waiter + * list is not FIFO ordered. + */ + if ((use_ww_ctx && ww_ctx) || !first) { + first = __mutex_waiter_is_first(lock, &waiter); + if (first) + __mutex_set_flag(lock, MUTEX_FLAG_HANDOFF); + } + + set_current_state(state); + /* + * Here we order against unlock; we must either see it change + * state back to RUNNING and fall through the next schedule(), + * or we must see its unlock and acquire. + */ + if (__mutex_trylock(lock) || + (first && mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx, &waiter))) + break; + + spin_lock(&lock->wait_lock); } - __set_task_state(task, TASK_RUNNING); + spin_lock(&lock->wait_lock); +acquired: + __set_current_state(TASK_RUNNING); - mutex_remove_waiter(lock, &waiter, task); - /* set it to 0 if there are no waiters left: */ + mutex_remove_waiter(lock, &waiter, current); if (likely(list_empty(&lock->wait_list))) - atomic_set(&lock->count, 0); + __mutex_clear_flag(lock, MUTEX_FLAGS); + debug_mutex_free_waiter(&waiter); skip_wait: /* got the lock - cleanup and rejoice! */ lock_acquired(&lock->dep_map, ip); - mutex_set_owner(lock); - if (use_ww_ctx) { - struct ww_mutex *ww = container_of(lock, struct ww_mutex, base); + if (use_ww_ctx && ww_ctx) ww_mutex_set_context_slowpath(ww, ww_ctx); - } - spin_unlock_mutex(&lock->wait_lock, flags); + spin_unlock(&lock->wait_lock); preempt_enable(); return 0; err: - mutex_remove_waiter(lock, &waiter, task); - spin_unlock_mutex(&lock->wait_lock, flags); + __set_current_state(TASK_RUNNING); + mutex_remove_waiter(lock, &waiter, current); +err_early_backoff: + spin_unlock(&lock->wait_lock); debug_mutex_free_waiter(&waiter); mutex_release(&lock->dep_map, 1, ip); preempt_enable(); return ret; } +static int __sched +__mutex_lock(struct mutex *lock, long state, unsigned int subclass, + struct lockdep_map *nest_lock, unsigned long ip) +{ + return __mutex_lock_common(lock, state, subclass, nest_lock, ip, NULL, false); +} + +static int __sched +__ww_mutex_lock(struct mutex *lock, long state, unsigned int subclass, + struct lockdep_map *nest_lock, unsigned long ip, + struct ww_acquire_ctx *ww_ctx) +{ + return __mutex_lock_common(lock, state, subclass, nest_lock, ip, ww_ctx, true); +} + #ifdef CONFIG_DEBUG_LOCK_ALLOC void __sched mutex_lock_nested(struct mutex *lock, unsigned int subclass) { - might_sleep(); - __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, - subclass, NULL, _RET_IP_, NULL, 0); + __mutex_lock(lock, TASK_UNINTERRUPTIBLE, subclass, NULL, _RET_IP_); } EXPORT_SYMBOL_GPL(mutex_lock_nested); @@ -627,32 +913,38 @@ EXPORT_SYMBOL_GPL(mutex_lock_nested); void __sched _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest) { - might_sleep(); - __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, - 0, nest, _RET_IP_, NULL, 0); + __mutex_lock(lock, TASK_UNINTERRUPTIBLE, 0, nest, _RET_IP_); } - EXPORT_SYMBOL_GPL(_mutex_lock_nest_lock); int __sched mutex_lock_killable_nested(struct mutex *lock, unsigned int subclass) { - might_sleep(); - return __mutex_lock_common(lock, TASK_KILLABLE, - subclass, NULL, _RET_IP_, NULL, 0); + return __mutex_lock(lock, TASK_KILLABLE, subclass, NULL, _RET_IP_); } EXPORT_SYMBOL_GPL(mutex_lock_killable_nested); int __sched mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass) { - might_sleep(); - return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, - subclass, NULL, _RET_IP_, NULL, 0); + return __mutex_lock(lock, TASK_INTERRUPTIBLE, subclass, NULL, _RET_IP_); } - EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested); +void __sched +mutex_lock_io_nested(struct mutex *lock, unsigned int subclass) +{ + int token; + + might_sleep(); + + token = io_schedule_prepare(); + __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, + subclass, NULL, _RET_IP_, NULL, 0); + io_schedule_finish(token); +} +EXPORT_SYMBOL_GPL(mutex_lock_io_nested); + static inline int ww_mutex_deadlock_injection(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) { @@ -680,89 +972,102 @@ ww_mutex_deadlock_injection(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) } int __sched -__ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) +ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) { int ret; might_sleep(); - ret = __mutex_lock_common(&lock->base, TASK_UNINTERRUPTIBLE, - 0, &ctx->dep_map, _RET_IP_, ctx, 1); - if (!ret && ctx->acquired > 1) + ret = __ww_mutex_lock(&lock->base, TASK_UNINTERRUPTIBLE, + 0, ctx ? &ctx->dep_map : NULL, _RET_IP_, + ctx); + if (!ret && ctx && ctx->acquired > 1) return ww_mutex_deadlock_injection(lock, ctx); return ret; } -EXPORT_SYMBOL_GPL(__ww_mutex_lock); +EXPORT_SYMBOL_GPL(ww_mutex_lock); int __sched -__ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) +ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) { int ret; might_sleep(); - ret = __mutex_lock_common(&lock->base, TASK_INTERRUPTIBLE, - 0, &ctx->dep_map, _RET_IP_, ctx, 1); + ret = __ww_mutex_lock(&lock->base, TASK_INTERRUPTIBLE, + 0, ctx ? &ctx->dep_map : NULL, _RET_IP_, + ctx); - if (!ret && ctx->acquired > 1) + if (!ret && ctx && ctx->acquired > 1) return ww_mutex_deadlock_injection(lock, ctx); return ret; } -EXPORT_SYMBOL_GPL(__ww_mutex_lock_interruptible); +EXPORT_SYMBOL_GPL(ww_mutex_lock_interruptible); #endif /* * Release the lock, slowpath: */ -static inline void -__mutex_unlock_common_slowpath(struct mutex *lock, int nested) +static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigned long ip) { - unsigned long flags; - WAKE_Q(wake_q); + struct task_struct *next = NULL; + DEFINE_WAKE_Q(wake_q); + unsigned long owner; + + mutex_release(&lock->dep_map, 1, ip); /* - * As a performance measurement, release the lock before doing other - * wakeup related duties to follow. This allows other tasks to acquire - * the lock sooner, while still handling cleanups in past unlock calls. - * This can be done as we do not enforce strict equivalence between the - * mutex counter and wait_list. - * + * Release the lock before (potentially) taking the spinlock such that + * other contenders can get on with things ASAP. * - * Some architectures leave the lock unlocked in the fastpath failure - * case, others need to leave it locked. In the later case we have to - * unlock it here - as the lock counter is currently 0 or negative. + * Except when HANDOFF, in that case we must not clear the owner field, + * but instead set it to the top waiter. */ - if (__mutex_slowpath_needs_to_unlock()) - atomic_set(&lock->count, 1); + owner = atomic_long_read(&lock->owner); + for (;;) { + unsigned long old; - spin_lock_mutex(&lock->wait_lock, flags); - mutex_release(&lock->dep_map, nested, _RET_IP_); - debug_mutex_unlock(lock); +#ifdef CONFIG_DEBUG_MUTEXES + DEBUG_LOCKS_WARN_ON(__owner_task(owner) != current); + DEBUG_LOCKS_WARN_ON(owner & MUTEX_FLAG_PICKUP); +#endif + if (owner & MUTEX_FLAG_HANDOFF) + break; + + old = atomic_long_cmpxchg_release(&lock->owner, owner, + __owner_flags(owner)); + if (old == owner) { + if (owner & MUTEX_FLAG_WAITERS) + break; + + return; + } + + owner = old; + } + + spin_lock(&lock->wait_lock); + debug_mutex_unlock(lock); if (!list_empty(&lock->wait_list)) { /* get the first entry from the wait-list: */ struct mutex_waiter *waiter = - list_entry(lock->wait_list.next, - struct mutex_waiter, list); + list_first_entry(&lock->wait_list, + struct mutex_waiter, list); + + next = waiter->task; debug_mutex_wake_waiter(lock, waiter); - wake_q_add(&wake_q, waiter->task); + wake_q_add(&wake_q, next); } - spin_unlock_mutex(&lock->wait_lock, flags); - wake_up_q(&wake_q); -} + if (owner & MUTEX_FLAG_HANDOFF) + __mutex_handoff(lock, next); -/* - * Release the lock, slowpath: - */ -__visible void -__mutex_unlock_slowpath(atomic_t *lock_count) -{ - struct mutex *lock = container_of(lock_count, struct mutex, count); + spin_unlock(&lock->wait_lock); - __mutex_unlock_common_slowpath(lock, 1); + wake_up_q(&wake_q); } #ifndef CONFIG_DEBUG_LOCK_ALLOC @@ -789,104 +1094,72 @@ __mutex_lock_interruptible_slowpath(struct mutex *lock); */ int __sched mutex_lock_interruptible(struct mutex *lock) { - int ret; - might_sleep(); - ret = __mutex_fastpath_lock_retval(&lock->count); - if (likely(!ret)) { - mutex_set_owner(lock); + + if (__mutex_trylock_fast(lock)) return 0; - } else - return __mutex_lock_interruptible_slowpath(lock); + + return __mutex_lock_interruptible_slowpath(lock); } EXPORT_SYMBOL(mutex_lock_interruptible); int __sched mutex_lock_killable(struct mutex *lock) { - int ret; - might_sleep(); - ret = __mutex_fastpath_lock_retval(&lock->count); - if (likely(!ret)) { - mutex_set_owner(lock); + + if (__mutex_trylock_fast(lock)) return 0; - } else - return __mutex_lock_killable_slowpath(lock); + + return __mutex_lock_killable_slowpath(lock); } EXPORT_SYMBOL(mutex_lock_killable); -__visible void __sched -__mutex_lock_slowpath(atomic_t *lock_count) +void __sched mutex_lock_io(struct mutex *lock) { - struct mutex *lock = container_of(lock_count, struct mutex, count); + int token; - __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, - NULL, _RET_IP_, NULL, 0); + token = io_schedule_prepare(); + mutex_lock(lock); + io_schedule_finish(token); +} +EXPORT_SYMBOL_GPL(mutex_lock_io); + +static noinline void __sched +__mutex_lock_slowpath(struct mutex *lock) +{ + __mutex_lock(lock, TASK_UNINTERRUPTIBLE, 0, NULL, _RET_IP_); } static noinline int __sched __mutex_lock_killable_slowpath(struct mutex *lock) { - return __mutex_lock_common(lock, TASK_KILLABLE, 0, - NULL, _RET_IP_, NULL, 0); + return __mutex_lock(lock, TASK_KILLABLE, 0, NULL, _RET_IP_); } static noinline int __sched __mutex_lock_interruptible_slowpath(struct mutex *lock) { - return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0, - NULL, _RET_IP_, NULL, 0); + return __mutex_lock(lock, TASK_INTERRUPTIBLE, 0, NULL, _RET_IP_); } static noinline int __sched __ww_mutex_lock_slowpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) { - return __mutex_lock_common(&lock->base, TASK_UNINTERRUPTIBLE, 0, - NULL, _RET_IP_, ctx, 1); + return __ww_mutex_lock(&lock->base, TASK_UNINTERRUPTIBLE, 0, NULL, + _RET_IP_, ctx); } static noinline int __sched __ww_mutex_lock_interruptible_slowpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) { - return __mutex_lock_common(&lock->base, TASK_INTERRUPTIBLE, 0, - NULL, _RET_IP_, ctx, 1); + return __ww_mutex_lock(&lock->base, TASK_INTERRUPTIBLE, 0, NULL, + _RET_IP_, ctx); } #endif -/* - * Spinlock based trylock, we take the spinlock and check whether we - * can get the lock: - */ -static inline int __mutex_trylock_slowpath(atomic_t *lock_count) -{ - struct mutex *lock = container_of(lock_count, struct mutex, count); - unsigned long flags; - int prev; - - /* No need to trylock if the mutex is locked. */ - if (mutex_is_locked(lock)) - return 0; - - spin_lock_mutex(&lock->wait_lock, flags); - - prev = atomic_xchg_acquire(&lock->count, -1); - if (likely(prev == 1)) { - mutex_set_owner(lock); - mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); - } - - /* Set it back to 0 if there are no waiters: */ - if (likely(list_empty(&lock->wait_list))) - atomic_set(&lock->count, 0); - - spin_unlock_mutex(&lock->wait_lock, flags); - - return prev == 1; -} - /** * mutex_trylock - try to acquire the mutex, without waiting * @lock: the mutex to be acquired @@ -903,52 +1176,45 @@ static inline int __mutex_trylock_slowpath(atomic_t *lock_count) */ int __sched mutex_trylock(struct mutex *lock) { - int ret; + bool locked = __mutex_trylock(lock); - ret = __mutex_fastpath_trylock(&lock->count, __mutex_trylock_slowpath); - if (ret) - mutex_set_owner(lock); + if (locked) + mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); - return ret; + return locked; } EXPORT_SYMBOL(mutex_trylock); #ifndef CONFIG_DEBUG_LOCK_ALLOC int __sched -__ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) +ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) { - int ret; - might_sleep(); - ret = __mutex_fastpath_lock_retval(&lock->base.count); + if (__mutex_trylock_fast(&lock->base)) { + if (ctx) + ww_mutex_set_context_fastpath(lock, ctx); + return 0; + } - if (likely(!ret)) { - ww_mutex_set_context_fastpath(lock, ctx); - mutex_set_owner(&lock->base); - } else - ret = __ww_mutex_lock_slowpath(lock, ctx); - return ret; + return __ww_mutex_lock_slowpath(lock, ctx); } -EXPORT_SYMBOL(__ww_mutex_lock); +EXPORT_SYMBOL(ww_mutex_lock); int __sched -__ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) +ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) { - int ret; - might_sleep(); - ret = __mutex_fastpath_lock_retval(&lock->base.count); + if (__mutex_trylock_fast(&lock->base)) { + if (ctx) + ww_mutex_set_context_fastpath(lock, ctx); + return 0; + } - if (likely(!ret)) { - ww_mutex_set_context_fastpath(lock, ctx); - mutex_set_owner(&lock->base); - } else - ret = __ww_mutex_lock_interruptible_slowpath(lock, ctx); - return ret; + return __ww_mutex_lock_interruptible_slowpath(lock, ctx); } -EXPORT_SYMBOL(__ww_mutex_lock_interruptible); +EXPORT_SYMBOL(ww_mutex_lock_interruptible); #endif diff --git a/kernel/locking/mutex.h b/kernel/locking/mutex.h index 6cd6b8e9efd7..6ebc1902f779 100644 --- a/kernel/locking/mutex.h +++ b/kernel/locking/mutex.h @@ -9,39 +9,9 @@ * !CONFIG_DEBUG_MUTEXES case. Most of them are NOPs: */ -#define spin_lock_mutex(lock, flags) \ - do { spin_lock(lock); (void)(flags); } while (0) -#define spin_unlock_mutex(lock, flags) \ - do { spin_unlock(lock); (void)(flags); } while (0) #define mutex_remove_waiter(lock, waiter, task) \ __list_del((waiter)->list.prev, (waiter)->list.next) -#ifdef CONFIG_MUTEX_SPIN_ON_OWNER -/* - * The mutex owner can get read and written to locklessly. - * We should use WRITE_ONCE when writing the owner value to - * avoid store tearing, otherwise, a thread could potentially - * read a partially written and incomplete owner value. - */ -static inline void mutex_set_owner(struct mutex *lock) -{ - WRITE_ONCE(lock->owner, current); -} - -static inline void mutex_clear_owner(struct mutex *lock) -{ - WRITE_ONCE(lock->owner, NULL); -} -#else -static inline void mutex_set_owner(struct mutex *lock) -{ -} - -static inline void mutex_clear_owner(struct mutex *lock) -{ -} -#endif - #define debug_mutex_wake_waiter(lock, waiter) do { } while (0) #define debug_mutex_free_waiter(waiter) do { } while (0) #define debug_mutex_add_waiter(lock, waiter, ti) do { } while (0) diff --git a/kernel/locking/osq_lock.c b/kernel/locking/osq_lock.c index 05a37857ab55..a3167941093b 100644 --- a/kernel/locking/osq_lock.c +++ b/kernel/locking/osq_lock.c @@ -21,6 +21,11 @@ static inline int encode_cpu(int cpu_nr) return cpu_nr + 1; } +static inline int node_cpu(struct optimistic_spin_node *node) +{ + return node->cpu - 1; +} + static inline struct optimistic_spin_node *decode_cpu(int encoded_cpu_val) { int cpu_nr = encoded_cpu_val - 1; @@ -75,7 +80,7 @@ osq_wait_next(struct optimistic_spin_queue *lock, break; } - cpu_relax_lowlatency(); + cpu_relax(); } return next; @@ -118,11 +123,13 @@ bool osq_lock(struct optimistic_spin_queue *lock) while (!READ_ONCE(node->locked)) { /* * If we need to reschedule bail... so we can block. + * Use vcpu_is_preempted() to avoid waiting for a preempted + * lock holder: */ - if (need_resched()) + if (need_resched() || vcpu_is_preempted(node_cpu(node->prev))) goto unqueue; - cpu_relax_lowlatency(); + cpu_relax(); } return true; @@ -148,7 +155,7 @@ unqueue: if (smp_load_acquire(&node->locked)) return true; - cpu_relax_lowlatency(); + cpu_relax(); /* * Or we race against a concurrent unqueue()'s step-B, in which diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c index ce182599cf2e..883cf1b92d90 100644 --- a/kernel/locking/percpu-rwsem.c +++ b/kernel/locking/percpu-rwsem.c @@ -1,7 +1,6 @@ #include <linux/atomic.h> #include <linux/rwsem.h> #include <linux/percpu.h> -#include <linux/wait.h> #include <linux/lockdep.h> #include <linux/percpu-rwsem.h> #include <linux/rcupdate.h> @@ -18,7 +17,7 @@ int __percpu_init_rwsem(struct percpu_rw_semaphore *sem, /* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */ rcu_sync_init(&sem->rss, RCU_SCHED_SYNC); __init_rwsem(&sem->rw_sem, name, rwsem_key); - init_waitqueue_head(&sem->writer); + rcuwait_init(&sem->writer); sem->readers_block = 0; return 0; } @@ -103,7 +102,7 @@ void __percpu_up_read(struct percpu_rw_semaphore *sem) __this_cpu_dec(*sem->read_count); /* Prod writer to recheck readers_active */ - wake_up(&sem->writer); + rcuwait_wake_up(&sem->writer); } EXPORT_SYMBOL_GPL(__percpu_up_read); @@ -160,7 +159,7 @@ void percpu_down_write(struct percpu_rw_semaphore *sem) */ /* Wait for all now active readers to complete. */ - wait_event(sem->writer, readers_active_check(sem)); + rcuwait_wait_event(&sem->writer, readers_active_check(sem)); } EXPORT_SYMBOL_GPL(percpu_down_write); diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c index 19248ddf37ce..cc3ed0ccdfa2 100644 --- a/kernel/locking/qrwlock.c +++ b/kernel/locking/qrwlock.c @@ -54,7 +54,7 @@ static __always_inline void rspin_until_writer_unlock(struct qrwlock *lock, u32 cnts) { while ((cnts & _QW_WMASK) == _QW_LOCKED) { - cpu_relax_lowlatency(); + cpu_relax(); cnts = atomic_read_acquire(&lock->cnts); } } @@ -130,7 +130,7 @@ void queued_write_lock_slowpath(struct qrwlock *lock) (cmpxchg_relaxed(&l->wmode, 0, _QW_WAITING) == 0)) break; - cpu_relax_lowlatency(); + cpu_relax(); } /* When no more readers, set the locked flag */ @@ -141,7 +141,7 @@ void queued_write_lock_slowpath(struct qrwlock *lock) _QW_LOCKED) == _QW_WAITING)) break; - cpu_relax_lowlatency(); + cpu_relax(); } unlock: arch_spin_unlock(&lock->wait_lock); diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h index e3b5520005db..e6b2f7ad3e51 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -263,7 +263,7 @@ pv_wait_early(struct pv_node *prev, int loop) if ((loop & PV_PREV_CHECK_MASK) != 0) return false; - return READ_ONCE(prev->state) != vcpu_running; + return READ_ONCE(prev->state) != vcpu_running || vcpu_is_preempted(prev->cpu); } /* diff --git a/kernel/locking/qspinlock_stat.h b/kernel/locking/qspinlock_stat.h index eb0a599fcf58..4a30ef63c607 100644 --- a/kernel/locking/qspinlock_stat.h +++ b/kernel/locking/qspinlock_stat.h @@ -63,6 +63,7 @@ enum qlock_stats { */ #include <linux/debugfs.h> #include <linux/sched.h> +#include <linux/sched/clock.h> #include <linux/fs.h> static const char * const qstat_names[qstat_num + 1] = { @@ -108,11 +109,7 @@ static ssize_t qstat_read(struct file *file, char __user *user_buf, /* * Get the counter ID stored in file->f_inode->i_private */ - if (!file->f_inode) { - WARN_ON_ONCE(1); - return -EBADF; - } - counter = (long)(file->f_inode->i_private); + counter = (long)file_inode(file)->i_private; if (counter >= qstat_num) return -EBADF; @@ -177,11 +174,7 @@ static ssize_t qstat_write(struct file *file, const char __user *user_buf, /* * Get the counter ID stored in file->f_inode->i_private */ - if (!file->f_inode) { - WARN_ON_ONCE(1); - return -EBADF; - } - if ((long)(file->f_inode->i_private) != qstat_reset_cnts) + if ((long)file_inode(file)->i_private != qstat_reset_cnts) return count; for_each_possible_cpu(cpu) { diff --git a/kernel/locking/rtmutex-debug.c b/kernel/locking/rtmutex-debug.c index 62b6cee8ea7f..97ee9df32e0f 100644 --- a/kernel/locking/rtmutex-debug.c +++ b/kernel/locking/rtmutex-debug.c @@ -18,6 +18,7 @@ */ #include <linux/sched.h> #include <linux/sched/rt.h> +#include <linux/sched/debug.h> #include <linux/delay.h> #include <linux/export.h> #include <linux/spinlock.h> diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 2c49d76f96c3..6edc32ecd9c5 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -12,9 +12,11 @@ */ #include <linux/spinlock.h> #include <linux/export.h> -#include <linux/sched.h> +#include <linux/sched/signal.h> #include <linux/sched/rt.h> #include <linux/sched/deadline.h> +#include <linux/sched/wake_q.h> +#include <linux/sched/debug.h> #include <linux/timer.h> #include "rtmutex_common.h" @@ -1179,7 +1181,7 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state, * TASK_INTERRUPTIBLE checks for signals and * timeout. Ignored otherwise. */ - if (unlikely(state == TASK_INTERRUPTIBLE)) { + if (likely(state == TASK_INTERRUPTIBLE)) { /* Signal pending? */ if (signal_pending(current)) ret = -EINTR; @@ -1446,7 +1448,7 @@ rt_mutex_fastunlock(struct rt_mutex *lock, bool (*slowfn)(struct rt_mutex *lock, struct wake_q_head *wqh)) { - WAKE_Q(wake_q); + DEFINE_WAKE_Q(wake_q); if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) { rt_mutex_deadlock_account_unlock(current); @@ -1619,11 +1621,15 @@ EXPORT_SYMBOL_GPL(__rt_mutex_init); * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a * proxy owner * - * @lock: the rt_mutex to be locked + * @lock: the rt_mutex to be locked * @proxy_owner:the task to set as owner * * No locking. Caller has to do serializing itself - * Special API call for PI-futex support + * + * Special API call for PI-futex support. This initializes the rtmutex and + * assigns it to @proxy_owner. Concurrent operations on the rtmutex are not + * possible at this point because the pi_state which contains the rtmutex + * is not yet visible to other tasks. */ void rt_mutex_init_proxy_locked(struct rt_mutex *lock, struct task_struct *proxy_owner) @@ -1637,10 +1643,14 @@ void rt_mutex_init_proxy_locked(struct rt_mutex *lock, /** * rt_mutex_proxy_unlock - release a lock on behalf of owner * - * @lock: the rt_mutex to be locked + * @lock: the rt_mutex to be locked * * No locking. Caller has to do serializing itself - * Special API call for PI-futex support + * + * Special API call for PI-futex support. This merrily cleans up the rtmutex + * (debugging) state. Concurrent operations on this rt_mutex are not + * possible because it belongs to the pi_state which is about to be freed + * and it is not longer visible to other tasks. */ void rt_mutex_proxy_unlock(struct rt_mutex *lock, struct task_struct *proxy_owner) diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h index e317e1cbb3eb..856dfff5c33a 100644 --- a/kernel/locking/rtmutex_common.h +++ b/kernel/locking/rtmutex_common.h @@ -13,6 +13,7 @@ #define __KERNEL_RTMUTEX_COMMON_H #include <linux/rtmutex.h> +#include <linux/sched/wake_q.h> /* * This is the control structure for tasks blocked on a rt_mutex, @@ -71,13 +72,12 @@ task_top_pi_waiter(struct task_struct *p) * lock->owner state tracking: */ #define RT_MUTEX_HAS_WAITERS 1UL -#define RT_MUTEX_OWNER_MASKALL 1UL static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock) { unsigned long owner = (unsigned long) READ_ONCE(lock->owner); - return (struct task_struct *) (owner & ~RT_MUTEX_OWNER_MASKALL); + return (struct task_struct *) (owner & ~RT_MUTEX_HAS_WAITERS); } /* diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c index 1591f6b3539f..7bc24d477805 100644 --- a/kernel/locking/rwsem-spinlock.c +++ b/kernel/locking/rwsem-spinlock.c @@ -6,7 +6,8 @@ * - Derived also from comments by Linus */ #include <linux/rwsem.h> -#include <linux/sched.h> +#include <linux/sched/signal.h> +#include <linux/sched/debug.h> #include <linux/export.h> enum rwsem_waiter_type { @@ -128,7 +129,6 @@ __rwsem_wake_one_writer(struct rw_semaphore *sem) void __sched __down_read(struct rw_semaphore *sem) { struct rwsem_waiter waiter; - struct task_struct *tsk; unsigned long flags; raw_spin_lock_irqsave(&sem->wait_lock, flags); @@ -140,13 +140,12 @@ void __sched __down_read(struct rw_semaphore *sem) goto out; } - tsk = current; - set_task_state(tsk, TASK_UNINTERRUPTIBLE); + set_current_state(TASK_UNINTERRUPTIBLE); /* set up my own style of waitqueue */ - waiter.task = tsk; + waiter.task = current; waiter.type = RWSEM_WAITING_FOR_READ; - get_task_struct(tsk); + get_task_struct(current); list_add_tail(&waiter.list, &sem->wait_list); @@ -158,10 +157,10 @@ void __sched __down_read(struct rw_semaphore *sem) if (!waiter.task) break; schedule(); - set_task_state(tsk, TASK_UNINTERRUPTIBLE); + set_current_state(TASK_UNINTERRUPTIBLE); } - __set_task_state(tsk, TASK_RUNNING); + __set_current_state(TASK_RUNNING); out: ; } @@ -194,15 +193,13 @@ int __down_read_trylock(struct rw_semaphore *sem) int __sched __down_write_common(struct rw_semaphore *sem, int state) { struct rwsem_waiter waiter; - struct task_struct *tsk; unsigned long flags; int ret = 0; raw_spin_lock_irqsave(&sem->wait_lock, flags); /* set up my own style of waitqueue */ - tsk = current; - waiter.task = tsk; + waiter.task = current; waiter.type = RWSEM_WAITING_FOR_WRITE; list_add_tail(&waiter.list, &sem->wait_list); @@ -220,7 +217,7 @@ int __sched __down_write_common(struct rw_semaphore *sem, int state) ret = -EINTR; goto out; } - set_task_state(tsk, state); + set_current_state(state); raw_spin_unlock_irqrestore(&sem->wait_lock, flags); schedule(); raw_spin_lock_irqsave(&sem->wait_lock, flags); diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index 2337b4bb2366..34e727f18e49 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c @@ -10,10 +10,12 @@ * and Davidlohr Bueso <davidlohr@hp.com>. Based on mutexes. */ #include <linux/rwsem.h> -#include <linux/sched.h> #include <linux/init.h> #include <linux/export.h> +#include <linux/sched/signal.h> #include <linux/sched/rt.h> +#include <linux/sched/wake_q.h> +#include <linux/sched/debug.h> #include <linux/osq_lock.h> #include "rwsem.h" @@ -224,10 +226,9 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) { long count, adjustment = -RWSEM_ACTIVE_READ_BIAS; struct rwsem_waiter waiter; - struct task_struct *tsk = current; - WAKE_Q(wake_q); + DEFINE_WAKE_Q(wake_q); - waiter.task = tsk; + waiter.task = current; waiter.type = RWSEM_WAITING_FOR_READ; raw_spin_lock_irq(&sem->wait_lock); @@ -254,13 +255,13 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) /* wait to be given the lock */ while (true) { - set_task_state(tsk, TASK_UNINTERRUPTIBLE); + set_current_state(TASK_UNINTERRUPTIBLE); if (!waiter.task) break; schedule(); } - __set_task_state(tsk, TASK_RUNNING); + __set_current_state(TASK_RUNNING); return sem; } EXPORT_SYMBOL(rwsem_down_read_failed); @@ -336,7 +337,11 @@ static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem) goto done; } - ret = owner->on_cpu; + /* + * As lock holder preemption issue, we both skip spinning if task is not + * on cpu or its cpu is preempted + */ + ret = owner->on_cpu && !vcpu_is_preempted(task_cpu(owner)); done: rcu_read_unlock(); return ret; @@ -362,13 +367,17 @@ static noinline bool rwsem_spin_on_owner(struct rw_semaphore *sem) */ barrier(); - /* abort spinning when need_resched or owner is not running */ - if (!owner->on_cpu || need_resched()) { + /* + * abort spinning when need_resched or owner is not running or + * owner's cpu is preempted. + */ + if (!owner->on_cpu || need_resched() || + vcpu_is_preempted(task_cpu(owner))) { rcu_read_unlock(); return false; } - cpu_relax_lowlatency(); + cpu_relax(); } rcu_read_unlock(); out: @@ -423,7 +432,7 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem) * memory barriers as we'll eventually observe the right * values at the cost of a few extra spins. */ - cpu_relax_lowlatency(); + cpu_relax(); } osq_unlock(&sem->osq); done: @@ -461,7 +470,7 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state) bool waiting = true; /* any queued threads before us */ struct rwsem_waiter waiter; struct rw_semaphore *ret = sem; - WAKE_Q(wake_q); + DEFINE_WAKE_Q(wake_q); /* undo write bias from down_write operation, stop active locking */ count = atomic_long_sub_return(RWSEM_ACTIVE_WRITE_BIAS, &sem->count); @@ -495,8 +504,6 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state) * wake any read locks that were queued ahead of us. */ if (count > RWSEM_WAITING_BIAS) { - WAKE_Q(wake_q); - __rwsem_mark_wake(sem, RWSEM_WAKE_READERS, &wake_q); /* * The wakeup is normally called _after_ the wait_lock @@ -506,6 +513,11 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state) * for attempting rwsem_try_write_lock(). */ wake_up_q(&wake_q); + + /* + * Reinitialize wake_q after use. + */ + wake_q_init(&wake_q); } } else @@ -571,7 +583,7 @@ __visible struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem) { unsigned long flags; - WAKE_Q(wake_q); + DEFINE_WAKE_Q(wake_q); /* * If a spinner is present, it is not necessary to do the wakeup. @@ -625,7 +637,7 @@ __visible struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem) { unsigned long flags; - WAKE_Q(wake_q); + DEFINE_WAKE_Q(wake_q); raw_spin_lock_irqsave(&sem->wait_lock, flags); diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index 45ba475d4be3..90a74ccd85a4 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -7,6 +7,7 @@ #include <linux/types.h> #include <linux/kernel.h> #include <linux/sched.h> +#include <linux/sched/debug.h> #include <linux/export.h> #include <linux/rwsem.h> #include <linux/atomic.h> diff --git a/kernel/locking/semaphore.c b/kernel/locking/semaphore.c index b8120abe594b..561acdd39960 100644 --- a/kernel/locking/semaphore.c +++ b/kernel/locking/semaphore.c @@ -29,6 +29,7 @@ #include <linux/kernel.h> #include <linux/export.h> #include <linux/sched.h> +#include <linux/sched/debug.h> #include <linux/semaphore.h> #include <linux/spinlock.h> #include <linux/ftrace.h> @@ -204,19 +205,18 @@ struct semaphore_waiter { static inline int __sched __down_common(struct semaphore *sem, long state, long timeout) { - struct task_struct *task = current; struct semaphore_waiter waiter; list_add_tail(&waiter.list, &sem->wait_list); - waiter.task = task; + waiter.task = current; waiter.up = false; for (;;) { - if (signal_pending_state(state, task)) + if (signal_pending_state(state, current)) goto interrupted; if (unlikely(timeout <= 0)) goto timed_out; - __set_task_state(task, state); + __set_current_state(state); raw_spin_unlock_irq(&sem->lock); timeout = schedule_timeout(timeout); raw_spin_lock_irq(&sem->lock); diff --git a/kernel/locking/spinlock.c b/kernel/locking/spinlock.c index db3ccb1dd614..4b082b5cac9e 100644 --- a/kernel/locking/spinlock.c +++ b/kernel/locking/spinlock.c @@ -363,14 +363,6 @@ void __lockfunc _raw_spin_lock_nested(raw_spinlock_t *lock, int subclass) } EXPORT_SYMBOL(_raw_spin_lock_nested); -void __lockfunc _raw_spin_lock_bh_nested(raw_spinlock_t *lock, int subclass) -{ - __local_bh_disable_ip(_RET_IP_, SOFTIRQ_LOCK_OFFSET); - spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_); - LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock); -} -EXPORT_SYMBOL(_raw_spin_lock_bh_nested); - unsigned long __lockfunc _raw_spin_lock_irqsave_nested(raw_spinlock_t *lock, int subclass) { diff --git a/kernel/locking/spinlock_debug.c b/kernel/locking/spinlock_debug.c index 0374a596cffa..9aa0fccd5d43 100644 --- a/kernel/locking/spinlock_debug.c +++ b/kernel/locking/spinlock_debug.c @@ -103,38 +103,14 @@ static inline void debug_spin_unlock(raw_spinlock_t *lock) lock->owner_cpu = -1; } -static void __spin_lock_debug(raw_spinlock_t *lock) -{ - u64 i; - u64 loops = loops_per_jiffy * HZ; - - for (i = 0; i < loops; i++) { - if (arch_spin_trylock(&lock->raw_lock)) - return; - __delay(1); - } - /* lockup suspected: */ - spin_dump(lock, "lockup suspected"); -#ifdef CONFIG_SMP - trigger_all_cpu_backtrace(); -#endif - - /* - * The trylock above was causing a livelock. Give the lower level arch - * specific lock code a chance to acquire the lock. We have already - * printed a warning/backtrace at this point. The non-debug arch - * specific code might actually succeed in acquiring the lock. If it is - * not successful, the end-result is the same - there is no forward - * progress. - */ - arch_spin_lock(&lock->raw_lock); -} - +/* + * We are now relying on the NMI watchdog to detect lockup instead of doing + * the detection here with an unfair lock which can cause problem of its own. + */ void do_raw_spin_lock(raw_spinlock_t *lock) { debug_spin_lock_before(lock); - if (unlikely(!arch_spin_trylock(&lock->raw_lock))) - __spin_lock_debug(lock); + arch_spin_lock(&lock->raw_lock); debug_spin_lock_after(lock); } @@ -172,32 +148,6 @@ static void rwlock_bug(rwlock_t *lock, const char *msg) #define RWLOCK_BUG_ON(cond, lock, msg) if (unlikely(cond)) rwlock_bug(lock, msg) -#if 0 /* __write_lock_debug() can lock up - maybe this can too? */ -static void __read_lock_debug(rwlock_t *lock) -{ - u64 i; - u64 loops = loops_per_jiffy * HZ; - int print_once = 1; - - for (;;) { - for (i = 0; i < loops; i++) { - if (arch_read_trylock(&lock->raw_lock)) - return; - __delay(1); - } - /* lockup suspected: */ - if (print_once) { - print_once = 0; - printk(KERN_EMERG "BUG: read-lock lockup on CPU#%d, " - "%s/%d, %p\n", - raw_smp_processor_id(), current->comm, - current->pid, lock); - dump_stack(); - } - } -} -#endif - void do_raw_read_lock(rwlock_t *lock) { RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic"); @@ -247,32 +197,6 @@ static inline void debug_write_unlock(rwlock_t *lock) lock->owner_cpu = -1; } -#if 0 /* This can cause lockups */ -static void __write_lock_debug(rwlock_t *lock) -{ - u64 i; - u64 loops = loops_per_jiffy * HZ; - int print_once = 1; - - for (;;) { - for (i = 0; i < loops; i++) { - if (arch_write_trylock(&lock->raw_lock)) - return; - __delay(1); - } - /* lockup suspected: */ - if (print_once) { - print_once = 0; - printk(KERN_EMERG "BUG: write-lock lockup on CPU#%d, " - "%s/%d, %p\n", - raw_smp_processor_id(), current->comm, - current->pid, lock); - dump_stack(); - } - } -} -#endif - void do_raw_write_lock(rwlock_t *lock) { debug_write_lock_before(lock); diff --git a/kernel/locking/test-ww_mutex.c b/kernel/locking/test-ww_mutex.c new file mode 100644 index 000000000000..da6c9a34f62f --- /dev/null +++ b/kernel/locking/test-ww_mutex.c @@ -0,0 +1,646 @@ +/* + * Module-based API test facility for ww_mutexes + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + */ + +#include <linux/kernel.h> + +#include <linux/completion.h> +#include <linux/delay.h> +#include <linux/kthread.h> +#include <linux/module.h> +#include <linux/random.h> +#include <linux/slab.h> +#include <linux/ww_mutex.h> + +static DEFINE_WW_CLASS(ww_class); +struct workqueue_struct *wq; + +struct test_mutex { + struct work_struct work; + struct ww_mutex mutex; + struct completion ready, go, done; + unsigned int flags; +}; + +#define TEST_MTX_SPIN BIT(0) +#define TEST_MTX_TRY BIT(1) +#define TEST_MTX_CTX BIT(2) +#define __TEST_MTX_LAST BIT(3) + +static void test_mutex_work(struct work_struct *work) +{ + struct test_mutex *mtx = container_of(work, typeof(*mtx), work); + + complete(&mtx->ready); + wait_for_completion(&mtx->go); + + if (mtx->flags & TEST_MTX_TRY) { + while (!ww_mutex_trylock(&mtx->mutex)) + cpu_relax(); + } else { + ww_mutex_lock(&mtx->mutex, NULL); + } + complete(&mtx->done); + ww_mutex_unlock(&mtx->mutex); +} + +static int __test_mutex(unsigned int flags) +{ +#define TIMEOUT (HZ / 16) + struct test_mutex mtx; + struct ww_acquire_ctx ctx; + int ret; + + ww_mutex_init(&mtx.mutex, &ww_class); + ww_acquire_init(&ctx, &ww_class); + + INIT_WORK_ONSTACK(&mtx.work, test_mutex_work); + init_completion(&mtx.ready); + init_completion(&mtx.go); + init_completion(&mtx.done); + mtx.flags = flags; + + schedule_work(&mtx.work); + + wait_for_completion(&mtx.ready); + ww_mutex_lock(&mtx.mutex, (flags & TEST_MTX_CTX) ? &ctx : NULL); + complete(&mtx.go); + if (flags & TEST_MTX_SPIN) { + unsigned long timeout = jiffies + TIMEOUT; + + ret = 0; + do { + if (completion_done(&mtx.done)) { + ret = -EINVAL; + break; + } + cpu_relax(); + } while (time_before(jiffies, timeout)); + } else { + ret = wait_for_completion_timeout(&mtx.done, TIMEOUT); + } + ww_mutex_unlock(&mtx.mutex); + ww_acquire_fini(&ctx); + + if (ret) { + pr_err("%s(flags=%x): mutual exclusion failure\n", + __func__, flags); + ret = -EINVAL; + } + + flush_work(&mtx.work); + destroy_work_on_stack(&mtx.work); + return ret; +#undef TIMEOUT +} + +static int test_mutex(void) +{ + int ret; + int i; + + for (i = 0; i < __TEST_MTX_LAST; i++) { + ret = __test_mutex(i); + if (ret) + return ret; + } + + return 0; +} + +static int test_aa(void) +{ + struct ww_mutex mutex; + struct ww_acquire_ctx ctx; + int ret; + + ww_mutex_init(&mutex, &ww_class); + ww_acquire_init(&ctx, &ww_class); + + ww_mutex_lock(&mutex, &ctx); + + if (ww_mutex_trylock(&mutex)) { + pr_err("%s: trylocked itself!\n", __func__); + ww_mutex_unlock(&mutex); + ret = -EINVAL; + goto out; + } + + ret = ww_mutex_lock(&mutex, &ctx); + if (ret != -EALREADY) { + pr_err("%s: missed deadlock for recursing, ret=%d\n", + __func__, ret); + if (!ret) + ww_mutex_unlock(&mutex); + ret = -EINVAL; + goto out; + } + + ret = 0; +out: + ww_mutex_unlock(&mutex); + ww_acquire_fini(&ctx); + return ret; +} + +struct test_abba { + struct work_struct work; + struct ww_mutex a_mutex; + struct ww_mutex b_mutex; + struct completion a_ready; + struct completion b_ready; + bool resolve; + int result; +}; + +static void test_abba_work(struct work_struct *work) +{ + struct test_abba *abba = container_of(work, typeof(*abba), work); + struct ww_acquire_ctx ctx; + int err; + + ww_acquire_init(&ctx, &ww_class); + ww_mutex_lock(&abba->b_mutex, &ctx); + + complete(&abba->b_ready); + wait_for_completion(&abba->a_ready); + + err = ww_mutex_lock(&abba->a_mutex, &ctx); + if (abba->resolve && err == -EDEADLK) { + ww_mutex_unlock(&abba->b_mutex); + ww_mutex_lock_slow(&abba->a_mutex, &ctx); + err = ww_mutex_lock(&abba->b_mutex, &ctx); + } + + if (!err) + ww_mutex_unlock(&abba->a_mutex); + ww_mutex_unlock(&abba->b_mutex); + ww_acquire_fini(&ctx); + + abba->result = err; +} + +static int test_abba(bool resolve) +{ + struct test_abba abba; + struct ww_acquire_ctx ctx; + int err, ret; + + ww_mutex_init(&abba.a_mutex, &ww_class); + ww_mutex_init(&abba.b_mutex, &ww_class); + INIT_WORK_ONSTACK(&abba.work, test_abba_work); + init_completion(&abba.a_ready); + init_completion(&abba.b_ready); + abba.resolve = resolve; + + schedule_work(&abba.work); + + ww_acquire_init(&ctx, &ww_class); + ww_mutex_lock(&abba.a_mutex, &ctx); + + complete(&abba.a_ready); + wait_for_completion(&abba.b_ready); + + err = ww_mutex_lock(&abba.b_mutex, &ctx); + if (resolve && err == -EDEADLK) { + ww_mutex_unlock(&abba.a_mutex); + ww_mutex_lock_slow(&abba.b_mutex, &ctx); + err = ww_mutex_lock(&abba.a_mutex, &ctx); + } + + if (!err) + ww_mutex_unlock(&abba.b_mutex); + ww_mutex_unlock(&abba.a_mutex); + ww_acquire_fini(&ctx); + + flush_work(&abba.work); + destroy_work_on_stack(&abba.work); + + ret = 0; + if (resolve) { + if (err || abba.result) { + pr_err("%s: failed to resolve ABBA deadlock, A err=%d, B err=%d\n", + __func__, err, abba.result); + ret = -EINVAL; + } + } else { + if (err != -EDEADLK && abba.result != -EDEADLK) { + pr_err("%s: missed ABBA deadlock, A err=%d, B err=%d\n", + __func__, err, abba.result); + ret = -EINVAL; + } + } + return ret; +} + +struct test_cycle { + struct work_struct work; + struct ww_mutex a_mutex; + struct ww_mutex *b_mutex; + struct completion *a_signal; + struct completion b_signal; + int result; +}; + +static void test_cycle_work(struct work_struct *work) +{ + struct test_cycle *cycle = container_of(work, typeof(*cycle), work); + struct ww_acquire_ctx ctx; + int err; + + ww_acquire_init(&ctx, &ww_class); + ww_mutex_lock(&cycle->a_mutex, &ctx); + + complete(cycle->a_signal); + wait_for_completion(&cycle->b_signal); + + err = ww_mutex_lock(cycle->b_mutex, &ctx); + if (err == -EDEADLK) { + ww_mutex_unlock(&cycle->a_mutex); + ww_mutex_lock_slow(cycle->b_mutex, &ctx); + err = ww_mutex_lock(&cycle->a_mutex, &ctx); + } + + if (!err) + ww_mutex_unlock(cycle->b_mutex); + ww_mutex_unlock(&cycle->a_mutex); + ww_acquire_fini(&ctx); + + cycle->result = err; +} + +static int __test_cycle(unsigned int nthreads) +{ + struct test_cycle *cycles; + unsigned int n, last = nthreads - 1; + int ret; + + cycles = kmalloc_array(nthreads, sizeof(*cycles), GFP_KERNEL); + if (!cycles) + return -ENOMEM; + + for (n = 0; n < nthreads; n++) { + struct test_cycle *cycle = &cycles[n]; + + ww_mutex_init(&cycle->a_mutex, &ww_class); + if (n == last) + cycle->b_mutex = &cycles[0].a_mutex; + else + cycle->b_mutex = &cycles[n + 1].a_mutex; + + if (n == 0) + cycle->a_signal = &cycles[last].b_signal; + else + cycle->a_signal = &cycles[n - 1].b_signal; + init_completion(&cycle->b_signal); + + INIT_WORK(&cycle->work, test_cycle_work); + cycle->result = 0; + } + + for (n = 0; n < nthreads; n++) + queue_work(wq, &cycles[n].work); + + flush_workqueue(wq); + + ret = 0; + for (n = 0; n < nthreads; n++) { + struct test_cycle *cycle = &cycles[n]; + + if (!cycle->result) + continue; + + pr_err("cylic deadlock not resolved, ret[%d/%d] = %d\n", + n, nthreads, cycle->result); + ret = -EINVAL; + break; + } + + for (n = 0; n < nthreads; n++) + ww_mutex_destroy(&cycles[n].a_mutex); + kfree(cycles); + return ret; +} + +static int test_cycle(unsigned int ncpus) +{ + unsigned int n; + int ret; + + for (n = 2; n <= ncpus + 1; n++) { + ret = __test_cycle(n); + if (ret) + return ret; + } + + return 0; +} + +struct stress { + struct work_struct work; + struct ww_mutex *locks; + int nlocks; + int nloops; +}; + +static int *get_random_order(int count) +{ + int *order; + int n, r, tmp; + + order = kmalloc_array(count, sizeof(*order), GFP_TEMPORARY); + if (!order) + return order; + + for (n = 0; n < count; n++) + order[n] = n; + + for (n = count - 1; n > 1; n--) { + r = get_random_int() % (n + 1); + if (r != n) { + tmp = order[n]; + order[n] = order[r]; + order[r] = tmp; + } + } + + return order; +} + +static void dummy_load(struct stress *stress) +{ + usleep_range(1000, 2000); +} + +static void stress_inorder_work(struct work_struct *work) +{ + struct stress *stress = container_of(work, typeof(*stress), work); + const int nlocks = stress->nlocks; + struct ww_mutex *locks = stress->locks; + struct ww_acquire_ctx ctx; + int *order; + + order = get_random_order(nlocks); + if (!order) + return; + + ww_acquire_init(&ctx, &ww_class); + + do { + int contended = -1; + int n, err; + +retry: + err = 0; + for (n = 0; n < nlocks; n++) { + if (n == contended) + continue; + + err = ww_mutex_lock(&locks[order[n]], &ctx); + if (err < 0) + break; + } + if (!err) + dummy_load(stress); + + if (contended > n) + ww_mutex_unlock(&locks[order[contended]]); + contended = n; + while (n--) + ww_mutex_unlock(&locks[order[n]]); + + if (err == -EDEADLK) { + ww_mutex_lock_slow(&locks[order[contended]], &ctx); + goto retry; + } + + if (err) { + pr_err_once("stress (%s) failed with %d\n", + __func__, err); + break; + } + } while (--stress->nloops); + + ww_acquire_fini(&ctx); + + kfree(order); + kfree(stress); +} + +struct reorder_lock { + struct list_head link; + struct ww_mutex *lock; +}; + +static void stress_reorder_work(struct work_struct *work) +{ + struct stress *stress = container_of(work, typeof(*stress), work); + LIST_HEAD(locks); + struct ww_acquire_ctx ctx; + struct reorder_lock *ll, *ln; + int *order; + int n, err; + + order = get_random_order(stress->nlocks); + if (!order) + return; + + for (n = 0; n < stress->nlocks; n++) { + ll = kmalloc(sizeof(*ll), GFP_KERNEL); + if (!ll) + goto out; + + ll->lock = &stress->locks[order[n]]; + list_add(&ll->link, &locks); + } + kfree(order); + order = NULL; + + ww_acquire_init(&ctx, &ww_class); + + do { + list_for_each_entry(ll, &locks, link) { + err = ww_mutex_lock(ll->lock, &ctx); + if (!err) + continue; + + ln = ll; + list_for_each_entry_continue_reverse(ln, &locks, link) + ww_mutex_unlock(ln->lock); + + if (err != -EDEADLK) { + pr_err_once("stress (%s) failed with %d\n", + __func__, err); + break; + } + + ww_mutex_lock_slow(ll->lock, &ctx); + list_move(&ll->link, &locks); /* restarts iteration */ + } + + dummy_load(stress); + list_for_each_entry(ll, &locks, link) + ww_mutex_unlock(ll->lock); + } while (--stress->nloops); + + ww_acquire_fini(&ctx); + +out: + list_for_each_entry_safe(ll, ln, &locks, link) + kfree(ll); + kfree(order); + kfree(stress); +} + +static void stress_one_work(struct work_struct *work) +{ + struct stress *stress = container_of(work, typeof(*stress), work); + const int nlocks = stress->nlocks; + struct ww_mutex *lock = stress->locks + (get_random_int() % nlocks); + int err; + + do { + err = ww_mutex_lock(lock, NULL); + if (!err) { + dummy_load(stress); + ww_mutex_unlock(lock); + } else { + pr_err_once("stress (%s) failed with %d\n", + __func__, err); + break; + } + } while (--stress->nloops); + + kfree(stress); +} + +#define STRESS_INORDER BIT(0) +#define STRESS_REORDER BIT(1) +#define STRESS_ONE BIT(2) +#define STRESS_ALL (STRESS_INORDER | STRESS_REORDER | STRESS_ONE) + +static int stress(int nlocks, int nthreads, int nloops, unsigned int flags) +{ + struct ww_mutex *locks; + int n; + + locks = kmalloc_array(nlocks, sizeof(*locks), GFP_KERNEL); + if (!locks) + return -ENOMEM; + + for (n = 0; n < nlocks; n++) + ww_mutex_init(&locks[n], &ww_class); + + for (n = 0; nthreads; n++) { + struct stress *stress; + void (*fn)(struct work_struct *work); + + fn = NULL; + switch (n & 3) { + case 0: + if (flags & STRESS_INORDER) + fn = stress_inorder_work; + break; + case 1: + if (flags & STRESS_REORDER) + fn = stress_reorder_work; + break; + case 2: + if (flags & STRESS_ONE) + fn = stress_one_work; + break; + } + + if (!fn) + continue; + + stress = kmalloc(sizeof(*stress), GFP_KERNEL); + if (!stress) + break; + + INIT_WORK(&stress->work, fn); + stress->locks = locks; + stress->nlocks = nlocks; + stress->nloops = nloops; + + queue_work(wq, &stress->work); + nthreads--; + } + + flush_workqueue(wq); + + for (n = 0; n < nlocks; n++) + ww_mutex_destroy(&locks[n]); + kfree(locks); + + return 0; +} + +static int __init test_ww_mutex_init(void) +{ + int ncpus = num_online_cpus(); + int ret; + + wq = alloc_workqueue("test-ww_mutex", WQ_UNBOUND, 0); + if (!wq) + return -ENOMEM; + + ret = test_mutex(); + if (ret) + return ret; + + ret = test_aa(); + if (ret) + return ret; + + ret = test_abba(false); + if (ret) + return ret; + + ret = test_abba(true); + if (ret) + return ret; + + ret = test_cycle(ncpus); + if (ret) + return ret; + + ret = stress(16, 2*ncpus, 1<<10, STRESS_INORDER); + if (ret) + return ret; + + ret = stress(16, 2*ncpus, 1<<10, STRESS_REORDER); + if (ret) + return ret; + + ret = stress(4096, hweight32(STRESS_ALL)*ncpus, 1<<12, STRESS_ALL); + if (ret) + return ret; + + return 0; +} + +static void __exit test_ww_mutex_exit(void) +{ + destroy_workqueue(wq); +} + +module_init(test_ww_mutex_init); +module_exit(test_ww_mutex_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Intel Corporation"); |