From c935cd62d3fe985d7f0ebea185d2759e8992e96f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 17 Jun 2020 17:17:19 +1000 Subject: lockdep: Split header file into lockdep and lockdep_types There is a header file inclusion loop between asm-generic/bug.h and linux/kernel.h. This causes potential compile failurs depending on the which file is included first. One way of breaking this loop is to stop spinlock_types.h from including lockdep.h. This patch splits lockdep.h into two files for this purpose. Signed-off-by: Herbert Xu Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Sergey Senozhatsky Reviewed-by: Andy Shevchenko Acked-by: Petr Mladek Acked-by: Steven Rostedt (VMware) Link: https://lkml.kernel.org/r/E1jlSJz-0003hE-8g@fornost.hmeau.com --- include/linux/lockdep.h | 178 +------------------------------------ include/linux/lockdep_types.h | 196 +++++++++++++++++++++++++++++++++++++++++ include/linux/spinlock.h | 1 + include/linux/spinlock_types.h | 2 +- 4 files changed, 200 insertions(+), 177 deletions(-) create mode 100644 include/linux/lockdep_types.h (limited to 'include') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 8fce5c98a4b0..3b73cf84f77d 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -10,181 +10,20 @@ #ifndef __LINUX_LOCKDEP_H #define __LINUX_LOCKDEP_H +#include + struct task_struct; -struct lockdep_map; /* for sysctl */ extern int prove_locking; extern int lock_stat; -#define MAX_LOCKDEP_SUBCLASSES 8UL - -#include - -enum lockdep_wait_type { - LD_WAIT_INV = 0, /* not checked, catch all */ - - LD_WAIT_FREE, /* wait free, rcu etc.. */ - LD_WAIT_SPIN, /* spin loops, raw_spinlock_t etc.. */ - -#ifdef CONFIG_PROVE_RAW_LOCK_NESTING - LD_WAIT_CONFIG, /* CONFIG_PREEMPT_LOCK, spinlock_t etc.. */ -#else - LD_WAIT_CONFIG = LD_WAIT_SPIN, -#endif - LD_WAIT_SLEEP, /* sleeping locks, mutex_t etc.. */ - - LD_WAIT_MAX, /* must be last */ -}; - #ifdef CONFIG_LOCKDEP #include -#include #include #include -/* - * We'd rather not expose kernel/lockdep_states.h this wide, but we do need - * the total number of states... :-( - */ -#define XXX_LOCK_USAGE_STATES (1+2*4) - -/* - * NR_LOCKDEP_CACHING_CLASSES ... Number of classes - * cached in the instance of lockdep_map - * - * Currently main class (subclass == 0) and signle depth subclass - * are cached in lockdep_map. This optimization is mainly targeting - * on rq->lock. double_rq_lock() acquires this highly competitive with - * single depth. - */ -#define NR_LOCKDEP_CACHING_CLASSES 2 - -/* - * A lockdep key is associated with each lock object. For static locks we use - * the lock address itself as the key. Dynamically allocated lock objects can - * have a statically or dynamically allocated key. Dynamically allocated lock - * keys must be registered before being used and must be unregistered before - * the key memory is freed. - */ -struct lockdep_subclass_key { - char __one_byte; -} __attribute__ ((__packed__)); - -/* hash_entry is used to keep track of dynamically allocated keys. */ -struct lock_class_key { - union { - struct hlist_node hash_entry; - struct lockdep_subclass_key subkeys[MAX_LOCKDEP_SUBCLASSES]; - }; -}; - -extern struct lock_class_key __lockdep_no_validate__; - -struct lock_trace; - -#define LOCKSTAT_POINTS 4 - -/* - * The lock-class itself. The order of the structure members matters. - * reinit_class() zeroes the key member and all subsequent members. - */ -struct lock_class { - /* - * class-hash: - */ - struct hlist_node hash_entry; - - /* - * Entry in all_lock_classes when in use. Entry in free_lock_classes - * when not in use. Instances that are being freed are on one of the - * zapped_classes lists. - */ - struct list_head lock_entry; - - /* - * These fields represent a directed graph of lock dependencies, - * to every node we attach a list of "forward" and a list of - * "backward" graph nodes. - */ - struct list_head locks_after, locks_before; - - const struct lockdep_subclass_key *key; - unsigned int subclass; - unsigned int dep_gen_id; - - /* - * IRQ/softirq usage tracking bits: - */ - unsigned long usage_mask; - const struct lock_trace *usage_traces[XXX_LOCK_USAGE_STATES]; - - /* - * Generation counter, when doing certain classes of graph walking, - * to ensure that we check one node only once: - */ - int name_version; - const char *name; - - short wait_type_inner; - short wait_type_outer; - -#ifdef CONFIG_LOCK_STAT - unsigned long contention_point[LOCKSTAT_POINTS]; - unsigned long contending_point[LOCKSTAT_POINTS]; -#endif -} __no_randomize_layout; - -#ifdef CONFIG_LOCK_STAT -struct lock_time { - s64 min; - s64 max; - s64 total; - unsigned long nr; -}; - -enum bounce_type { - bounce_acquired_write, - bounce_acquired_read, - bounce_contended_write, - bounce_contended_read, - nr_bounce_types, - - bounce_acquired = bounce_acquired_write, - bounce_contended = bounce_contended_write, -}; - -struct lock_class_stats { - unsigned long contention_point[LOCKSTAT_POINTS]; - unsigned long contending_point[LOCKSTAT_POINTS]; - struct lock_time read_waittime; - struct lock_time write_waittime; - struct lock_time read_holdtime; - struct lock_time write_holdtime; - unsigned long bounces[nr_bounce_types]; -}; - -struct lock_class_stats lock_stats(struct lock_class *class); -void clear_lock_stats(struct lock_class *class); -#endif - -/* - * Map the lock object (the lock instance) to the lock-class object. - * This is embedded into specific lock instances: - */ -struct lockdep_map { - struct lock_class_key *key; - struct lock_class *class_cache[NR_LOCKDEP_CACHING_CLASSES]; - const char *name; - short wait_type_outer; /* can be taken in this context */ - short wait_type_inner; /* presents this context */ -#ifdef CONFIG_LOCK_STAT - int cpu; - unsigned long ip; -#endif -}; - static inline void lockdep_copy_map(struct lockdep_map *to, struct lockdep_map *from) { @@ -440,8 +279,6 @@ static inline void lock_set_subclass(struct lockdep_map *lock, extern void lock_downgrade(struct lockdep_map *lock, unsigned long ip); -struct pin_cookie { unsigned int val; }; - #define NIL_COOKIE (struct pin_cookie){ .val = 0U, } extern struct pin_cookie lock_pin_lock(struct lockdep_map *lock); @@ -520,10 +357,6 @@ static inline void lockdep_set_selftest_task(struct task_struct *task) # define lockdep_reset() do { debug_locks = 1; } while (0) # define lockdep_free_key_range(start, size) do { } while (0) # define lockdep_sys_exit() do { } while (0) -/* - * The class key takes no space if lockdep is disabled: - */ -struct lock_class_key { }; static inline void lockdep_register_key(struct lock_class_key *key) { @@ -533,11 +366,6 @@ static inline void lockdep_unregister_key(struct lock_class_key *key) { } -/* - * The lockdep_map takes no space if lockdep is disabled: - */ -struct lockdep_map { }; - #define lockdep_depth(tsk) (0) #define lockdep_is_held_type(l, r) (1) @@ -549,8 +377,6 @@ struct lockdep_map { }; #define lockdep_recursing(tsk) (0) -struct pin_cookie { }; - #define NIL_COOKIE (struct pin_cookie){ } #define lockdep_pin_lock(l) ({ struct pin_cookie cookie = { }; cookie; }) diff --git a/include/linux/lockdep_types.h b/include/linux/lockdep_types.h new file mode 100644 index 000000000000..7b9350624577 --- /dev/null +++ b/include/linux/lockdep_types.h @@ -0,0 +1,196 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Runtime locking correctness validator + * + * Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar + * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra + * + * see Documentation/locking/lockdep-design.rst for more details. + */ +#ifndef __LINUX_LOCKDEP_TYPES_H +#define __LINUX_LOCKDEP_TYPES_H + +#include + +#define MAX_LOCKDEP_SUBCLASSES 8UL + +enum lockdep_wait_type { + LD_WAIT_INV = 0, /* not checked, catch all */ + + LD_WAIT_FREE, /* wait free, rcu etc.. */ + LD_WAIT_SPIN, /* spin loops, raw_spinlock_t etc.. */ + +#ifdef CONFIG_PROVE_RAW_LOCK_NESTING + LD_WAIT_CONFIG, /* CONFIG_PREEMPT_LOCK, spinlock_t etc.. */ +#else + LD_WAIT_CONFIG = LD_WAIT_SPIN, +#endif + LD_WAIT_SLEEP, /* sleeping locks, mutex_t etc.. */ + + LD_WAIT_MAX, /* must be last */ +}; + +#ifdef CONFIG_LOCKDEP + +#include + +/* + * We'd rather not expose kernel/lockdep_states.h this wide, but we do need + * the total number of states... :-( + */ +#define XXX_LOCK_USAGE_STATES (1+2*4) + +/* + * NR_LOCKDEP_CACHING_CLASSES ... Number of classes + * cached in the instance of lockdep_map + * + * Currently main class (subclass == 0) and signle depth subclass + * are cached in lockdep_map. This optimization is mainly targeting + * on rq->lock. double_rq_lock() acquires this highly competitive with + * single depth. + */ +#define NR_LOCKDEP_CACHING_CLASSES 2 + +/* + * A lockdep key is associated with each lock object. For static locks we use + * the lock address itself as the key. Dynamically allocated lock objects can + * have a statically or dynamically allocated key. Dynamically allocated lock + * keys must be registered before being used and must be unregistered before + * the key memory is freed. + */ +struct lockdep_subclass_key { + char __one_byte; +} __attribute__ ((__packed__)); + +/* hash_entry is used to keep track of dynamically allocated keys. */ +struct lock_class_key { + union { + struct hlist_node hash_entry; + struct lockdep_subclass_key subkeys[MAX_LOCKDEP_SUBCLASSES]; + }; +}; + +extern struct lock_class_key __lockdep_no_validate__; + +struct lock_trace; + +#define LOCKSTAT_POINTS 4 + +/* + * The lock-class itself. The order of the structure members matters. + * reinit_class() zeroes the key member and all subsequent members. + */ +struct lock_class { + /* + * class-hash: + */ + struct hlist_node hash_entry; + + /* + * Entry in all_lock_classes when in use. Entry in free_lock_classes + * when not in use. Instances that are being freed are on one of the + * zapped_classes lists. + */ + struct list_head lock_entry; + + /* + * These fields represent a directed graph of lock dependencies, + * to every node we attach a list of "forward" and a list of + * "backward" graph nodes. + */ + struct list_head locks_after, locks_before; + + const struct lockdep_subclass_key *key; + unsigned int subclass; + unsigned int dep_gen_id; + + /* + * IRQ/softirq usage tracking bits: + */ + unsigned long usage_mask; + const struct lock_trace *usage_traces[XXX_LOCK_USAGE_STATES]; + + /* + * Generation counter, when doing certain classes of graph walking, + * to ensure that we check one node only once: + */ + int name_version; + const char *name; + + short wait_type_inner; + short wait_type_outer; + +#ifdef CONFIG_LOCK_STAT + unsigned long contention_point[LOCKSTAT_POINTS]; + unsigned long contending_point[LOCKSTAT_POINTS]; +#endif +} __no_randomize_layout; + +#ifdef CONFIG_LOCK_STAT +struct lock_time { + s64 min; + s64 max; + s64 total; + unsigned long nr; +}; + +enum bounce_type { + bounce_acquired_write, + bounce_acquired_read, + bounce_contended_write, + bounce_contended_read, + nr_bounce_types, + + bounce_acquired = bounce_acquired_write, + bounce_contended = bounce_contended_write, +}; + +struct lock_class_stats { + unsigned long contention_point[LOCKSTAT_POINTS]; + unsigned long contending_point[LOCKSTAT_POINTS]; + struct lock_time read_waittime; + struct lock_time write_waittime; + struct lock_time read_holdtime; + struct lock_time write_holdtime; + unsigned long bounces[nr_bounce_types]; +}; + +struct lock_class_stats lock_stats(struct lock_class *class); +void clear_lock_stats(struct lock_class *class); +#endif + +/* + * Map the lock object (the lock instance) to the lock-class object. + * This is embedded into specific lock instances: + */ +struct lockdep_map { + struct lock_class_key *key; + struct lock_class *class_cache[NR_LOCKDEP_CACHING_CLASSES]; + const char *name; + short wait_type_outer; /* can be taken in this context */ + short wait_type_inner; /* presents this context */ +#ifdef CONFIG_LOCK_STAT + int cpu; + unsigned long ip; +#endif +}; + +struct pin_cookie { unsigned int val; }; + +#else /* !CONFIG_LOCKDEP */ + +/* + * The class key takes no space if lockdep is disabled: + */ +struct lock_class_key { }; + +/* + * The lockdep_map takes no space if lockdep is disabled: + */ +struct lockdep_map { }; + +struct pin_cookie { }; + +#endif /* !LOCKDEP */ + +#endif /* __LINUX_LOCKDEP_TYPES_H */ diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index d3770b3f9d9a..f2f12d746dbd 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -56,6 +56,7 @@ #include #include #include +#include #include #include diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h index 6102e6bff3ae..b981caafe8bf 100644 --- a/include/linux/spinlock_types.h +++ b/include/linux/spinlock_types.h @@ -15,7 +15,7 @@ # include #endif -#include +#include typedef struct raw_spinlock { arch_spinlock_t raw_lock; -- cgit v1.2.3 From ba1f2b2eaa2a529dba722507c55ff3d761d325dd Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 27 May 2020 15:50:29 +0200 Subject: x86/entry: Fix NMI vs IRQ state tracking While the nmi_enter() users did trace_hardirqs_{off_prepare,on_finish}() there was no matching lockdep_hardirqs_*() calls to complete the picture. Introduce idtentry_{enter,exit}_nmi() to enable proper IRQ state tracking across the NMIs. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Ingo Molnar Link: https://lkml.kernel.org/r/20200623083721.216740948@infradead.org --- arch/x86/entry/common.c | 42 +++++++++++++++++++++++++++++++++++++---- arch/x86/include/asm/idtentry.h | 3 +++ arch/x86/kernel/nmi.c | 9 ++++----- arch/x86/kernel/traps.c | 17 ++++++----------- include/linux/hardirq.h | 28 ++++++++++++++++++--------- 5 files changed, 70 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 0521546022cb..63c607dd6c52 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -592,7 +592,7 @@ SYSCALL_DEFINE0(ni_syscall) * The return value must be fed into the state argument of * idtentry_exit(). */ -idtentry_state_t noinstr idtentry_enter(struct pt_regs *regs) +noinstr idtentry_state_t idtentry_enter(struct pt_regs *regs) { idtentry_state_t ret = { .exit_rcu = false, @@ -687,7 +687,7 @@ static void idtentry_exit_cond_resched(struct pt_regs *regs, bool may_sched) * Counterpart to idtentry_enter(). The return value of the entry * function must be fed into the @state argument. */ -void noinstr idtentry_exit(struct pt_regs *regs, idtentry_state_t state) +noinstr void idtentry_exit(struct pt_regs *regs, idtentry_state_t state) { lockdep_assert_irqs_disabled(); @@ -731,7 +731,7 @@ void noinstr idtentry_exit(struct pt_regs *regs, idtentry_state_t state) * Invokes enter_from_user_mode() to establish the proper context for * NOHZ_FULL. Otherwise scheduling on exit would not be possible. */ -void noinstr idtentry_enter_user(struct pt_regs *regs) +noinstr void idtentry_enter_user(struct pt_regs *regs) { check_user_regs(regs); enter_from_user_mode(); @@ -749,13 +749,47 @@ void noinstr idtentry_enter_user(struct pt_regs *regs) * * Counterpart to idtentry_enter_user(). */ -void noinstr idtentry_exit_user(struct pt_regs *regs) +noinstr void idtentry_exit_user(struct pt_regs *regs) { lockdep_assert_irqs_disabled(); prepare_exit_to_usermode(regs); } +noinstr bool idtentry_enter_nmi(struct pt_regs *regs) +{ + bool irq_state = lockdep_hardirqs_enabled(current); + + __nmi_enter(); + lockdep_hardirqs_off(CALLER_ADDR0); + lockdep_hardirq_enter(); + rcu_nmi_enter(); + + instrumentation_begin(); + trace_hardirqs_off_finish(); + ftrace_nmi_enter(); + instrumentation_end(); + + return irq_state; +} + +noinstr void idtentry_exit_nmi(struct pt_regs *regs, bool restore) +{ + instrumentation_begin(); + ftrace_nmi_exit(); + if (restore) { + trace_hardirqs_on_prepare(); + lockdep_hardirqs_on_prepare(CALLER_ADDR0); + } + instrumentation_end(); + + rcu_nmi_exit(); + lockdep_hardirq_exit(); + if (restore) + lockdep_hardirqs_on(CALLER_ADDR0); + __nmi_exit(); +} + #ifdef CONFIG_XEN_PV #ifndef CONFIG_PREEMPTION /* diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index 7227225cf45d..2b0497486525 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -20,6 +20,9 @@ typedef struct idtentry_state { idtentry_state_t idtentry_enter(struct pt_regs *regs); void idtentry_exit(struct pt_regs *regs, idtentry_state_t state); +bool idtentry_enter_nmi(struct pt_regs *regs); +void idtentry_exit_nmi(struct pt_regs *regs, bool irq_state); + /** * DECLARE_IDTENTRY - Declare functions for simple IDT entry points * No error code pushed by hardware diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index d7c5e44b26f7..4fc9954a9560 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -330,7 +330,6 @@ static noinstr void default_do_nmi(struct pt_regs *regs) __this_cpu_write(last_nmi_rip, regs->ip); instrumentation_begin(); - trace_hardirqs_off_finish(); handled = nmi_handle(NMI_LOCAL, regs); __this_cpu_add(nmi_stats.normal, handled); @@ -417,8 +416,6 @@ static noinstr void default_do_nmi(struct pt_regs *regs) unknown_nmi_error(reason, regs); out: - if (regs->flags & X86_EFLAGS_IF) - trace_hardirqs_on_prepare(); instrumentation_end(); } @@ -478,6 +475,8 @@ static DEFINE_PER_CPU(unsigned long, nmi_dr7); DEFINE_IDTENTRY_RAW(exc_nmi) { + bool irq_state; + if (IS_ENABLED(CONFIG_SMP) && arch_cpu_is_offline(smp_processor_id())) return; @@ -491,14 +490,14 @@ nmi_restart: this_cpu_write(nmi_dr7, local_db_save()); - nmi_enter(); + irq_state = idtentry_enter_nmi(regs); inc_irq_stat(__nmi_count); if (!ignore_nmis) default_do_nmi(regs); - nmi_exit(); + idtentry_exit_nmi(regs, irq_state); local_db_restore(this_cpu_read(nmi_dr7)); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 4627f826fb57..cdd73829e637 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -403,7 +403,7 @@ DEFINE_IDTENTRY_DF(exc_double_fault) } #endif - nmi_enter(); + idtentry_enter_nmi(regs); instrumentation_begin(); notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV); @@ -649,15 +649,12 @@ DEFINE_IDTENTRY_RAW(exc_int3) instrumentation_end(); idtentry_exit_user(regs); } else { - nmi_enter(); + bool irq_state = idtentry_enter_nmi(regs); instrumentation_begin(); - trace_hardirqs_off_finish(); if (!do_int3(regs)) die("int3", regs, 0); - if (regs->flags & X86_EFLAGS_IF) - trace_hardirqs_on_prepare(); instrumentation_end(); - nmi_exit(); + idtentry_exit_nmi(regs, irq_state); } } @@ -865,9 +862,8 @@ out: static __always_inline void exc_debug_kernel(struct pt_regs *regs, unsigned long dr6) { - nmi_enter(); + bool irq_state = idtentry_enter_nmi(regs); instrumentation_begin(); - trace_hardirqs_off_finish(); /* * If something gets miswired and we end up here for a user mode @@ -884,10 +880,8 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs, handle_debug(regs, dr6, false); - if (regs->flags & X86_EFLAGS_IF) - trace_hardirqs_on_prepare(); instrumentation_end(); - nmi_exit(); + idtentry_exit_nmi(regs, irq_state); } static __always_inline void exc_debug_user(struct pt_regs *regs, @@ -903,6 +897,7 @@ static __always_inline void exc_debug_user(struct pt_regs *regs, instrumentation_begin(); handle_debug(regs, dr6, true); + instrumentation_end(); idtentry_exit_user(regs); } diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 03c9fece7d43..754f67ac4326 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -111,32 +111,42 @@ extern void rcu_nmi_exit(void); /* * nmi_enter() can nest up to 15 times; see NMI_BITS. */ -#define nmi_enter() \ +#define __nmi_enter() \ do { \ + lockdep_off(); \ arch_nmi_enter(); \ printk_nmi_enter(); \ - lockdep_off(); \ BUG_ON(in_nmi() == NMI_MASK); \ __preempt_count_add(NMI_OFFSET + HARDIRQ_OFFSET); \ - rcu_nmi_enter(); \ + } while (0) + +#define nmi_enter() \ + do { \ + __nmi_enter(); \ lockdep_hardirq_enter(); \ + rcu_nmi_enter(); \ instrumentation_begin(); \ ftrace_nmi_enter(); \ instrumentation_end(); \ } while (0) +#define __nmi_exit() \ + do { \ + BUG_ON(!in_nmi()); \ + __preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET); \ + printk_nmi_exit(); \ + arch_nmi_exit(); \ + lockdep_on(); \ + } while (0) + #define nmi_exit() \ do { \ instrumentation_begin(); \ ftrace_nmi_exit(); \ instrumentation_end(); \ - lockdep_hardirq_exit(); \ rcu_nmi_exit(); \ - BUG_ON(!in_nmi()); \ - __preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET); \ - lockdep_on(); \ - printk_nmi_exit(); \ - arch_nmi_exit(); \ + lockdep_hardirq_exit(); \ + __nmi_exit(); \ } while (0) #endif /* LINUX_HARDIRQ_H */ -- cgit v1.2.3 From a21ee6055c30ce68c4e201c6496f0ed2a1936230 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 25 May 2020 12:22:41 +0200 Subject: lockdep: Change hardirq{s_enabled,_context} to per-cpu variables Currently all IRQ-tracking state is in task_struct, this means that task_struct needs to be defined before we use it. Especially for lockdep_assert_irq*() this can lead to header-hell. Move the hardirq state into per-cpu variables to avoid the task_struct dependency. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Ingo Molnar Link: https://lkml.kernel.org/r/20200623083721.512673481@infradead.org --- include/linux/irqflags.h | 19 ++++++++++++------- include/linux/lockdep.h | 34 ++++++++++++++++++---------------- include/linux/sched.h | 2 -- kernel/fork.c | 4 +--- kernel/locking/lockdep.c | 30 +++++++++++++++--------------- kernel/softirq.c | 6 ++++++ 6 files changed, 52 insertions(+), 43 deletions(-) (limited to 'include') diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 6384d2813ded..255444fe4609 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -14,6 +14,7 @@ #include #include +#include /* Currently lockdep_softirqs_on/off is used only by lockdep */ #ifdef CONFIG_PROVE_LOCKING @@ -31,18 +32,22 @@ #endif #ifdef CONFIG_TRACE_IRQFLAGS + +DECLARE_PER_CPU(int, hardirqs_enabled); +DECLARE_PER_CPU(int, hardirq_context); + extern void trace_hardirqs_on_prepare(void); extern void trace_hardirqs_off_finish(void); extern void trace_hardirqs_on(void); extern void trace_hardirqs_off(void); -# define lockdep_hardirq_context(p) ((p)->hardirq_context) +# define lockdep_hardirq_context(p) (this_cpu_read(hardirq_context)) # define lockdep_softirq_context(p) ((p)->softirq_context) -# define lockdep_hardirqs_enabled(p) ((p)->hardirqs_enabled) +# define lockdep_hardirqs_enabled(p) (this_cpu_read(hardirqs_enabled)) # define lockdep_softirqs_enabled(p) ((p)->softirqs_enabled) -# define lockdep_hardirq_enter() \ -do { \ - if (!current->hardirq_context++) \ - current->hardirq_threaded = 0; \ +# define lockdep_hardirq_enter() \ +do { \ + if (this_cpu_inc_return(hardirq_context) == 1) \ + current->hardirq_threaded = 0; \ } while (0) # define lockdep_hardirq_threaded() \ do { \ @@ -50,7 +55,7 @@ do { \ } while (0) # define lockdep_hardirq_exit() \ do { \ - current->hardirq_context--; \ + this_cpu_dec(hardirq_context); \ } while (0) # define lockdep_softirq_enter() \ do { \ diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 3b73cf84f77d..be6cb17a8879 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -11,6 +11,7 @@ #define __LINUX_LOCKDEP_H #include +#include struct task_struct; @@ -529,28 +530,29 @@ do { \ lock_release(&(lock)->dep_map, _THIS_IP_); \ } while (0) -#define lockdep_assert_irqs_enabled() do { \ - WARN_ONCE(debug_locks && !current->lockdep_recursion && \ - !current->hardirqs_enabled, \ - "IRQs not enabled as expected\n"); \ - } while (0) +DECLARE_PER_CPU(int, hardirqs_enabled); +DECLARE_PER_CPU(int, hardirq_context); -#define lockdep_assert_irqs_disabled() do { \ - WARN_ONCE(debug_locks && !current->lockdep_recursion && \ - current->hardirqs_enabled, \ - "IRQs not disabled as expected\n"); \ - } while (0) +#define lockdep_assert_irqs_enabled() \ +do { \ + WARN_ON_ONCE(debug_locks && !this_cpu_read(hardirqs_enabled)); \ +} while (0) -#define lockdep_assert_in_irq() do { \ - WARN_ONCE(debug_locks && !current->lockdep_recursion && \ - !current->hardirq_context, \ - "Not in hardirq as expected\n"); \ - } while (0) +#define lockdep_assert_irqs_disabled() \ +do { \ + WARN_ON_ONCE(debug_locks && this_cpu_read(hardirqs_enabled)); \ +} while (0) + +#define lockdep_assert_in_irq() \ +do { \ + WARN_ON_ONCE(debug_locks && !this_cpu_read(hardirq_context)); \ +} while (0) #else # define might_lock(lock) do { } while (0) # define might_lock_read(lock) do { } while (0) # define might_lock_nested(lock, subclass) do { } while (0) + # define lockdep_assert_irqs_enabled() do { } while (0) # define lockdep_assert_irqs_disabled() do { } while (0) # define lockdep_assert_in_irq() do { } while (0) @@ -560,7 +562,7 @@ do { \ # define lockdep_assert_RT_in_threaded_ctx() do { \ WARN_ONCE(debug_locks && !current->lockdep_recursion && \ - current->hardirq_context && \ + lockdep_hardirq_context(current) && \ !(current->hardirq_threaded || current->irq_config), \ "Not in threaded context on PREEMPT_RT as expected\n"); \ } while (0) diff --git a/include/linux/sched.h b/include/linux/sched.h index 692e327d7455..3903a9500926 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -990,8 +990,6 @@ struct task_struct { unsigned long hardirq_disable_ip; unsigned int hardirq_enable_event; unsigned int hardirq_disable_event; - int hardirqs_enabled; - int hardirq_context; u64 hardirq_chain_key; unsigned long softirq_disable_ip; unsigned long softirq_enable_ip; diff --git a/kernel/fork.c b/kernel/fork.c index efc5493203ae..70d9d0a4de2a 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1954,8 +1954,8 @@ static __latent_entropy struct task_struct *copy_process( rt_mutex_init_task(p); + lockdep_assert_irqs_enabled(); #ifdef CONFIG_PROVE_LOCKING - DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); #endif retval = -EAGAIN; @@ -2036,7 +2036,6 @@ static __latent_entropy struct task_struct *copy_process( #endif #ifdef CONFIG_TRACE_IRQFLAGS p->irq_events = 0; - p->hardirqs_enabled = 0; p->hardirq_enable_ip = 0; p->hardirq_enable_event = 0; p->hardirq_disable_ip = _THIS_IP_; @@ -2046,7 +2045,6 @@ static __latent_entropy struct task_struct *copy_process( p->softirq_enable_event = 0; p->softirq_disable_ip = 0; p->softirq_disable_event = 0; - p->hardirq_context = 0; p->softirq_context = 0; #endif diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index d595623c4b34..ab4ffbe0e9e9 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -2062,9 +2062,9 @@ print_bad_irq_dependency(struct task_struct *curr, pr_warn("-----------------------------------------------------\n"); pr_warn("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n", curr->comm, task_pid_nr(curr), - curr->hardirq_context, hardirq_count() >> HARDIRQ_SHIFT, + lockdep_hardirq_context(curr), hardirq_count() >> HARDIRQ_SHIFT, curr->softirq_context, softirq_count() >> SOFTIRQ_SHIFT, - curr->hardirqs_enabled, + lockdep_hardirqs_enabled(curr), curr->softirqs_enabled); print_lock(next); @@ -3658,7 +3658,7 @@ void lockdep_hardirqs_on_prepare(unsigned long ip) if (unlikely(current->lockdep_recursion & LOCKDEP_RECURSION_MASK)) return; - if (unlikely(current->hardirqs_enabled)) { + if (unlikely(lockdep_hardirqs_enabled(current))) { /* * Neither irq nor preemption are disabled here * so this is racy by nature but losing one hit @@ -3686,7 +3686,7 @@ void lockdep_hardirqs_on_prepare(unsigned long ip) * Can't allow enabling interrupts while in an interrupt handler, * that's general bad form and such. Recursion, limited stack etc.. */ - if (DEBUG_LOCKS_WARN_ON(current->hardirq_context)) + if (DEBUG_LOCKS_WARN_ON(lockdep_hardirq_context(current))) return; current->hardirq_chain_key = current->curr_chain_key; @@ -3724,7 +3724,7 @@ void noinstr lockdep_hardirqs_on(unsigned long ip) if (unlikely(current->lockdep_recursion & LOCKDEP_RECURSION_MASK)) return; - if (curr->hardirqs_enabled) { + if (lockdep_hardirqs_enabled(curr)) { /* * Neither irq nor preemption are disabled here * so this is racy by nature but losing one hit @@ -3751,7 +3751,7 @@ void noinstr lockdep_hardirqs_on(unsigned long ip) skip_checks: /* we'll do an OFF -> ON transition: */ - curr->hardirqs_enabled = 1; + this_cpu_write(hardirqs_enabled, 1); curr->hardirq_enable_ip = ip; curr->hardirq_enable_event = ++curr->irq_events; debug_atomic_inc(hardirqs_on_events); @@ -3783,11 +3783,11 @@ void noinstr lockdep_hardirqs_off(unsigned long ip) if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) return; - if (curr->hardirqs_enabled) { + if (lockdep_hardirqs_enabled(curr)) { /* * We have done an ON -> OFF transition: */ - curr->hardirqs_enabled = 0; + this_cpu_write(hardirqs_enabled, 0); curr->hardirq_disable_ip = ip; curr->hardirq_disable_event = ++curr->irq_events; debug_atomic_inc(hardirqs_off_events); @@ -3832,7 +3832,7 @@ void lockdep_softirqs_on(unsigned long ip) * usage bit for all held locks, if hardirqs are * enabled too: */ - if (curr->hardirqs_enabled) + if (lockdep_hardirqs_enabled(curr)) mark_held_locks(curr, LOCK_ENABLED_SOFTIRQ); lockdep_recursion_finish(); } @@ -3881,7 +3881,7 @@ mark_usage(struct task_struct *curr, struct held_lock *hlock, int check) */ if (!hlock->trylock) { if (hlock->read) { - if (curr->hardirq_context) + if (lockdep_hardirq_context(curr)) if (!mark_lock(curr, hlock, LOCK_USED_IN_HARDIRQ_READ)) return 0; @@ -3890,7 +3890,7 @@ mark_usage(struct task_struct *curr, struct held_lock *hlock, int check) LOCK_USED_IN_SOFTIRQ_READ)) return 0; } else { - if (curr->hardirq_context) + if (lockdep_hardirq_context(curr)) if (!mark_lock(curr, hlock, LOCK_USED_IN_HARDIRQ)) return 0; if (curr->softirq_context) @@ -3928,7 +3928,7 @@ lock_used: static inline unsigned int task_irq_context(struct task_struct *task) { - return LOCK_CHAIN_HARDIRQ_CONTEXT * !!task->hardirq_context + + return LOCK_CHAIN_HARDIRQ_CONTEXT * !!lockdep_hardirq_context(task) + LOCK_CHAIN_SOFTIRQ_CONTEXT * !!task->softirq_context; } @@ -4021,7 +4021,7 @@ static inline short task_wait_context(struct task_struct *curr) * Set appropriate wait type for the context; for IRQs we have to take * into account force_irqthread as that is implied by PREEMPT_RT. */ - if (curr->hardirq_context) { + if (lockdep_hardirq_context(curr)) { /* * Check if force_irqthreads will run us threaded. */ @@ -4864,11 +4864,11 @@ static void check_flags(unsigned long flags) return; if (irqs_disabled_flags(flags)) { - if (DEBUG_LOCKS_WARN_ON(current->hardirqs_enabled)) { + if (DEBUG_LOCKS_WARN_ON(lockdep_hardirqs_enabled(current))) { printk("possible reason: unannotated irqs-off.\n"); } } else { - if (DEBUG_LOCKS_WARN_ON(!current->hardirqs_enabled)) { + if (DEBUG_LOCKS_WARN_ON(!lockdep_hardirqs_enabled(current))) { printk("possible reason: unannotated irqs-on.\n"); } } diff --git a/kernel/softirq.c b/kernel/softirq.c index c4201b7f42b1..342c53feaa7a 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -107,6 +107,12 @@ static bool ksoftirqd_running(unsigned long pending) * where hardirqs are disabled legitimately: */ #ifdef CONFIG_TRACE_IRQFLAGS + +DEFINE_PER_CPU(int, hardirqs_enabled); +DEFINE_PER_CPU(int, hardirq_context); +EXPORT_PER_CPU_SYMBOL_GPL(hardirqs_enabled); +EXPORT_PER_CPU_SYMBOL_GPL(hardirq_context); + void __local_bh_disable_ip(unsigned long ip, unsigned int cnt) { unsigned long flags; -- cgit v1.2.3 From f9ad4a5f3f20bee022b1bdde94e5ece6dc0b0edc Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 27 May 2020 13:03:26 +0200 Subject: lockdep: Remove lockdep_hardirq{s_enabled,_context}() argument Now that the macros use per-cpu data, we no longer need the argument. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Ingo Molnar Link: https://lkml.kernel.org/r/20200623083721.571835311@infradead.org --- arch/x86/entry/common.c | 2 +- include/linux/irqflags.h | 8 ++++---- include/linux/lockdep.h | 2 +- kernel/locking/lockdep.c | 30 +++++++++++++++--------------- kernel/softirq.c | 2 +- tools/include/linux/irqflags.h | 4 ++-- 6 files changed, 24 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 63c607dd6c52..4ea640363f5d 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -758,7 +758,7 @@ noinstr void idtentry_exit_user(struct pt_regs *regs) noinstr bool idtentry_enter_nmi(struct pt_regs *regs) { - bool irq_state = lockdep_hardirqs_enabled(current); + bool irq_state = lockdep_hardirqs_enabled(); __nmi_enter(); lockdep_hardirqs_off(CALLER_ADDR0); diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 255444fe4609..5811ee8a5cd8 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -40,9 +40,9 @@ DECLARE_PER_CPU(int, hardirq_context); extern void trace_hardirqs_off_finish(void); extern void trace_hardirqs_on(void); extern void trace_hardirqs_off(void); -# define lockdep_hardirq_context(p) (this_cpu_read(hardirq_context)) +# define lockdep_hardirq_context() (this_cpu_read(hardirq_context)) # define lockdep_softirq_context(p) ((p)->softirq_context) -# define lockdep_hardirqs_enabled(p) (this_cpu_read(hardirqs_enabled)) +# define lockdep_hardirqs_enabled() (this_cpu_read(hardirqs_enabled)) # define lockdep_softirqs_enabled(p) ((p)->softirqs_enabled) # define lockdep_hardirq_enter() \ do { \ @@ -109,9 +109,9 @@ do { \ # define trace_hardirqs_off_finish() do { } while (0) # define trace_hardirqs_on() do { } while (0) # define trace_hardirqs_off() do { } while (0) -# define lockdep_hardirq_context(p) 0 +# define lockdep_hardirq_context() 0 # define lockdep_softirq_context(p) 0 -# define lockdep_hardirqs_enabled(p) 0 +# define lockdep_hardirqs_enabled() 0 # define lockdep_softirqs_enabled(p) 0 # define lockdep_hardirq_enter() do { } while (0) # define lockdep_hardirq_threaded() do { } while (0) diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index be6cb17a8879..fd04b9e96091 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -562,7 +562,7 @@ do { \ # define lockdep_assert_RT_in_threaded_ctx() do { \ WARN_ONCE(debug_locks && !current->lockdep_recursion && \ - lockdep_hardirq_context(current) && \ + lockdep_hardirq_context() && \ !(current->hardirq_threaded || current->irq_config), \ "Not in threaded context on PREEMPT_RT as expected\n"); \ } while (0) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index ab4ffbe0e9e9..c9ea05edce25 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -2062,9 +2062,9 @@ print_bad_irq_dependency(struct task_struct *curr, pr_warn("-----------------------------------------------------\n"); pr_warn("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n", curr->comm, task_pid_nr(curr), - lockdep_hardirq_context(curr), hardirq_count() >> HARDIRQ_SHIFT, + lockdep_hardirq_context(), hardirq_count() >> HARDIRQ_SHIFT, curr->softirq_context, softirq_count() >> SOFTIRQ_SHIFT, - lockdep_hardirqs_enabled(curr), + lockdep_hardirqs_enabled(), curr->softirqs_enabled); print_lock(next); @@ -3331,9 +3331,9 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this, pr_warn("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] takes:\n", curr->comm, task_pid_nr(curr), - lockdep_hardirq_context(curr), hardirq_count() >> HARDIRQ_SHIFT, + lockdep_hardirq_context(), hardirq_count() >> HARDIRQ_SHIFT, lockdep_softirq_context(curr), softirq_count() >> SOFTIRQ_SHIFT, - lockdep_hardirqs_enabled(curr), + lockdep_hardirqs_enabled(), lockdep_softirqs_enabled(curr)); print_lock(this); @@ -3658,7 +3658,7 @@ void lockdep_hardirqs_on_prepare(unsigned long ip) if (unlikely(current->lockdep_recursion & LOCKDEP_RECURSION_MASK)) return; - if (unlikely(lockdep_hardirqs_enabled(current))) { + if (unlikely(lockdep_hardirqs_enabled())) { /* * Neither irq nor preemption are disabled here * so this is racy by nature but losing one hit @@ -3686,7 +3686,7 @@ void lockdep_hardirqs_on_prepare(unsigned long ip) * Can't allow enabling interrupts while in an interrupt handler, * that's general bad form and such. Recursion, limited stack etc.. */ - if (DEBUG_LOCKS_WARN_ON(lockdep_hardirq_context(current))) + if (DEBUG_LOCKS_WARN_ON(lockdep_hardirq_context())) return; current->hardirq_chain_key = current->curr_chain_key; @@ -3724,7 +3724,7 @@ void noinstr lockdep_hardirqs_on(unsigned long ip) if (unlikely(current->lockdep_recursion & LOCKDEP_RECURSION_MASK)) return; - if (lockdep_hardirqs_enabled(curr)) { + if (lockdep_hardirqs_enabled()) { /* * Neither irq nor preemption are disabled here * so this is racy by nature but losing one hit @@ -3783,7 +3783,7 @@ void noinstr lockdep_hardirqs_off(unsigned long ip) if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) return; - if (lockdep_hardirqs_enabled(curr)) { + if (lockdep_hardirqs_enabled()) { /* * We have done an ON -> OFF transition: */ @@ -3832,7 +3832,7 @@ void lockdep_softirqs_on(unsigned long ip) * usage bit for all held locks, if hardirqs are * enabled too: */ - if (lockdep_hardirqs_enabled(curr)) + if (lockdep_hardirqs_enabled()) mark_held_locks(curr, LOCK_ENABLED_SOFTIRQ); lockdep_recursion_finish(); } @@ -3881,7 +3881,7 @@ mark_usage(struct task_struct *curr, struct held_lock *hlock, int check) */ if (!hlock->trylock) { if (hlock->read) { - if (lockdep_hardirq_context(curr)) + if (lockdep_hardirq_context()) if (!mark_lock(curr, hlock, LOCK_USED_IN_HARDIRQ_READ)) return 0; @@ -3890,7 +3890,7 @@ mark_usage(struct task_struct *curr, struct held_lock *hlock, int check) LOCK_USED_IN_SOFTIRQ_READ)) return 0; } else { - if (lockdep_hardirq_context(curr)) + if (lockdep_hardirq_context()) if (!mark_lock(curr, hlock, LOCK_USED_IN_HARDIRQ)) return 0; if (curr->softirq_context) @@ -3928,7 +3928,7 @@ lock_used: static inline unsigned int task_irq_context(struct task_struct *task) { - return LOCK_CHAIN_HARDIRQ_CONTEXT * !!lockdep_hardirq_context(task) + + return LOCK_CHAIN_HARDIRQ_CONTEXT * !!lockdep_hardirq_context() + LOCK_CHAIN_SOFTIRQ_CONTEXT * !!task->softirq_context; } @@ -4021,7 +4021,7 @@ static inline short task_wait_context(struct task_struct *curr) * Set appropriate wait type for the context; for IRQs we have to take * into account force_irqthread as that is implied by PREEMPT_RT. */ - if (lockdep_hardirq_context(curr)) { + if (lockdep_hardirq_context()) { /* * Check if force_irqthreads will run us threaded. */ @@ -4864,11 +4864,11 @@ static void check_flags(unsigned long flags) return; if (irqs_disabled_flags(flags)) { - if (DEBUG_LOCKS_WARN_ON(lockdep_hardirqs_enabled(current))) { + if (DEBUG_LOCKS_WARN_ON(lockdep_hardirqs_enabled())) { printk("possible reason: unannotated irqs-off.\n"); } } else { - if (DEBUG_LOCKS_WARN_ON(!lockdep_hardirqs_enabled(current))) { + if (DEBUG_LOCKS_WARN_ON(!lockdep_hardirqs_enabled())) { printk("possible reason: unannotated irqs-on.\n"); } } diff --git a/kernel/softirq.c b/kernel/softirq.c index 342c53feaa7a..5e9aaa648a74 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -230,7 +230,7 @@ static inline bool lockdep_softirq_start(void) { bool in_hardirq = false; - if (lockdep_hardirq_context(current)) { + if (lockdep_hardirq_context()) { in_hardirq = true; lockdep_hardirq_exit(); } diff --git a/tools/include/linux/irqflags.h b/tools/include/linux/irqflags.h index 67e01bbadbfe..501262aee8ff 100644 --- a/tools/include/linux/irqflags.h +++ b/tools/include/linux/irqflags.h @@ -2,9 +2,9 @@ #ifndef _LIBLOCKDEP_LINUX_TRACE_IRQFLAGS_H_ #define _LIBLOCKDEP_LINUX_TRACE_IRQFLAGS_H_ -# define lockdep_hardirq_context(p) 0 +# define lockdep_hardirq_context() 0 # define lockdep_softirq_context(p) 0 -# define lockdep_hardirqs_enabled(p) 0 +# define lockdep_hardirqs_enabled() 0 # define lockdep_softirqs_enabled(p) 0 # define lockdep_hardirq_enter() do { } while (0) # define lockdep_hardirq_exit() do { } while (0) -- cgit v1.2.3 From 5be542e945cb39a2457aa2cfe8b84aac95ef0f2d Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 16 Jul 2020 16:36:50 +1000 Subject: lockdep: Move list.h inclusion into lockdep.h Currently lockdep_types.h includes list.h without actually using any of its macros or functions. All it needs are the type definitions which were moved into types.h long ago. This potentially causes inclusion loops because both are included by many core header files. This patch moves the list.h inclusion into lockdep.h. Note that we could probably remove it completely but that could potentially result in compile failures should any end users not include list.h directly and also be unlucky enough to not get list.h via some other header file. Reported-by: Petr Mladek Signed-off-by: Herbert Xu Signed-off-by: Peter Zijlstra (Intel) Tested-by: Petr Mladek Link: https://lkml.kernel.org/r/20200716063649.GA23065@gondor.apana.org.au --- include/linux/lockdep.h | 1 + include/linux/lockdep_types.h | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index fd04b9e96091..7aafba0ddcf9 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -22,6 +22,7 @@ extern int lock_stat; #ifdef CONFIG_LOCKDEP #include +#include #include #include diff --git a/include/linux/lockdep_types.h b/include/linux/lockdep_types.h index 7b9350624577..bb35b449f533 100644 --- a/include/linux/lockdep_types.h +++ b/include/linux/lockdep_types.h @@ -32,8 +32,6 @@ enum lockdep_wait_type { #ifdef CONFIG_LOCKDEP -#include - /* * We'd rather not expose kernel/lockdep_states.h this wide, but we do need * the total number of states... :-( -- cgit v1.2.3 From a9232dc5607dbada801f2fe83ea307cda762969a Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sat, 11 Jul 2020 17:59:54 +0300 Subject: rwsem: fix commas in initialisation Leading comma prevents arbitrary reordering of initialisation clauses. The whole point of C99 initialisation is to allow any such reordering. Signed-off-by: Alexey Dobriyan Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200711145954.GA1178171@localhost.localdomain --- include/linux/rwsem.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index 7e5b2a4eb560..25e3fde85617 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -60,39 +60,39 @@ static inline int rwsem_is_locked(struct rw_semaphore *sem) } #define RWSEM_UNLOCKED_VALUE 0L -#define __RWSEM_INIT_COUNT(name) .count = ATOMIC_LONG_INIT(RWSEM_UNLOCKED_VALUE) +#define __RWSEM_COUNT_INIT(name) .count = ATOMIC_LONG_INIT(RWSEM_UNLOCKED_VALUE) /* Common initializer macros and functions */ #ifdef CONFIG_DEBUG_LOCK_ALLOC # define __RWSEM_DEP_MAP_INIT(lockname) \ - , .dep_map = { \ + .dep_map = { \ .name = #lockname, \ .wait_type_inner = LD_WAIT_SLEEP, \ - } + }, #else # define __RWSEM_DEP_MAP_INIT(lockname) #endif #ifdef CONFIG_DEBUG_RWSEMS -# define __DEBUG_RWSEM_INITIALIZER(lockname) , .magic = &lockname +# define __RWSEM_DEBUG_INIT(lockname) .magic = &lockname, #else -# define __DEBUG_RWSEM_INITIALIZER(lockname) +# define __RWSEM_DEBUG_INIT(lockname) #endif #ifdef CONFIG_RWSEM_SPIN_ON_OWNER -#define __RWSEM_OPT_INIT(lockname) , .osq = OSQ_LOCK_UNLOCKED +#define __RWSEM_OPT_INIT(lockname) .osq = OSQ_LOCK_UNLOCKED, #else #define __RWSEM_OPT_INIT(lockname) #endif #define __RWSEM_INITIALIZER(name) \ - { __RWSEM_INIT_COUNT(name), \ + { __RWSEM_COUNT_INIT(name), \ .owner = ATOMIC_LONG_INIT(0), \ - .wait_list = LIST_HEAD_INIT((name).wait_list), \ - .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock) \ __RWSEM_OPT_INIT(name) \ - __DEBUG_RWSEM_INITIALIZER(name) \ + .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock),\ + .wait_list = LIST_HEAD_INIT((name).wait_list), \ + __RWSEM_DEBUG_INIT(name) \ __RWSEM_DEP_MAP_INIT(name) } #define DECLARE_RWSEM(name) \ -- cgit v1.2.3 From e885d5d94793ef342e49d55672baabbc16e32bb1 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 16 Jul 2020 16:36:50 +1000 Subject: lockdep: Move list.h inclusion into lockdep.h Currently lockdep_types.h includes list.h without actually using any of its macros or functions. All it needs are the type definitions which were moved into types.h long ago. This potentially causes inclusion loops because both are included by many core header files. This patch moves the list.h inclusion into lockdep.h. Note that we could probably remove it completely but that could potentially result in compile failures should any end users not include list.h directly and also be unlucky enough to not get list.h via some other header file. Reported-by: Petr Mladek Signed-off-by: Herbert Xu Signed-off-by: Peter Zijlstra (Intel) Tested-by: Petr Mladek Link: https://lkml.kernel.org/r/20200716063649.GA23065@gondor.apana.org.au --- include/linux/lockdep.h | 1 + include/linux/lockdep_types.h | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 3b73cf84f77d..b1ad5c045353 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -21,6 +21,7 @@ extern int lock_stat; #ifdef CONFIG_LOCKDEP #include +#include #include #include diff --git a/include/linux/lockdep_types.h b/include/linux/lockdep_types.h index 7b9350624577..bb35b449f533 100644 --- a/include/linux/lockdep_types.h +++ b/include/linux/lockdep_types.h @@ -32,8 +32,6 @@ enum lockdep_wait_type { #ifdef CONFIG_LOCKDEP -#include - /* * We'd rather not expose kernel/lockdep_states.h this wide, but we do need * the total number of states... :-( -- cgit v1.2.3 From 7ca8cf5347f720b07a0b32a924b768f5710547e7 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 29 Jul 2020 22:31:05 +1000 Subject: locking/atomic: Move ATOMIC_INIT into linux/types.h This patch moves ATOMIC_INIT from asm/atomic.h into linux/types.h. This allows users of atomic_t to use ATOMIC_INIT without having to include atomic.h as that way may lead to header loops. Signed-off-by: Herbert Xu Signed-off-by: Peter Zijlstra (Intel) Acked-by: Waiman Long Link: https://lkml.kernel.org/r/20200729123105.GB7047@gondor.apana.org.au --- arch/alpha/include/asm/atomic.h | 1 - arch/arc/include/asm/atomic.h | 2 -- arch/arm/include/asm/atomic.h | 2 -- arch/arm64/include/asm/atomic.h | 2 -- arch/h8300/include/asm/atomic.h | 2 -- arch/hexagon/include/asm/atomic.h | 2 -- arch/ia64/include/asm/atomic.h | 1 - arch/m68k/include/asm/atomic.h | 2 -- arch/mips/include/asm/atomic.h | 1 - arch/parisc/include/asm/atomic.h | 2 -- arch/powerpc/include/asm/atomic.h | 2 -- arch/riscv/include/asm/atomic.h | 2 -- arch/s390/include/asm/atomic.h | 2 -- arch/sh/include/asm/atomic.h | 2 -- arch/sparc/include/asm/atomic_32.h | 2 -- arch/sparc/include/asm/atomic_64.h | 1 - arch/x86/include/asm/atomic.h | 2 -- arch/xtensa/include/asm/atomic.h | 2 -- include/asm-generic/atomic.h | 2 -- include/linux/types.h | 2 ++ 20 files changed, 2 insertions(+), 34 deletions(-) (limited to 'include') diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h index 2144530d1428..e2093994fd0d 100644 --- a/arch/alpha/include/asm/atomic.h +++ b/arch/alpha/include/asm/atomic.h @@ -24,7 +24,6 @@ #define __atomic_acquire_fence() #define __atomic_post_full_fence() -#define ATOMIC_INIT(i) { (i) } #define ATOMIC64_INIT(i) { (i) } #define atomic_read(v) READ_ONCE((v)->counter) diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h index 7298ce84762e..c614857eb209 100644 --- a/arch/arc/include/asm/atomic.h +++ b/arch/arc/include/asm/atomic.h @@ -14,8 +14,6 @@ #include #include -#define ATOMIC_INIT(i) { (i) } - #ifndef CONFIG_ARC_PLAT_EZNPS #define atomic_read(v) READ_ONCE((v)->counter) diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h index 75bb2c543e59..455eb19a5ac1 100644 --- a/arch/arm/include/asm/atomic.h +++ b/arch/arm/include/asm/atomic.h @@ -15,8 +15,6 @@ #include #include -#define ATOMIC_INIT(i) { (i) } - #ifdef __KERNEL__ /* diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h index a08890da696c..015ddffaf6ca 100644 --- a/arch/arm64/include/asm/atomic.h +++ b/arch/arm64/include/asm/atomic.h @@ -99,8 +99,6 @@ static inline long arch_atomic64_dec_if_positive(atomic64_t *v) return __lse_ll_sc_body(atomic64_dec_if_positive, v); } -#define ATOMIC_INIT(i) { (i) } - #define arch_atomic_read(v) __READ_ONCE((v)->counter) #define arch_atomic_set(v, i) __WRITE_ONCE(((v)->counter), (i)) diff --git a/arch/h8300/include/asm/atomic.h b/arch/h8300/include/asm/atomic.h index c6b6a06231b2..a990d151f163 100644 --- a/arch/h8300/include/asm/atomic.h +++ b/arch/h8300/include/asm/atomic.h @@ -12,8 +12,6 @@ * resource counting etc.. */ -#define ATOMIC_INIT(i) { (i) } - #define atomic_read(v) READ_ONCE((v)->counter) #define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i)) diff --git a/arch/hexagon/include/asm/atomic.h b/arch/hexagon/include/asm/atomic.h index 0231d69c8bf2..4ab895d7111f 100644 --- a/arch/hexagon/include/asm/atomic.h +++ b/arch/hexagon/include/asm/atomic.h @@ -12,8 +12,6 @@ #include #include -#define ATOMIC_INIT(i) { (i) } - /* Normal writes in our arch don't clear lock reservations */ static inline void atomic_set(atomic_t *v, int new) diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h index 50440f3ddc43..f267d956458f 100644 --- a/arch/ia64/include/asm/atomic.h +++ b/arch/ia64/include/asm/atomic.h @@ -19,7 +19,6 @@ #include -#define ATOMIC_INIT(i) { (i) } #define ATOMIC64_INIT(i) { (i) } #define atomic_read(v) READ_ONCE((v)->counter) diff --git a/arch/m68k/include/asm/atomic.h b/arch/m68k/include/asm/atomic.h index 47228b0d4163..756c5cc58f94 100644 --- a/arch/m68k/include/asm/atomic.h +++ b/arch/m68k/include/asm/atomic.h @@ -16,8 +16,6 @@ * We do not have SMP m68k systems, so we don't have to deal with that. */ -#define ATOMIC_INIT(i) { (i) } - #define atomic_read(v) READ_ONCE((v)->counter) #define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i)) diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h index e5ac88392d1f..f904084fcb1f 100644 --- a/arch/mips/include/asm/atomic.h +++ b/arch/mips/include/asm/atomic.h @@ -45,7 +45,6 @@ static __always_inline type pfx##_xchg(pfx##_t *v, type n) \ return xchg(&v->counter, n); \ } -#define ATOMIC_INIT(i) { (i) } ATOMIC_OPS(atomic, int) #ifdef CONFIG_64BIT diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h index 118953d41763..f960e2f32b1b 100644 --- a/arch/parisc/include/asm/atomic.h +++ b/arch/parisc/include/asm/atomic.h @@ -136,8 +136,6 @@ ATOMIC_OPS(xor, ^=) #undef ATOMIC_OP_RETURN #undef ATOMIC_OP -#define ATOMIC_INIT(i) { (i) } - #ifdef CONFIG_64BIT #define ATOMIC64_INIT(i) { (i) } diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h index 498785ffc25f..0311c3c42960 100644 --- a/arch/powerpc/include/asm/atomic.h +++ b/arch/powerpc/include/asm/atomic.h @@ -11,8 +11,6 @@ #include #include -#define ATOMIC_INIT(i) { (i) } - /* * Since *_return_relaxed and {cmp}xchg_relaxed are implemented with * a "bne-" instruction at the end, so an isync is enough as a acquire barrier diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h index 96f95c9ebd97..400a8c8b6de7 100644 --- a/arch/riscv/include/asm/atomic.h +++ b/arch/riscv/include/asm/atomic.h @@ -19,8 +19,6 @@ #include #include -#define ATOMIC_INIT(i) { (i) } - #define __atomic_acquire_fence() \ __asm__ __volatile__(RISCV_ACQUIRE_BARRIER "" ::: "memory") diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h index 491ad53a0d4e..cae473a7b6f7 100644 --- a/arch/s390/include/asm/atomic.h +++ b/arch/s390/include/asm/atomic.h @@ -15,8 +15,6 @@ #include #include -#define ATOMIC_INIT(i) { (i) } - static inline int atomic_read(const atomic_t *v) { int c; diff --git a/arch/sh/include/asm/atomic.h b/arch/sh/include/asm/atomic.h index f37b95a80232..7c2a8a703b9a 100644 --- a/arch/sh/include/asm/atomic.h +++ b/arch/sh/include/asm/atomic.h @@ -19,8 +19,6 @@ #include #include -#define ATOMIC_INIT(i) { (i) } - #define atomic_read(v) READ_ONCE((v)->counter) #define atomic_set(v,i) WRITE_ONCE((v)->counter, (i)) diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h index 94c930f0bc62..efad5532f169 100644 --- a/arch/sparc/include/asm/atomic_32.h +++ b/arch/sparc/include/asm/atomic_32.h @@ -18,8 +18,6 @@ #include #include -#define ATOMIC_INIT(i) { (i) } - int atomic_add_return(int, atomic_t *); int atomic_fetch_add(int, atomic_t *); int atomic_fetch_and(int, atomic_t *); diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h index b60448397d4f..6b235d3d1d9d 100644 --- a/arch/sparc/include/asm/atomic_64.h +++ b/arch/sparc/include/asm/atomic_64.h @@ -12,7 +12,6 @@ #include #include -#define ATOMIC_INIT(i) { (i) } #define ATOMIC64_INIT(i) { (i) } #define atomic_read(v) READ_ONCE((v)->counter) diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index bf35e476a776..b6cac6e9bb70 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -14,8 +14,6 @@ * resource counting etc.. */ -#define ATOMIC_INIT(i) { (i) } - /** * arch_atomic_read - read atomic variable * @v: pointer of type atomic_t diff --git a/arch/xtensa/include/asm/atomic.h b/arch/xtensa/include/asm/atomic.h index 3e7c6134ed32..744c2f463845 100644 --- a/arch/xtensa/include/asm/atomic.h +++ b/arch/xtensa/include/asm/atomic.h @@ -19,8 +19,6 @@ #include #include -#define ATOMIC_INIT(i) { (i) } - /* * This Xtensa implementation assumes that the right mechanism * for exclusion is for locking interrupts to level EXCM_LEVEL. diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h index 286867f593d2..11f96f40f4a7 100644 --- a/include/asm-generic/atomic.h +++ b/include/asm-generic/atomic.h @@ -159,8 +159,6 @@ ATOMIC_OP(xor, ^) * resource counting etc.. */ -#define ATOMIC_INIT(i) { (i) } - /** * atomic_read - read atomic variable * @v: pointer of type atomic_t diff --git a/include/linux/types.h b/include/linux/types.h index d3021c879179..a147977602b5 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -167,6 +167,8 @@ typedef struct { int counter; } atomic_t; +#define ATOMIC_INIT(i) { (i) } + #ifdef CONFIG_64BIT typedef struct { s64 counter; -- cgit v1.2.3 From 459e39538e612b8dd130d34b93c9bfc89ecc836c Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 29 Jul 2020 22:33:16 +1000 Subject: locking/qspinlock: Do not include atomic.h from qspinlock_types.h This patch breaks a header loop involving qspinlock_types.h. The issue is that qspinlock_types.h includes atomic.h, which then eventually includes kernel.h which could lead back to the original file via spinlock_types.h. As ATOMIC_INIT is now defined by linux/types.h, there is no longer any need to include atomic.h from qspinlock_types.h. This also allows the CONFIG_PARAVIRT hack to be removed since it was trying to prevent exactly this loop. Signed-off-by: Herbert Xu Signed-off-by: Peter Zijlstra (Intel) Acked-by: Waiman Long Link: https://lkml.kernel.org/r/20200729123316.GC7047@gondor.apana.org.au --- include/asm-generic/qspinlock.h | 1 + include/asm-generic/qspinlock_types.h | 8 -------- 2 files changed, 1 insertion(+), 8 deletions(-) (limited to 'include') diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h index fde943d180e0..2b26cd729b94 100644 --- a/include/asm-generic/qspinlock.h +++ b/include/asm-generic/qspinlock.h @@ -11,6 +11,7 @@ #define __ASM_GENERIC_QSPINLOCK_H #include +#include /** * queued_spin_is_locked - is the spinlock locked? diff --git a/include/asm-generic/qspinlock_types.h b/include/asm-generic/qspinlock_types.h index 56d1309d32f8..2fd1fb89ec36 100644 --- a/include/asm-generic/qspinlock_types.h +++ b/include/asm-generic/qspinlock_types.h @@ -9,15 +9,7 @@ #ifndef __ASM_GENERIC_QSPINLOCK_TYPES_H #define __ASM_GENERIC_QSPINLOCK_TYPES_H -/* - * Including atomic.h with PARAVIRT on will cause compilation errors because - * of recursive header file incluson via paravirt_types.h. So don't include - * it if PARAVIRT is on. - */ -#ifndef CONFIG_PARAVIRT #include -#include -#endif typedef struct qspinlock { union { -- cgit v1.2.3 From 0d24f65e933ca89d55d17f6dbdb2a72ca88f0992 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 20 Jul 2020 17:55:07 +0200 Subject: Documentation: locking: Describe seqlock design and usage Proper documentation for the design and usage of sequence counters and sequential locks does not exist. Complete the seqlock.h documentation as follows: - Divide all documentation on a seqcount_t vs. seqlock_t basis. The description for both mechanisms was intermingled, which is incorrect since the usage constrains for each type are vastly different. - Add an introductory paragraph describing the internal design of, and rationale for, sequence counters. - Document seqcount_t writer non-preemptibility requirement, which was not previously documented anywhere, and provide a clear rationale. - Provide template code for seqcount_t and seqlock_t initialization and reader/writer critical sections. - Recommend using seqlock_t by default. It implicitly handles the serialization and non-preemptibility requirements of writers. At seqlock.h: - Remove references to brlocks as they've long been removed from the kernel. - Remove references to gcc-3.x since the kernel's minimum supported gcc version is 4.9. References: 0f6ed63b1707 ("no need to keep brlock macros anymore...") References: 6ec4476ac825 ("Raise gcc version requirement to 4.9") Signed-off-by: Ahmed S. Darwish Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200720155530.1173732-2-a.darwish@linutronix.de --- Documentation/locking/index.rst | 1 + Documentation/locking/seqlock.rst | 170 ++++++++++++++++++++++++++++++++++++++ include/linux/seqlock.h | 85 +++++++++---------- 3 files changed, 211 insertions(+), 45 deletions(-) create mode 100644 Documentation/locking/seqlock.rst (limited to 'include') diff --git a/Documentation/locking/index.rst b/Documentation/locking/index.rst index d785878cad65..7003bd5aeff4 100644 --- a/Documentation/locking/index.rst +++ b/Documentation/locking/index.rst @@ -14,6 +14,7 @@ locking mutex-design rt-mutex-design rt-mutex + seqlock spinlocks ww-mutex-design preempt-locking diff --git a/Documentation/locking/seqlock.rst b/Documentation/locking/seqlock.rst new file mode 100644 index 000000000000..366dd368d90a --- /dev/null +++ b/Documentation/locking/seqlock.rst @@ -0,0 +1,170 @@ +====================================== +Sequence counters and sequential locks +====================================== + +Introduction +============ + +Sequence counters are a reader-writer consistency mechanism with +lockless readers (read-only retry loops), and no writer starvation. They +are used for data that's rarely written to (e.g. system time), where the +reader wants a consistent set of information and is willing to retry if +that information changes. + +A data set is consistent when the sequence count at the beginning of the +read side critical section is even and the same sequence count value is +read again at the end of the critical section. The data in the set must +be copied out inside the read side critical section. If the sequence +count has changed between the start and the end of the critical section, +the reader must retry. + +Writers increment the sequence count at the start and the end of their +critical section. After starting the critical section the sequence count +is odd and indicates to the readers that an update is in progress. At +the end of the write side critical section the sequence count becomes +even again which lets readers make progress. + +A sequence counter write side critical section must never be preempted +or interrupted by read side sections. Otherwise the reader will spin for +the entire scheduler tick due to the odd sequence count value and the +interrupted writer. If that reader belongs to a real-time scheduling +class, it can spin forever and the kernel will livelock. + +This mechanism cannot be used if the protected data contains pointers, +as the writer can invalidate a pointer that the reader is following. + + +.. _seqcount_t: + +Sequence counters (``seqcount_t``) +================================== + +This is the the raw counting mechanism, which does not protect against +multiple writers. Write side critical sections must thus be serialized +by an external lock. + +If the write serialization primitive is not implicitly disabling +preemption, preemption must be explicitly disabled before entering the +write side section. If the read section can be invoked from hardirq or +softirq contexts, interrupts or bottom halves must also be respectively +disabled before entering the write section. + +If it's desired to automatically handle the sequence counter +requirements of writer serialization and non-preemptibility, use +:ref:`seqlock_t` instead. + +Initialization:: + + /* dynamic */ + seqcount_t foo_seqcount; + seqcount_init(&foo_seqcount); + + /* static */ + static seqcount_t foo_seqcount = SEQCNT_ZERO(foo_seqcount); + + /* C99 struct init */ + struct { + .seq = SEQCNT_ZERO(foo.seq), + } foo; + +Write path:: + + /* Serialized context with disabled preemption */ + + write_seqcount_begin(&foo_seqcount); + + /* ... [[write-side critical section]] ... */ + + write_seqcount_end(&foo_seqcount); + +Read path:: + + do { + seq = read_seqcount_begin(&foo_seqcount); + + /* ... [[read-side critical section]] ... */ + + } while (read_seqcount_retry(&foo_seqcount, seq)); + + +.. _seqlock_t: + +Sequential locks (``seqlock_t``) +================================ + +This contains the :ref:`seqcount_t` mechanism earlier discussed, plus an +embedded spinlock for writer serialization and non-preemptibility. + +If the read side section can be invoked from hardirq or softirq context, +use the write side function variants which disable interrupts or bottom +halves respectively. + +Initialization:: + + /* dynamic */ + seqlock_t foo_seqlock; + seqlock_init(&foo_seqlock); + + /* static */ + static DEFINE_SEQLOCK(foo_seqlock); + + /* C99 struct init */ + struct { + .seql = __SEQLOCK_UNLOCKED(foo.seql) + } foo; + +Write path:: + + write_seqlock(&foo_seqlock); + + /* ... [[write-side critical section]] ... */ + + write_sequnlock(&foo_seqlock); + +Read path, three categories: + +1. Normal Sequence readers which never block a writer but they must + retry if a writer is in progress by detecting change in the sequence + number. Writers do not wait for a sequence reader:: + + do { + seq = read_seqbegin(&foo_seqlock); + + /* ... [[read-side critical section]] ... */ + + } while (read_seqretry(&foo_seqlock, seq)); + +2. Locking readers which will wait if a writer or another locking reader + is in progress. A locking reader in progress will also block a writer + from entering its critical section. This read lock is + exclusive. Unlike rwlock_t, only one locking reader can acquire it:: + + read_seqlock_excl(&foo_seqlock); + + /* ... [[read-side critical section]] ... */ + + read_sequnlock_excl(&foo_seqlock); + +3. Conditional lockless reader (as in 1), or locking reader (as in 2), + according to a passed marker. This is used to avoid lockless readers + starvation (too much retry loops) in case of a sharp spike in write + activity. First, a lockless read is tried (even marker passed). If + that trial fails (odd sequence counter is returned, which is used as + the next iteration marker), the lockless read is transformed to a + full locking read and no retry loop is necessary:: + + /* marker; even initialization */ + int seq = 0; + do { + read_seqbegin_or_lock(&foo_seqlock, &seq); + + /* ... [[read-side critical section]] ... */ + + } while (need_seqretry(&foo_seqlock, seq)); + done_seqretry(&foo_seqlock, seq); + + +API documentation +================= + +.. kernel-doc:: include/linux/seqlock.h diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 8b97204f35a7..299d68f10325 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -1,36 +1,15 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_SEQLOCK_H #define __LINUX_SEQLOCK_H + /* - * Reader/writer consistent mechanism without starving writers. This type of - * lock for data where the reader wants a consistent set of information - * and is willing to retry if the information changes. There are two types - * of readers: - * 1. Sequence readers which never block a writer but they may have to retry - * if a writer is in progress by detecting change in sequence number. - * Writers do not wait for a sequence reader. - * 2. Locking readers which will wait if a writer or another locking reader - * is in progress. A locking reader in progress will also block a writer - * from going forward. Unlike the regular rwlock, the read lock here is - * exclusive so that only one locking reader can get it. - * - * This is not as cache friendly as brlock. Also, this may not work well - * for data that contains pointers, because any writer could - * invalidate a pointer that a reader was following. - * - * Expected non-blocking reader usage: - * do { - * seq = read_seqbegin(&foo); - * ... - * } while (read_seqretry(&foo, seq)); - * - * - * On non-SMP the spin locks disappear but the writer still needs - * to increment the sequence variables because an interrupt routine could - * change the state of the data. - * - * Based on x86_64 vsyscall gettimeofday - * by Keith Owens and Andrea Arcangeli + * seqcount_t / seqlock_t - a reader-writer consistency mechanism with + * lockless readers (read-only retry loops), and no writer starvation. + * + * See Documentation/locking/seqlock.rst + * + * Copyrights: + * - Based on x86_64 vsyscall gettimeofday: Keith Owens, Andrea Arcangeli */ #include @@ -41,8 +20,8 @@ #include /* - * The seqlock interface does not prescribe a precise sequence of read - * begin/retry/end. For readers, typically there is a call to + * The seqlock seqcount_t interface does not prescribe a precise sequence of + * read begin/retry/end. For readers, typically there is a call to * read_seqcount_begin() and read_seqcount_retry(), however, there are more * esoteric cases which do not follow this pattern. * @@ -50,16 +29,30 @@ * via seqcount_t under KCSAN: upon beginning a seq-reader critical section, * pessimistically mark the next KCSAN_SEQLOCK_REGION_MAX memory accesses as * atomics; if there is a matching read_seqcount_retry() call, no following - * memory operations are considered atomic. Usage of seqlocks via seqlock_t - * interface is not affected. + * memory operations are considered atomic. Usage of the seqlock_t interface + * is not affected. */ #define KCSAN_SEQLOCK_REGION_MAX 1000 /* - * Version using sequence counter only. - * This can be used when code has its own mutex protecting the - * updating starting before the write_seqcountbeqin() and ending - * after the write_seqcount_end(). + * Sequence counters (seqcount_t) + * + * This is the raw counting mechanism, without any writer protection. + * + * Write side critical sections must be serialized and non-preemptible. + * + * If readers can be invoked from hardirq or softirq contexts, + * interrupts or bottom halves must also be respectively disabled before + * entering the write section. + * + * This mechanism can't be used if the protected data contains pointers, + * as the writer can invalidate a pointer that a reader is following. + * + * If it's desired to automatically handle the sequence counter writer + * serialization and non-preemptibility requirements, use a sequential + * lock (seqlock_t) instead. + * + * See Documentation/locking/seqlock.rst */ typedef struct seqcount { unsigned sequence; @@ -398,10 +391,6 @@ static inline void raw_write_seqcount_latch(seqcount_t *s) smp_wmb(); /* increment "sequence" before following stores */ } -/* - * Sequence counter only version assumes that callers are using their - * own mutexing. - */ static inline void write_seqcount_begin_nested(seqcount_t *s, int subclass) { raw_write_seqcount_begin(s); @@ -434,15 +423,21 @@ static inline void write_seqcount_invalidate(seqcount_t *s) kcsan_nestable_atomic_end(); } +/* + * Sequential locks (seqlock_t) + * + * Sequence counters with an embedded spinlock for writer serialization + * and non-preemptibility. + * + * For more info, see: + * - Comments on top of seqcount_t + * - Documentation/locking/seqlock.rst + */ typedef struct { struct seqcount seqcount; spinlock_t lock; } seqlock_t; -/* - * These macros triggered gcc-3.x compile-time problems. We think these are - * OK now. Be cautious. - */ #define __SEQLOCK_UNLOCKED(lockname) \ { \ .seqcount = SEQCNT_ZERO(lockname), \ -- cgit v1.2.3 From 15cbe67bbd3adeb4854c42713dbeaf2ff876beee Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 20 Jul 2020 17:55:08 +0200 Subject: seqlock: Properly format kernel-doc code samples Align the code samples and note sections inside kernel-doc comments with tabs. This way they can be properly parsed and rendered by Sphinx. It also makes the code samples easier to read from text editors. Signed-off-by: Ahmed S. Darwish Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200720155530.1173732-3-a.darwish@linutronix.de --- include/linux/seqlock.h | 108 +++++++++++++++++++++++++----------------------- 1 file changed, 56 insertions(+), 52 deletions(-) (limited to 'include') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 299d68f10325..6c4f68ef1393 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -263,32 +263,32 @@ static inline void raw_write_seqcount_end(seqcount_t *s) * atomically, avoiding compiler optimizations; b) to document which writes are * meant to propagate to the reader critical section. This is necessary because * neither writes before and after the barrier are enclosed in a seq-writer - * critical section that would ensure readers are aware of ongoing writes. + * critical section that would ensure readers are aware of ongoing writes:: * - * seqcount_t seq; - * bool X = true, Y = false; + * seqcount_t seq; + * bool X = true, Y = false; * - * void read(void) - * { - * bool x, y; + * void read(void) + * { + * bool x, y; * - * do { - * int s = read_seqcount_begin(&seq); + * do { + * int s = read_seqcount_begin(&seq); * - * x = X; y = Y; + * x = X; y = Y; * - * } while (read_seqcount_retry(&seq, s)); + * } while (read_seqcount_retry(&seq, s)); * - * BUG_ON(!x && !y); + * BUG_ON(!x && !y); * } * * void write(void) * { - * WRITE_ONCE(Y, true); + * WRITE_ONCE(Y, true); * - * raw_write_seqcount_barrier(seq); + * raw_write_seqcount_barrier(seq); * - * WRITE_ONCE(X, false); + * WRITE_ONCE(X, false); * } */ static inline void raw_write_seqcount_barrier(seqcount_t *s) @@ -325,64 +325,68 @@ static inline int raw_read_seqcount_latch(seqcount_t *s) * Very simply put: we first modify one copy and then the other. This ensures * there is always one copy in a stable state, ready to give us an answer. * - * The basic form is a data structure like: + * The basic form is a data structure like:: * - * struct latch_struct { - * seqcount_t seq; - * struct data_struct data[2]; - * }; + * struct latch_struct { + * seqcount_t seq; + * struct data_struct data[2]; + * }; * * Where a modification, which is assumed to be externally serialized, does the - * following: + * following:: * - * void latch_modify(struct latch_struct *latch, ...) - * { - * smp_wmb(); <- Ensure that the last data[1] update is visible - * latch->seq++; - * smp_wmb(); <- Ensure that the seqcount update is visible + * void latch_modify(struct latch_struct *latch, ...) + * { + * smp_wmb(); // Ensure that the last data[1] update is visible + * latch->seq++; + * smp_wmb(); // Ensure that the seqcount update is visible * - * modify(latch->data[0], ...); + * modify(latch->data[0], ...); * - * smp_wmb(); <- Ensure that the data[0] update is visible - * latch->seq++; - * smp_wmb(); <- Ensure that the seqcount update is visible + * smp_wmb(); // Ensure that the data[0] update is visible + * latch->seq++; + * smp_wmb(); // Ensure that the seqcount update is visible * - * modify(latch->data[1], ...); - * } + * modify(latch->data[1], ...); + * } * - * The query will have a form like: + * The query will have a form like:: * - * struct entry *latch_query(struct latch_struct *latch, ...) - * { - * struct entry *entry; - * unsigned seq, idx; + * struct entry *latch_query(struct latch_struct *latch, ...) + * { + * struct entry *entry; + * unsigned seq, idx; * - * do { - * seq = raw_read_seqcount_latch(&latch->seq); + * do { + * seq = raw_read_seqcount_latch(&latch->seq); * - * idx = seq & 0x01; - * entry = data_query(latch->data[idx], ...); + * idx = seq & 0x01; + * entry = data_query(latch->data[idx], ...); * - * smp_rmb(); - * } while (seq != latch->seq); + * smp_rmb(); + * } while (seq != latch->seq); * - * return entry; - * } + * return entry; + * } * * So during the modification, queries are first redirected to data[1]. Then we * modify data[0]. When that is complete, we redirect queries back to data[0] * and we can modify data[1]. * - * NOTE: The non-requirement for atomic modifications does _NOT_ include - * the publishing of new entries in the case where data is a dynamic - * data structure. + * NOTE: + * + * The non-requirement for atomic modifications does _NOT_ include + * the publishing of new entries in the case where data is a dynamic + * data structure. + * + * An iteration might start in data[0] and get suspended long enough + * to miss an entire modification sequence, once it resumes it might + * observe the new entry. * - * An iteration might start in data[0] and get suspended long enough - * to miss an entire modification sequence, once it resumes it might - * observe the new entry. + * NOTE: * - * NOTE: When data is a dynamic data structure; one should use regular RCU - * patterns to manage the lifetimes of the objects within. + * When data is a dynamic data structure; one should use regular RCU + * patterns to manage the lifetimes of the objects within. */ static inline void raw_write_seqcount_latch(seqcount_t *s) { -- cgit v1.2.3 From d3b35b87f436c1b226a8061bee9c8875ba6658bd Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 20 Jul 2020 17:55:09 +0200 Subject: seqlock: seqcount_t latch: End read sections with read_seqcount_retry() The seqcount_t latch reader example at the raw_write_seqcount_latch() kernel-doc comment ends the latch read section with a manual smp memory barrier and sequence counter comparison. This is technically correct, but it is suboptimal: read_seqcount_retry() already contains the same logic of an smp memory barrier and sequence counter comparison. End the latch read critical section example with read_seqcount_retry(). Signed-off-by: Ahmed S. Darwish Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200720155530.1173732-4-a.darwish@linutronix.de --- include/linux/seqlock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 6c4f68ef1393..d724b5e5408d 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -363,8 +363,8 @@ static inline int raw_read_seqcount_latch(seqcount_t *s) * idx = seq & 0x01; * entry = data_query(latch->data[idx], ...); * - * smp_rmb(); - * } while (seq != latch->seq); + * // read_seqcount_retry() includes needed smp_rmb() + * } while (read_seqcount_retry(&latch->seq, seq)); * * return entry; * } -- cgit v1.2.3 From f4a27cbcec90ac04ee60e04b222e1449dcdba0bd Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 20 Jul 2020 17:55:10 +0200 Subject: seqlock: Reorder seqcount_t and seqlock_t API definitions The seqlock.h seqcount_t and seqlock_t API definitions are presented in the chronological order of their development rather than the order that makes most sense to readers. This makes it hard to follow and understand the header file code. Group and reorder all of the exported seqlock.h functions according to their function. First, group together the seqcount_t standard read path functions: - __read_seqcount_begin() - raw_read_seqcount_begin() - read_seqcount_begin() since each function is implemented exactly in terms of the one above it. Then, group the special-case seqcount_t readers on their own as: - raw_read_seqcount() - raw_seqcount_begin() since the only difference between the two functions is that the second one masks the sequence counter LSB while the first one does not. Note that raw_seqcount_begin() can actually be implemented in terms of raw_read_seqcount(), which will be done in a follow-up commit. Then, group the seqcount_t write path functions, instead of injecting unrelated seqcount_t latch functions between them, and order them as: - raw_write_seqcount_begin() - raw_write_seqcount_end() - write_seqcount_begin_nested() - write_seqcount_begin() - write_seqcount_end() - raw_write_seqcount_barrier() - write_seqcount_invalidate() which is the expected natural order. This also isolates the seqcount_t latch functions into their own area, at the end of the sequence counters section, and before jumping to the next one: sequential locks (seqlock_t). Do a similar grouping and reordering for seqlock_t "locking" readers vs. the "conditionally locking or lockless" ones. No implementation code was changed in any of the reordering above. Signed-off-by: Ahmed S. Darwish Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200720155530.1173732-5-a.darwish@linutronix.de --- include/linux/seqlock.h | 158 ++++++++++++++++++++++++------------------------ 1 file changed, 78 insertions(+), 80 deletions(-) (limited to 'include') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index d724b5e5408d..4c1456008d89 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -128,23 +128,6 @@ repeat: return ret; } -/** - * raw_read_seqcount - Read the raw seqcount - * @s: pointer to seqcount_t - * Returns: count to be passed to read_seqcount_retry - * - * raw_read_seqcount opens a read critical section of the given - * seqcount without any lockdep checking and without checking or - * masking the LSB. Calling code is responsible for handling that. - */ -static inline unsigned raw_read_seqcount(const seqcount_t *s) -{ - unsigned ret = READ_ONCE(s->sequence); - smp_rmb(); - kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); - return ret; -} - /** * raw_read_seqcount_begin - start seq-read critical section w/o lockdep * @s: pointer to seqcount_t @@ -176,6 +159,23 @@ static inline unsigned read_seqcount_begin(const seqcount_t *s) return raw_read_seqcount_begin(s); } +/** + * raw_read_seqcount - Read the raw seqcount + * @s: pointer to seqcount_t + * Returns: count to be passed to read_seqcount_retry + * + * raw_read_seqcount opens a read critical section of the given + * seqcount without any lockdep checking and without checking or + * masking the LSB. Calling code is responsible for handling that. + */ +static inline unsigned raw_read_seqcount(const seqcount_t *s) +{ + unsigned ret = READ_ONCE(s->sequence); + smp_rmb(); + kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); + return ret; +} + /** * raw_seqcount_begin - begin a seq-read critical section * @s: pointer to seqcount_t @@ -234,8 +234,6 @@ static inline int read_seqcount_retry(const seqcount_t *s, unsigned start) return __read_seqcount_retry(s, start); } - - static inline void raw_write_seqcount_begin(seqcount_t *s) { kcsan_nestable_atomic_begin(); @@ -250,6 +248,23 @@ static inline void raw_write_seqcount_end(seqcount_t *s) kcsan_nestable_atomic_end(); } +static inline void write_seqcount_begin_nested(seqcount_t *s, int subclass) +{ + raw_write_seqcount_begin(s); + seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_); +} + +static inline void write_seqcount_begin(seqcount_t *s) +{ + write_seqcount_begin_nested(s, 0); +} + +static inline void write_seqcount_end(seqcount_t *s) +{ + seqcount_release(&s->dep_map, _RET_IP_); + raw_write_seqcount_end(s); +} + /** * raw_write_seqcount_barrier - do a seq write barrier * @s: pointer to seqcount_t @@ -300,6 +315,21 @@ static inline void raw_write_seqcount_barrier(seqcount_t *s) kcsan_nestable_atomic_end(); } +/** + * write_seqcount_invalidate - invalidate in-progress read-side seq operations + * @s: pointer to seqcount_t + * + * After write_seqcount_invalidate, no read-side seq operations will complete + * successfully and see data older than this. + */ +static inline void write_seqcount_invalidate(seqcount_t *s) +{ + smp_wmb(); + kcsan_nestable_atomic_begin(); + s->sequence+=2; + kcsan_nestable_atomic_end(); +} + static inline int raw_read_seqcount_latch(seqcount_t *s) { /* Pairs with the first smp_wmb() in raw_write_seqcount_latch() */ @@ -395,38 +425,6 @@ static inline void raw_write_seqcount_latch(seqcount_t *s) smp_wmb(); /* increment "sequence" before following stores */ } -static inline void write_seqcount_begin_nested(seqcount_t *s, int subclass) -{ - raw_write_seqcount_begin(s); - seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_); -} - -static inline void write_seqcount_begin(seqcount_t *s) -{ - write_seqcount_begin_nested(s, 0); -} - -static inline void write_seqcount_end(seqcount_t *s) -{ - seqcount_release(&s->dep_map, _RET_IP_); - raw_write_seqcount_end(s); -} - -/** - * write_seqcount_invalidate - invalidate in-progress read-side seq operations - * @s: pointer to seqcount_t - * - * After write_seqcount_invalidate, no read-side seq operations will complete - * successfully and see data older than this. - */ -static inline void write_seqcount_invalidate(seqcount_t *s) -{ - smp_wmb(); - kcsan_nestable_atomic_begin(); - s->sequence+=2; - kcsan_nestable_atomic_end(); -} - /* * Sequential locks (seqlock_t) * @@ -555,35 +553,6 @@ static inline void read_sequnlock_excl(seqlock_t *sl) spin_unlock(&sl->lock); } -/** - * read_seqbegin_or_lock - begin a sequence number check or locking block - * @lock: sequence lock - * @seq : sequence number to be checked - * - * First try it once optimistically without taking the lock. If that fails, - * take the lock. The sequence number is also used as a marker for deciding - * whether to be a reader (even) or writer (odd). - * N.B. seq must be initialized to an even number to begin with. - */ -static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq) -{ - if (!(*seq & 1)) /* Even */ - *seq = read_seqbegin(lock); - else /* Odd */ - read_seqlock_excl(lock); -} - -static inline int need_seqretry(seqlock_t *lock, int seq) -{ - return !(seq & 1) && read_seqretry(lock, seq); -} - -static inline void done_seqretry(seqlock_t *lock, int seq) -{ - if (seq & 1) - read_sequnlock_excl(lock); -} - static inline void read_seqlock_excl_bh(seqlock_t *sl) { spin_lock_bh(&sl->lock); @@ -621,6 +590,35 @@ read_sequnlock_excl_irqrestore(seqlock_t *sl, unsigned long flags) spin_unlock_irqrestore(&sl->lock, flags); } +/** + * read_seqbegin_or_lock - begin a sequence number check or locking block + * @lock: sequence lock + * @seq : sequence number to be checked + * + * First try it once optimistically without taking the lock. If that fails, + * take the lock. The sequence number is also used as a marker for deciding + * whether to be a reader (even) or writer (odd). + * N.B. seq must be initialized to an even number to begin with. + */ +static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq) +{ + if (!(*seq & 1)) /* Even */ + *seq = read_seqbegin(lock); + else /* Odd */ + read_seqlock_excl(lock); +} + +static inline int need_seqretry(seqlock_t *lock, int seq) +{ + return !(seq & 1) && read_seqretry(lock, seq); +} + +static inline void done_seqretry(seqlock_t *lock, int seq) +{ + if (seq & 1) + read_sequnlock_excl(lock); +} + static inline unsigned long read_seqbegin_or_lock_irqsave(seqlock_t *lock, int *seq) { -- cgit v1.2.3 From 89b88845e05752b3d684eaf147f457c8dfa99c5f Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 20 Jul 2020 17:55:11 +0200 Subject: seqlock: Add kernel-doc for seqcount_t and seqlock_t APIs seqlock.h is now included by kernel's RST documentation, but a small number of the the exported seqlock.h functions are kernel-doc annotated. Add kernel-doc for all seqlock.h exported APIs. Signed-off-by: Ahmed S. Darwish Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200720155530.1173732-6-a.darwish@linutronix.de --- include/linux/seqlock.h | 425 +++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 348 insertions(+), 77 deletions(-) (limited to 'include') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 4c1456008d89..85fb3ac93ffb 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -75,6 +75,10 @@ static inline void __seqcount_init(seqcount_t *s, const char *name, # define SEQCOUNT_DEP_MAP_INIT(lockname) \ .dep_map = { .name = #lockname } \ +/** + * seqcount_init() - runtime initializer for seqcount_t + * @s: Pointer to the seqcount_t instance + */ # define seqcount_init(s) \ do { \ static struct lock_class_key __key; \ @@ -98,13 +102,15 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s) # define seqcount_lockdep_reader_access(x) #endif -#define SEQCNT_ZERO(lockname) { .sequence = 0, SEQCOUNT_DEP_MAP_INIT(lockname)} - +/** + * SEQCNT_ZERO() - static initializer for seqcount_t + * @name: Name of the seqcount_t instance + */ +#define SEQCNT_ZERO(name) { .sequence = 0, SEQCOUNT_DEP_MAP_INIT(name) } /** - * __read_seqcount_begin - begin a seq-read critical section (without barrier) - * @s: pointer to seqcount_t - * Returns: count to be passed to read_seqcount_retry + * __read_seqcount_begin() - begin a seqcount_t read section w/o barrier + * @s: Pointer to seqcount_t * * __read_seqcount_begin is like read_seqcount_begin, but has no smp_rmb() * barrier. Callers should ensure that smp_rmb() or equivalent ordering is @@ -113,6 +119,8 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s) * * Use carefully, only in critical code, and comment how the barrier is * provided. + * + * Return: count to be passed to read_seqcount_retry() */ static inline unsigned __read_seqcount_begin(const seqcount_t *s) { @@ -129,13 +137,10 @@ repeat: } /** - * raw_read_seqcount_begin - start seq-read critical section w/o lockdep - * @s: pointer to seqcount_t - * Returns: count to be passed to read_seqcount_retry + * raw_read_seqcount_begin() - begin a seqcount_t read section w/o lockdep + * @s: Pointer to seqcount_t * - * raw_read_seqcount_begin opens a read critical section of the given - * seqcount, but without any lockdep checking. Validity of the critical - * section is tested by checking read_seqcount_retry function. + * Return: count to be passed to read_seqcount_retry() */ static inline unsigned raw_read_seqcount_begin(const seqcount_t *s) { @@ -145,13 +150,10 @@ static inline unsigned raw_read_seqcount_begin(const seqcount_t *s) } /** - * read_seqcount_begin - begin a seq-read critical section - * @s: pointer to seqcount_t - * Returns: count to be passed to read_seqcount_retry + * read_seqcount_begin() - begin a seqcount_t read critical section + * @s: Pointer to seqcount_t * - * read_seqcount_begin opens a read critical section of the given seqcount. - * Validity of the critical section is tested by checking read_seqcount_retry - * function. + * Return: count to be passed to read_seqcount_retry() */ static inline unsigned read_seqcount_begin(const seqcount_t *s) { @@ -160,13 +162,15 @@ static inline unsigned read_seqcount_begin(const seqcount_t *s) } /** - * raw_read_seqcount - Read the raw seqcount - * @s: pointer to seqcount_t - * Returns: count to be passed to read_seqcount_retry + * raw_read_seqcount() - read the raw seqcount_t counter value + * @s: Pointer to seqcount_t * * raw_read_seqcount opens a read critical section of the given - * seqcount without any lockdep checking and without checking or - * masking the LSB. Calling code is responsible for handling that. + * seqcount_t, without any lockdep checking, and without checking or + * masking the sequence counter LSB. Calling code is responsible for + * handling that. + * + * Return: count to be passed to read_seqcount_retry() */ static inline unsigned raw_read_seqcount(const seqcount_t *s) { @@ -177,18 +181,21 @@ static inline unsigned raw_read_seqcount(const seqcount_t *s) } /** - * raw_seqcount_begin - begin a seq-read critical section - * @s: pointer to seqcount_t - * Returns: count to be passed to read_seqcount_retry + * raw_seqcount_begin() - begin a seqcount_t read critical section w/o + * lockdep and w/o counter stabilization + * @s: Pointer to seqcount_t * - * raw_seqcount_begin opens a read critical section of the given seqcount. - * Validity of the critical section is tested by checking read_seqcount_retry - * function. + * raw_seqcount_begin opens a read critical section of the given + * seqcount_t. Unlike read_seqcount_begin(), this function will not wait + * for the count to stabilize. If a writer is active when it begins, it + * will fail the read_seqcount_retry() at the end of the read critical + * section instead of stabilizing at the beginning of it. * - * Unlike read_seqcount_begin(), this function will not wait for the count - * to stabilize. If a writer is active when we begin, we will fail the - * read_seqcount_retry() instead of stabilizing at the beginning of the - * critical section. + * Use this only in special kernel hot paths where the read section is + * small and has a high probability of success through other external + * means. It will save a single branching instruction. + * + * Return: count to be passed to read_seqcount_retry() */ static inline unsigned raw_seqcount_begin(const seqcount_t *s) { @@ -199,10 +206,9 @@ static inline unsigned raw_seqcount_begin(const seqcount_t *s) } /** - * __read_seqcount_retry - end a seq-read critical section (without barrier) - * @s: pointer to seqcount_t - * @start: count, from read_seqcount_begin - * Returns: 1 if retry is required, else 0 + * __read_seqcount_retry() - end a seqcount_t read section w/o barrier + * @s: Pointer to seqcount_t + * @start: count, from read_seqcount_begin() * * __read_seqcount_retry is like read_seqcount_retry, but has no smp_rmb() * barrier. Callers should ensure that smp_rmb() or equivalent ordering is @@ -211,6 +217,8 @@ static inline unsigned raw_seqcount_begin(const seqcount_t *s) * * Use carefully, only in critical code, and comment how the barrier is * provided. + * + * Return: true if a read section retry is required, else false */ static inline int __read_seqcount_retry(const seqcount_t *s, unsigned start) { @@ -219,14 +227,15 @@ static inline int __read_seqcount_retry(const seqcount_t *s, unsigned start) } /** - * read_seqcount_retry - end a seq-read critical section - * @s: pointer to seqcount_t - * @start: count, from read_seqcount_begin - * Returns: 1 if retry is required, else 0 + * read_seqcount_retry() - end a seqcount_t read critical section + * @s: Pointer to seqcount_t + * @start: count, from read_seqcount_begin() * - * read_seqcount_retry closes a read critical section of the given seqcount. - * If the critical section was invalid, it must be ignored (and typically - * retried). + * read_seqcount_retry closes the read critical section of given + * seqcount_t. If the critical section was invalid, it must be ignored + * (and typically retried). + * + * Return: true if a read section retry is required, else false */ static inline int read_seqcount_retry(const seqcount_t *s, unsigned start) { @@ -234,6 +243,10 @@ static inline int read_seqcount_retry(const seqcount_t *s, unsigned start) return __read_seqcount_retry(s, start); } +/** + * raw_write_seqcount_begin() - start a seqcount_t write section w/o lockdep + * @s: Pointer to seqcount_t + */ static inline void raw_write_seqcount_begin(seqcount_t *s) { kcsan_nestable_atomic_begin(); @@ -241,6 +254,10 @@ static inline void raw_write_seqcount_begin(seqcount_t *s) smp_wmb(); } +/** + * raw_write_seqcount_end() - end a seqcount_t write section w/o lockdep + * @s: Pointer to seqcount_t + */ static inline void raw_write_seqcount_end(seqcount_t *s) { smp_wmb(); @@ -248,17 +265,42 @@ static inline void raw_write_seqcount_end(seqcount_t *s) kcsan_nestable_atomic_end(); } +/** + * write_seqcount_begin_nested() - start a seqcount_t write section with + * custom lockdep nesting level + * @s: Pointer to seqcount_t + * @subclass: lockdep nesting level + * + * See Documentation/locking/lockdep-design.rst + */ static inline void write_seqcount_begin_nested(seqcount_t *s, int subclass) { raw_write_seqcount_begin(s); seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_); } +/** + * write_seqcount_begin() - start a seqcount_t write side critical section + * @s: Pointer to seqcount_t + * + * write_seqcount_begin opens a write side critical section of the given + * seqcount_t. + * + * Context: seqcount_t write side critical sections must be serialized and + * non-preemptible. If readers can be invoked from hardirq or softirq + * context, interrupts or bottom halves must be respectively disabled. + */ static inline void write_seqcount_begin(seqcount_t *s) { write_seqcount_begin_nested(s, 0); } +/** + * write_seqcount_end() - end a seqcount_t write side critical section + * @s: Pointer to seqcount_t + * + * The write section must've been opened with write_seqcount_begin(). + */ static inline void write_seqcount_end(seqcount_t *s) { seqcount_release(&s->dep_map, _RET_IP_); @@ -266,12 +308,12 @@ static inline void write_seqcount_end(seqcount_t *s) } /** - * raw_write_seqcount_barrier - do a seq write barrier - * @s: pointer to seqcount_t + * raw_write_seqcount_barrier() - do a seqcount_t write barrier + * @s: Pointer to seqcount_t * - * This can be used to provide an ordering guarantee instead of the - * usual consistency guarantee. It is one wmb cheaper, because we can - * collapse the two back-to-back wmb()s. + * This can be used to provide an ordering guarantee instead of the usual + * consistency guarantee. It is one wmb cheaper, because it can collapse + * the two back-to-back wmb()s. * * Note that writes surrounding the barrier should be declared atomic (e.g. * via WRITE_ONCE): a) to ensure the writes become visible to other threads @@ -316,11 +358,12 @@ static inline void raw_write_seqcount_barrier(seqcount_t *s) } /** - * write_seqcount_invalidate - invalidate in-progress read-side seq operations - * @s: pointer to seqcount_t + * write_seqcount_invalidate() - invalidate in-progress seqcount_t read + * side operations + * @s: Pointer to seqcount_t * - * After write_seqcount_invalidate, no read-side seq operations will complete - * successfully and see data older than this. + * After write_seqcount_invalidate, no seqcount_t read side operations + * will complete successfully and see data older than this. */ static inline void write_seqcount_invalidate(seqcount_t *s) { @@ -330,6 +373,21 @@ static inline void write_seqcount_invalidate(seqcount_t *s) kcsan_nestable_atomic_end(); } +/** + * raw_read_seqcount_latch() - pick even/odd seqcount_t latch data copy + * @s: Pointer to seqcount_t + * + * Use seqcount_t latching to switch between two storage places protected + * by a sequence counter. Doing so allows having interruptible, preemptible, + * seqcount_t write side critical sections. + * + * Check raw_write_seqcount_latch() for more details and a full reader and + * writer usage example. + * + * Return: sequence counter raw value. Use the lowest bit as an index for + * picking which data copy to read. The full counter value must then be + * checked with read_seqcount_retry(). + */ static inline int raw_read_seqcount_latch(seqcount_t *s) { /* Pairs with the first smp_wmb() in raw_write_seqcount_latch() */ @@ -338,8 +396,8 @@ static inline int raw_read_seqcount_latch(seqcount_t *s) } /** - * raw_write_seqcount_latch - redirect readers to even/odd copy - * @s: pointer to seqcount_t + * raw_write_seqcount_latch() - redirect readers to even/odd copy + * @s: Pointer to seqcount_t * * The latch technique is a multiversion concurrency control method that allows * queries during non-atomic modifications. If you can guarantee queries never @@ -446,17 +504,28 @@ typedef struct { .lock = __SPIN_LOCK_UNLOCKED(lockname) \ } -#define seqlock_init(x) \ +/** + * seqlock_init() - dynamic initializer for seqlock_t + * @sl: Pointer to the seqlock_t instance + */ +#define seqlock_init(sl) \ do { \ - seqcount_init(&(x)->seqcount); \ - spin_lock_init(&(x)->lock); \ + seqcount_init(&(sl)->seqcount); \ + spin_lock_init(&(sl)->lock); \ } while (0) -#define DEFINE_SEQLOCK(x) \ - seqlock_t x = __SEQLOCK_UNLOCKED(x) +/** + * DEFINE_SEQLOCK() - Define a statically allocated seqlock_t + * @sl: Name of the seqlock_t instance + */ +#define DEFINE_SEQLOCK(sl) \ + seqlock_t sl = __SEQLOCK_UNLOCKED(sl) -/* - * Read side functions for starting and finalizing a read side section. +/** + * read_seqbegin() - start a seqlock_t read side critical section + * @sl: Pointer to seqlock_t + * + * Return: count, to be passed to read_seqretry() */ static inline unsigned read_seqbegin(const seqlock_t *sl) { @@ -467,6 +536,17 @@ static inline unsigned read_seqbegin(const seqlock_t *sl) return ret; } +/** + * read_seqretry() - end a seqlock_t read side section + * @sl: Pointer to seqlock_t + * @start: count, from read_seqbegin() + * + * read_seqretry closes the read side critical section of given seqlock_t. + * If the critical section was invalid, it must be ignored (and typically + * retried). + * + * Return: true if a read section retry is required, else false + */ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) { /* @@ -478,10 +558,18 @@ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) return read_seqcount_retry(&sl->seqcount, start); } -/* - * Lock out other writers and update the count. - * Acts like a normal spin_lock/unlock. - * Don't need preempt_disable() because that is in the spin_lock already. +/** + * write_seqlock() - start a seqlock_t write side critical section + * @sl: Pointer to seqlock_t + * + * write_seqlock opens a write side critical section for the given + * seqlock_t. It also implicitly acquires the spinlock_t embedded inside + * that sequential lock. All seqlock_t write side sections are thus + * automatically serialized and non-preemptible. + * + * Context: if the seqlock_t read section, or other write side critical + * sections, can be invoked from hardirq or softirq contexts, use the + * _irqsave or _bh variants of this function instead. */ static inline void write_seqlock(seqlock_t *sl) { @@ -489,30 +577,66 @@ static inline void write_seqlock(seqlock_t *sl) write_seqcount_begin(&sl->seqcount); } +/** + * write_sequnlock() - end a seqlock_t write side critical section + * @sl: Pointer to seqlock_t + * + * write_sequnlock closes the (serialized and non-preemptible) write side + * critical section of given seqlock_t. + */ static inline void write_sequnlock(seqlock_t *sl) { write_seqcount_end(&sl->seqcount); spin_unlock(&sl->lock); } +/** + * write_seqlock_bh() - start a softirqs-disabled seqlock_t write section + * @sl: Pointer to seqlock_t + * + * _bh variant of write_seqlock(). Use only if the read side section, or + * other write side sections, can be invoked from softirq contexts. + */ static inline void write_seqlock_bh(seqlock_t *sl) { spin_lock_bh(&sl->lock); write_seqcount_begin(&sl->seqcount); } +/** + * write_sequnlock_bh() - end a softirqs-disabled seqlock_t write section + * @sl: Pointer to seqlock_t + * + * write_sequnlock_bh closes the serialized, non-preemptible, and + * softirqs-disabled, seqlock_t write side critical section opened with + * write_seqlock_bh(). + */ static inline void write_sequnlock_bh(seqlock_t *sl) { write_seqcount_end(&sl->seqcount); spin_unlock_bh(&sl->lock); } +/** + * write_seqlock_irq() - start a non-interruptible seqlock_t write section + * @sl: Pointer to seqlock_t + * + * _irq variant of write_seqlock(). Use only if the read side section, or + * other write sections, can be invoked from hardirq contexts. + */ static inline void write_seqlock_irq(seqlock_t *sl) { spin_lock_irq(&sl->lock); write_seqcount_begin(&sl->seqcount); } +/** + * write_sequnlock_irq() - end a non-interruptible seqlock_t write section + * @sl: Pointer to seqlock_t + * + * write_sequnlock_irq closes the serialized and non-interruptible + * seqlock_t write side section opened with write_seqlock_irq(). + */ static inline void write_sequnlock_irq(seqlock_t *sl) { write_seqcount_end(&sl->seqcount); @@ -528,9 +652,28 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl) return flags; } +/** + * write_seqlock_irqsave() - start a non-interruptible seqlock_t write + * section + * @lock: Pointer to seqlock_t + * @flags: Stack-allocated storage for saving caller's local interrupt + * state, to be passed to write_sequnlock_irqrestore(). + * + * _irqsave variant of write_seqlock(). Use it only if the read side + * section, or other write sections, can be invoked from hardirq context. + */ #define write_seqlock_irqsave(lock, flags) \ do { flags = __write_seqlock_irqsave(lock); } while (0) +/** + * write_sequnlock_irqrestore() - end non-interruptible seqlock_t write + * section + * @sl: Pointer to seqlock_t + * @flags: Caller's saved interrupt state, from write_seqlock_irqsave() + * + * write_sequnlock_irqrestore closes the serialized and non-interruptible + * seqlock_t write section previously opened with write_seqlock_irqsave(). + */ static inline void write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags) { @@ -538,36 +681,79 @@ write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags) spin_unlock_irqrestore(&sl->lock, flags); } -/* - * A locking reader exclusively locks out other writers and locking readers, - * but doesn't update the sequence number. Acts like a normal spin_lock/unlock. - * Don't need preempt_disable() because that is in the spin_lock already. +/** + * read_seqlock_excl() - begin a seqlock_t locking reader section + * @sl: Pointer to seqlock_t + * + * read_seqlock_excl opens a seqlock_t locking reader critical section. A + * locking reader exclusively locks out *both* other writers *and* other + * locking readers, but it does not update the embedded sequence number. + * + * Locking readers act like a normal spin_lock()/spin_unlock(). + * + * Context: if the seqlock_t write section, *or other read sections*, can + * be invoked from hardirq or softirq contexts, use the _irqsave or _bh + * variant of this function instead. + * + * The opened read section must be closed with read_sequnlock_excl(). */ static inline void read_seqlock_excl(seqlock_t *sl) { spin_lock(&sl->lock); } +/** + * read_sequnlock_excl() - end a seqlock_t locking reader critical section + * @sl: Pointer to seqlock_t + */ static inline void read_sequnlock_excl(seqlock_t *sl) { spin_unlock(&sl->lock); } +/** + * read_seqlock_excl_bh() - start a seqlock_t locking reader section with + * softirqs disabled + * @sl: Pointer to seqlock_t + * + * _bh variant of read_seqlock_excl(). Use this variant only if the + * seqlock_t write side section, *or other read sections*, can be invoked + * from softirq contexts. + */ static inline void read_seqlock_excl_bh(seqlock_t *sl) { spin_lock_bh(&sl->lock); } +/** + * read_sequnlock_excl_bh() - stop a seqlock_t softirq-disabled locking + * reader section + * @sl: Pointer to seqlock_t + */ static inline void read_sequnlock_excl_bh(seqlock_t *sl) { spin_unlock_bh(&sl->lock); } +/** + * read_seqlock_excl_irq() - start a non-interruptible seqlock_t locking + * reader section + * @sl: Pointer to seqlock_t + * + * _irq variant of read_seqlock_excl(). Use this only if the seqlock_t + * write side section, *or other read sections*, can be invoked from a + * hardirq context. + */ static inline void read_seqlock_excl_irq(seqlock_t *sl) { spin_lock_irq(&sl->lock); } +/** + * read_sequnlock_excl_irq() - end an interrupts-disabled seqlock_t + * locking reader section + * @sl: Pointer to seqlock_t + */ static inline void read_sequnlock_excl_irq(seqlock_t *sl) { spin_unlock_irq(&sl->lock); @@ -581,9 +767,26 @@ static inline unsigned long __read_seqlock_excl_irqsave(seqlock_t *sl) return flags; } +/** + * read_seqlock_excl_irqsave() - start a non-interruptible seqlock_t + * locking reader section + * @lock: Pointer to seqlock_t + * @flags: Stack-allocated storage for saving caller's local interrupt + * state, to be passed to read_sequnlock_excl_irqrestore(). + * + * _irqsave variant of read_seqlock_excl(). Use this only if the seqlock_t + * write side section, *or other read sections*, can be invoked from a + * hardirq context. + */ #define read_seqlock_excl_irqsave(lock, flags) \ do { flags = __read_seqlock_excl_irqsave(lock); } while (0) +/** + * read_sequnlock_excl_irqrestore() - end non-interruptible seqlock_t + * locking reader section + * @sl: Pointer to seqlock_t + * @flags: Caller saved interrupt state, from read_seqlock_excl_irqsave() + */ static inline void read_sequnlock_excl_irqrestore(seqlock_t *sl, unsigned long flags) { @@ -591,14 +794,35 @@ read_sequnlock_excl_irqrestore(seqlock_t *sl, unsigned long flags) } /** - * read_seqbegin_or_lock - begin a sequence number check or locking block - * @lock: sequence lock - * @seq : sequence number to be checked - * - * First try it once optimistically without taking the lock. If that fails, - * take the lock. The sequence number is also used as a marker for deciding - * whether to be a reader (even) or writer (odd). - * N.B. seq must be initialized to an even number to begin with. + * read_seqbegin_or_lock() - begin a seqlock_t lockless or locking reader + * @lock: Pointer to seqlock_t + * @seq : Marker and return parameter. If the passed value is even, the + * reader will become a *lockless* seqlock_t reader as in read_seqbegin(). + * If the passed value is odd, the reader will become a *locking* reader + * as in read_seqlock_excl(). In the first call to this function, the + * caller *must* initialize and pass an even value to @seq; this way, a + * lockless read can be optimistically tried first. + * + * read_seqbegin_or_lock is an API designed to optimistically try a normal + * lockless seqlock_t read section first. If an odd counter is found, the + * lockless read trial has failed, and the next read iteration transforms + * itself into a full seqlock_t locking reader. + * + * This is typically used to avoid seqlock_t lockless readers starvation + * (too much retry loops) in the case of a sharp spike in write side + * activity. + * + * Context: if the seqlock_t write section, *or other read sections*, can + * be invoked from hardirq or softirq contexts, use the _irqsave or _bh + * variant of this function instead. + * + * Check Documentation/locking/seqlock.rst for template example code. + * + * Return: the encountered sequence counter value, through the @seq + * parameter, which is overloaded as a return parameter. This returned + * value must be checked with need_seqretry(). If the read section need to + * be retried, this returned value must also be passed as the @seq + * parameter of the next read_seqbegin_or_lock() iteration. */ static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq) { @@ -608,17 +832,52 @@ static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq) read_seqlock_excl(lock); } +/** + * need_seqretry() - validate seqlock_t "locking or lockless" read section + * @lock: Pointer to seqlock_t + * @seq: sequence count, from read_seqbegin_or_lock() + * + * Return: true if a read section retry is required, false otherwise + */ static inline int need_seqretry(seqlock_t *lock, int seq) { return !(seq & 1) && read_seqretry(lock, seq); } +/** + * done_seqretry() - end seqlock_t "locking or lockless" reader section + * @lock: Pointer to seqlock_t + * @seq: count, from read_seqbegin_or_lock() + * + * done_seqretry finishes the seqlock_t read side critical section started + * with read_seqbegin_or_lock() and validated by need_seqretry(). + */ static inline void done_seqretry(seqlock_t *lock, int seq) { if (seq & 1) read_sequnlock_excl(lock); } +/** + * read_seqbegin_or_lock_irqsave() - begin a seqlock_t lockless reader, or + * a non-interruptible locking reader + * @lock: Pointer to seqlock_t + * @seq: Marker and return parameter. Check read_seqbegin_or_lock(). + * + * This is the _irqsave variant of read_seqbegin_or_lock(). Use it only if + * the seqlock_t write section, *or other read sections*, can be invoked + * from hardirq context. + * + * Note: Interrupts will be disabled only for "locking reader" mode. + * + * Return: + * + * 1. The saved local interrupts state in case of a locking reader, to + * be passed to done_seqretry_irqrestore(). + * + * 2. The encountered sequence counter value, returned through @seq + * overloaded as a return parameter. Check read_seqbegin_or_lock(). + */ static inline unsigned long read_seqbegin_or_lock_irqsave(seqlock_t *lock, int *seq) { @@ -632,6 +891,18 @@ read_seqbegin_or_lock_irqsave(seqlock_t *lock, int *seq) return flags; } +/** + * done_seqretry_irqrestore() - end a seqlock_t lockless reader, or a + * non-interruptible locking reader section + * @lock: Pointer to seqlock_t + * @seq: Count, from read_seqbegin_or_lock_irqsave() + * @flags: Caller's saved local interrupt state in case of a locking + * reader, also from read_seqbegin_or_lock_irqsave() + * + * This is the _irqrestore variant of done_seqretry(). The read section + * must've been opened with read_seqbegin_or_lock_irqsave(), and validated + * by need_seqretry(). + */ static inline void done_seqretry_irqrestore(seqlock_t *lock, int seq, unsigned long flags) { -- cgit v1.2.3 From 932e46365226324d2cf26d8bdec8b51ceb296948 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 20 Jul 2020 17:55:12 +0200 Subject: seqlock: Implement raw_seqcount_begin() in terms of raw_read_seqcount() raw_seqcount_begin() has the same code as raw_read_seqcount(), with the exception of masking the sequence counter's LSB before returning it to the caller. Note, raw_seqcount_begin() masks the counter's LSB before returning it to the caller so that read_seqcount_retry() can fail if the counter is odd -- without the overhead of an extra branching instruction. Signed-off-by: Ahmed S. Darwish Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200720155530.1173732-7-a.darwish@linutronix.de --- include/linux/seqlock.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 85fb3ac93ffb..e885702d8b82 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -199,10 +199,11 @@ static inline unsigned raw_read_seqcount(const seqcount_t *s) */ static inline unsigned raw_seqcount_begin(const seqcount_t *s) { - unsigned ret = READ_ONCE(s->sequence); - smp_rmb(); - kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); - return ret & ~1; + /* + * If the counter is odd, let read_seqcount_retry() fail + * by decrementing the counter. + */ + return raw_read_seqcount(s) & ~1; } /** -- cgit v1.2.3 From 8fd8ad5c5dfcb09cf62abadd4043eaf1afbbd0ce Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 20 Jul 2020 17:55:13 +0200 Subject: lockdep: Add preemption enabled/disabled assertion APIs Asserting that preemption is enabled or disabled is a critical sanity check. Developers are usually reluctant to add such a check in a fastpath as reading the preemption count can be costly. Extend the lockdep API with macros asserting that preemption is disabled or enabled. If lockdep is disabled, or if the underlying architecture does not support kernel preemption, this assert has no runtime overhead. References: f54bb2ec02c8 ("locking/lockdep: Add IRQs disabled/enabled assertion APIs: ...") Signed-off-by: Ahmed S. Darwish Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200720155530.1173732-8-a.darwish@linutronix.de --- include/linux/lockdep.h | 19 +++++++++++++++++++ lib/Kconfig.debug | 1 + 2 files changed, 20 insertions(+) (limited to 'include') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 7aafba0ddcf9..39a35699d0d6 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -549,6 +549,22 @@ do { \ WARN_ON_ONCE(debug_locks && !this_cpu_read(hardirq_context)); \ } while (0) +#define lockdep_assert_preemption_enabled() \ +do { \ + WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_COUNT) && \ + debug_locks && \ + (preempt_count() != 0 || \ + !this_cpu_read(hardirqs_enabled))); \ +} while (0) + +#define lockdep_assert_preemption_disabled() \ +do { \ + WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_COUNT) && \ + debug_locks && \ + (preempt_count() == 0 && \ + this_cpu_read(hardirqs_enabled))); \ +} while (0) + #else # define might_lock(lock) do { } while (0) # define might_lock_read(lock) do { } while (0) @@ -557,6 +573,9 @@ do { \ # define lockdep_assert_irqs_enabled() do { } while (0) # define lockdep_assert_irqs_disabled() do { } while (0) # define lockdep_assert_in_irq() do { } while (0) + +# define lockdep_assert_preemption_enabled() do { } while (0) +# define lockdep_assert_preemption_disabled() do { } while (0) #endif #ifdef CONFIG_PROVE_RAW_LOCK_NESTING diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 9ad9210d70a1..5379931ba3b5 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1117,6 +1117,7 @@ config PROVE_LOCKING select DEBUG_RWSEMS select DEBUG_WW_MUTEX_SLOWPATH select DEBUG_LOCK_ALLOC + select PREEMPT_COUNT if !ARCH_NO_PREEMPT select TRACE_IRQFLAGS default n help -- cgit v1.2.3 From 859247d39fb008ea812e8f0c398a58a20c12899e Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 20 Jul 2020 17:55:14 +0200 Subject: seqlock: lockdep assert non-preemptibility on seqcount_t write Preemption must be disabled before entering a sequence count write side critical section. Failing to do so, the seqcount read side can preempt the write side section and spin for the entire scheduler tick. If that reader belongs to a real-time scheduling class, it can spin forever and the kernel will livelock. Assert through lockdep that preemption is disabled for seqcount writers. Signed-off-by: Ahmed S. Darwish Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200720155530.1173732-9-a.darwish@linutronix.de --- include/linux/seqlock.h | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index e885702d8b82..54bc20496392 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -266,6 +266,12 @@ static inline void raw_write_seqcount_end(seqcount_t *s) kcsan_nestable_atomic_end(); } +static inline void __write_seqcount_begin_nested(seqcount_t *s, int subclass) +{ + raw_write_seqcount_begin(s); + seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_); +} + /** * write_seqcount_begin_nested() - start a seqcount_t write section with * custom lockdep nesting level @@ -276,8 +282,19 @@ static inline void raw_write_seqcount_end(seqcount_t *s) */ static inline void write_seqcount_begin_nested(seqcount_t *s, int subclass) { - raw_write_seqcount_begin(s); - seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_); + lockdep_assert_preemption_disabled(); + __write_seqcount_begin_nested(s, subclass); +} + +/* + * A write_seqcount_begin() variant w/o lockdep non-preemptibility checks. + * + * Use for internal seqlock.h code where it's known that preemption is + * already disabled. For example, seqlock_t write side functions. + */ +static inline void __write_seqcount_begin(seqcount_t *s) +{ + __write_seqcount_begin_nested(s, 0); } /** @@ -575,7 +592,7 @@ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) static inline void write_seqlock(seqlock_t *sl) { spin_lock(&sl->lock); - write_seqcount_begin(&sl->seqcount); + __write_seqcount_begin(&sl->seqcount); } /** @@ -601,7 +618,7 @@ static inline void write_sequnlock(seqlock_t *sl) static inline void write_seqlock_bh(seqlock_t *sl) { spin_lock_bh(&sl->lock); - write_seqcount_begin(&sl->seqcount); + __write_seqcount_begin(&sl->seqcount); } /** @@ -628,7 +645,7 @@ static inline void write_sequnlock_bh(seqlock_t *sl) static inline void write_seqlock_irq(seqlock_t *sl) { spin_lock_irq(&sl->lock); - write_seqcount_begin(&sl->seqcount); + __write_seqcount_begin(&sl->seqcount); } /** @@ -649,7 +666,7 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl) unsigned long flags; spin_lock_irqsave(&sl->lock, flags); - write_seqcount_begin(&sl->seqcount); + __write_seqcount_begin(&sl->seqcount); return flags; } -- cgit v1.2.3 From 55f3560df975f557c48aa6afc636808f31ecb87a Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 20 Jul 2020 17:55:15 +0200 Subject: seqlock: Extend seqcount API with associated locks A sequence counter write side critical section must be protected by some form of locking to serialize writers. If the serialization primitive is not disabling preemption implicitly, preemption has to be explicitly disabled before entering the write side critical section. There is no built-in debugging mechanism to verify that the lock used for writer serialization is held and preemption is disabled. Some usage sites like dma-buf have explicit lockdep checks for the writer-side lock, but this covers only a small portion of the sequence counter usage in the kernel. Add new sequence counter types which allows to associate a lock to the sequence counter at initialization time. The seqcount API functions are extended to provide appropriate lockdep assertions depending on the seqcount/lock type. For sequence counters with associated locks that do not implicitly disable preemption, preemption protection is enforced in the sequence counter write side functions. This removes the need to explicitly add preempt_disable/enable() around the write side critical sections: the write_begin/end() functions for these new sequence counter types automatically do this. Introduce the following seqcount types with associated locks: seqcount_spinlock_t seqcount_raw_spinlock_t seqcount_rwlock_t seqcount_mutex_t seqcount_ww_mutex_t Extend the seqcount read and write functions to branch out to the specific seqcount_LOCKTYPE_t implementation at compile-time. This avoids kernel API explosion per each new seqcount_LOCKTYPE_t added. Add such compile-time type detection logic into a new, internal, seqlock header. Document the proper seqcount_LOCKTYPE_t usage, and rationale, at Documentation/locking/seqlock.rst. If lockdep is disabled, this lock association is compiled out and has neither storage size nor runtime overhead. Signed-off-by: Ahmed S. Darwish Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200720155530.1173732-10-a.darwish@linutronix.de --- Documentation/locking/seqlock.rst | 52 +++++ include/linux/seqlock.h | 464 ++++++++++++++++++++++++++++++++------ 2 files changed, 447 insertions(+), 69 deletions(-) (limited to 'include') diff --git a/Documentation/locking/seqlock.rst b/Documentation/locking/seqlock.rst index 366dd368d90a..62c5ad98c11c 100644 --- a/Documentation/locking/seqlock.rst +++ b/Documentation/locking/seqlock.rst @@ -87,6 +87,58 @@ Read path:: } while (read_seqcount_retry(&foo_seqcount, seq)); +.. _seqcount_locktype_t: + +Sequence counters with associated locks (``seqcount_LOCKTYPE_t``) +----------------------------------------------------------------- + +As discussed at :ref:`seqcount_t`, sequence count write side critical +sections must be serialized and non-preemptible. This variant of +sequence counters associate the lock used for writer serialization at +initialization time, which enables lockdep to validate that the write +side critical sections are properly serialized. + +This lock association is a NOOP if lockdep is disabled and has neither +storage nor runtime overhead. If lockdep is enabled, the lock pointer is +stored in struct seqcount and lockdep's "lock is held" assertions are +injected at the beginning of the write side critical section to validate +that it is properly protected. + +For lock types which do not implicitly disable preemption, preemption +protection is enforced in the write side function. + +The following sequence counters with associated locks are defined: + + - ``seqcount_spinlock_t`` + - ``seqcount_raw_spinlock_t`` + - ``seqcount_rwlock_t`` + - ``seqcount_mutex_t`` + - ``seqcount_ww_mutex_t`` + +The plain seqcount read and write APIs branch out to the specific +seqcount_LOCKTYPE_t implementation at compile-time. This avoids kernel +API explosion per each new seqcount LOCKTYPE. + +Initialization (replace "LOCKTYPE" with one of the supported locks):: + + /* dynamic */ + seqcount_LOCKTYPE_t foo_seqcount; + seqcount_LOCKTYPE_init(&foo_seqcount, &lock); + + /* static */ + static seqcount_LOCKTYPE_t foo_seqcount = + SEQCNT_LOCKTYPE_ZERO(foo_seqcount, &lock); + + /* C99 struct init */ + struct { + .seq = SEQCNT_LOCKTYPE_ZERO(foo.seq, &lock), + } foo; + +Write path: same as in :ref:`seqcount_t`, while running from a context +with the associated LOCKTYPE lock acquired. + +Read path: same as in :ref:`seqcount_t`. + .. _seqlock_t: Sequential locks (``seqlock_t``) diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 54bc20496392..8c16a494c968 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -10,13 +10,17 @@ * * Copyrights: * - Based on x86_64 vsyscall gettimeofday: Keith Owens, Andrea Arcangeli + * - Sequence counters with associated locks, (C) 2020 Linutronix GmbH */ -#include -#include -#include #include #include +#include +#include +#include +#include +#include + #include /* @@ -48,6 +52,10 @@ * This mechanism can't be used if the protected data contains pointers, * as the writer can invalidate a pointer that a reader is following. * + * If the write serialization mechanism is one of the common kernel + * locking primitives, use a sequence counter with associated lock + * (seqcount_LOCKTYPE_t) instead. + * * If it's desired to automatically handle the sequence counter writer * serialization and non-preemptibility requirements, use a sequential * lock (seqlock_t) instead. @@ -108,9 +116,267 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s) */ #define SEQCNT_ZERO(name) { .sequence = 0, SEQCOUNT_DEP_MAP_INIT(name) } +/* + * Sequence counters with associated locks (seqcount_LOCKTYPE_t) + * + * A sequence counter which associates the lock used for writer + * serialization at initialization time. This enables lockdep to validate + * that the write side critical section is properly serialized. + * + * For associated locks which do not implicitly disable preemption, + * preemption protection is enforced in the write side function. + * + * Lockdep is never used in any for the raw write variants. + * + * See Documentation/locking/seqlock.rst + */ + +#ifdef CONFIG_LOCKDEP +#define __SEQ_LOCKDEP(expr) expr +#else +#define __SEQ_LOCKDEP(expr) +#endif + +#define SEQCOUNT_LOCKTYPE_ZERO(seq_name, assoc_lock) { \ + .seqcount = SEQCNT_ZERO(seq_name.seqcount), \ + __SEQ_LOCKDEP(.lock = (assoc_lock)) \ +} + +#define seqcount_locktype_init(s, assoc_lock) \ +do { \ + seqcount_init(&(s)->seqcount); \ + __SEQ_LOCKDEP((s)->lock = (assoc_lock)); \ +} while (0) + +/** + * typedef seqcount_spinlock_t - sequence counter with spinlock associated + * @seqcount: The real sequence counter + * @lock: Pointer to the associated spinlock + * + * A plain sequence counter with external writer synchronization by a + * spinlock. The spinlock is associated to the sequence count in the + * static initializer or init function. This enables lockdep to validate + * that the write side critical section is properly serialized. + */ +typedef struct seqcount_spinlock { + seqcount_t seqcount; + __SEQ_LOCKDEP(spinlock_t *lock); +} seqcount_spinlock_t; + +/** + * SEQCNT_SPINLOCK_ZERO - static initializer for seqcount_spinlock_t + * @name: Name of the seqcount_spinlock_t instance + * @lock: Pointer to the associated spinlock + */ +#define SEQCNT_SPINLOCK_ZERO(name, lock) \ + SEQCOUNT_LOCKTYPE_ZERO(name, lock) + +/** + * seqcount_spinlock_init - runtime initializer for seqcount_spinlock_t + * @s: Pointer to the seqcount_spinlock_t instance + * @lock: Pointer to the associated spinlock + */ +#define seqcount_spinlock_init(s, lock) \ + seqcount_locktype_init(s, lock) + +/** + * typedef seqcount_raw_spinlock_t - sequence count with raw spinlock associated + * @seqcount: The real sequence counter + * @lock: Pointer to the associated raw spinlock + * + * A plain sequence counter with external writer synchronization by a + * raw spinlock. The raw spinlock is associated to the sequence count in + * the static initializer or init function. This enables lockdep to + * validate that the write side critical section is properly serialized. + */ +typedef struct seqcount_raw_spinlock { + seqcount_t seqcount; + __SEQ_LOCKDEP(raw_spinlock_t *lock); +} seqcount_raw_spinlock_t; + +/** + * SEQCNT_RAW_SPINLOCK_ZERO - static initializer for seqcount_raw_spinlock_t + * @name: Name of the seqcount_raw_spinlock_t instance + * @lock: Pointer to the associated raw_spinlock + */ +#define SEQCNT_RAW_SPINLOCK_ZERO(name, lock) \ + SEQCOUNT_LOCKTYPE_ZERO(name, lock) + +/** + * seqcount_raw_spinlock_init - runtime initializer for seqcount_raw_spinlock_t + * @s: Pointer to the seqcount_raw_spinlock_t instance + * @lock: Pointer to the associated raw_spinlock + */ +#define seqcount_raw_spinlock_init(s, lock) \ + seqcount_locktype_init(s, lock) + +/** + * typedef seqcount_rwlock_t - sequence count with rwlock associated + * @seqcount: The real sequence counter + * @lock: Pointer to the associated rwlock + * + * A plain sequence counter with external writer synchronization by a + * rwlock. The rwlock is associated to the sequence count in the static + * initializer or init function. This enables lockdep to validate that + * the write side critical section is properly serialized. + */ +typedef struct seqcount_rwlock { + seqcount_t seqcount; + __SEQ_LOCKDEP(rwlock_t *lock); +} seqcount_rwlock_t; + +/** + * SEQCNT_RWLOCK_ZERO - static initializer for seqcount_rwlock_t + * @name: Name of the seqcount_rwlock_t instance + * @lock: Pointer to the associated rwlock + */ +#define SEQCNT_RWLOCK_ZERO(name, lock) \ + SEQCOUNT_LOCKTYPE_ZERO(name, lock) + +/** + * seqcount_rwlock_init - runtime initializer for seqcount_rwlock_t + * @s: Pointer to the seqcount_rwlock_t instance + * @lock: Pointer to the associated rwlock + */ +#define seqcount_rwlock_init(s, lock) \ + seqcount_locktype_init(s, lock) + +/** + * typedef seqcount_mutex_t - sequence count with mutex associated + * @seqcount: The real sequence counter + * @lock: Pointer to the associated mutex + * + * A plain sequence counter with external writer synchronization by a + * mutex. The mutex is associated to the sequence counter in the static + * initializer or init function. This enables lockdep to validate that + * the write side critical section is properly serialized. + * + * The write side API functions write_seqcount_begin()/end() automatically + * disable and enable preemption when used with seqcount_mutex_t. + */ +typedef struct seqcount_mutex { + seqcount_t seqcount; + __SEQ_LOCKDEP(struct mutex *lock); +} seqcount_mutex_t; + +/** + * SEQCNT_MUTEX_ZERO - static initializer for seqcount_mutex_t + * @name: Name of the seqcount_mutex_t instance + * @lock: Pointer to the associated mutex + */ +#define SEQCNT_MUTEX_ZERO(name, lock) \ + SEQCOUNT_LOCKTYPE_ZERO(name, lock) + +/** + * seqcount_mutex_init - runtime initializer for seqcount_mutex_t + * @s: Pointer to the seqcount_mutex_t instance + * @lock: Pointer to the associated mutex + */ +#define seqcount_mutex_init(s, lock) \ + seqcount_locktype_init(s, lock) + +/** + * typedef seqcount_ww_mutex_t - sequence count with ww_mutex associated + * @seqcount: The real sequence counter + * @lock: Pointer to the associated ww_mutex + * + * A plain sequence counter with external writer synchronization by a + * ww_mutex. The ww_mutex is associated to the sequence counter in the static + * initializer or init function. This enables lockdep to validate that + * the write side critical section is properly serialized. + * + * The write side API functions write_seqcount_begin()/end() automatically + * disable and enable preemption when used with seqcount_ww_mutex_t. + */ +typedef struct seqcount_ww_mutex { + seqcount_t seqcount; + __SEQ_LOCKDEP(struct ww_mutex *lock); +} seqcount_ww_mutex_t; + +/** + * SEQCNT_WW_MUTEX_ZERO - static initializer for seqcount_ww_mutex_t + * @name: Name of the seqcount_ww_mutex_t instance + * @lock: Pointer to the associated ww_mutex + */ +#define SEQCNT_WW_MUTEX_ZERO(name, lock) \ + SEQCOUNT_LOCKTYPE_ZERO(name, lock) + +/** + * seqcount_ww_mutex_init - runtime initializer for seqcount_ww_mutex_t + * @s: Pointer to the seqcount_ww_mutex_t instance + * @lock: Pointer to the associated ww_mutex + */ +#define seqcount_ww_mutex_init(s, lock) \ + seqcount_locktype_init(s, lock) + +/* + * @preempt: Is the associated write serialization lock preemtpible? + */ +#define SEQCOUNT_LOCKTYPE(locktype, preempt, lockmember) \ +static inline seqcount_t * \ +__seqcount_##locktype##_ptr(seqcount_##locktype##_t *s) \ +{ \ + return &s->seqcount; \ +} \ + \ +static inline bool \ +__seqcount_##locktype##_preemptible(seqcount_##locktype##_t *s) \ +{ \ + return preempt; \ +} \ + \ +static inline void \ +__seqcount_##locktype##_assert(seqcount_##locktype##_t *s) \ +{ \ + __SEQ_LOCKDEP(lockdep_assert_held(lockmember)); \ +} + +/* + * Similar hooks, but for plain seqcount_t + */ + +static inline seqcount_t *__seqcount_ptr(seqcount_t *s) +{ + return s; +} + +static inline bool __seqcount_preemptible(seqcount_t *s) +{ + return false; +} + +static inline void __seqcount_assert(seqcount_t *s) +{ + lockdep_assert_preemption_disabled(); +} + +/* + * @s: Pointer to seqcount_locktype_t, generated hooks first parameter. + */ +SEQCOUNT_LOCKTYPE(raw_spinlock, false, s->lock) +SEQCOUNT_LOCKTYPE(spinlock, false, s->lock) +SEQCOUNT_LOCKTYPE(rwlock, false, s->lock) +SEQCOUNT_LOCKTYPE(mutex, true, s->lock) +SEQCOUNT_LOCKTYPE(ww_mutex, true, &s->lock->base) + +#define __seqprop_case(s, locktype, prop) \ + seqcount_##locktype##_t: __seqcount_##locktype##_##prop((void *)(s)) + +#define __seqprop(s, prop) _Generic(*(s), \ + seqcount_t: __seqcount_##prop((void *)(s)), \ + __seqprop_case((s), raw_spinlock, prop), \ + __seqprop_case((s), spinlock, prop), \ + __seqprop_case((s), rwlock, prop), \ + __seqprop_case((s), mutex, prop), \ + __seqprop_case((s), ww_mutex, prop)) + +#define __to_seqcount_t(s) __seqprop(s, ptr) +#define __associated_lock_exists_and_is_preemptible(s) __seqprop(s, preemptible) +#define __assert_write_section_is_protected(s) __seqprop(s, assert) + /** * __read_seqcount_begin() - begin a seqcount_t read section w/o barrier - * @s: Pointer to seqcount_t + * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants * * __read_seqcount_begin is like read_seqcount_begin, but has no smp_rmb() * barrier. Callers should ensure that smp_rmb() or equivalent ordering is @@ -122,7 +388,10 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s) * * Return: count to be passed to read_seqcount_retry() */ -static inline unsigned __read_seqcount_begin(const seqcount_t *s) +#define __read_seqcount_begin(s) \ + __read_seqcount_t_begin(__to_seqcount_t(s)) + +static inline unsigned __read_seqcount_t_begin(const seqcount_t *s) { unsigned ret; @@ -138,32 +407,38 @@ repeat: /** * raw_read_seqcount_begin() - begin a seqcount_t read section w/o lockdep - * @s: Pointer to seqcount_t + * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants * * Return: count to be passed to read_seqcount_retry() */ -static inline unsigned raw_read_seqcount_begin(const seqcount_t *s) +#define raw_read_seqcount_begin(s) \ + raw_read_seqcount_t_begin(__to_seqcount_t(s)) + +static inline unsigned raw_read_seqcount_t_begin(const seqcount_t *s) { - unsigned ret = __read_seqcount_begin(s); + unsigned ret = __read_seqcount_t_begin(s); smp_rmb(); return ret; } /** * read_seqcount_begin() - begin a seqcount_t read critical section - * @s: Pointer to seqcount_t + * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants * * Return: count to be passed to read_seqcount_retry() */ -static inline unsigned read_seqcount_begin(const seqcount_t *s) +#define read_seqcount_begin(s) \ + read_seqcount_t_begin(__to_seqcount_t(s)) + +static inline unsigned read_seqcount_t_begin(const seqcount_t *s) { seqcount_lockdep_reader_access(s); - return raw_read_seqcount_begin(s); + return raw_read_seqcount_t_begin(s); } /** * raw_read_seqcount() - read the raw seqcount_t counter value - * @s: Pointer to seqcount_t + * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants * * raw_read_seqcount opens a read critical section of the given * seqcount_t, without any lockdep checking, and without checking or @@ -172,7 +447,10 @@ static inline unsigned read_seqcount_begin(const seqcount_t *s) * * Return: count to be passed to read_seqcount_retry() */ -static inline unsigned raw_read_seqcount(const seqcount_t *s) +#define raw_read_seqcount(s) \ + raw_read_seqcount_t(__to_seqcount_t(s)) + +static inline unsigned raw_read_seqcount_t(const seqcount_t *s) { unsigned ret = READ_ONCE(s->sequence); smp_rmb(); @@ -183,7 +461,7 @@ static inline unsigned raw_read_seqcount(const seqcount_t *s) /** * raw_seqcount_begin() - begin a seqcount_t read critical section w/o * lockdep and w/o counter stabilization - * @s: Pointer to seqcount_t + * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants * * raw_seqcount_begin opens a read critical section of the given * seqcount_t. Unlike read_seqcount_begin(), this function will not wait @@ -197,18 +475,21 @@ static inline unsigned raw_read_seqcount(const seqcount_t *s) * * Return: count to be passed to read_seqcount_retry() */ -static inline unsigned raw_seqcount_begin(const seqcount_t *s) +#define raw_seqcount_begin(s) \ + raw_seqcount_t_begin(__to_seqcount_t(s)) + +static inline unsigned raw_seqcount_t_begin(const seqcount_t *s) { /* * If the counter is odd, let read_seqcount_retry() fail * by decrementing the counter. */ - return raw_read_seqcount(s) & ~1; + return raw_read_seqcount_t(s) & ~1; } /** * __read_seqcount_retry() - end a seqcount_t read section w/o barrier - * @s: Pointer to seqcount_t + * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants * @start: count, from read_seqcount_begin() * * __read_seqcount_retry is like read_seqcount_retry, but has no smp_rmb() @@ -221,7 +502,10 @@ static inline unsigned raw_seqcount_begin(const seqcount_t *s) * * Return: true if a read section retry is required, else false */ -static inline int __read_seqcount_retry(const seqcount_t *s, unsigned start) +#define __read_seqcount_retry(s, start) \ + __read_seqcount_t_retry(__to_seqcount_t(s), start) + +static inline int __read_seqcount_t_retry(const seqcount_t *s, unsigned start) { kcsan_atomic_next(0); return unlikely(READ_ONCE(s->sequence) != start); @@ -229,7 +513,7 @@ static inline int __read_seqcount_retry(const seqcount_t *s, unsigned start) /** * read_seqcount_retry() - end a seqcount_t read critical section - * @s: Pointer to seqcount_t + * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants * @start: count, from read_seqcount_begin() * * read_seqcount_retry closes the read critical section of given @@ -238,17 +522,28 @@ static inline int __read_seqcount_retry(const seqcount_t *s, unsigned start) * * Return: true if a read section retry is required, else false */ -static inline int read_seqcount_retry(const seqcount_t *s, unsigned start) +#define read_seqcount_retry(s, start) \ + read_seqcount_t_retry(__to_seqcount_t(s), start) + +static inline int read_seqcount_t_retry(const seqcount_t *s, unsigned start) { smp_rmb(); - return __read_seqcount_retry(s, start); + return __read_seqcount_t_retry(s, start); } /** * raw_write_seqcount_begin() - start a seqcount_t write section w/o lockdep - * @s: Pointer to seqcount_t + * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants */ -static inline void raw_write_seqcount_begin(seqcount_t *s) +#define raw_write_seqcount_begin(s) \ +do { \ + if (__associated_lock_exists_and_is_preemptible(s)) \ + preempt_disable(); \ + \ + raw_write_seqcount_t_begin(__to_seqcount_t(s)); \ +} while (0) + +static inline void raw_write_seqcount_t_begin(seqcount_t *s) { kcsan_nestable_atomic_begin(); s->sequence++; @@ -257,49 +552,50 @@ static inline void raw_write_seqcount_begin(seqcount_t *s) /** * raw_write_seqcount_end() - end a seqcount_t write section w/o lockdep - * @s: Pointer to seqcount_t + * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants */ -static inline void raw_write_seqcount_end(seqcount_t *s) +#define raw_write_seqcount_end(s) \ +do { \ + raw_write_seqcount_t_end(__to_seqcount_t(s)); \ + \ + if (__associated_lock_exists_and_is_preemptible(s)) \ + preempt_enable(); \ +} while (0) + +static inline void raw_write_seqcount_t_end(seqcount_t *s) { smp_wmb(); s->sequence++; kcsan_nestable_atomic_end(); } -static inline void __write_seqcount_begin_nested(seqcount_t *s, int subclass) -{ - raw_write_seqcount_begin(s); - seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_); -} - /** * write_seqcount_begin_nested() - start a seqcount_t write section with * custom lockdep nesting level - * @s: Pointer to seqcount_t + * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants * @subclass: lockdep nesting level * * See Documentation/locking/lockdep-design.rst */ -static inline void write_seqcount_begin_nested(seqcount_t *s, int subclass) -{ - lockdep_assert_preemption_disabled(); - __write_seqcount_begin_nested(s, subclass); -} - -/* - * A write_seqcount_begin() variant w/o lockdep non-preemptibility checks. - * - * Use for internal seqlock.h code where it's known that preemption is - * already disabled. For example, seqlock_t write side functions. - */ -static inline void __write_seqcount_begin(seqcount_t *s) +#define write_seqcount_begin_nested(s, subclass) \ +do { \ + __assert_write_section_is_protected(s); \ + \ + if (__associated_lock_exists_and_is_preemptible(s)) \ + preempt_disable(); \ + \ + write_seqcount_t_begin_nested(__to_seqcount_t(s), subclass); \ +} while (0) + +static inline void write_seqcount_t_begin_nested(seqcount_t *s, int subclass) { - __write_seqcount_begin_nested(s, 0); + raw_write_seqcount_t_begin(s); + seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_); } /** * write_seqcount_begin() - start a seqcount_t write side critical section - * @s: Pointer to seqcount_t + * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants * * write_seqcount_begin opens a write side critical section of the given * seqcount_t. @@ -308,26 +604,44 @@ static inline void __write_seqcount_begin(seqcount_t *s) * non-preemptible. If readers can be invoked from hardirq or softirq * context, interrupts or bottom halves must be respectively disabled. */ -static inline void write_seqcount_begin(seqcount_t *s) +#define write_seqcount_begin(s) \ +do { \ + __assert_write_section_is_protected(s); \ + \ + if (__associated_lock_exists_and_is_preemptible(s)) \ + preempt_disable(); \ + \ + write_seqcount_t_begin(__to_seqcount_t(s)); \ +} while (0) + +static inline void write_seqcount_t_begin(seqcount_t *s) { - write_seqcount_begin_nested(s, 0); + write_seqcount_t_begin_nested(s, 0); } /** * write_seqcount_end() - end a seqcount_t write side critical section - * @s: Pointer to seqcount_t + * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants * * The write section must've been opened with write_seqcount_begin(). */ -static inline void write_seqcount_end(seqcount_t *s) +#define write_seqcount_end(s) \ +do { \ + write_seqcount_t_end(__to_seqcount_t(s)); \ + \ + if (__associated_lock_exists_and_is_preemptible(s)) \ + preempt_enable(); \ +} while (0) + +static inline void write_seqcount_t_end(seqcount_t *s) { seqcount_release(&s->dep_map, _RET_IP_); - raw_write_seqcount_end(s); + raw_write_seqcount_t_end(s); } /** * raw_write_seqcount_barrier() - do a seqcount_t write barrier - * @s: Pointer to seqcount_t + * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants * * This can be used to provide an ordering guarantee instead of the usual * consistency guarantee. It is one wmb cheaper, because it can collapse @@ -366,7 +680,10 @@ static inline void write_seqcount_end(seqcount_t *s) * WRITE_ONCE(X, false); * } */ -static inline void raw_write_seqcount_barrier(seqcount_t *s) +#define raw_write_seqcount_barrier(s) \ + raw_write_seqcount_t_barrier(__to_seqcount_t(s)) + +static inline void raw_write_seqcount_t_barrier(seqcount_t *s) { kcsan_nestable_atomic_begin(); s->sequence++; @@ -378,12 +695,15 @@ static inline void raw_write_seqcount_barrier(seqcount_t *s) /** * write_seqcount_invalidate() - invalidate in-progress seqcount_t read * side operations - * @s: Pointer to seqcount_t + * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants * * After write_seqcount_invalidate, no seqcount_t read side operations * will complete successfully and see data older than this. */ -static inline void write_seqcount_invalidate(seqcount_t *s) +#define write_seqcount_invalidate(s) \ + write_seqcount_t_invalidate(__to_seqcount_t(s)) + +static inline void write_seqcount_t_invalidate(seqcount_t *s) { smp_wmb(); kcsan_nestable_atomic_begin(); @@ -393,7 +713,7 @@ static inline void write_seqcount_invalidate(seqcount_t *s) /** * raw_read_seqcount_latch() - pick even/odd seqcount_t latch data copy - * @s: Pointer to seqcount_t + * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants * * Use seqcount_t latching to switch between two storage places protected * by a sequence counter. Doing so allows having interruptible, preemptible, @@ -406,7 +726,10 @@ static inline void write_seqcount_invalidate(seqcount_t *s) * picking which data copy to read. The full counter value must then be * checked with read_seqcount_retry(). */ -static inline int raw_read_seqcount_latch(seqcount_t *s) +#define raw_read_seqcount_latch(s) \ + raw_read_seqcount_t_latch(__to_seqcount_t(s)) + +static inline int raw_read_seqcount_t_latch(seqcount_t *s) { /* Pairs with the first smp_wmb() in raw_write_seqcount_latch() */ int seq = READ_ONCE(s->sequence); /* ^^^ */ @@ -415,7 +738,7 @@ static inline int raw_read_seqcount_latch(seqcount_t *s) /** * raw_write_seqcount_latch() - redirect readers to even/odd copy - * @s: Pointer to seqcount_t + * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants * * The latch technique is a multiversion concurrency control method that allows * queries during non-atomic modifications. If you can guarantee queries never @@ -494,7 +817,10 @@ static inline int raw_read_seqcount_latch(seqcount_t *s) * When data is a dynamic data structure; one should use regular RCU * patterns to manage the lifetimes of the objects within. */ -static inline void raw_write_seqcount_latch(seqcount_t *s) +#define raw_write_seqcount_latch(s) \ + raw_write_seqcount_t_latch(__to_seqcount_t(s)) + +static inline void raw_write_seqcount_t_latch(seqcount_t *s) { smp_wmb(); /* prior stores before incrementing "sequence" */ s->sequence++; @@ -592,7 +918,7 @@ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) static inline void write_seqlock(seqlock_t *sl) { spin_lock(&sl->lock); - __write_seqcount_begin(&sl->seqcount); + write_seqcount_t_begin(&sl->seqcount); } /** @@ -604,7 +930,7 @@ static inline void write_seqlock(seqlock_t *sl) */ static inline void write_sequnlock(seqlock_t *sl) { - write_seqcount_end(&sl->seqcount); + write_seqcount_t_end(&sl->seqcount); spin_unlock(&sl->lock); } @@ -618,7 +944,7 @@ static inline void write_sequnlock(seqlock_t *sl) static inline void write_seqlock_bh(seqlock_t *sl) { spin_lock_bh(&sl->lock); - __write_seqcount_begin(&sl->seqcount); + write_seqcount_t_begin(&sl->seqcount); } /** @@ -631,7 +957,7 @@ static inline void write_seqlock_bh(seqlock_t *sl) */ static inline void write_sequnlock_bh(seqlock_t *sl) { - write_seqcount_end(&sl->seqcount); + write_seqcount_t_end(&sl->seqcount); spin_unlock_bh(&sl->lock); } @@ -645,7 +971,7 @@ static inline void write_sequnlock_bh(seqlock_t *sl) static inline void write_seqlock_irq(seqlock_t *sl) { spin_lock_irq(&sl->lock); - __write_seqcount_begin(&sl->seqcount); + write_seqcount_t_begin(&sl->seqcount); } /** @@ -657,7 +983,7 @@ static inline void write_seqlock_irq(seqlock_t *sl) */ static inline void write_sequnlock_irq(seqlock_t *sl) { - write_seqcount_end(&sl->seqcount); + write_seqcount_t_end(&sl->seqcount); spin_unlock_irq(&sl->lock); } @@ -666,7 +992,7 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl) unsigned long flags; spin_lock_irqsave(&sl->lock, flags); - __write_seqcount_begin(&sl->seqcount); + write_seqcount_t_begin(&sl->seqcount); return flags; } @@ -695,13 +1021,13 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl) static inline void write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags) { - write_seqcount_end(&sl->seqcount); + write_seqcount_t_end(&sl->seqcount); spin_unlock_irqrestore(&sl->lock, flags); } /** * read_seqlock_excl() - begin a seqlock_t locking reader section - * @sl: Pointer to seqlock_t + * @sl: Pointer to seqlock_t * * read_seqlock_excl opens a seqlock_t locking reader critical section. A * locking reader exclusively locks out *both* other writers *and* other -- cgit v1.2.3 From ec8702da570ebb59f38471007bf71359c51b027b Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 20 Jul 2020 17:55:16 +0200 Subject: seqlock: Align multi-line macros newline escapes at 72 columns Parent commit, "seqlock: Extend seqcount API with associated locks", introduced a big number of multi-line macros that are newline-escaped at 72 columns. For overall cohesion, align the earlier-existing macros similarly. Signed-off-by: Ahmed S. Darwish Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200720155530.1173732-11-a.darwish@linutronix.de --- include/linux/seqlock.h | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 8c16a494c968..b48729988325 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -80,17 +80,18 @@ static inline void __seqcount_init(seqcount_t *s, const char *name, } #ifdef CONFIG_DEBUG_LOCK_ALLOC -# define SEQCOUNT_DEP_MAP_INIT(lockname) \ - .dep_map = { .name = #lockname } \ + +# define SEQCOUNT_DEP_MAP_INIT(lockname) \ + .dep_map = { .name = #lockname } /** * seqcount_init() - runtime initializer for seqcount_t * @s: Pointer to the seqcount_t instance */ -# define seqcount_init(s) \ - do { \ - static struct lock_class_key __key; \ - __seqcount_init((s), #s, &__key); \ +# define seqcount_init(s) \ + do { \ + static struct lock_class_key __key; \ + __seqcount_init((s), #s, &__key); \ } while (0) static inline void seqcount_lockdep_reader_access(const seqcount_t *s) @@ -842,20 +843,20 @@ typedef struct { spinlock_t lock; } seqlock_t; -#define __SEQLOCK_UNLOCKED(lockname) \ - { \ - .seqcount = SEQCNT_ZERO(lockname), \ - .lock = __SPIN_LOCK_UNLOCKED(lockname) \ +#define __SEQLOCK_UNLOCKED(lockname) \ + { \ + .seqcount = SEQCNT_ZERO(lockname), \ + .lock = __SPIN_LOCK_UNLOCKED(lockname) \ } /** * seqlock_init() - dynamic initializer for seqlock_t * @sl: Pointer to the seqlock_t instance */ -#define seqlock_init(sl) \ - do { \ - seqcount_init(&(sl)->seqcount); \ - spin_lock_init(&(sl)->lock); \ +#define seqlock_init(sl) \ + do { \ + seqcount_init(&(sl)->seqcount); \ + spin_lock_init(&(sl)->lock); \ } while (0) /** -- cgit v1.2.3 From 318ce71f3e3ae4108c1665f3860afa8a2a4c9f02 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 20 Jul 2020 17:55:17 +0200 Subject: dma-buf: Remove custom seqcount lockdep class key Commit 3c3b177a9369 ("reservation: add support for read-only access using rcu") introduced a sequence counter to manage updates to reservations. Back then, the reservation object initializer reservation_object_init() was always inlined. Having the sequence counter initialization inlined meant that each of the call sites would have a different lockdep class key, which would've broken lockdep's deadlock detection. The aforementioned commit thus introduced, and exported, a custom seqcount lockdep class key and name. The commit 8735f16803f00 ("dma-buf: cleanup reservation_object_init...") transformed the reservation object initializer to a normal non-inlined C function. seqcount_init(), which automatically defines the seqcount lockdep class key and must be called non-inlined, can now be safely used. Remove the seqcount custom lockdep class key, name, and export. Use seqcount_init() inside the dma reservation object initializer. Signed-off-by: Ahmed S. Darwish Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Sebastian Andrzej Siewior Acked-by: Daniel Vetter Link: https://lkml.kernel.org/r/20200720155530.1173732-12-a.darwish@linutronix.de --- drivers/dma-buf/dma-resv.c | 9 +-------- include/linux/dma-resv.h | 2 -- 2 files changed, 1 insertion(+), 10 deletions(-) (limited to 'include') diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index b45f8514dc82..15efa0c2dacb 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -51,12 +51,6 @@ DEFINE_WD_CLASS(reservation_ww_class); EXPORT_SYMBOL(reservation_ww_class); -struct lock_class_key reservation_seqcount_class; -EXPORT_SYMBOL(reservation_seqcount_class); - -const char reservation_seqcount_string[] = "reservation_seqcount"; -EXPORT_SYMBOL(reservation_seqcount_string); - /** * dma_resv_list_alloc - allocate fence list * @shared_max: number of fences we need space for @@ -135,9 +129,8 @@ subsys_initcall(dma_resv_lockdep); void dma_resv_init(struct dma_resv *obj) { ww_mutex_init(&obj->lock, &reservation_ww_class); + seqcount_init(&obj->seq); - __seqcount_init(&obj->seq, reservation_seqcount_string, - &reservation_seqcount_class); RCU_INIT_POINTER(obj->fence, NULL); RCU_INIT_POINTER(obj->fence_excl, NULL); } diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index ee50d10f052b..a6538ae7d93f 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -46,8 +46,6 @@ #include extern struct ww_class reservation_ww_class; -extern struct lock_class_key reservation_seqcount_class; -extern const char reservation_seqcount_string[]; /** * struct dma_resv_list - a list of shared fences -- cgit v1.2.3 From cd29f22019ec4ab998d2e1e8c831c7c42db4aa7d Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 20 Jul 2020 17:55:18 +0200 Subject: dma-buf: Use sequence counter with associated wound/wait mutex A sequence counter write side critical section must be protected by some form of locking to serialize writers. If the serialization primitive is not disabling preemption implicitly, preemption has to be explicitly disabled before entering the sequence counter write side critical section. The dma-buf reservation subsystem uses plain sequence counters to manage updates to reservations. Writer serialization is accomplished through a wound/wait mutex. Acquiring a wound/wait mutex does not disable preemption, so this needs to be done manually before and after the write side critical section. Use the newly-added seqcount_ww_mutex_t instead: - It associates the ww_mutex with the sequence count, which enables lockdep to validate that the write side critical section is properly serialized. - It removes the need to explicitly add preempt_disable/enable() around the write side critical section because the write_begin/end() functions for this new data type automatically do this. If lockdep is disabled this ww_mutex lock association is compiled out and has neither storage size nor runtime overhead. Signed-off-by: Ahmed S. Darwish Signed-off-by: Peter Zijlstra (Intel) Acked-by: Daniel Vetter Link: https://lkml.kernel.org/r/20200720155530.1173732-13-a.darwish@linutronix.de --- drivers/dma-buf/dma-resv.c | 8 +------- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 2 -- include/linux/dma-resv.h | 2 +- 3 files changed, 2 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 15efa0c2dacb..a7631352a486 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -129,7 +129,7 @@ subsys_initcall(dma_resv_lockdep); void dma_resv_init(struct dma_resv *obj) { ww_mutex_init(&obj->lock, &reservation_ww_class); - seqcount_init(&obj->seq); + seqcount_ww_mutex_init(&obj->seq, &obj->lock); RCU_INIT_POINTER(obj->fence, NULL); RCU_INIT_POINTER(obj->fence_excl, NULL); @@ -260,7 +260,6 @@ void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence) fobj = dma_resv_get_list(obj); count = fobj->shared_count; - preempt_disable(); write_seqcount_begin(&obj->seq); for (i = 0; i < count; ++i) { @@ -282,7 +281,6 @@ replace: smp_store_mb(fobj->shared_count, count); write_seqcount_end(&obj->seq); - preempt_enable(); dma_fence_put(old); } EXPORT_SYMBOL(dma_resv_add_shared_fence); @@ -309,14 +307,12 @@ void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence) if (fence) dma_fence_get(fence); - preempt_disable(); write_seqcount_begin(&obj->seq); /* write_seqcount_begin provides the necessary memory barrier */ RCU_INIT_POINTER(obj->fence_excl, fence); if (old) old->shared_count = 0; write_seqcount_end(&obj->seq); - preempt_enable(); /* inplace update, no shared fences */ while (i--) @@ -394,13 +390,11 @@ retry: src_list = dma_resv_get_list(dst); old = dma_resv_get_excl(dst); - preempt_disable(); write_seqcount_begin(&dst->seq); /* write_seqcount_begin provides the necessary memory barrier */ RCU_INIT_POINTER(dst->fence_excl, new); RCU_INIT_POINTER(dst->fence, dst_list); write_seqcount_end(&dst->seq); - preempt_enable(); dma_resv_list_free(src_list); dma_fence_put(old); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index b91b5171270f..ff4b583cb96a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -258,11 +258,9 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo, new->shared_count = k; /* Install the new fence list, seqcount provides the barriers */ - preempt_disable(); write_seqcount_begin(&resv->seq); RCU_INIT_POINTER(resv->fence, new); write_seqcount_end(&resv->seq); - preempt_enable(); /* Drop the references to the removed fences or move them to ef_list */ for (i = j, k = 0; i < old->shared_count; ++i) { diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h index a6538ae7d93f..d44a77e8a7e3 100644 --- a/include/linux/dma-resv.h +++ b/include/linux/dma-resv.h @@ -69,7 +69,7 @@ struct dma_resv_list { */ struct dma_resv { struct ww_mutex lock; - seqcount_t seq; + seqcount_ww_mutex_t seq; struct dma_fence __rcu *fence_excl; struct dma_resv_list __rcu *fence; -- cgit v1.2.3 From b75058614fdd3140074a640b514f6a0b4d485a2d Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 20 Jul 2020 17:55:19 +0200 Subject: sched: tasks: Use sequence counter with associated spinlock A sequence counter write side critical section must be protected by some form of locking to serialize writers. A plain seqcount_t does not contain the information of which lock must be held when entering a write side critical section. Use the new seqcount_spinlock_t data type, which allows to associate a spinlock with the sequence counter. This enables lockdep to verify that the spinlock used for writer serialization is held when the write side critical section is entered. If lockdep is disabled this lock association is compiled out and has neither storage size nor runtime overhead. Signed-off-by: Ahmed S. Darwish Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200720155530.1173732-14-a.darwish@linutronix.de --- include/linux/sched.h | 2 +- init/init_task.c | 3 ++- kernel/fork.c | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 8d1de021b315..9a9d8263962d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1050,7 +1050,7 @@ struct task_struct { /* Protected by ->alloc_lock: */ nodemask_t mems_allowed; /* Seqence number to catch updates: */ - seqcount_t mems_allowed_seq; + seqcount_spinlock_t mems_allowed_seq; int cpuset_mem_spread_rotor; int cpuset_slab_spread_rotor; #endif diff --git a/init/init_task.c b/init/init_task.c index 15089d15010a..94fe3ba1bb60 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -154,7 +154,8 @@ struct task_struct init_task .trc_holdout_list = LIST_HEAD_INIT(init_task.trc_holdout_list), #endif #ifdef CONFIG_CPUSETS - .mems_allowed_seq = SEQCNT_ZERO(init_task.mems_allowed_seq), + .mems_allowed_seq = SEQCNT_SPINLOCK_ZERO(init_task.mems_allowed_seq, + &init_task.alloc_lock), #endif #ifdef CONFIG_RT_MUTEXES .pi_waiters = RB_ROOT_CACHED, diff --git a/kernel/fork.c b/kernel/fork.c index 70d9d0a4de2a..fc72f09a61b2 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2032,7 +2032,7 @@ static __latent_entropy struct task_struct *copy_process( #ifdef CONFIG_CPUSETS p->cpuset_mem_spread_rotor = NUMA_NO_NODE; p->cpuset_slab_spread_rotor = NUMA_NO_NODE; - seqcount_init(&p->mems_allowed_seq); + seqcount_spinlock_init(&p->mems_allowed_seq, &p->alloc_lock); #endif #ifdef CONFIG_TRACE_IRQFLAGS p->irq_events = 0; -- cgit v1.2.3 From 8201d923f492703a7d6c980cff3034759a452b86 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 20 Jul 2020 17:55:20 +0200 Subject: netfilter: conntrack: Use sequence counter with associated spinlock A sequence counter write side critical section must be protected by some form of locking to serialize writers. A plain seqcount_t does not contain the information of which lock must be held when entering a write side critical section. Use the new seqcount_spinlock_t data type, which allows to associate a spinlock with the sequence counter. This enables lockdep to verify that the spinlock used for writer serialization is held when the write side critical section is entered. If lockdep is disabled this lock association is compiled out and has neither storage size nor runtime overhead. Signed-off-by: Ahmed S. Darwish Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200720155530.1173732-15-a.darwish@linutronix.de --- include/net/netfilter/nf_conntrack.h | 2 +- net/netfilter/nf_conntrack_core.c | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 90690e37a56f..ea4e2010b246 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -286,7 +286,7 @@ int nf_conntrack_hash_resize(unsigned int hashsize); extern struct hlist_nulls_head *nf_conntrack_hash; extern unsigned int nf_conntrack_htable_size; -extern seqcount_t nf_conntrack_generation; +extern seqcount_spinlock_t nf_conntrack_generation; extern unsigned int nf_conntrack_max; /* must be called with rcu read lock held */ diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index f33d72c5b06e..b597b5b16ba1 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -180,7 +180,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_htable_size); unsigned int nf_conntrack_max __read_mostly; EXPORT_SYMBOL_GPL(nf_conntrack_max); -seqcount_t nf_conntrack_generation __read_mostly; +seqcount_spinlock_t nf_conntrack_generation __read_mostly; static unsigned int nf_conntrack_hash_rnd __read_mostly; static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple, @@ -2600,7 +2600,8 @@ int nf_conntrack_init_start(void) /* struct nf_ct_ext uses u8 to store offsets/size */ BUILD_BUG_ON(total_extension_size() > 255u); - seqcount_init(&nf_conntrack_generation); + seqcount_spinlock_init(&nf_conntrack_generation, + &nf_conntrack_locks_all_lock); for (i = 0; i < CONNTRACK_LOCKS; i++) spin_lock_init(&nf_conntrack_locks[i]); -- cgit v1.2.3 From 26475371976c69489d3a8e6c8bbf35afbbc25055 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 20 Jul 2020 17:55:24 +0200 Subject: vfs: Use sequence counter with associated spinlock A sequence counter write side critical section must be protected by some form of locking to serialize writers. A plain seqcount_t does not contain the information of which lock must be held when entering a write side critical section. Use the new seqcount_spinlock_t data type, which allows to associate a spinlock with the sequence counter. This enables lockdep to verify that the spinlock used for writer serialization is held when the write side critical section is entered. If lockdep is disabled this lock association is compiled out and has neither storage size nor runtime overhead. Signed-off-by: Ahmed S. Darwish Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200720155530.1173732-19-a.darwish@linutronix.de --- fs/dcache.c | 2 +- fs/fs_struct.c | 4 ++-- include/linux/dcache.h | 2 +- include/linux/fs_struct.h | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/fs/dcache.c b/fs/dcache.c index 361ea7ab30ea..ea0485861d93 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1746,7 +1746,7 @@ static struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) dentry->d_lockref.count = 1; dentry->d_flags = 0; spin_lock_init(&dentry->d_lock); - seqcount_init(&dentry->d_seq); + seqcount_spinlock_init(&dentry->d_seq, &dentry->d_lock); dentry->d_inode = NULL; dentry->d_parent = dentry; dentry->d_sb = sb; diff --git a/fs/fs_struct.c b/fs/fs_struct.c index ca639ed967b7..04b3f5b9c629 100644 --- a/fs/fs_struct.c +++ b/fs/fs_struct.c @@ -117,7 +117,7 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old) fs->users = 1; fs->in_exec = 0; spin_lock_init(&fs->lock); - seqcount_init(&fs->seq); + seqcount_spinlock_init(&fs->seq, &fs->lock); fs->umask = old->umask; spin_lock(&old->lock); @@ -163,6 +163,6 @@ EXPORT_SYMBOL(current_umask); struct fs_struct init_fs = { .users = 1, .lock = __SPIN_LOCK_UNLOCKED(init_fs.lock), - .seq = SEQCNT_ZERO(init_fs.seq), + .seq = SEQCNT_SPINLOCK_ZERO(init_fs.seq, &init_fs.lock), .umask = 0022, }; diff --git a/include/linux/dcache.h b/include/linux/dcache.h index a81f0c3cf352..65d975bf9390 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -89,7 +89,7 @@ extern struct dentry_stat_t dentry_stat; struct dentry { /* RCU lookup touched fields */ unsigned int d_flags; /* protected by d_lock */ - seqcount_t d_seq; /* per dentry seqlock */ + seqcount_spinlock_t d_seq; /* per dentry seqlock */ struct hlist_bl_node d_hash; /* lookup hash list */ struct dentry *d_parent; /* parent directory */ struct qstr d_name; diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h index cf1015abfbf2..783b48dedb72 100644 --- a/include/linux/fs_struct.h +++ b/include/linux/fs_struct.h @@ -9,7 +9,7 @@ struct fs_struct { int users; spinlock_t lock; - seqcount_t seq; + seqcount_spinlock_t seq; int umask; int in_exec; struct path root, pwd; -- cgit v1.2.3 From 5c73b9a2b1b4ecc809a914aa64970157b3d8c936 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 20 Jul 2020 17:55:29 +0200 Subject: kvm/eventfd: Use sequence counter with associated spinlock A sequence counter write side critical section must be protected by some form of locking to serialize writers. A plain seqcount_t does not contain the information of which lock must be held when entering a write side critical section. Use the new seqcount_spinlock_t data type, which allows to associate a spinlock with the sequence counter. This enables lockdep to verify that the spinlock used for writer serialization is held when the write side critical section is entered. If lockdep is disabled this lock association is compiled out and has neither storage size nor runtime overhead. Signed-off-by: Ahmed S. Darwish Signed-off-by: Peter Zijlstra (Intel) Acked-by: Paolo Bonzini Link: https://lkml.kernel.org/r/20200720155530.1173732-24-a.darwish@linutronix.de --- include/linux/kvm_irqfd.h | 2 +- virt/kvm/eventfd.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_irqfd.h b/include/linux/kvm_irqfd.h index dc1da020305b..dac047abdba7 100644 --- a/include/linux/kvm_irqfd.h +++ b/include/linux/kvm_irqfd.h @@ -42,7 +42,7 @@ struct kvm_kernel_irqfd { wait_queue_entry_t wait; /* Update side is protected by irqfds.lock */ struct kvm_kernel_irq_routing_entry irq_entry; - seqcount_t irq_entry_sc; + seqcount_spinlock_t irq_entry_sc; /* Used for level IRQ fast-path */ int gsi; struct work_struct inject; diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index ef7ed916ad4a..d6408bb497dc 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -303,7 +303,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) INIT_LIST_HEAD(&irqfd->list); INIT_WORK(&irqfd->inject, irqfd_inject); INIT_WORK(&irqfd->shutdown, irqfd_shutdown); - seqcount_init(&irqfd->irq_entry_sc); + seqcount_spinlock_init(&irqfd->irq_entry_sc, &kvm->irqfds.lock); f = fdget(args->fd); if (!f.file) { -- cgit v1.2.3 From af5a06b582ec3d7b0160b4faaa65f73d8dcf989f Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 20 Jul 2020 17:55:30 +0200 Subject: hrtimer: Use sequence counter with associated raw spinlock A sequence counter write side critical section must be protected by some form of locking to serialize writers. A plain seqcount_t does not contain the information of which lock must be held when entering a write side critical section. Use the new seqcount_raw_spinlock_t data type, which allows to associate a raw spinlock with the sequence counter. This enables lockdep to verify that the raw spinlock used for writer serialization is held when the write side critical section is entered. If lockdep is disabled this lock association is compiled out and has neither storage size nor runtime overhead. Signed-off-by: Ahmed S. Darwish Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200720155530.1173732-25-a.darwish@linutronix.de --- include/linux/hrtimer.h | 2 +- kernel/time/hrtimer.c | 13 ++++++++++--- 2 files changed, 11 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 15c8ac313678..25993b86ac5c 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -159,7 +159,7 @@ struct hrtimer_clock_base { struct hrtimer_cpu_base *cpu_base; unsigned int index; clockid_t clockid; - seqcount_t seq; + seqcount_raw_spinlock_t seq; struct hrtimer *running; struct timerqueue_head active; ktime_t (*get_time)(void); diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index d89da1c7e005..c4038511d5c9 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -135,7 +135,11 @@ static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = { * timer->base->cpu_base */ static struct hrtimer_cpu_base migration_cpu_base = { - .clock_base = { { .cpu_base = &migration_cpu_base, }, }, + .clock_base = { { + .cpu_base = &migration_cpu_base, + .seq = SEQCNT_RAW_SPINLOCK_ZERO(migration_cpu_base.seq, + &migration_cpu_base.lock), + }, }, }; #define migration_base migration_cpu_base.clock_base[0] @@ -1998,8 +2002,11 @@ int hrtimers_prepare_cpu(unsigned int cpu) int i; for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { - cpu_base->clock_base[i].cpu_base = cpu_base; - timerqueue_init_head(&cpu_base->clock_base[i].active); + struct hrtimer_clock_base *clock_b = &cpu_base->clock_base[i]; + + clock_b->cpu_base = cpu_base; + seqcount_raw_spinlock_init(&clock_b->seq, &cpu_base->lock); + timerqueue_init_head(&clock_b->active); } cpu_base->cpu = cpu; -- cgit v1.2.3 From e55687fe5c1e4849e5559a0a49199c9ca3fff36e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 23 Jul 2020 11:56:22 +0200 Subject: seqlock: s/__SEQ_LOCKDEP/__SEQ_LOCK/g __SEQ_LOCKDEP() is an expression gate for the seqcount_LOCKNAME_t::lock member. Rename it to be about the member, not the gate condition. Later (PREEMPT_RT) patches will make the member available for !LOCKDEP configs. Signed-off-by: Peter Zijlstra (Intel) --- include/linux/seqlock.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index b48729988325..c689abab06c8 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -133,20 +133,20 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s) */ #ifdef CONFIG_LOCKDEP -#define __SEQ_LOCKDEP(expr) expr +#define __SEQ_LOCK(expr) expr #else -#define __SEQ_LOCKDEP(expr) +#define __SEQ_LOCK(expr) #endif #define SEQCOUNT_LOCKTYPE_ZERO(seq_name, assoc_lock) { \ .seqcount = SEQCNT_ZERO(seq_name.seqcount), \ - __SEQ_LOCKDEP(.lock = (assoc_lock)) \ + __SEQ_LOCK(.lock = (assoc_lock)) \ } #define seqcount_locktype_init(s, assoc_lock) \ do { \ seqcount_init(&(s)->seqcount); \ - __SEQ_LOCKDEP((s)->lock = (assoc_lock)); \ + __SEQ_LOCK((s)->lock = (assoc_lock)); \ } while (0) /** @@ -161,7 +161,7 @@ do { \ */ typedef struct seqcount_spinlock { seqcount_t seqcount; - __SEQ_LOCKDEP(spinlock_t *lock); + __SEQ_LOCK(spinlock_t *lock); } seqcount_spinlock_t; /** @@ -192,7 +192,7 @@ typedef struct seqcount_spinlock { */ typedef struct seqcount_raw_spinlock { seqcount_t seqcount; - __SEQ_LOCKDEP(raw_spinlock_t *lock); + __SEQ_LOCK(raw_spinlock_t *lock); } seqcount_raw_spinlock_t; /** @@ -223,7 +223,7 @@ typedef struct seqcount_raw_spinlock { */ typedef struct seqcount_rwlock { seqcount_t seqcount; - __SEQ_LOCKDEP(rwlock_t *lock); + __SEQ_LOCK(rwlock_t *lock); } seqcount_rwlock_t; /** @@ -257,7 +257,7 @@ typedef struct seqcount_rwlock { */ typedef struct seqcount_mutex { seqcount_t seqcount; - __SEQ_LOCKDEP(struct mutex *lock); + __SEQ_LOCK(struct mutex *lock); } seqcount_mutex_t; /** @@ -291,7 +291,7 @@ typedef struct seqcount_mutex { */ typedef struct seqcount_ww_mutex { seqcount_t seqcount; - __SEQ_LOCKDEP(struct ww_mutex *lock); + __SEQ_LOCK(struct ww_mutex *lock); } seqcount_ww_mutex_t; /** @@ -329,7 +329,7 @@ __seqcount_##locktype##_preemptible(seqcount_##locktype##_t *s) \ static inline void \ __seqcount_##locktype##_assert(seqcount_##locktype##_t *s) \ { \ - __SEQ_LOCKDEP(lockdep_assert_held(lockmember)); \ + __SEQ_LOCK(lockdep_assert_held(lockmember)); \ } /* -- cgit v1.2.3 From a8772dccb2ec7b139db1b3ba782ecb12ed92d7c3 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 23 Jul 2020 11:56:49 +0200 Subject: seqlock: Fold seqcount_LOCKNAME_t definition Manual repetition is boring and error prone. Signed-off-by: Peter Zijlstra (Intel) --- include/linux/seqlock.h | 142 +++++++++++++----------------------------------- 1 file changed, 39 insertions(+), 103 deletions(-) (limited to 'include') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index c689abab06c8..4b259bb4d4b9 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -149,21 +149,6 @@ do { \ __SEQ_LOCK((s)->lock = (assoc_lock)); \ } while (0) -/** - * typedef seqcount_spinlock_t - sequence counter with spinlock associated - * @seqcount: The real sequence counter - * @lock: Pointer to the associated spinlock - * - * A plain sequence counter with external writer synchronization by a - * spinlock. The spinlock is associated to the sequence count in the - * static initializer or init function. This enables lockdep to validate - * that the write side critical section is properly serialized. - */ -typedef struct seqcount_spinlock { - seqcount_t seqcount; - __SEQ_LOCK(spinlock_t *lock); -} seqcount_spinlock_t; - /** * SEQCNT_SPINLOCK_ZERO - static initializer for seqcount_spinlock_t * @name: Name of the seqcount_spinlock_t instance @@ -180,21 +165,6 @@ typedef struct seqcount_spinlock { #define seqcount_spinlock_init(s, lock) \ seqcount_locktype_init(s, lock) -/** - * typedef seqcount_raw_spinlock_t - sequence count with raw spinlock associated - * @seqcount: The real sequence counter - * @lock: Pointer to the associated raw spinlock - * - * A plain sequence counter with external writer synchronization by a - * raw spinlock. The raw spinlock is associated to the sequence count in - * the static initializer or init function. This enables lockdep to - * validate that the write side critical section is properly serialized. - */ -typedef struct seqcount_raw_spinlock { - seqcount_t seqcount; - __SEQ_LOCK(raw_spinlock_t *lock); -} seqcount_raw_spinlock_t; - /** * SEQCNT_RAW_SPINLOCK_ZERO - static initializer for seqcount_raw_spinlock_t * @name: Name of the seqcount_raw_spinlock_t instance @@ -211,21 +181,6 @@ typedef struct seqcount_raw_spinlock { #define seqcount_raw_spinlock_init(s, lock) \ seqcount_locktype_init(s, lock) -/** - * typedef seqcount_rwlock_t - sequence count with rwlock associated - * @seqcount: The real sequence counter - * @lock: Pointer to the associated rwlock - * - * A plain sequence counter with external writer synchronization by a - * rwlock. The rwlock is associated to the sequence count in the static - * initializer or init function. This enables lockdep to validate that - * the write side critical section is properly serialized. - */ -typedef struct seqcount_rwlock { - seqcount_t seqcount; - __SEQ_LOCK(rwlock_t *lock); -} seqcount_rwlock_t; - /** * SEQCNT_RWLOCK_ZERO - static initializer for seqcount_rwlock_t * @name: Name of the seqcount_rwlock_t instance @@ -242,24 +197,6 @@ typedef struct seqcount_rwlock { #define seqcount_rwlock_init(s, lock) \ seqcount_locktype_init(s, lock) -/** - * typedef seqcount_mutex_t - sequence count with mutex associated - * @seqcount: The real sequence counter - * @lock: Pointer to the associated mutex - * - * A plain sequence counter with external writer synchronization by a - * mutex. The mutex is associated to the sequence counter in the static - * initializer or init function. This enables lockdep to validate that - * the write side critical section is properly serialized. - * - * The write side API functions write_seqcount_begin()/end() automatically - * disable and enable preemption when used with seqcount_mutex_t. - */ -typedef struct seqcount_mutex { - seqcount_t seqcount; - __SEQ_LOCK(struct mutex *lock); -} seqcount_mutex_t; - /** * SEQCNT_MUTEX_ZERO - static initializer for seqcount_mutex_t * @name: Name of the seqcount_mutex_t instance @@ -276,24 +213,6 @@ typedef struct seqcount_mutex { #define seqcount_mutex_init(s, lock) \ seqcount_locktype_init(s, lock) -/** - * typedef seqcount_ww_mutex_t - sequence count with ww_mutex associated - * @seqcount: The real sequence counter - * @lock: Pointer to the associated ww_mutex - * - * A plain sequence counter with external writer synchronization by a - * ww_mutex. The ww_mutex is associated to the sequence counter in the static - * initializer or init function. This enables lockdep to validate that - * the write side critical section is properly serialized. - * - * The write side API functions write_seqcount_begin()/end() automatically - * disable and enable preemption when used with seqcount_ww_mutex_t. - */ -typedef struct seqcount_ww_mutex { - seqcount_t seqcount; - __SEQ_LOCK(struct ww_mutex *lock); -} seqcount_ww_mutex_t; - /** * SEQCNT_WW_MUTEX_ZERO - static initializer for seqcount_ww_mutex_t * @name: Name of the seqcount_ww_mutex_t instance @@ -310,30 +229,50 @@ typedef struct seqcount_ww_mutex { #define seqcount_ww_mutex_init(s, lock) \ seqcount_locktype_init(s, lock) -/* - * @preempt: Is the associated write serialization lock preemtpible? +/** + * typedef seqcount_LOCKNAME_t - sequence counter with spinlock associated + * @seqcount: The real sequence counter + * @lock: Pointer to the associated spinlock + * + * A plain sequence counter with external writer synchronization by a + * spinlock. The spinlock is associated to the sequence count in the + * static initializer or init function. This enables lockdep to validate + * that the write side critical section is properly serialized. */ -#define SEQCOUNT_LOCKTYPE(locktype, preempt, lockmember) \ -static inline seqcount_t * \ -__seqcount_##locktype##_ptr(seqcount_##locktype##_t *s) \ + +/* + * SEQCOUNT_LOCKTYPE() - Instantiate seqcount_LOCKNAME_t and helpers + * @locktype: actual typename + * @lockname: name + * @preemptible: preemptibility of above locktype + * @lockmember: argument for lockdep_assert_held() + */ +#define SEQCOUNT_LOCKTYPE(locktype, lockname, preemptible, lockmember) \ +typedef struct seqcount_##lockname { \ + seqcount_t seqcount; \ + __SEQ_LOCK(locktype *lock); \ +} seqcount_##lockname##_t; \ + \ +static __always_inline seqcount_t * \ +__seqcount_##lockname##_ptr(seqcount_##lockname##_t *s) \ { \ return &s->seqcount; \ } \ \ -static inline bool \ -__seqcount_##locktype##_preemptible(seqcount_##locktype##_t *s) \ +static __always_inline bool \ +__seqcount_##lockname##_preemptible(seqcount_##lockname##_t *s) \ { \ - return preempt; \ + return preemptible; \ } \ \ -static inline void \ -__seqcount_##locktype##_assert(seqcount_##locktype##_t *s) \ +static __always_inline void \ +__seqcount_##lockname##_assert(seqcount_##lockname##_t *s) \ { \ __SEQ_LOCK(lockdep_assert_held(lockmember)); \ } /* - * Similar hooks, but for plain seqcount_t + * __seqprop() for seqcount_t */ static inline seqcount_t *__seqcount_ptr(seqcount_t *s) @@ -351,17 +290,14 @@ static inline void __seqcount_assert(seqcount_t *s) lockdep_assert_preemption_disabled(); } -/* - * @s: Pointer to seqcount_locktype_t, generated hooks first parameter. - */ -SEQCOUNT_LOCKTYPE(raw_spinlock, false, s->lock) -SEQCOUNT_LOCKTYPE(spinlock, false, s->lock) -SEQCOUNT_LOCKTYPE(rwlock, false, s->lock) -SEQCOUNT_LOCKTYPE(mutex, true, s->lock) -SEQCOUNT_LOCKTYPE(ww_mutex, true, &s->lock->base) - -#define __seqprop_case(s, locktype, prop) \ - seqcount_##locktype##_t: __seqcount_##locktype##_##prop((void *)(s)) +SEQCOUNT_LOCKTYPE(raw_spinlock_t, raw_spinlock, false, s->lock) +SEQCOUNT_LOCKTYPE(spinlock_t, spinlock, false, s->lock) +SEQCOUNT_LOCKTYPE(rwlock_t, rwlock, false, s->lock) +SEQCOUNT_LOCKTYPE(struct mutex, mutex, true, s->lock) +SEQCOUNT_LOCKTYPE(struct ww_mutex, ww_mutex, true, &s->lock->base) + +#define __seqprop_case(s, lockname, prop) \ + seqcount_##lockname##_t: __seqcount_##lockname##_##prop((void *)(s)) #define __seqprop(s, prop) _Generic(*(s), \ seqcount_t: __seqcount_##prop((void *)(s)), \ -- cgit v1.2.3 From e4e9ab3f9f91ad3b88d12363f890e8ad9b59b645 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 23 Jul 2020 12:00:53 +0200 Subject: seqlock: Fold seqcount_LOCKNAME_init() definition Manual repetition is boring and error prone. Signed-off-by: Peter Zijlstra (Intel) --- include/linux/seqlock.h | 61 ++++++++++++------------------------------------- 1 file changed, 14 insertions(+), 47 deletions(-) (limited to 'include') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 4b259bb4d4b9..501ff47d1e8e 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -143,12 +143,6 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s) __SEQ_LOCK(.lock = (assoc_lock)) \ } -#define seqcount_locktype_init(s, assoc_lock) \ -do { \ - seqcount_init(&(s)->seqcount); \ - __SEQ_LOCK((s)->lock = (assoc_lock)); \ -} while (0) - /** * SEQCNT_SPINLOCK_ZERO - static initializer for seqcount_spinlock_t * @name: Name of the seqcount_spinlock_t instance @@ -157,14 +151,6 @@ do { \ #define SEQCNT_SPINLOCK_ZERO(name, lock) \ SEQCOUNT_LOCKTYPE_ZERO(name, lock) -/** - * seqcount_spinlock_init - runtime initializer for seqcount_spinlock_t - * @s: Pointer to the seqcount_spinlock_t instance - * @lock: Pointer to the associated spinlock - */ -#define seqcount_spinlock_init(s, lock) \ - seqcount_locktype_init(s, lock) - /** * SEQCNT_RAW_SPINLOCK_ZERO - static initializer for seqcount_raw_spinlock_t * @name: Name of the seqcount_raw_spinlock_t instance @@ -173,14 +159,6 @@ do { \ #define SEQCNT_RAW_SPINLOCK_ZERO(name, lock) \ SEQCOUNT_LOCKTYPE_ZERO(name, lock) -/** - * seqcount_raw_spinlock_init - runtime initializer for seqcount_raw_spinlock_t - * @s: Pointer to the seqcount_raw_spinlock_t instance - * @lock: Pointer to the associated raw_spinlock - */ -#define seqcount_raw_spinlock_init(s, lock) \ - seqcount_locktype_init(s, lock) - /** * SEQCNT_RWLOCK_ZERO - static initializer for seqcount_rwlock_t * @name: Name of the seqcount_rwlock_t instance @@ -189,14 +167,6 @@ do { \ #define SEQCNT_RWLOCK_ZERO(name, lock) \ SEQCOUNT_LOCKTYPE_ZERO(name, lock) -/** - * seqcount_rwlock_init - runtime initializer for seqcount_rwlock_t - * @s: Pointer to the seqcount_rwlock_t instance - * @lock: Pointer to the associated rwlock - */ -#define seqcount_rwlock_init(s, lock) \ - seqcount_locktype_init(s, lock) - /** * SEQCNT_MUTEX_ZERO - static initializer for seqcount_mutex_t * @name: Name of the seqcount_mutex_t instance @@ -205,14 +175,6 @@ do { \ #define SEQCNT_MUTEX_ZERO(name, lock) \ SEQCOUNT_LOCKTYPE_ZERO(name, lock) -/** - * seqcount_mutex_init - runtime initializer for seqcount_mutex_t - * @s: Pointer to the seqcount_mutex_t instance - * @lock: Pointer to the associated mutex - */ -#define seqcount_mutex_init(s, lock) \ - seqcount_locktype_init(s, lock) - /** * SEQCNT_WW_MUTEX_ZERO - static initializer for seqcount_ww_mutex_t * @name: Name of the seqcount_ww_mutex_t instance @@ -222,15 +184,7 @@ do { \ SEQCOUNT_LOCKTYPE_ZERO(name, lock) /** - * seqcount_ww_mutex_init - runtime initializer for seqcount_ww_mutex_t - * @s: Pointer to the seqcount_ww_mutex_t instance - * @lock: Pointer to the associated ww_mutex - */ -#define seqcount_ww_mutex_init(s, lock) \ - seqcount_locktype_init(s, lock) - -/** - * typedef seqcount_LOCKNAME_t - sequence counter with spinlock associated + * typedef seqcount_LOCKNAME_t - sequence counter with LOCKTYPR associated * @seqcount: The real sequence counter * @lock: Pointer to the associated spinlock * @@ -240,6 +194,12 @@ do { \ * that the write side critical section is properly serialized. */ +/** + * seqcount_LOCKNAME_init() - runtime initializer for seqcount_LOCKNAME_t + * @s: Pointer to the seqcount_LOCKNAME_t instance + * @lock: Pointer to the associated LOCKTYPE + */ + /* * SEQCOUNT_LOCKTYPE() - Instantiate seqcount_LOCKNAME_t and helpers * @locktype: actual typename @@ -253,6 +213,13 @@ typedef struct seqcount_##lockname { \ __SEQ_LOCK(locktype *lock); \ } seqcount_##lockname##_t; \ \ +static __always_inline void \ +seqcount_##lockname##_init(seqcount_##lockname##_t *s, locktype *lock) \ +{ \ + seqcount_init(&s->seqcount); \ + __SEQ_LOCK(s->lock = lock); \ +} \ + \ static __always_inline seqcount_t * \ __seqcount_##lockname##_ptr(seqcount_##lockname##_t *s) \ { \ -- cgit v1.2.3 From 0efc94c5d15c3da0a69543d86ad2180f39256ed6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 23 Jul 2020 12:03:13 +0200 Subject: seqcount: Compress SEQCNT_LOCKNAME_ZERO() Less is more. Signed-off-by: Peter Zijlstra (Intel) --- include/linux/seqlock.h | 63 ++++++++++++++----------------------------------- 1 file changed, 18 insertions(+), 45 deletions(-) (limited to 'include') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 501ff47d1e8e..251dcd6f5cd8 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -138,51 +138,6 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s) #define __SEQ_LOCK(expr) #endif -#define SEQCOUNT_LOCKTYPE_ZERO(seq_name, assoc_lock) { \ - .seqcount = SEQCNT_ZERO(seq_name.seqcount), \ - __SEQ_LOCK(.lock = (assoc_lock)) \ -} - -/** - * SEQCNT_SPINLOCK_ZERO - static initializer for seqcount_spinlock_t - * @name: Name of the seqcount_spinlock_t instance - * @lock: Pointer to the associated spinlock - */ -#define SEQCNT_SPINLOCK_ZERO(name, lock) \ - SEQCOUNT_LOCKTYPE_ZERO(name, lock) - -/** - * SEQCNT_RAW_SPINLOCK_ZERO - static initializer for seqcount_raw_spinlock_t - * @name: Name of the seqcount_raw_spinlock_t instance - * @lock: Pointer to the associated raw_spinlock - */ -#define SEQCNT_RAW_SPINLOCK_ZERO(name, lock) \ - SEQCOUNT_LOCKTYPE_ZERO(name, lock) - -/** - * SEQCNT_RWLOCK_ZERO - static initializer for seqcount_rwlock_t - * @name: Name of the seqcount_rwlock_t instance - * @lock: Pointer to the associated rwlock - */ -#define SEQCNT_RWLOCK_ZERO(name, lock) \ - SEQCOUNT_LOCKTYPE_ZERO(name, lock) - -/** - * SEQCNT_MUTEX_ZERO - static initializer for seqcount_mutex_t - * @name: Name of the seqcount_mutex_t instance - * @lock: Pointer to the associated mutex - */ -#define SEQCNT_MUTEX_ZERO(name, lock) \ - SEQCOUNT_LOCKTYPE_ZERO(name, lock) - -/** - * SEQCNT_WW_MUTEX_ZERO - static initializer for seqcount_ww_mutex_t - * @name: Name of the seqcount_ww_mutex_t instance - * @lock: Pointer to the associated ww_mutex - */ -#define SEQCNT_WW_MUTEX_ZERO(name, lock) \ - SEQCOUNT_LOCKTYPE_ZERO(name, lock) - /** * typedef seqcount_LOCKNAME_t - sequence counter with LOCKTYPR associated * @seqcount: The real sequence counter @@ -263,6 +218,24 @@ SEQCOUNT_LOCKTYPE(rwlock_t, rwlock, false, s->lock) SEQCOUNT_LOCKTYPE(struct mutex, mutex, true, s->lock) SEQCOUNT_LOCKTYPE(struct ww_mutex, ww_mutex, true, &s->lock->base) +/** + * SEQCNT_LOCKNAME_ZERO - static initializer for seqcount_LOCKNAME_t + * @name: Name of the seqcount_LOCKNAME_t instance + * @lock: Pointer to the associated LOCKTYPE + */ + +#define SEQCOUNT_LOCKTYPE_ZERO(seq_name, assoc_lock) { \ + .seqcount = SEQCNT_ZERO(seq_name.seqcount), \ + __SEQ_LOCK(.lock = (assoc_lock)) \ +} + +#define SEQCNT_SPINLOCK_ZERO(name, lock) SEQCOUNT_LOCKTYPE_ZERO(name, lock) +#define SEQCNT_RAW_SPINLOCK_ZERO(name, lock) SEQCOUNT_LOCKTYPE_ZERO(name, lock) +#define SEQCNT_RWLOCK_ZERO(name, lock) SEQCOUNT_LOCKTYPE_ZERO(name, lock) +#define SEQCNT_MUTEX_ZERO(name, lock) SEQCOUNT_LOCKTYPE_ZERO(name, lock) +#define SEQCNT_WW_MUTEX_ZERO(name, lock) SEQCOUNT_LOCKTYPE_ZERO(name, lock) + + #define __seqprop_case(s, lockname, prop) \ seqcount_##lockname##_t: __seqcount_##lockname##_##prop((void *)(s)) -- cgit v1.2.3 From b5e6a027bd327daa679ca55182a920659e2cbb90 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 23 Jul 2020 12:11:49 +0200 Subject: seqcount: More consistent seqprop names Attempt uniformity and brevity. Signed-off-by: Peter Zijlstra (Intel) --- include/linux/seqlock.h | 52 ++++++++++++++++++++++++------------------------- 1 file changed, 26 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 251dcd6f5cd8..a076f783aa36 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -247,9 +247,9 @@ SEQCOUNT_LOCKTYPE(struct ww_mutex, ww_mutex, true, &s->lock->base) __seqprop_case((s), mutex, prop), \ __seqprop_case((s), ww_mutex, prop)) -#define __to_seqcount_t(s) __seqprop(s, ptr) -#define __associated_lock_exists_and_is_preemptible(s) __seqprop(s, preemptible) -#define __assert_write_section_is_protected(s) __seqprop(s, assert) +#define __seqcount_ptr(s) __seqprop(s, ptr) +#define __seqcount_lock_preemptible(s) __seqprop(s, preemptible) +#define __seqcount_assert_lock_held(s) __seqprop(s, assert) /** * __read_seqcount_begin() - begin a seqcount_t read section w/o barrier @@ -266,7 +266,7 @@ SEQCOUNT_LOCKTYPE(struct ww_mutex, ww_mutex, true, &s->lock->base) * Return: count to be passed to read_seqcount_retry() */ #define __read_seqcount_begin(s) \ - __read_seqcount_t_begin(__to_seqcount_t(s)) + __read_seqcount_t_begin(__seqcount_ptr(s)) static inline unsigned __read_seqcount_t_begin(const seqcount_t *s) { @@ -289,7 +289,7 @@ repeat: * Return: count to be passed to read_seqcount_retry() */ #define raw_read_seqcount_begin(s) \ - raw_read_seqcount_t_begin(__to_seqcount_t(s)) + raw_read_seqcount_t_begin(__seqcount_ptr(s)) static inline unsigned raw_read_seqcount_t_begin(const seqcount_t *s) { @@ -305,7 +305,7 @@ static inline unsigned raw_read_seqcount_t_begin(const seqcount_t *s) * Return: count to be passed to read_seqcount_retry() */ #define read_seqcount_begin(s) \ - read_seqcount_t_begin(__to_seqcount_t(s)) + read_seqcount_t_begin(__seqcount_ptr(s)) static inline unsigned read_seqcount_t_begin(const seqcount_t *s) { @@ -325,7 +325,7 @@ static inline unsigned read_seqcount_t_begin(const seqcount_t *s) * Return: count to be passed to read_seqcount_retry() */ #define raw_read_seqcount(s) \ - raw_read_seqcount_t(__to_seqcount_t(s)) + raw_read_seqcount_t(__seqcount_ptr(s)) static inline unsigned raw_read_seqcount_t(const seqcount_t *s) { @@ -353,7 +353,7 @@ static inline unsigned raw_read_seqcount_t(const seqcount_t *s) * Return: count to be passed to read_seqcount_retry() */ #define raw_seqcount_begin(s) \ - raw_seqcount_t_begin(__to_seqcount_t(s)) + raw_seqcount_t_begin(__seqcount_ptr(s)) static inline unsigned raw_seqcount_t_begin(const seqcount_t *s) { @@ -380,7 +380,7 @@ static inline unsigned raw_seqcount_t_begin(const seqcount_t *s) * Return: true if a read section retry is required, else false */ #define __read_seqcount_retry(s, start) \ - __read_seqcount_t_retry(__to_seqcount_t(s), start) + __read_seqcount_t_retry(__seqcount_ptr(s), start) static inline int __read_seqcount_t_retry(const seqcount_t *s, unsigned start) { @@ -400,7 +400,7 @@ static inline int __read_seqcount_t_retry(const seqcount_t *s, unsigned start) * Return: true if a read section retry is required, else false */ #define read_seqcount_retry(s, start) \ - read_seqcount_t_retry(__to_seqcount_t(s), start) + read_seqcount_t_retry(__seqcount_ptr(s), start) static inline int read_seqcount_t_retry(const seqcount_t *s, unsigned start) { @@ -414,10 +414,10 @@ static inline int read_seqcount_t_retry(const seqcount_t *s, unsigned start) */ #define raw_write_seqcount_begin(s) \ do { \ - if (__associated_lock_exists_and_is_preemptible(s)) \ + if (__seqcount_lock_preemptible(s)) \ preempt_disable(); \ \ - raw_write_seqcount_t_begin(__to_seqcount_t(s)); \ + raw_write_seqcount_t_begin(__seqcount_ptr(s)); \ } while (0) static inline void raw_write_seqcount_t_begin(seqcount_t *s) @@ -433,9 +433,9 @@ static inline void raw_write_seqcount_t_begin(seqcount_t *s) */ #define raw_write_seqcount_end(s) \ do { \ - raw_write_seqcount_t_end(__to_seqcount_t(s)); \ + raw_write_seqcount_t_end(__seqcount_ptr(s)); \ \ - if (__associated_lock_exists_and_is_preemptible(s)) \ + if (__seqcount_lock_preemptible(s)) \ preempt_enable(); \ } while (0) @@ -456,12 +456,12 @@ static inline void raw_write_seqcount_t_end(seqcount_t *s) */ #define write_seqcount_begin_nested(s, subclass) \ do { \ - __assert_write_section_is_protected(s); \ + __seqcount_assert_lock_held(s); \ \ - if (__associated_lock_exists_and_is_preemptible(s)) \ + if (__seqcount_lock_preemptible(s)) \ preempt_disable(); \ \ - write_seqcount_t_begin_nested(__to_seqcount_t(s), subclass); \ + write_seqcount_t_begin_nested(__seqcount_ptr(s), subclass); \ } while (0) static inline void write_seqcount_t_begin_nested(seqcount_t *s, int subclass) @@ -483,12 +483,12 @@ static inline void write_seqcount_t_begin_nested(seqcount_t *s, int subclass) */ #define write_seqcount_begin(s) \ do { \ - __assert_write_section_is_protected(s); \ + __seqcount_assert_lock_held(s); \ \ - if (__associated_lock_exists_and_is_preemptible(s)) \ + if (__seqcount_lock_preemptible(s)) \ preempt_disable(); \ \ - write_seqcount_t_begin(__to_seqcount_t(s)); \ + write_seqcount_t_begin(__seqcount_ptr(s)); \ } while (0) static inline void write_seqcount_t_begin(seqcount_t *s) @@ -504,9 +504,9 @@ static inline void write_seqcount_t_begin(seqcount_t *s) */ #define write_seqcount_end(s) \ do { \ - write_seqcount_t_end(__to_seqcount_t(s)); \ + write_seqcount_t_end(__seqcount_ptr(s)); \ \ - if (__associated_lock_exists_and_is_preemptible(s)) \ + if (__seqcount_lock_preemptible(s)) \ preempt_enable(); \ } while (0) @@ -558,7 +558,7 @@ static inline void write_seqcount_t_end(seqcount_t *s) * } */ #define raw_write_seqcount_barrier(s) \ - raw_write_seqcount_t_barrier(__to_seqcount_t(s)) + raw_write_seqcount_t_barrier(__seqcount_ptr(s)) static inline void raw_write_seqcount_t_barrier(seqcount_t *s) { @@ -578,7 +578,7 @@ static inline void raw_write_seqcount_t_barrier(seqcount_t *s) * will complete successfully and see data older than this. */ #define write_seqcount_invalidate(s) \ - write_seqcount_t_invalidate(__to_seqcount_t(s)) + write_seqcount_t_invalidate(__seqcount_ptr(s)) static inline void write_seqcount_t_invalidate(seqcount_t *s) { @@ -604,7 +604,7 @@ static inline void write_seqcount_t_invalidate(seqcount_t *s) * checked with read_seqcount_retry(). */ #define raw_read_seqcount_latch(s) \ - raw_read_seqcount_t_latch(__to_seqcount_t(s)) + raw_read_seqcount_t_latch(__seqcount_ptr(s)) static inline int raw_read_seqcount_t_latch(seqcount_t *s) { @@ -695,7 +695,7 @@ static inline int raw_read_seqcount_t_latch(seqcount_t *s) * patterns to manage the lifetimes of the objects within. */ #define raw_write_seqcount_latch(s) \ - raw_write_seqcount_t_latch(__to_seqcount_t(s)) + raw_write_seqcount_t_latch(__seqcount_ptr(s)) static inline void raw_write_seqcount_t_latch(seqcount_t *s) { -- cgit v1.2.3