From 9b84fadc444de5456ab5f5487e2108311c724c3f Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 15 Oct 2021 13:42:40 -0400 Subject: tracing: Reuse logic from perf's get_recursion_context() Instead of having branches that adds noise to the branch prediction, use the addition logic to set the bit for the level of interrupt context that the state is currently in. This copies the logic from perf's get_recursion_context() function. Link: https://lore.kernel.org/all/20211015161702.GF174703@worktop.programming.kicks-ass.net/ Suggested-by: Peter Zijlstra Signed-off-by: Steven Rostedt (VMware) --- include/linux/trace_recursion.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'include/linux/trace_recursion.h') diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h index a9f9c5714e65..f6da7a03bff0 100644 --- a/include/linux/trace_recursion.h +++ b/include/linux/trace_recursion.h @@ -137,12 +137,13 @@ enum { static __always_inline int trace_get_context_bit(void) { unsigned long pc = preempt_count(); + unsigned char bit = 0; - if (!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET))) - return TRACE_CTX_NORMAL; - else - return pc & NMI_MASK ? TRACE_CTX_NMI : - pc & HARDIRQ_MASK ? TRACE_CTX_IRQ : TRACE_CTX_SOFTIRQ; + bit += !!(pc & (NMI_MASK)); + bit += !!(pc & (NMI_MASK | HARDIRQ_MASK)); + bit += !!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET)); + + return TRACE_CTX_NORMAL - bit; } #ifdef CONFIG_FTRACE_RECORD_RECURSION -- cgit v1.2.3 From 91ebe8bcbff9d2ff21303e73bf7434f39a98b255 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 15 Oct 2021 15:01:19 -0400 Subject: tracing/perf: Add interrupt_context_level() helper Now that there are three different instances of doing the addition trick to the preempt_count() and NMI_MASK, HARDIRQ_MASK and SOFTIRQ_OFFSET macros, it deserves a helper function defined in the preempt.h header. Add the interrupt_context_level() helper and replace the three instances that do that logic with it. Link: https://lore.kernel.org/all/20211015142541.4badd8a9@gandalf.local.home/ Signed-off-by: Steven Rostedt (VMware) --- include/linux/preempt.h | 21 +++++++++++++++++++++ include/linux/trace_recursion.h | 7 +------ kernel/events/internal.h | 7 +------ kernel/trace/ring_buffer.c | 7 +------ 4 files changed, 24 insertions(+), 18 deletions(-) (limited to 'include/linux/trace_recursion.h') diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 4d244e295e85..b32e3dabe28b 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -77,6 +77,27 @@ /* preempt_count() and related functions, depends on PREEMPT_NEED_RESCHED */ #include +/** + * interrupt_context_level - return interrupt context level + * + * Returns the current interrupt context level. + * 0 - normal context + * 1 - softirq context + * 2 - hardirq context + * 3 - NMI context + */ +static __always_inline unsigned char interrupt_context_level(void) +{ + unsigned long pc = preempt_count(); + unsigned char level = 0; + + level += !!(pc & (NMI_MASK)); + level += !!(pc & (NMI_MASK | HARDIRQ_MASK)); + level += !!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET)); + + return level; +} + #define nmi_count() (preempt_count() & NMI_MASK) #define hardirq_count() (preempt_count() & HARDIRQ_MASK) #ifdef CONFIG_PREEMPT_RT diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h index f6da7a03bff0..1d8cce02c3fb 100644 --- a/include/linux/trace_recursion.h +++ b/include/linux/trace_recursion.h @@ -136,12 +136,7 @@ enum { static __always_inline int trace_get_context_bit(void) { - unsigned long pc = preempt_count(); - unsigned char bit = 0; - - bit += !!(pc & (NMI_MASK)); - bit += !!(pc & (NMI_MASK | HARDIRQ_MASK)); - bit += !!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET)); + unsigned char bit = interrupt_context_level(); return TRACE_CTX_NORMAL - bit; } diff --git a/kernel/events/internal.h b/kernel/events/internal.h index 228801e20788..082832738c8f 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -205,12 +205,7 @@ DEFINE_OUTPUT_COPY(__output_copy_user, arch_perf_out_copy_user) static inline int get_recursion_context(int *recursion) { - unsigned int pc = preempt_count(); - unsigned char rctx = 0; - - rctx += !!(pc & (NMI_MASK)); - rctx += !!(pc & (NMI_MASK | HARDIRQ_MASK)); - rctx += !!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET)); + unsigned char rctx = interrupt_context_level(); if (recursion[rctx]) return -1; diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 15d4380006e3..f6520d0a4c8c 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -3167,12 +3167,7 @@ static __always_inline int trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer) { unsigned int val = cpu_buffer->current_context; - unsigned long pc = preempt_count(); - int bit = 0; - - bit += !!(pc & (NMI_MASK)); - bit += !!(pc & (NMI_MASK | HARDIRQ_MASK)); - bit += !!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET)); + int bit = interrupt_context_level(); bit = RB_CTX_NORMAL - bit; -- cgit v1.2.3 From bce5c81cb31f7f124ce231ec79df9e85a8bac132 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 19 Oct 2021 09:25:20 -0400 Subject: tracing: Explain the trace recursion transition bit better The current text of the explanation of the transition bit in the trace recursion protection is not very clear. Improve the text, so that when all the archs no longer have the issue of tracing between a start of a new (interrupt) context and updating the preempt_count to reflect the new context, that it may be removed. Link: https://lore.kernel.org/all/20211018220203.064a42ed@gandalf.local.home/ Suggested-by: Petr Mladek Signed-off-by: Steven Rostedt (VMware) --- include/linux/trace_recursion.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux/trace_recursion.h') diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h index 1d8cce02c3fb..24f284eb55a7 100644 --- a/include/linux/trace_recursion.h +++ b/include/linux/trace_recursion.h @@ -168,8 +168,12 @@ static __always_inline int trace_test_and_set_recursion(unsigned long ip, unsign bit = trace_get_context_bit() + start; if (unlikely(val & (1 << bit))) { /* - * It could be that preempt_count has not been updated during - * a switch between contexts. Allow for a single recursion. + * If an interrupt occurs during a trace, and another trace + * happens in that interrupt but before the preempt_count is + * updated to reflect the new interrupt context, then this + * will think a recursion occurred, and the event will be dropped. + * Let a single instance happen via the TRANSITION_BIT to + * not drop those events. */ bit = TRACE_TRANSITION_BIT; if (val & (1 << bit)) { -- cgit v1.2.3 From ce5e48036c9e76a2a5bd4d9079eac273087a533a Mon Sep 17 00:00:00 2001 From: 王贇 Date: Wed, 27 Oct 2021 11:14:44 +0800 Subject: ftrace: disable preemption when recursion locked As the documentation explained, ftrace_test_recursion_trylock() and ftrace_test_recursion_unlock() were supposed to disable and enable preemption properly, however currently this work is done outside of the function, which could be missing by mistake. And since the internal using of trace_test_and_set_recursion() and trace_clear_recursion() also require preemption disabled, we can just merge the logical. This patch will make sure the preemption has been disabled when trace_test_and_set_recursion() return bit >= 0, and trace_clear_recursion() will enable the preemption if previously enabled. Link: https://lkml.kernel.org/r/13bde807-779c-aa4c-0672-20515ae365ea@linux.alibaba.com CC: Petr Mladek Cc: Guo Ren Cc: Ingo Molnar Cc: "James E.J. Bottomley" Cc: Helge Deller Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Paul Walmsley Cc: Palmer Dabbelt Cc: Albert Ou Cc: Thomas Gleixner Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Josh Poimboeuf Cc: Jiri Kosina Cc: Joe Lawrence Cc: Masami Hiramatsu Cc: Nicholas Piggin Cc: Jisheng Zhang CC: Steven Rostedt CC: Miroslav Benes Reported-by: Abaci Suggested-by: Peter Zijlstra Signed-off-by: Michael Wang [ Removed extra line in comment - SDR ] Signed-off-by: Steven Rostedt (VMware) --- arch/csky/kernel/probes/ftrace.c | 2 -- arch/parisc/kernel/ftrace.c | 2 -- arch/powerpc/kernel/kprobes-ftrace.c | 2 -- arch/riscv/kernel/probes/ftrace.c | 2 -- arch/x86/kernel/kprobes/ftrace.c | 2 -- include/linux/trace_recursion.h | 11 ++++++++++- kernel/livepatch/patch.c | 12 ++++++------ kernel/trace/ftrace.c | 15 +++++---------- kernel/trace/trace_functions.c | 5 ----- 9 files changed, 21 insertions(+), 32 deletions(-) (limited to 'include/linux/trace_recursion.h') diff --git a/arch/csky/kernel/probes/ftrace.c b/arch/csky/kernel/probes/ftrace.c index b388228abbf2..834cffcfbce3 100644 --- a/arch/csky/kernel/probes/ftrace.c +++ b/arch/csky/kernel/probes/ftrace.c @@ -17,7 +17,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, return; regs = ftrace_get_regs(fregs); - preempt_disable_notrace(); p = get_kprobe((kprobe_opcode_t *)ip); if (!p) { p = get_kprobe((kprobe_opcode_t *)(ip - MCOUNT_INSN_SIZE)); @@ -57,7 +56,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, __this_cpu_write(current_kprobe, NULL); } out: - preempt_enable_notrace(); ftrace_test_recursion_unlock(bit); } NOKPROBE_SYMBOL(kprobe_ftrace_handler); diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c index 01581f715737..b14011d3c2f1 100644 --- a/arch/parisc/kernel/ftrace.c +++ b/arch/parisc/kernel/ftrace.c @@ -211,7 +211,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, return; regs = ftrace_get_regs(fregs); - preempt_disable_notrace(); p = get_kprobe((kprobe_opcode_t *)ip); if (unlikely(!p) || kprobe_disabled(p)) goto out; @@ -240,7 +239,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, } __this_cpu_write(current_kprobe, NULL); out: - preempt_enable_notrace(); ftrace_test_recursion_unlock(bit); } NOKPROBE_SYMBOL(kprobe_ftrace_handler); diff --git a/arch/powerpc/kernel/kprobes-ftrace.c b/arch/powerpc/kernel/kprobes-ftrace.c index 7154d58338cc..072ebe7f290b 100644 --- a/arch/powerpc/kernel/kprobes-ftrace.c +++ b/arch/powerpc/kernel/kprobes-ftrace.c @@ -26,7 +26,6 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip, return; regs = ftrace_get_regs(fregs); - preempt_disable_notrace(); p = get_kprobe((kprobe_opcode_t *)nip); if (unlikely(!p) || kprobe_disabled(p)) goto out; @@ -61,7 +60,6 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip, __this_cpu_write(current_kprobe, NULL); } out: - preempt_enable_notrace(); ftrace_test_recursion_unlock(bit); } NOKPROBE_SYMBOL(kprobe_ftrace_handler); diff --git a/arch/riscv/kernel/probes/ftrace.c b/arch/riscv/kernel/probes/ftrace.c index aab85a82f419..7142ec42e889 100644 --- a/arch/riscv/kernel/probes/ftrace.c +++ b/arch/riscv/kernel/probes/ftrace.c @@ -15,7 +15,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, if (bit < 0) return; - preempt_disable_notrace(); p = get_kprobe((kprobe_opcode_t *)ip); if (unlikely(!p) || kprobe_disabled(p)) goto out; @@ -52,7 +51,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, __this_cpu_write(current_kprobe, NULL); } out: - preempt_enable_notrace(); ftrace_test_recursion_unlock(bit); } NOKPROBE_SYMBOL(kprobe_ftrace_handler); diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c index 596de2f6d3a5..dd2ec14adb77 100644 --- a/arch/x86/kernel/kprobes/ftrace.c +++ b/arch/x86/kernel/kprobes/ftrace.c @@ -25,7 +25,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, if (bit < 0) return; - preempt_disable_notrace(); p = get_kprobe((kprobe_opcode_t *)ip); if (unlikely(!p) || kprobe_disabled(p)) goto out; @@ -59,7 +58,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, __this_cpu_write(current_kprobe, NULL); } out: - preempt_enable_notrace(); ftrace_test_recursion_unlock(bit); } NOKPROBE_SYMBOL(kprobe_ftrace_handler); diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h index 24f284eb55a7..a13f23b04d73 100644 --- a/include/linux/trace_recursion.h +++ b/include/linux/trace_recursion.h @@ -155,6 +155,9 @@ extern void ftrace_record_recursion(unsigned long ip, unsigned long parent_ip); # define do_ftrace_record_recursion(ip, pip) do { } while (0) #endif +/* + * Preemption is promised to be disabled when return bit >= 0. + */ static __always_inline int trace_test_and_set_recursion(unsigned long ip, unsigned long pip, int start, int max) { @@ -189,14 +192,20 @@ static __always_inline int trace_test_and_set_recursion(unsigned long ip, unsign current->trace_recursion = val; barrier(); + preempt_disable_notrace(); + return bit + 1; } +/* + * Preemption will be enabled (if it was previously enabled). + */ static __always_inline void trace_clear_recursion(int bit) { if (!bit) return; + preempt_enable_notrace(); barrier(); bit--; trace_recursion_clear(bit); @@ -209,7 +218,7 @@ static __always_inline void trace_clear_recursion(int bit) * tracing recursed in the same context (normal vs interrupt), * * Returns: -1 if a recursion happened. - * >= 0 if no recursion + * >= 0 if no recursion. */ static __always_inline int ftrace_test_recursion_trylock(unsigned long ip, unsigned long parent_ip) diff --git a/kernel/livepatch/patch.c b/kernel/livepatch/patch.c index e8029aea67f1..fe316c021d73 100644 --- a/kernel/livepatch/patch.c +++ b/kernel/livepatch/patch.c @@ -49,14 +49,15 @@ static void notrace klp_ftrace_handler(unsigned long ip, ops = container_of(fops, struct klp_ops, fops); + /* + * The ftrace_test_recursion_trylock() will disable preemption, + * which is required for the variant of synchronize_rcu() that is + * used to allow patching functions where RCU is not watching. + * See klp_synchronize_transition() for more details. + */ bit = ftrace_test_recursion_trylock(ip, parent_ip); if (WARN_ON_ONCE(bit < 0)) return; - /* - * A variant of synchronize_rcu() is used to allow patching functions - * where RCU is not watching, see klp_synchronize_transition(). - */ - preempt_disable_notrace(); func = list_first_or_null_rcu(&ops->func_stack, struct klp_func, stack_node); @@ -120,7 +121,6 @@ static void notrace klp_ftrace_handler(unsigned long ip, klp_arch_set_pc(fregs, (unsigned long)func->new_func); unlock: - preempt_enable_notrace(); ftrace_test_recursion_unlock(bit); } diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 2057ad363772..b4ed1a301232 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -7198,16 +7198,15 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op; int bit; + /* + * The ftrace_test_and_set_recursion() will disable preemption, + * which is required since some of the ops may be dynamically + * allocated, they must be freed after a synchronize_rcu(). + */ bit = trace_test_and_set_recursion(ip, parent_ip, TRACE_LIST_START, TRACE_LIST_MAX); if (bit < 0) return; - /* - * Some of the ops may be dynamically allocated, - * they must be freed after a synchronize_rcu(). - */ - preempt_disable_notrace(); - do_for_each_ftrace_op(op, ftrace_ops_list) { /* Stub functions don't need to be called nor tested */ if (op->flags & FTRACE_OPS_FL_STUB) @@ -7231,7 +7230,6 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, } } while_for_each_ftrace_op(op); out: - preempt_enable_notrace(); trace_clear_recursion(bit); } @@ -7279,12 +7277,9 @@ static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip, if (bit < 0) return; - preempt_disable_notrace(); - if (!(op->flags & FTRACE_OPS_FL_RCU) || rcu_is_watching()) op->func(ip, parent_ip, op, fregs); - preempt_enable_notrace(); trace_clear_recursion(bit); } NOKPROBE_SYMBOL(ftrace_ops_assist_func); diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 1f0e63f5d1f9..9f1bfbe105e8 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -186,7 +186,6 @@ function_trace_call(unsigned long ip, unsigned long parent_ip, return; trace_ctx = tracing_gen_ctx(); - preempt_disable_notrace(); cpu = smp_processor_id(); data = per_cpu_ptr(tr->array_buffer.data, cpu); @@ -194,7 +193,6 @@ function_trace_call(unsigned long ip, unsigned long parent_ip, trace_function(tr, ip, parent_ip, trace_ctx); ftrace_test_recursion_unlock(bit); - preempt_enable_notrace(); } #ifdef CONFIG_UNWINDER_ORC @@ -298,8 +296,6 @@ function_no_repeats_trace_call(unsigned long ip, unsigned long parent_ip, if (bit < 0) return; - preempt_disable_notrace(); - cpu = smp_processor_id(); data = per_cpu_ptr(tr->array_buffer.data, cpu); if (atomic_read(&data->disabled)) @@ -324,7 +320,6 @@ function_no_repeats_trace_call(unsigned long ip, unsigned long parent_ip, out: ftrace_test_recursion_unlock(bit); - preempt_enable_notrace(); } static void -- cgit v1.2.3