From c536aa1c5b17fac1ee395932031ff7d82827f2c5 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 5 Nov 2020 21:32:40 -0500 Subject: kprobes/ftrace: Add recursion protection to the ftrace callback If a ftrace callback does not supply its own recursion protection and does not set the RECURSION_SAFE flag in its ftrace_ops, then ftrace will make a helper trampoline to do so before calling the callback instead of just calling the callback directly. The default for ftrace_ops is going to change. It will expect that handlers provide their own recursion protection, unless its ftrace_ops states otherwise. Link: https://lkml.kernel.org/r/20201028115613.140212174@goodmis.org Link: https://lkml.kernel.org/r/20201106023546.944907560@goodmis.org Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Josh Poimboeuf Cc: Jiri Kosina Cc: Miroslav Benes Cc: Petr Mladek Cc: Andrew Morton Cc: Guo Ren Cc: "James E.J. Bottomley" Cc: Helge Deller Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Heiko Carstens Cc: Vasily Gorbik Cc: Christian Borntraeger Cc: Thomas Gleixner Cc: Borislav Petkov Cc: x86@kernel.org Cc: "H. Peter Anvin" Cc: "Naveen N. Rao" Cc: Anil S Keshavamurthy Cc: "David S. Miller" Cc: linux-csky@vger.kernel.org Cc: linux-parisc@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-s390@vger.kernel.org Acked-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- arch/s390/kernel/ftrace.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index b388e87a08bf..8f31c726537a 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -201,14 +201,21 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *ops, struct pt_regs *regs) { struct kprobe_ctlblk *kcb; - struct kprobe *p = get_kprobe((kprobe_opcode_t *)ip); + struct kprobe *p; + int bit; - if (unlikely(!p) || kprobe_disabled(p)) + bit = ftrace_test_recursion_trylock(); + if (bit < 0) return; + preempt_disable_notrace(); + p = get_kprobe((kprobe_opcode_t *)ip); + if (unlikely(!p) || kprobe_disabled(p)) + goto out; + if (kprobe_running()) { kprobes_inc_nmissed_count(p); - return; + goto out; } __this_cpu_write(current_kprobe, p); @@ -228,6 +235,9 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, } } __this_cpu_write(current_kprobe, NULL); +out: + preempt_enable_notrace(); + ftrace_test_recursion_unlock(bit); } NOKPROBE_SYMBOL(kprobe_ftrace_handler); -- cgit v1.2.3 From 773c16705058e9be7b0f4ce124e89cd231c120a2 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 5 Nov 2020 21:32:46 -0500 Subject: ftrace: Add recording of functions that caused recursion This adds CONFIG_FTRACE_RECORD_RECURSION that will record to a file "recursed_functions" all the functions that caused recursion while a callback to the function tracer was running. Link: https://lkml.kernel.org/r/20201106023548.102375687@goodmis.org Cc: Masami Hiramatsu Cc: Andrew Morton Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Guo Ren Cc: "James E.J. Bottomley" Cc: Helge Deller Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Heiko Carstens Cc: Vasily Gorbik Cc: Christian Borntraeger Cc: Thomas Gleixner Cc: Borislav Petkov Cc: x86@kernel.org Cc: "H. Peter Anvin" Cc: Kees Cook Cc: Anton Vorontsov Cc: Colin Cross Cc: Tony Luck Cc: Josh Poimboeuf Cc: Jiri Kosina Cc: Miroslav Benes Cc: Petr Mladek Cc: Joe Lawrence Cc: Kamalesh Babulal Cc: Mauro Carvalho Chehab Cc: Sebastian Andrzej Siewior Cc: linux-doc@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-csky@vger.kernel.org Cc: linux-parisc@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-s390@vger.kernel.org Cc: live-patching@vger.kernel.org Signed-off-by: Steven Rostedt (VMware) --- Documentation/trace/ftrace-uses.rst | 6 +- arch/csky/kernel/probes/ftrace.c | 2 +- arch/parisc/kernel/ftrace.c | 2 +- arch/powerpc/kernel/kprobes-ftrace.c | 2 +- arch/s390/kernel/ftrace.c | 2 +- arch/x86/kernel/kprobes/ftrace.c | 2 +- fs/pstore/ftrace.c | 2 +- include/linux/trace_recursion.h | 29 ++++- kernel/livepatch/patch.c | 2 +- kernel/trace/Kconfig | 25 ++++ kernel/trace/Makefile | 1 + kernel/trace/ftrace.c | 4 +- kernel/trace/trace_event_perf.c | 2 +- kernel/trace/trace_functions.c | 2 +- kernel/trace/trace_output.c | 6 +- kernel/trace/trace_output.h | 1 + kernel/trace/trace_recursion_record.c | 236 ++++++++++++++++++++++++++++++++++ 17 files changed, 306 insertions(+), 20 deletions(-) create mode 100644 kernel/trace/trace_recursion_record.c (limited to 'arch/s390') diff --git a/Documentation/trace/ftrace-uses.rst b/Documentation/trace/ftrace-uses.rst index 86cd14b8e126..5981d5691745 100644 --- a/Documentation/trace/ftrace-uses.rst +++ b/Documentation/trace/ftrace-uses.rst @@ -118,7 +118,7 @@ can help in this regard. If you start your code with: int bit; - bit = ftrace_test_recursion_trylock(); + bit = ftrace_test_recursion_trylock(ip, parent_ip); if (bit < 0) return; @@ -130,7 +130,9 @@ The code in between will be safe to use, even if it ends up calling a function that the callback is tracing. Note, on success, ftrace_test_recursion_trylock() will disable preemption, and the ftrace_test_recursion_unlock() will enable it again (if it was previously -enabled). +enabled). The instruction pointer (ip) and its parent (parent_ip) is passed to +ftrace_test_recursion_trylock() to record where the recursion happened +(if CONFIG_FTRACE_RECORD_RECURSION is set). Alternatively, if the FTRACE_OPS_FL_RECURSION flag is set on the ftrace_ops (as explained below), then a helper trampoline will be used to test diff --git a/arch/csky/kernel/probes/ftrace.c b/arch/csky/kernel/probes/ftrace.c index 5eb2604fdf71..f30b179924ef 100644 --- a/arch/csky/kernel/probes/ftrace.c +++ b/arch/csky/kernel/probes/ftrace.c @@ -18,7 +18,7 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, struct kprobe *p; struct kprobe_ctlblk *kcb; - bit = ftrace_test_recursion_trylock(); + bit = ftrace_test_recursion_trylock(ip, parent_ip); if (bit < 0) return; diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c index 13d85042810a..1c5d3732bda2 100644 --- a/arch/parisc/kernel/ftrace.c +++ b/arch/parisc/kernel/ftrace.c @@ -210,7 +210,7 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, struct kprobe *p; int bit; - bit = ftrace_test_recursion_trylock(); + bit = ftrace_test_recursion_trylock(ip, parent_ip); if (bit < 0) return; diff --git a/arch/powerpc/kernel/kprobes-ftrace.c b/arch/powerpc/kernel/kprobes-ftrace.c index 5df8d50c65ae..fdfee39938ea 100644 --- a/arch/powerpc/kernel/kprobes-ftrace.c +++ b/arch/powerpc/kernel/kprobes-ftrace.c @@ -20,7 +20,7 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip, struct kprobe_ctlblk *kcb; int bit; - bit = ftrace_test_recursion_trylock(); + bit = ftrace_test_recursion_trylock(nip, parent_nip); if (bit < 0) return; diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 8f31c726537a..657c1ab45408 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -204,7 +204,7 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, struct kprobe *p; int bit; - bit = ftrace_test_recursion_trylock(); + bit = ftrace_test_recursion_trylock(ip, parent_ip); if (bit < 0) return; diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c index a40a6cdfcca3..954d930a7127 100644 --- a/arch/x86/kernel/kprobes/ftrace.c +++ b/arch/x86/kernel/kprobes/ftrace.c @@ -20,7 +20,7 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, struct kprobe_ctlblk *kcb; int bit; - bit = ftrace_test_recursion_trylock(); + bit = ftrace_test_recursion_trylock(ip, parent_ip); if (bit < 0) return; diff --git a/fs/pstore/ftrace.c b/fs/pstore/ftrace.c index 816210fc5d3a..adb0935eb062 100644 --- a/fs/pstore/ftrace.c +++ b/fs/pstore/ftrace.c @@ -41,7 +41,7 @@ static void notrace pstore_ftrace_call(unsigned long ip, if (unlikely(oops_in_progress)) return; - bit = ftrace_test_recursion_trylock(); + bit = ftrace_test_recursion_trylock(ip, parent_ip); if (bit < 0) return; diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h index ac3d73484cb2..228cc56ed66e 100644 --- a/include/linux/trace_recursion.h +++ b/include/linux/trace_recursion.h @@ -91,6 +91,9 @@ enum { * not be correct. Allow for a single recursion to cover this case. */ TRACE_TRANSITION_BIT, + + /* Used to prevent recursion recording from recursing. */ + TRACE_RECORD_RECURSION_BIT, }; #define trace_recursion_set(bit) do { (current)->trace_recursion |= (1<<(bit)); } while (0) @@ -142,7 +145,22 @@ static __always_inline int trace_get_context_bit(void) pc & HARDIRQ_MASK ? TRACE_CTX_IRQ : TRACE_CTX_SOFTIRQ; } -static __always_inline int trace_test_and_set_recursion(int start, int max) +#ifdef CONFIG_FTRACE_RECORD_RECURSION +extern void ftrace_record_recursion(unsigned long ip, unsigned long parent_ip); +# define do_ftrace_record_recursion(ip, pip) \ + do { \ + if (!trace_recursion_test(TRACE_RECORD_RECURSION_BIT)) { \ + trace_recursion_set(TRACE_RECORD_RECURSION_BIT); \ + ftrace_record_recursion(ip, pip); \ + trace_recursion_clear(TRACE_RECORD_RECURSION_BIT); \ + } \ + } while (0) +#else +# define do_ftrace_record_recursion(ip, pip) do { } while (0) +#endif + +static __always_inline int trace_test_and_set_recursion(unsigned long ip, unsigned long pip, + int start, int max) { unsigned int val = current->trace_recursion; int bit; @@ -158,8 +176,10 @@ static __always_inline int trace_test_and_set_recursion(int start, int max) * a switch between contexts. Allow for a single recursion. */ bit = TRACE_TRANSITION_BIT; - if (trace_recursion_test(bit)) + if (trace_recursion_test(bit)) { + do_ftrace_record_recursion(ip, pip); return -1; + } trace_recursion_set(bit); barrier(); return bit + 1; @@ -199,9 +219,10 @@ static __always_inline void trace_clear_recursion(int bit) * Returns: -1 if a recursion happened. * >= 0 if no recursion */ -static __always_inline int ftrace_test_recursion_trylock(void) +static __always_inline int ftrace_test_recursion_trylock(unsigned long ip, + unsigned long parent_ip) { - return trace_test_and_set_recursion(TRACE_FTRACE_START, TRACE_FTRACE_MAX); + return trace_test_and_set_recursion(ip, parent_ip, TRACE_FTRACE_START, TRACE_FTRACE_MAX); } /** diff --git a/kernel/livepatch/patch.c b/kernel/livepatch/patch.c index 15480bf3ce88..875c5dbbdd33 100644 --- a/kernel/livepatch/patch.c +++ b/kernel/livepatch/patch.c @@ -49,7 +49,7 @@ static void notrace klp_ftrace_handler(unsigned long ip, ops = container_of(fops, struct klp_ops, fops); - bit = ftrace_test_recursion_trylock(); + bit = ftrace_test_recursion_trylock(ip, parent_ip); if (WARN_ON_ONCE(bit < 0)) return; /* diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index a4020c0b4508..9b11c096d139 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -727,6 +727,31 @@ config TRACE_EVAL_MAP_FILE If unsure, say N. +config FTRACE_RECORD_RECURSION + bool "Record functions that recurse in function tracing" + depends on FUNCTION_TRACER + help + All callbacks that attach to the function tracing have some sort + of protection against recursion. Even though the protection exists, + it adds overhead. This option will create a file in the tracefs + file system called "recursed_functions" that will list the functions + that triggered a recursion. + + This will add more overhead to cases that have recursion. + + If unsure, say N + +config FTRACE_RECORD_RECURSION_SIZE + int "Max number of recursed functions to record" + default 128 + depends on FTRACE_RECORD_RECURSION + help + This defines the limit of number of functions that can be + listed in the "recursed_functions" file, that lists all + the functions that caused a recursion to happen. + This file can be reset, but the limit can not change in + size at runtime. + config GCOV_PROFILE_FTRACE bool "Enable GCOV profiling on ftrace subsystem" depends on GCOV_KERNEL diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index e153be351548..7e44cea89fdc 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -92,6 +92,7 @@ obj-$(CONFIG_DYNAMIC_EVENTS) += trace_dynevent.o obj-$(CONFIG_PROBE_EVENTS) += trace_probe.o obj-$(CONFIG_UPROBE_EVENTS) += trace_uprobe.o obj-$(CONFIG_BOOTTIME_TRACING) += trace_boot.o +obj-$(CONFIG_FTRACE_RECORD_RECURSION) += trace_recursion_record.o obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 39f2bba89b76..03aad2b5cd5e 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -6918,7 +6918,7 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op; int bit; - bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX); + bit = trace_test_and_set_recursion(ip, parent_ip, TRACE_LIST_START, TRACE_LIST_MAX); if (bit < 0) return; @@ -6993,7 +6993,7 @@ static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip, { int bit; - bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX); + bit = trace_test_and_set_recursion(ip, parent_ip, TRACE_LIST_START, TRACE_LIST_MAX); if (bit < 0) return; diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index a2b9fddb8148..1b202e28dfaa 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -447,7 +447,7 @@ perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip, if ((unsigned long)ops->private != smp_processor_id()) return; - bit = ftrace_test_recursion_trylock(); + bit = ftrace_test_recursion_trylock(ip, parent_ip); if (bit < 0) return; diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 89c414ce1388..646eda6c44a5 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -141,7 +141,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip, if (unlikely(!tr->function_enabled)) return; - bit = ftrace_test_recursion_trylock(); + bit = ftrace_test_recursion_trylock(ip, parent_ip); if (bit < 0) return; diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 000e9dc224c6..92b1575ae0ca 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -353,8 +353,8 @@ static inline const char *kretprobed(const char *name) } #endif /* CONFIG_KRETPROBES */ -static void -seq_print_sym(struct trace_seq *s, unsigned long address, bool offset) +void +trace_seq_print_sym(struct trace_seq *s, unsigned long address, bool offset) { #ifdef CONFIG_KALLSYMS char str[KSYM_SYMBOL_LEN]; @@ -420,7 +420,7 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags) goto out; } - seq_print_sym(s, ip, sym_flags & TRACE_ITER_SYM_OFFSET); + trace_seq_print_sym(s, ip, sym_flags & TRACE_ITER_SYM_OFFSET); if (sym_flags & TRACE_ITER_SYM_ADDR) trace_seq_printf(s, " <" IP_FMT ">", ip); diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h index 2f742b74e7e6..4c954636caf0 100644 --- a/kernel/trace/trace_output.h +++ b/kernel/trace/trace_output.h @@ -16,6 +16,7 @@ extern int seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags); +extern void trace_seq_print_sym(struct trace_seq *s, unsigned long address, bool offset); extern int trace_print_context(struct trace_iterator *iter); extern int trace_print_lat_context(struct trace_iterator *iter); diff --git a/kernel/trace/trace_recursion_record.c b/kernel/trace/trace_recursion_record.c new file mode 100644 index 000000000000..b2edac1fe156 --- /dev/null +++ b/kernel/trace/trace_recursion_record.c @@ -0,0 +1,236 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include +#include + +#include "trace_output.h" + +struct recursed_functions { + unsigned long ip; + unsigned long parent_ip; +}; + +static struct recursed_functions recursed_functions[CONFIG_FTRACE_RECORD_RECURSION_SIZE]; +static atomic_t nr_records; + +/* + * Cache the last found function. Yes, updates to this is racey, but + * so is memory cache ;-) + */ +static unsigned long cached_function; + +void ftrace_record_recursion(unsigned long ip, unsigned long parent_ip) +{ + int index = 0; + int i; + unsigned long old; + + again: + /* First check the last one recorded */ + if (ip == cached_function) + return; + + i = atomic_read(&nr_records); + /* nr_records is -1 when clearing records */ + smp_mb__after_atomic(); + if (i < 0) + return; + + /* + * If there's two writers and this writer comes in second, + * the cmpxchg() below to update the ip will fail. Then this + * writer will try again. It is possible that index will now + * be greater than nr_records. This is because the writer + * that succeeded has not updated the nr_records yet. + * This writer could keep trying again until the other writer + * updates nr_records. But if the other writer takes an + * interrupt, and that interrupt locks up that CPU, we do + * not want this CPU to lock up due to the recursion protection, + * and have a bug report showing this CPU as the cause of + * locking up the computer. To not lose this record, this + * writer will simply use the next position to update the + * recursed_functions, and it will update the nr_records + * accordingly. + */ + if (index < i) + index = i; + if (index >= CONFIG_FTRACE_RECORD_RECURSION_SIZE) + return; + + for (i = index - 1; i >= 0; i--) { + if (recursed_functions[i].ip == ip) { + cached_function = ip; + return; + } + } + + cached_function = ip; + + /* + * We only want to add a function if it hasn't been added before. + * Add to the current location before incrementing the count. + * If it fails to add, then increment the index (save in i) + * and try again. + */ + old = cmpxchg(&recursed_functions[index].ip, 0, ip); + if (old != 0) { + /* Did something else already added this for us? */ + if (old == ip) + return; + /* Try the next location (use i for the next index) */ + index++; + goto again; + } + + recursed_functions[index].parent_ip = parent_ip; + + /* + * It's still possible that we could race with the clearing + * CPU0 CPU1 + * ---- ---- + * ip = func + * nr_records = -1; + * recursed_functions[0] = 0; + * i = -1 + * if (i < 0) + * nr_records = 0; + * (new recursion detected) + * recursed_functions[0] = func + * cmpxchg(recursed_functions[0], + * func, 0) + * + * But the worse that could happen is that we get a zero in + * the recursed_functions array, and it's likely that "func" will + * be recorded again. + */ + i = atomic_read(&nr_records); + smp_mb__after_atomic(); + if (i < 0) + cmpxchg(&recursed_functions[index].ip, ip, 0); + else if (i <= index) + atomic_cmpxchg(&nr_records, i, index + 1); +} +EXPORT_SYMBOL_GPL(ftrace_record_recursion); + +static DEFINE_MUTEX(recursed_function_lock); +static struct trace_seq *tseq; + +static void *recursed_function_seq_start(struct seq_file *m, loff_t *pos) +{ + void *ret = NULL; + int index; + + mutex_lock(&recursed_function_lock); + index = atomic_read(&nr_records); + if (*pos < index) { + ret = &recursed_functions[*pos]; + } + + tseq = kzalloc(sizeof(*tseq), GFP_KERNEL); + if (!tseq) + return ERR_PTR(-ENOMEM); + + trace_seq_init(tseq); + + return ret; +} + +static void *recursed_function_seq_next(struct seq_file *m, void *v, loff_t *pos) +{ + int index; + int p; + + index = atomic_read(&nr_records); + p = ++(*pos); + + return p < index ? &recursed_functions[p] : NULL; +} + +static void recursed_function_seq_stop(struct seq_file *m, void *v) +{ + kfree(tseq); + mutex_unlock(&recursed_function_lock); +} + +static int recursed_function_seq_show(struct seq_file *m, void *v) +{ + struct recursed_functions *record = v; + int ret = 0; + + if (record) { + trace_seq_print_sym(tseq, record->parent_ip, true); + trace_seq_puts(tseq, ":\t"); + trace_seq_print_sym(tseq, record->ip, true); + trace_seq_putc(tseq, '\n'); + ret = trace_print_seq(m, tseq); + } + + return ret; +} + +static const struct seq_operations recursed_function_seq_ops = { + .start = recursed_function_seq_start, + .next = recursed_function_seq_next, + .stop = recursed_function_seq_stop, + .show = recursed_function_seq_show +}; + +static int recursed_function_open(struct inode *inode, struct file *file) +{ + int ret = 0; + + mutex_lock(&recursed_function_lock); + /* If this file was opened for write, then erase contents */ + if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) { + /* disable updating records */ + atomic_set(&nr_records, -1); + smp_mb__after_atomic(); + memset(recursed_functions, 0, sizeof(recursed_functions)); + smp_wmb(); + /* enable them again */ + atomic_set(&nr_records, 0); + } + if (file->f_mode & FMODE_READ) + ret = seq_open(file, &recursed_function_seq_ops); + mutex_unlock(&recursed_function_lock); + + return ret; +} + +static ssize_t recursed_function_write(struct file *file, + const char __user *buffer, + size_t count, loff_t *ppos) +{ + return count; +} + +static int recursed_function_release(struct inode *inode, struct file *file) +{ + if (file->f_mode & FMODE_READ) + seq_release(inode, file); + return 0; +} + +static const struct file_operations recursed_functions_fops = { + .open = recursed_function_open, + .write = recursed_function_write, + .read = seq_read, + .llseek = seq_lseek, + .release = recursed_function_release, +}; + +__init static int create_recursed_functions(void) +{ + struct dentry *dentry; + + dentry = trace_create_file("recursed_functions", 0644, NULL, NULL, + &recursed_functions_fops); + if (!dentry) + pr_warn("WARNING: Failed to create recursed_functions\n"); + return 0; +} + +fs_initcall(create_recursed_functions); -- cgit v1.2.3 From d19ad0775dcd64b49eecf4fa79c17959ebfbd26b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 28 Oct 2020 17:42:17 -0400 Subject: ftrace: Have the callbacks receive a struct ftrace_regs instead of pt_regs In preparation to have arguments of a function passed to callbacks attached to functions as default, change the default callback prototype to receive a struct ftrace_regs as the forth parameter instead of a pt_regs. For callbacks that set the FL_SAVE_REGS flag in their ftrace_ops flags, they will now need to get the pt_regs via a ftrace_get_regs() helper call. If this is called by a callback that their ftrace_ops did not have a FL_SAVE_REGS flag set, it that helper function will return NULL. This will allow the ftrace_regs to hold enough just to get the parameters and stack pointer, but without the worry that callbacks may have a pt_regs that is not completely filled. Acked-by: Peter Zijlstra (Intel) Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- arch/csky/kernel/probes/ftrace.c | 4 +++- arch/nds32/kernel/ftrace.c | 4 ++-- arch/parisc/kernel/ftrace.c | 8 +++++--- arch/powerpc/kernel/kprobes-ftrace.c | 4 +++- arch/s390/kernel/ftrace.c | 4 +++- arch/x86/kernel/kprobes/ftrace.c | 3 ++- fs/pstore/ftrace.c | 2 +- include/linux/ftrace.h | 16 ++++++++++++++-- include/linux/kprobes.h | 2 +- kernel/livepatch/patch.c | 3 ++- kernel/trace/ftrace.c | 27 +++++++++++++++------------ kernel/trace/trace_event_perf.c | 2 +- kernel/trace/trace_events.c | 2 +- kernel/trace/trace_functions.c | 9 ++++----- kernel/trace/trace_irqsoff.c | 2 +- kernel/trace/trace_sched_wakeup.c | 2 +- kernel/trace/trace_selftest.c | 20 +++++++++++--------- kernel/trace/trace_stack.c | 2 +- 18 files changed, 71 insertions(+), 45 deletions(-) (limited to 'arch/s390') diff --git a/arch/csky/kernel/probes/ftrace.c b/arch/csky/kernel/probes/ftrace.c index f30b179924ef..ae2b1c7b3b5c 100644 --- a/arch/csky/kernel/probes/ftrace.c +++ b/arch/csky/kernel/probes/ftrace.c @@ -11,17 +11,19 @@ int arch_check_ftrace_location(struct kprobe *p) /* Ftrace callback handler for kprobes -- called under preepmt disabed */ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *ops, struct pt_regs *regs) + struct ftrace_ops *ops, struct ftrace_regs *fregs) { int bit; bool lr_saver = false; struct kprobe *p; struct kprobe_ctlblk *kcb; + struct pt_regs *regs; bit = ftrace_test_recursion_trylock(ip, parent_ip); if (bit < 0) return; + regs = ftrace_get_regs(fregs); preempt_disable_notrace(); p = get_kprobe((kprobe_opcode_t *)ip); if (!p) { diff --git a/arch/nds32/kernel/ftrace.c b/arch/nds32/kernel/ftrace.c index 3763b3f8c3db..414f8a780cc3 100644 --- a/arch/nds32/kernel/ftrace.c +++ b/arch/nds32/kernel/ftrace.c @@ -10,7 +10,7 @@ extern void (*ftrace_trace_function)(unsigned long, unsigned long, extern void ftrace_graph_caller(void); noinline void __naked ftrace_stub(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct pt_regs *regs) + struct ftrace_ops *op, struct ftrace_regs *fregs) { __asm__ (""); /* avoid to optimize as pure function */ } @@ -38,7 +38,7 @@ EXPORT_SYMBOL(_mcount); #else /* CONFIG_DYNAMIC_FTRACE */ noinline void __naked ftrace_stub(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct pt_regs *regs) + struct ftrace_ops *op, struct ftrace_regs *fregs) { __asm__ (""); /* avoid to optimize as pure function */ } diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c index 1c5d3732bda2..0a1e75af5382 100644 --- a/arch/parisc/kernel/ftrace.c +++ b/arch/parisc/kernel/ftrace.c @@ -51,7 +51,7 @@ static void __hot prepare_ftrace_return(unsigned long *parent, void notrace __hot ftrace_function_trampoline(unsigned long parent, unsigned long self_addr, unsigned long org_sp_gr3, - struct pt_regs *regs) + struct ftrace_regs *fregs) { #ifndef CONFIG_DYNAMIC_FTRACE extern ftrace_func_t ftrace_trace_function; @@ -61,7 +61,7 @@ void notrace __hot ftrace_function_trampoline(unsigned long parent, if (function_trace_op->flags & FTRACE_OPS_FL_ENABLED && ftrace_trace_function != ftrace_stub) ftrace_trace_function(self_addr, parent, - function_trace_op, regs); + function_trace_op, fregs); #ifdef CONFIG_FUNCTION_GRAPH_TRACER if (dereference_function_descriptor(ftrace_graph_return) != @@ -204,9 +204,10 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, #ifdef CONFIG_KPROBES_ON_FTRACE void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *ops, struct pt_regs *regs) + struct ftrace_ops *ops, struct ftrace_regs *fregs) { struct kprobe_ctlblk *kcb; + struct pt_regs *regs; struct kprobe *p; int bit; @@ -214,6 +215,7 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, if (bit < 0) return; + regs = ftrace_get_regs(fregs); preempt_disable_notrace(); p = get_kprobe((kprobe_opcode_t *)ip); if (unlikely(!p) || kprobe_disabled(p)) diff --git a/arch/powerpc/kernel/kprobes-ftrace.c b/arch/powerpc/kernel/kprobes-ftrace.c index fdfee39938ea..660138f6c4b2 100644 --- a/arch/powerpc/kernel/kprobes-ftrace.c +++ b/arch/powerpc/kernel/kprobes-ftrace.c @@ -14,16 +14,18 @@ /* Ftrace callback handler for kprobes */ void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip, - struct ftrace_ops *ops, struct pt_regs *regs) + struct ftrace_ops *ops, struct ftrace_regs *fregs) { struct kprobe *p; struct kprobe_ctlblk *kcb; + struct pt_regs *regs; int bit; bit = ftrace_test_recursion_trylock(nip, parent_nip); if (bit < 0) return; + regs = ftrace_get_regs(fregs); preempt_disable_notrace(); p = get_kprobe((kprobe_opcode_t *)nip); if (unlikely(!p) || kprobe_disabled(p)) diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 657c1ab45408..67b80f4412f9 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -198,9 +198,10 @@ int ftrace_disable_ftrace_graph_caller(void) #ifdef CONFIG_KPROBES_ON_FTRACE void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *ops, struct pt_regs *regs) + struct ftrace_ops *ops, struct ftrace_regs *fregs) { struct kprobe_ctlblk *kcb; + struct pt_regs *regs; struct kprobe *p; int bit; @@ -208,6 +209,7 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, if (bit < 0) return; + regs = ftrace_get_regs(fregs); preempt_disable_notrace(); p = get_kprobe((kprobe_opcode_t *)ip); if (unlikely(!p) || kprobe_disabled(p)) diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c index 954d930a7127..373e5fa3ce1f 100644 --- a/arch/x86/kernel/kprobes/ftrace.c +++ b/arch/x86/kernel/kprobes/ftrace.c @@ -14,8 +14,9 @@ /* Ftrace callback handler for kprobes -- called under preepmt disabed */ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *ops, struct pt_regs *regs) + struct ftrace_ops *ops, struct ftrace_regs *fregs) { + struct pt_regs *regs = ftrace_get_regs(fregs); struct kprobe *p; struct kprobe_ctlblk *kcb; int bit; diff --git a/fs/pstore/ftrace.c b/fs/pstore/ftrace.c index adb0935eb062..5939595f0115 100644 --- a/fs/pstore/ftrace.c +++ b/fs/pstore/ftrace.c @@ -26,7 +26,7 @@ static u64 pstore_ftrace_stamp; static void notrace pstore_ftrace_call(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, - struct pt_regs *regs) + struct ftrace_regs *fregs) { int bit; unsigned long flags; diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 8dde9c17aaa5..24e1fa52337d 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -90,8 +90,20 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, struct ftrace_ops; +struct ftrace_regs { + struct pt_regs regs; +}; + +static __always_inline struct pt_regs *ftrace_get_regs(struct ftrace_regs *fregs) +{ + if (!fregs) + return NULL; + + return &fregs->regs; +} + typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct pt_regs *regs); + struct ftrace_ops *op, struct ftrace_regs *fregs); ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops); @@ -259,7 +271,7 @@ int register_ftrace_function(struct ftrace_ops *ops); int unregister_ftrace_function(struct ftrace_ops *ops); extern void ftrace_stub(unsigned long a0, unsigned long a1, - struct ftrace_ops *op, struct pt_regs *regs); + struct ftrace_ops *op, struct ftrace_regs *fregs); #else /* !CONFIG_FUNCTION_TRACER */ /* diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 629abaf25681..be73350955e4 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -345,7 +345,7 @@ static inline void wait_for_kprobe_optimizer(void) { } #endif /* CONFIG_OPTPROBES */ #ifdef CONFIG_KPROBES_ON_FTRACE extern void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *ops, struct pt_regs *regs); + struct ftrace_ops *ops, struct ftrace_regs *fregs); extern int arch_prepare_kprobe_ftrace(struct kprobe *p); #endif diff --git a/kernel/livepatch/patch.c b/kernel/livepatch/patch.c index 875c5dbbdd33..f89f9e7e9b07 100644 --- a/kernel/livepatch/patch.c +++ b/kernel/livepatch/patch.c @@ -40,8 +40,9 @@ struct klp_ops *klp_find_ops(void *old_func) static void notrace klp_ftrace_handler(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *fops, - struct pt_regs *regs) + struct ftrace_regs *fregs) { + struct pt_regs *regs = ftrace_get_regs(fregs); struct klp_ops *ops; struct klp_func *func; int patch_state; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 3db64fb0cce8..67888311784e 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -121,7 +121,7 @@ struct ftrace_ops global_ops; #if ARCH_SUPPORTS_FTRACE_OPS static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct pt_regs *regs); + struct ftrace_ops *op, struct ftrace_regs *fregs); #else /* See comment below, where ftrace_ops_list_func is defined */ static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip); @@ -140,7 +140,7 @@ static inline void ftrace_ops_init(struct ftrace_ops *ops) } static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct pt_regs *regs) + struct ftrace_ops *op, struct ftrace_regs *fregs) { struct trace_array *tr = op->private; int pid; @@ -154,7 +154,7 @@ static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip, return; } - op->saved_func(ip, parent_ip, op, regs); + op->saved_func(ip, parent_ip, op, fregs); } static void ftrace_sync_ipi(void *data) @@ -754,7 +754,7 @@ ftrace_profile_alloc(struct ftrace_profile_stat *stat, unsigned long ip) static void function_profile_call(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *ops, struct pt_regs *regs) + struct ftrace_ops *ops, struct ftrace_regs *fregs) { struct ftrace_profile_stat *stat; struct ftrace_profile *rec; @@ -2143,6 +2143,7 @@ static int ftrace_check_record(struct dyn_ftrace *rec, bool enable, bool update) else rec->flags &= ~FTRACE_FL_TRAMP_EN; } + if (flag & FTRACE_FL_DIRECT) { /* * If there's only one user (direct_ops helper) @@ -2368,8 +2369,9 @@ unsigned long ftrace_find_rec_direct(unsigned long ip) } static void call_direct_funcs(unsigned long ip, unsigned long pip, - struct ftrace_ops *ops, struct pt_regs *regs) + struct ftrace_ops *ops, struct ftrace_regs *fregs) { + struct pt_regs *regs = ftrace_get_regs(fregs); unsigned long addr; addr = ftrace_find_rec_direct(ip); @@ -4292,7 +4294,7 @@ static int __init ftrace_mod_cmd_init(void) core_initcall(ftrace_mod_cmd_init); static void function_trace_probe_call(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct pt_regs *pt_regs) + struct ftrace_ops *op, struct ftrace_regs *fregs) { struct ftrace_probe_ops *probe_ops; struct ftrace_func_probe *probe; @@ -6911,8 +6913,9 @@ void ftrace_reset_array_ops(struct trace_array *tr) static nokprobe_inline void __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *ignored, struct pt_regs *regs) + struct ftrace_ops *ignored, struct ftrace_regs *fregs) { + struct pt_regs *regs = ftrace_get_regs(fregs); struct ftrace_ops *op; int bit; @@ -6945,7 +6948,7 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, pr_warn("op=%p %pS\n", op, op); goto out; } - op->func(ip, parent_ip, op, regs); + op->func(ip, parent_ip, op, fregs); } } while_for_each_ftrace_op(op); out: @@ -6968,9 +6971,9 @@ out: */ #if ARCH_SUPPORTS_FTRACE_OPS static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct pt_regs *regs) + struct ftrace_ops *op, struct ftrace_regs *fregs) { - __ftrace_ops_list_func(ip, parent_ip, NULL, regs); + __ftrace_ops_list_func(ip, parent_ip, NULL, fregs); } NOKPROBE_SYMBOL(ftrace_ops_list_func); #else @@ -6987,7 +6990,7 @@ NOKPROBE_SYMBOL(ftrace_ops_no_ops); * this function will be called by the mcount trampoline. */ static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct pt_regs *regs) + struct ftrace_ops *op, struct ftrace_regs *fregs) { int bit; @@ -6998,7 +7001,7 @@ static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip, preempt_disable_notrace(); if (!(op->flags & FTRACE_OPS_FL_RCU) || rcu_is_watching()) - op->func(ip, parent_ip, op, regs); + op->func(ip, parent_ip, op, fregs); preempt_enable_notrace(); trace_clear_recursion(bit); diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 1b202e28dfaa..a71181655958 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -432,7 +432,7 @@ NOKPROBE_SYMBOL(perf_trace_buf_update); #ifdef CONFIG_FUNCTION_TRACER static void perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *ops, struct pt_regs *pt_regs) + struct ftrace_ops *ops, struct ftrace_regs *fregs) { struct ftrace_entry *entry; struct perf_event *event; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index f4b459bb6d33..98d194d8460e 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -3673,7 +3673,7 @@ static struct trace_event_file event_trace_file __initdata; static void __init function_test_events_call(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct pt_regs *pt_regs) + struct ftrace_ops *op, struct ftrace_regs *regs) { struct trace_buffer *buffer; struct ring_buffer_event *event; diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 646eda6c44a5..c5095dd28e20 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -23,10 +23,10 @@ static void tracing_start_function_trace(struct trace_array *tr); static void tracing_stop_function_trace(struct trace_array *tr); static void function_trace_call(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct pt_regs *pt_regs); + struct ftrace_ops *op, struct ftrace_regs *fregs); static void function_stack_trace_call(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct pt_regs *pt_regs); + struct ftrace_ops *op, struct ftrace_regs *fregs); static struct tracer_flags func_flags; /* Our option */ @@ -89,7 +89,6 @@ void ftrace_destroy_function_files(struct trace_array *tr) static int function_trace_init(struct trace_array *tr) { ftrace_func_t func; - /* * Instance trace_arrays get their ops allocated * at instance creation. Unless it failed @@ -129,7 +128,7 @@ static void function_trace_start(struct trace_array *tr) static void function_trace_call(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct pt_regs *pt_regs) + struct ftrace_ops *op, struct ftrace_regs *fregs) { struct trace_array *tr = op->private; struct trace_array_cpu *data; @@ -178,7 +177,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip, static void function_stack_trace_call(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct pt_regs *pt_regs) + struct ftrace_ops *op, struct ftrace_regs *fregs) { struct trace_array *tr = op->private; struct trace_array_cpu *data; diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 10bbb0f381d5..d06aab4dcbb8 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -138,7 +138,7 @@ static int func_prolog_dec(struct trace_array *tr, */ static void irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct pt_regs *pt_regs) + struct ftrace_ops *op, struct ftrace_regs *fregs) { struct trace_array *tr = irqsoff_trace; struct trace_array_cpu *data; diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 97b10bb31a1f..c0181066dbe9 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -212,7 +212,7 @@ static void wakeup_print_header(struct seq_file *s) */ static void wakeup_tracer_call(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct pt_regs *pt_regs) + struct ftrace_ops *op, struct ftrace_regs *fregs) { struct trace_array *tr = wakeup_trace; struct trace_array_cpu *data; diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 8ee3c0bb5d8a..5ed081c6471c 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -107,7 +107,7 @@ static int trace_selftest_test_probe1_cnt; static void trace_selftest_test_probe1_func(unsigned long ip, unsigned long pip, struct ftrace_ops *op, - struct pt_regs *pt_regs) + struct ftrace_regs *fregs) { trace_selftest_test_probe1_cnt++; } @@ -116,7 +116,7 @@ static int trace_selftest_test_probe2_cnt; static void trace_selftest_test_probe2_func(unsigned long ip, unsigned long pip, struct ftrace_ops *op, - struct pt_regs *pt_regs) + struct ftrace_regs *fregs) { trace_selftest_test_probe2_cnt++; } @@ -125,7 +125,7 @@ static int trace_selftest_test_probe3_cnt; static void trace_selftest_test_probe3_func(unsigned long ip, unsigned long pip, struct ftrace_ops *op, - struct pt_regs *pt_regs) + struct ftrace_regs *fregs) { trace_selftest_test_probe3_cnt++; } @@ -134,7 +134,7 @@ static int trace_selftest_test_global_cnt; static void trace_selftest_test_global_func(unsigned long ip, unsigned long pip, struct ftrace_ops *op, - struct pt_regs *pt_regs) + struct ftrace_regs *fregs) { trace_selftest_test_global_cnt++; } @@ -143,7 +143,7 @@ static int trace_selftest_test_dyn_cnt; static void trace_selftest_test_dyn_func(unsigned long ip, unsigned long pip, struct ftrace_ops *op, - struct pt_regs *pt_regs) + struct ftrace_regs *fregs) { trace_selftest_test_dyn_cnt++; } @@ -414,7 +414,7 @@ static int trace_selftest_recursion_cnt; static void trace_selftest_test_recursion_func(unsigned long ip, unsigned long pip, struct ftrace_ops *op, - struct pt_regs *pt_regs) + struct ftrace_regs *fregs) { /* * This function is registered without the recursion safe flag. @@ -429,7 +429,7 @@ static void trace_selftest_test_recursion_func(unsigned long ip, static void trace_selftest_test_recursion_safe_func(unsigned long ip, unsigned long pip, struct ftrace_ops *op, - struct pt_regs *pt_regs) + struct ftrace_regs *fregs) { /* * We said we would provide our own recursion. By calling @@ -548,9 +548,11 @@ static enum { static void trace_selftest_test_regs_func(unsigned long ip, unsigned long pip, struct ftrace_ops *op, - struct pt_regs *pt_regs) + struct ftrace_regs *fregs) { - if (pt_regs) + struct pt_regs *regs = ftrace_get_regs(fregs); + + if (regs) trace_selftest_regs_stat = TRACE_SELFTEST_REGS_FOUND; else trace_selftest_regs_stat = TRACE_SELFTEST_REGS_NOT_FOUND; diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 969db526a563..63c285042051 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -290,7 +290,7 @@ static void check_stack(unsigned long ip, unsigned long *stack) static void stack_trace_call(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct pt_regs *pt_regs) + struct ftrace_ops *op, struct ftrace_regs *fregs) { unsigned long stack; -- cgit v1.2.3 From 2860cd8a235375df3c8ec8039d9fe5eb2f658b86 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 28 Oct 2020 17:15:27 -0400 Subject: livepatch: Use the default ftrace_ops instead of REGS when ARGS is available When CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS is available, the ftrace call will be able to set the ip of the calling function. This will improve the performance of live kernel patching where it does not need all the regs to be stored just to change the instruction pointer. If all archs that support live kernel patching also support HAVE_DYNAMIC_FTRACE_WITH_ARGS, then the architecture specific function klp_arch_set_pc() could be made generic. It is possible that an arch can support HAVE_DYNAMIC_FTRACE_WITH_ARGS but not HAVE_DYNAMIC_FTRACE_WITH_REGS and then have access to live patching. Cc: Josh Poimboeuf Cc: Jiri Kosina Cc: live-patching@vger.kernel.org Acked-by: Peter Zijlstra (Intel) Acked-by: Miroslav Benes Signed-off-by: Steven Rostedt (VMware) --- arch/powerpc/include/asm/livepatch.h | 4 +++- arch/s390/include/asm/livepatch.h | 5 ++++- arch/x86/include/asm/ftrace.h | 3 +++ arch/x86/include/asm/livepatch.h | 4 ++-- arch/x86/kernel/ftrace_64.S | 4 ++++ include/linux/ftrace.h | 7 +++++++ kernel/livepatch/Kconfig | 2 +- kernel/livepatch/patch.c | 9 +++++---- 8 files changed, 29 insertions(+), 9 deletions(-) (limited to 'arch/s390') diff --git a/arch/powerpc/include/asm/livepatch.h b/arch/powerpc/include/asm/livepatch.h index 4a3d5d25fed5..ae25e6e72997 100644 --- a/arch/powerpc/include/asm/livepatch.h +++ b/arch/powerpc/include/asm/livepatch.h @@ -12,8 +12,10 @@ #include #ifdef CONFIG_LIVEPATCH -static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip) +static inline void klp_arch_set_pc(struct ftrace_regs *fregs, unsigned long ip) { + struct pt_regs *regs = ftrace_get_regs(fregs); + regs->nip = ip; } diff --git a/arch/s390/include/asm/livepatch.h b/arch/s390/include/asm/livepatch.h index 818612b784cd..d578a8c76676 100644 --- a/arch/s390/include/asm/livepatch.h +++ b/arch/s390/include/asm/livepatch.h @@ -11,10 +11,13 @@ #ifndef ASM_LIVEPATCH_H #define ASM_LIVEPATCH_H +#include #include -static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip) +static inline void klp_arch_set_pc(struct ftrace_regs *fregs, unsigned long ip) { + struct pt_regs *regs = ftrace_get_regs(fregs); + regs->psw.addr = ip; } diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index e00fe88146e0..9f3130f40807 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -54,6 +54,9 @@ arch_ftrace_get_regs(struct ftrace_regs *fregs) return NULL; return &fregs->regs; } + +#define ftrace_instruction_pointer_set(fregs, _ip) \ + do { (fregs)->regs.ip = (_ip); } while (0) #endif #ifdef CONFIG_DYNAMIC_FTRACE diff --git a/arch/x86/include/asm/livepatch.h b/arch/x86/include/asm/livepatch.h index 1fde1ab6559e..7c5cc6660e4b 100644 --- a/arch/x86/include/asm/livepatch.h +++ b/arch/x86/include/asm/livepatch.h @@ -12,9 +12,9 @@ #include #include -static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip) +static inline void klp_arch_set_pc(struct ftrace_regs *fregs, unsigned long ip) { - regs->ip = ip; + ftrace_instruction_pointer_set(fregs, ip); } #endif /* _ASM_X86_LIVEPATCH_H */ diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S index 60e3b64f5ea6..0d54099c2a3a 100644 --- a/arch/x86/kernel/ftrace_64.S +++ b/arch/x86/kernel/ftrace_64.S @@ -157,6 +157,10 @@ SYM_INNER_LABEL(ftrace_caller_op_ptr, SYM_L_GLOBAL) SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL) call ftrace_stub + /* Handlers can change the RIP */ + movq RIP(%rsp), %rax + movq %rax, MCOUNT_REG_SIZE(%rsp) + restore_mcount_regs /* diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 588ea7023a7a..9a8ce28e4485 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -97,6 +97,13 @@ struct ftrace_regs { }; #define arch_ftrace_get_regs(fregs) (&(fregs)->regs) +/* + * ftrace_instruction_pointer_set() is to be defined by the architecture + * if to allow setting of the instruction pointer from the ftrace_regs + * when HAVE_DYNAMIC_FTRACE_WITH_ARGS is set and it supports + * live kernel patching. + */ +#define ftrace_instruction_pointer_set(fregs, ip) do { } while (0) #endif /* CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS */ static __always_inline struct pt_regs *ftrace_get_regs(struct ftrace_regs *fregs) diff --git a/kernel/livepatch/Kconfig b/kernel/livepatch/Kconfig index 54102deb50ba..53d51ed619a3 100644 --- a/kernel/livepatch/Kconfig +++ b/kernel/livepatch/Kconfig @@ -6,7 +6,7 @@ config HAVE_LIVEPATCH config LIVEPATCH bool "Kernel Live Patching" - depends on DYNAMIC_FTRACE_WITH_REGS + depends on DYNAMIC_FTRACE_WITH_REGS || DYNAMIC_FTRACE_WITH_ARGS depends on MODULES depends on SYSFS depends on KALLSYMS_ALL diff --git a/kernel/livepatch/patch.c b/kernel/livepatch/patch.c index f89f9e7e9b07..e8029aea67f1 100644 --- a/kernel/livepatch/patch.c +++ b/kernel/livepatch/patch.c @@ -42,7 +42,6 @@ static void notrace klp_ftrace_handler(unsigned long ip, struct ftrace_ops *fops, struct ftrace_regs *fregs) { - struct pt_regs *regs = ftrace_get_regs(fregs); struct klp_ops *ops; struct klp_func *func; int patch_state; @@ -118,7 +117,7 @@ static void notrace klp_ftrace_handler(unsigned long ip, if (func->nop) goto unlock; - klp_arch_set_pc(regs, (unsigned long)func->new_func); + klp_arch_set_pc(fregs, (unsigned long)func->new_func); unlock: preempt_enable_notrace(); @@ -200,8 +199,10 @@ static int klp_patch_func(struct klp_func *func) return -ENOMEM; ops->fops.func = klp_ftrace_handler; - ops->fops.flags = FTRACE_OPS_FL_SAVE_REGS | - FTRACE_OPS_FL_DYNAMIC | + ops->fops.flags = FTRACE_OPS_FL_DYNAMIC | +#ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS + FTRACE_OPS_FL_SAVE_REGS | +#endif FTRACE_OPS_FL_IPMODIFY | FTRACE_OPS_FL_PERMANENT; -- cgit v1.2.3