From a2d7629048322ae62bff57f34f5f995e25ed234c Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 20 Oct 2015 11:38:08 -0400 Subject: tracing: Have stack tracer force RCU to be watching The stack tracer was triggering the WARN_ON() in module.c: static void module_assert_mutex_or_preempt(void) { #ifdef CONFIG_LOCKDEP if (unlikely(!debug_locks)) return; WARN_ON(!rcu_read_lock_sched_held() && !lockdep_is_held(&module_mutex)); #endif } The reason is that the stack tracer traces all function calls, and some of those calls happen while exiting or entering user space and idle. Some of these functions are called after RCU had already stopped watching, as RCU does not watch userspace or idle CPUs. If a max stack is hit, then the save_stack_trace() is called, which will check module addresses and call module_assert_mutex_or_preempt(), and then trigger the warning. Sad part is, the warning itself will also do a stack trace and tigger the same warning. That probably should be fixed. The warning was added by 0be964be0d45 "module: Sanitize RCU usage and locking" but this bug has probably been around longer. But it's unlikely to cause much harm, but the new warning causes the system to lock up. Cc: stable@vger.kernel.org # 4.2+ Cc: Peter Zijlstra Cc:"Paul E. McKenney" Signed-off-by: Steven Rostedt --- kernel/trace/trace_stack.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'kernel/trace/trace_stack.c') diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index b746399ab59c..5f29402bff0f 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -88,6 +88,12 @@ check_stack(unsigned long ip, unsigned long *stack) local_irq_save(flags); arch_spin_lock(&max_stack_lock); + /* + * RCU may not be watching, make it see us. + * The stack trace code uses rcu_sched. + */ + rcu_irq_enter(); + /* In case another CPU set the tracer_frame on us */ if (unlikely(!frame_size)) this_size -= tracer_frame; @@ -169,6 +175,7 @@ check_stack(unsigned long ip, unsigned long *stack) } out: + rcu_irq_exit(); arch_spin_unlock(&max_stack_lock); local_irq_restore(flags); } -- cgit v1.2.3 From 1904be1b6bb92058c8e00063dd59df2df294e258 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 20 Oct 2015 21:48:02 -0400 Subject: tracing: Do not allow stack_tracer to record stack in NMI The code in stack tracer should not be executed within an NMI as it grabs spinlocks and stack tracing an NMI gives the possibility of causing a deadlock. Although this is safe on x86_64, because it does not perform stack traces when the task struct stack is not in use (interrupts and NMIs), it may be an issue for NMIs on i386 and other archs that use the same stack as the NMI. Signed-off-by: Steven Rostedt --- kernel/trace/trace_stack.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'kernel/trace/trace_stack.c') diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 5f29402bff0f..8abf1ba18085 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -85,6 +85,10 @@ check_stack(unsigned long ip, unsigned long *stack) if (!object_is_on_stack(stack)) return; + /* Can't do this from NMI context (can cause deadlocks) */ + if (in_nmi()) + return; + local_irq_save(flags); arch_spin_lock(&max_stack_lock); -- cgit v1.2.3 From bb99d8ccec7f83a2730a29d1ae7eee5ffa446a9e Mon Sep 17 00:00:00 2001 From: AKASHI Takahiro Date: Fri, 30 Oct 2015 14:25:39 +0900 Subject: tracing: Allow arch-specific stack tracer A stack frame may be used in a different way depending on cpu architecture. Thus it is not always appropriate to slurp the stack contents, as current check_stack() does, in order to calcurate a stack index (height) at a given function call. At least not on arm64. In addition, there is a possibility that we will mistakenly detect a stale stack frame which has not been overwritten. This patch makes check_stack() a weak function so as to later implement arch-specific version. Link: http://lkml.kernel.org/r/1446182741-31019-5-git-send-email-takahiro.akashi@linaro.org Signed-off-by: AKASHI Takahiro Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 11 +++++++ kernel/trace/trace_stack.c | 80 +++++++++++++++++++++++++--------------------- 2 files changed, 54 insertions(+), 37 deletions(-) (limited to 'kernel/trace/trace_stack.c') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 6cd8c0ee4b6f..b4c92ab9e08b 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -263,7 +263,18 @@ static inline void ftrace_kill(void) { } #endif /* CONFIG_FUNCTION_TRACER */ #ifdef CONFIG_STACK_TRACER + +#define STACK_TRACE_ENTRIES 500 + +struct stack_trace; + +extern unsigned stack_trace_index[]; +extern struct stack_trace stack_trace_max; +extern unsigned long stack_trace_max_size; +extern arch_spinlock_t max_stack_lock; + extern int stack_tracer_enabled; +void stack_trace_print(void); int stack_trace_sysctl(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index b746399ab59c..50945a7939f4 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -16,24 +16,22 @@ #include "trace.h" -#define STACK_TRACE_ENTRIES 500 - static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES+1] = { [0 ... (STACK_TRACE_ENTRIES)] = ULONG_MAX }; -static unsigned stack_dump_index[STACK_TRACE_ENTRIES]; +unsigned stack_trace_index[STACK_TRACE_ENTRIES]; /* * Reserve one entry for the passed in ip. This will allow * us to remove most or all of the stack size overhead * added by the stack tracer itself. */ -static struct stack_trace max_stack_trace = { +struct stack_trace stack_trace_max = { .max_entries = STACK_TRACE_ENTRIES - 1, .entries = &stack_dump_trace[0], }; -static unsigned long max_stack_size; -static arch_spinlock_t max_stack_lock = +unsigned long stack_trace_max_size; +arch_spinlock_t max_stack_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; static DEFINE_PER_CPU(int, trace_active); @@ -42,30 +40,38 @@ static DEFINE_MUTEX(stack_sysctl_mutex); int stack_tracer_enabled; static int last_stack_tracer_enabled; -static inline void print_max_stack(void) +void stack_trace_print(void) { long i; int size; pr_emerg(" Depth Size Location (%d entries)\n" " ----- ---- --------\n", - max_stack_trace.nr_entries); + stack_trace_max.nr_entries); - for (i = 0; i < max_stack_trace.nr_entries; i++) { + for (i = 0; i < stack_trace_max.nr_entries; i++) { if (stack_dump_trace[i] == ULONG_MAX) break; - if (i+1 == max_stack_trace.nr_entries || + if (i+1 == stack_trace_max.nr_entries || stack_dump_trace[i+1] == ULONG_MAX) - size = stack_dump_index[i]; + size = stack_trace_index[i]; else - size = stack_dump_index[i] - stack_dump_index[i+1]; + size = stack_trace_index[i] - stack_trace_index[i+1]; - pr_emerg("%3ld) %8d %5d %pS\n", i, stack_dump_index[i], + pr_emerg("%3ld) %8d %5d %pS\n", i, stack_trace_index[i], size, (void *)stack_dump_trace[i]); } } -static inline void +/* + * When arch-specific code overides this function, the following + * data should be filled up, assuming max_stack_lock is held to + * prevent concurrent updates. + * stack_trace_index[] + * stack_trace_max + * stack_trace_max_size + */ +void __weak check_stack(unsigned long ip, unsigned long *stack) { unsigned long this_size, flags; unsigned long *p, *top, *start; @@ -78,7 +84,7 @@ check_stack(unsigned long ip, unsigned long *stack) /* Remove the frame of the tracer */ this_size -= frame_size; - if (this_size <= max_stack_size) + if (this_size <= stack_trace_max_size) return; /* we do not handle interrupt stacks yet */ @@ -93,18 +99,18 @@ check_stack(unsigned long ip, unsigned long *stack) this_size -= tracer_frame; /* a race could have already updated it */ - if (this_size <= max_stack_size) + if (this_size <= stack_trace_max_size) goto out; - max_stack_size = this_size; + stack_trace_max_size = this_size; - max_stack_trace.nr_entries = 0; - max_stack_trace.skip = 3; + stack_trace_max.nr_entries = 0; + stack_trace_max.skip = 3; - save_stack_trace(&max_stack_trace); + save_stack_trace(&stack_trace_max); /* Skip over the overhead of the stack tracer itself */ - for (i = 0; i < max_stack_trace.nr_entries; i++) { + for (i = 0; i < stack_trace_max.nr_entries; i++) { if (stack_dump_trace[i] == ip) break; } @@ -124,18 +130,18 @@ check_stack(unsigned long ip, unsigned long *stack) * loop will only happen once. This code only takes place * on a new max, so it is far from a fast path. */ - while (i < max_stack_trace.nr_entries) { + while (i < stack_trace_max.nr_entries) { int found = 0; - stack_dump_index[x] = this_size; + stack_trace_index[x] = this_size; p = start; - for (; p < top && i < max_stack_trace.nr_entries; p++) { + for (; p < top && i < stack_trace_max.nr_entries; p++) { if (stack_dump_trace[i] == ULONG_MAX) break; if (*p == stack_dump_trace[i]) { stack_dump_trace[x] = stack_dump_trace[i++]; - this_size = stack_dump_index[x++] = + this_size = stack_trace_index[x++] = (top - p) * sizeof(unsigned long); found = 1; /* Start the search from here */ @@ -150,7 +156,7 @@ check_stack(unsigned long ip, unsigned long *stack) if (unlikely(!tracer_frame)) { tracer_frame = (p - stack) * sizeof(unsigned long); - max_stack_size -= tracer_frame; + stack_trace_max_size -= tracer_frame; } } } @@ -159,12 +165,12 @@ check_stack(unsigned long ip, unsigned long *stack) i++; } - max_stack_trace.nr_entries = x; + stack_trace_max.nr_entries = x; for (; x < i; x++) stack_dump_trace[x] = ULONG_MAX; if (task_stack_end_corrupted(current)) { - print_max_stack(); + stack_trace_print(); BUG(); } @@ -262,7 +268,7 @@ __next(struct seq_file *m, loff_t *pos) { long n = *pos - 1; - if (n > max_stack_trace.nr_entries || stack_dump_trace[n] == ULONG_MAX) + if (n > stack_trace_max.nr_entries || stack_dump_trace[n] == ULONG_MAX) return NULL; m->private = (void *)n; @@ -332,9 +338,9 @@ static int t_show(struct seq_file *m, void *v) seq_printf(m, " Depth Size Location" " (%d entries)\n" " ----- ---- --------\n", - max_stack_trace.nr_entries); + stack_trace_max.nr_entries); - if (!stack_tracer_enabled && !max_stack_size) + if (!stack_tracer_enabled && !stack_trace_max_size) print_disabled(m); return 0; @@ -342,17 +348,17 @@ static int t_show(struct seq_file *m, void *v) i = *(long *)v; - if (i >= max_stack_trace.nr_entries || + if (i >= stack_trace_max.nr_entries || stack_dump_trace[i] == ULONG_MAX) return 0; - if (i+1 == max_stack_trace.nr_entries || + if (i+1 == stack_trace_max.nr_entries || stack_dump_trace[i+1] == ULONG_MAX) - size = stack_dump_index[i]; + size = stack_trace_index[i]; else - size = stack_dump_index[i] - stack_dump_index[i+1]; + size = stack_trace_index[i] - stack_trace_index[i+1]; - seq_printf(m, "%3ld) %8d %5d ", i, stack_dump_index[i], size); + seq_printf(m, "%3ld) %8d %5d ", i, stack_trace_index[i], size); trace_lookup_stack(m, i); @@ -442,7 +448,7 @@ static __init int stack_trace_init(void) return 0; trace_create_file("stack_max_size", 0644, d_tracer, - &max_stack_size, &stack_max_size_fops); + &stack_trace_max_size, &stack_max_size_fops); trace_create_file("stack_trace", 0444, d_tracer, NULL, &stack_trace_fops); -- cgit v1.2.3 From d332736df0c277905de06311ae084e2c76580a3f Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 3 Nov 2015 14:50:15 -0500 Subject: tracing: Rename max_stack_lock to stack_trace_max_lock Now that max_stack_lock is a global variable, it requires a naming convention that is unlikely to collide. Rename it to the same naming convention that the other stack_trace variables have. Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 2 +- kernel/trace/trace_stack.c | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'kernel/trace/trace_stack.c') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index b4c92ab9e08b..eae6548efbf0 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -271,7 +271,7 @@ struct stack_trace; extern unsigned stack_trace_index[]; extern struct stack_trace stack_trace_max; extern unsigned long stack_trace_max_size; -extern arch_spinlock_t max_stack_lock; +extern arch_spinlock_t stack_trace_max_lock; extern int stack_tracer_enabled; void stack_trace_print(void); diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 50945a7939f4..0bd212af406c 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -31,7 +31,7 @@ struct stack_trace stack_trace_max = { }; unsigned long stack_trace_max_size; -arch_spinlock_t max_stack_lock = +arch_spinlock_t stack_trace_max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; static DEFINE_PER_CPU(int, trace_active); @@ -65,7 +65,7 @@ void stack_trace_print(void) /* * When arch-specific code overides this function, the following - * data should be filled up, assuming max_stack_lock is held to + * data should be filled up, assuming stack_trace_max_lock is held to * prevent concurrent updates. * stack_trace_index[] * stack_trace_max @@ -92,7 +92,7 @@ check_stack(unsigned long ip, unsigned long *stack) return; local_irq_save(flags); - arch_spin_lock(&max_stack_lock); + arch_spin_lock(&stack_trace_max_lock); /* In case another CPU set the tracer_frame on us */ if (unlikely(!frame_size)) @@ -175,7 +175,7 @@ check_stack(unsigned long ip, unsigned long *stack) } out: - arch_spin_unlock(&max_stack_lock); + arch_spin_unlock(&stack_trace_max_lock); local_irq_restore(flags); } @@ -246,9 +246,9 @@ stack_max_size_write(struct file *filp, const char __user *ubuf, cpu = smp_processor_id(); per_cpu(trace_active, cpu)++; - arch_spin_lock(&max_stack_lock); + arch_spin_lock(&stack_trace_max_lock); *ptr = val; - arch_spin_unlock(&max_stack_lock); + arch_spin_unlock(&stack_trace_max_lock); per_cpu(trace_active, cpu)--; local_irq_restore(flags); @@ -291,7 +291,7 @@ static void *t_start(struct seq_file *m, loff_t *pos) cpu = smp_processor_id(); per_cpu(trace_active, cpu)++; - arch_spin_lock(&max_stack_lock); + arch_spin_lock(&stack_trace_max_lock); if (*pos == 0) return SEQ_START_TOKEN; @@ -303,7 +303,7 @@ static void t_stop(struct seq_file *m, void *p) { int cpu; - arch_spin_unlock(&max_stack_lock); + arch_spin_unlock(&stack_trace_max_lock); cpu = smp_processor_id(); per_cpu(trace_active, cpu)--; -- cgit v1.2.3