From c29f122cd7fc178b72b1335b1fce0dff2e5c0f5d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 2 Nov 2012 17:17:59 -0400 Subject: ftrace: Add context level recursion bit checking Currently for recursion checking in the function tracer, ftrace tests a task_struct bit to determine if the function tracer had recursed or not. If it has, then it will will return without going further. But this leads to races. If an interrupt came in after the bit was set, the functions being traced would see that bit set and think that the function tracer recursed on itself, and would return. Instead add a bit for each context (normal, softirq, irq and nmi). A check of which context the task is in is made before testing the associated bit. Now if an interrupt preempts the function tracer after the previous context has been set, the interrupt functions can still be traced. Signed-off-by: Steven Rostedt --- kernel/trace/trace.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index c75d7988902c..fe6ccff9cc70 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -299,8 +299,14 @@ struct tracer { /* for function tracing recursion */ #define TRACE_INTERNAL_BIT (1<<11) -#define TRACE_GLOBAL_BIT (1<<12) -#define TRACE_CONTROL_BIT (1<<13) +#define TRACE_INTERNAL_NMI_BIT (1<<12) +#define TRACE_INTERNAL_IRQ_BIT (1<<13) +#define TRACE_INTERNAL_SIRQ_BIT (1<<14) +#define TRACE_GLOBAL_BIT (1<<15) +#define TRACE_GLOBAL_NMI_BIT (1<<16) +#define TRACE_GLOBAL_IRQ_BIT (1<<17) +#define TRACE_GLOBAL_SIRQ_BIT (1<<18) +#define TRACE_CONTROL_BIT (1<<19) /* * Abuse of the trace_recursion. @@ -309,7 +315,7 @@ struct tracer { * was called in irq context but we have irq tracing off. Since this * can only be modified by current, we can reuse trace_recursion. */ -#define TRACE_IRQ_BIT (1<<13) +#define TRACE_IRQ_BIT (1<<20) #define trace_recursion_set(bit) do { (current)->trace_recursion |= (bit); } while (0) #define trace_recursion_clear(bit) do { (current)->trace_recursion &= ~(bit); } while (0) -- cgit v1.2.3 From e46cbf75c621725964fe1f6e7013e8bcd86a0e3d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 2 Nov 2012 17:32:25 -0400 Subject: tracing: Make the trace recursion bits into enums Convert the bits into enums which makes the code a little easier to maintain. Signed-off-by: Steven Rostedt --- kernel/trace/trace.h | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index fe6ccff9cc70..5a095d6f088d 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -298,15 +298,18 @@ struct tracer { #define trace_recursion_buffer() ((current)->trace_recursion & 0x3ff) /* for function tracing recursion */ -#define TRACE_INTERNAL_BIT (1<<11) -#define TRACE_INTERNAL_NMI_BIT (1<<12) -#define TRACE_INTERNAL_IRQ_BIT (1<<13) -#define TRACE_INTERNAL_SIRQ_BIT (1<<14) -#define TRACE_GLOBAL_BIT (1<<15) -#define TRACE_GLOBAL_NMI_BIT (1<<16) -#define TRACE_GLOBAL_IRQ_BIT (1<<17) -#define TRACE_GLOBAL_SIRQ_BIT (1<<18) -#define TRACE_CONTROL_BIT (1<<19) +enum { + TRACE_INTERNAL_BIT = 11, + TRACE_INTERNAL_NMI_BIT, + TRACE_INTERNAL_IRQ_BIT, + TRACE_INTERNAL_SIRQ_BIT, + + TRACE_GLOBAL_BIT, + TRACE_GLOBAL_NMI_BIT, + TRACE_GLOBAL_IRQ_BIT, + TRACE_GLOBAL_SIRQ_BIT, + + TRACE_CONTROL_BIT, /* * Abuse of the trace_recursion. @@ -315,11 +318,12 @@ struct tracer { * was called in irq context but we have irq tracing off. Since this * can only be modified by current, we can reuse trace_recursion. */ -#define TRACE_IRQ_BIT (1<<20) + TRACE_IRQ_BIT, +}; -#define trace_recursion_set(bit) do { (current)->trace_recursion |= (bit); } while (0) -#define trace_recursion_clear(bit) do { (current)->trace_recursion &= ~(bit); } while (0) -#define trace_recursion_test(bit) ((current)->trace_recursion & (bit)) +#define trace_recursion_set(bit) do { (current)->trace_recursion |= (1<<(bit)); } while (0) +#define trace_recursion_clear(bit) do { (current)->trace_recursion &= ~(1<<(bit)); } while (0) +#define trace_recursion_test(bit) ((current)->trace_recursion & (1<<(bit))) #define TRACE_PIPE_ALL_CPU -1 -- cgit v1.2.3 From edc15cafcbfa3d73f819cae99885a2e35e4cbce5 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 2 Nov 2012 17:47:21 -0400 Subject: tracing: Avoid unnecessary multiple recursion checks When function tracing occurs, the following steps are made: If arch does not support a ftrace feature: call internal function (uses INTERNAL bits) which calls... If callback is registered to the "global" list, the list function is called and recursion checks the GLOBAL bits. then this function calls... The function callback, which can use the FTRACE bits to check for recursion. Now if the arch does not suppport a feature, and it calls the global list function which calls the ftrace callback all three of these steps will do a recursion protection. There's no reason to do one if the previous caller already did. The recursion that we are protecting against will go through the same steps again. To prevent the multiple recursion checks, if a recursion bit is set that is higher than the MAX bit of the current check, then we know that the check was made by the previous caller, and we can skip the current check. Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 40 +++++-------------- kernel/trace/trace.h | 106 +++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 110 insertions(+), 36 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 639b6ab1f04c..ce8c3d68292f 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -158,25 +158,15 @@ ftrace_global_list_func(unsigned long ip, unsigned long parent_ip, { int bit; - if (in_interrupt()) { - if (in_nmi()) - bit = TRACE_GLOBAL_NMI_BIT; - - else if (in_irq()) - bit = TRACE_GLOBAL_IRQ_BIT; - else - bit = TRACE_GLOBAL_SIRQ_BIT; - } else - bit = TRACE_GLOBAL_BIT; - - if (unlikely(trace_recursion_test(bit))) + bit = trace_test_and_set_recursion(TRACE_GLOBAL_START, TRACE_GLOBAL_MAX); + if (bit < 0) return; - trace_recursion_set(bit); do_for_each_ftrace_op(op, ftrace_global_list) { op->func(ip, parent_ip, op, regs); } while_for_each_ftrace_op(op); - trace_recursion_clear(bit); + + trace_clear_recursion(bit); } static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip, @@ -4145,26 +4135,14 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *ignored, struct pt_regs *regs) { struct ftrace_ops *op; - unsigned int bit; + int bit; if (function_trace_stop) return; - if (in_interrupt()) { - if (in_nmi()) - bit = TRACE_INTERNAL_NMI_BIT; - - else if (in_irq()) - bit = TRACE_INTERNAL_IRQ_BIT; - else - bit = TRACE_INTERNAL_SIRQ_BIT; - } else - bit = TRACE_INTERNAL_BIT; - - if (unlikely(trace_recursion_test(bit))) - return; - - trace_recursion_set(bit); + bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX); + if (bit < 0) + return; /* * Some of the ops may be dynamically allocated, @@ -4176,7 +4154,7 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, op->func(ip, parent_ip, op, regs); } while_for_each_ftrace_op(op); preempt_enable_notrace(); - trace_recursion_clear(bit); + trace_clear_recursion(bit); } /* diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 5a095d6f088d..c203a51dd412 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -297,18 +297,49 @@ struct tracer { /* Ring buffer has the 10 LSB bits to count */ #define trace_recursion_buffer() ((current)->trace_recursion & 0x3ff) -/* for function tracing recursion */ +/* + * For function tracing recursion: + * The order of these bits are important. + * + * When function tracing occurs, the following steps are made: + * If arch does not support a ftrace feature: + * call internal function (uses INTERNAL bits) which calls... + * If callback is registered to the "global" list, the list + * function is called and recursion checks the GLOBAL bits. + * then this function calls... + * The function callback, which can use the FTRACE bits to + * check for recursion. + * + * Now if the arch does not suppport a feature, and it calls + * the global list function which calls the ftrace callback + * all three of these steps will do a recursion protection. + * There's no reason to do one if the previous caller already + * did. The recursion that we are protecting against will + * go through the same steps again. + * + * To prevent the multiple recursion checks, if a recursion + * bit is set that is higher than the MAX bit of the current + * check, then we know that the check was made by the previous + * caller, and we can skip the current check. + */ enum { - TRACE_INTERNAL_BIT = 11, - TRACE_INTERNAL_NMI_BIT, - TRACE_INTERNAL_IRQ_BIT, - TRACE_INTERNAL_SIRQ_BIT, + TRACE_FTRACE_BIT = 11, + TRACE_FTRACE_NMI_BIT, + TRACE_FTRACE_IRQ_BIT, + TRACE_FTRACE_SIRQ_BIT, + /* GLOBAL_BITs must be greater than FTRACE_BITs */ TRACE_GLOBAL_BIT, TRACE_GLOBAL_NMI_BIT, TRACE_GLOBAL_IRQ_BIT, TRACE_GLOBAL_SIRQ_BIT, + /* INTERNAL_BITs must be greater than GLOBAL_BITs */ + TRACE_INTERNAL_BIT, + TRACE_INTERNAL_NMI_BIT, + TRACE_INTERNAL_IRQ_BIT, + TRACE_INTERNAL_SIRQ_BIT, + TRACE_CONTROL_BIT, /* @@ -325,6 +356,71 @@ enum { #define trace_recursion_clear(bit) do { (current)->trace_recursion &= ~(1<<(bit)); } while (0) #define trace_recursion_test(bit) ((current)->trace_recursion & (1<<(bit))) +#define TRACE_CONTEXT_BITS 4 + +#define TRACE_FTRACE_START TRACE_FTRACE_BIT +#define TRACE_FTRACE_MAX ((1 << (TRACE_FTRACE_START + TRACE_CONTEXT_BITS)) - 1) + +#define TRACE_GLOBAL_START TRACE_GLOBAL_BIT +#define TRACE_GLOBAL_MAX ((1 << (TRACE_GLOBAL_START + TRACE_CONTEXT_BITS)) - 1) + +#define TRACE_LIST_START TRACE_INTERNAL_BIT +#define TRACE_LIST_MAX ((1 << (TRACE_LIST_START + TRACE_CONTEXT_BITS)) - 1) + +#define TRACE_CONTEXT_MASK TRACE_LIST_MAX + +static __always_inline int trace_get_context_bit(void) +{ + int bit; + + if (in_interrupt()) { + if (in_nmi()) + bit = 0; + + else if (in_irq()) + bit = 1; + else + bit = 2; + } else + bit = 3; + + return bit; +} + +static __always_inline int trace_test_and_set_recursion(int start, int max) +{ + unsigned int val = current->trace_recursion; + int bit; + + /* A previous recursion check was made */ + if ((val & TRACE_CONTEXT_MASK) > max) + return 0; + + bit = trace_get_context_bit() + start; + if (unlikely(val & (1 << bit))) + return -1; + + val |= 1 << bit; + current->trace_recursion = val; + barrier(); + + return bit; +} + +static __always_inline void trace_clear_recursion(int bit) +{ + unsigned int val = current->trace_recursion; + + if (!bit) + return; + + bit = 1 << bit; + val &= ~bit; + + barrier(); + current->trace_recursion = val; +} + #define TRACE_PIPE_ALL_CPU -1 static inline struct ring_buffer_iter * -- cgit v1.2.3 From 567cd4da54ff45513d2ca1f0e3cb9ba45b66d6cf Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 2 Nov 2012 18:33:05 -0400 Subject: ring-buffer: User context bit recursion checking Using context bit recursion checking, we can help increase the performance of the ring buffer. Before this patch: # echo function > /debug/tracing/current_tracer # for i in `seq 10`; do ./hackbench 50; done Time: 10.285 Time: 10.407 Time: 10.243 Time: 10.372 Time: 10.380 Time: 10.198 Time: 10.272 Time: 10.354 Time: 10.248 Time: 10.253 (average: 10.3012) Now we have: # echo function > /debug/tracing/current_tracer # for i in `seq 10`; do ./hackbench 50; done Time: 9.712 Time: 9.824 Time: 9.861 Time: 9.827 Time: 9.962 Time: 9.905 Time: 9.886 Time: 10.088 Time: 9.861 Time: 9.834 (average: 9.876) a 4% savings! Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 85 ++++++++++++++++++++++++++++++++-------------- kernel/trace/trace.h | 13 +++---- 2 files changed, 67 insertions(+), 31 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 6ff9cc4658ed..481e26269281 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2432,41 +2432,76 @@ rb_reserve_next_event(struct ring_buffer *buffer, #ifdef CONFIG_TRACING -#define TRACE_RECURSIVE_DEPTH 16 +/* + * The lock and unlock are done within a preempt disable section. + * The current_context per_cpu variable can only be modified + * by the current task between lock and unlock. But it can + * be modified more than once via an interrupt. To pass this + * information from the lock to the unlock without having to + * access the 'in_interrupt()' functions again (which do show + * a bit of overhead in something as critical as function tracing, + * we use a bitmask trick. + * + * bit 0 = NMI context + * bit 1 = IRQ context + * bit 2 = SoftIRQ context + * bit 3 = normal context. + * + * This works because this is the order of contexts that can + * preempt other contexts. A SoftIRQ never preempts an IRQ + * context. + * + * When the context is determined, the corresponding bit is + * checked and set (if it was set, then a recursion of that context + * happened). + * + * On unlock, we need to clear this bit. To do so, just subtract + * 1 from the current_context and AND it to itself. + * + * (binary) + * 101 - 1 = 100 + * 101 & 100 = 100 (clearing bit zero) + * + * 1010 - 1 = 1001 + * 1010 & 1001 = 1000 (clearing bit 1) + * + * The least significant bit can be cleared this way, and it + * just so happens that it is the same bit corresponding to + * the current context. + */ +static DEFINE_PER_CPU(unsigned int, current_context); -/* Keep this code out of the fast path cache */ -static noinline void trace_recursive_fail(void) +static __always_inline int trace_recursive_lock(void) { - /* Disable all tracing before we do anything else */ - tracing_off_permanent(); - - printk_once(KERN_WARNING "Tracing recursion: depth[%ld]:" - "HC[%lu]:SC[%lu]:NMI[%lu]\n", - trace_recursion_buffer(), - hardirq_count() >> HARDIRQ_SHIFT, - softirq_count() >> SOFTIRQ_SHIFT, - in_nmi()); + unsigned int val = this_cpu_read(current_context); + int bit; - WARN_ON_ONCE(1); -} - -static inline int trace_recursive_lock(void) -{ - trace_recursion_inc(); + if (in_interrupt()) { + if (in_nmi()) + bit = 0; + else if (in_irq()) + bit = 1; + else + bit = 2; + } else + bit = 3; - if (likely(trace_recursion_buffer() < TRACE_RECURSIVE_DEPTH)) - return 0; + if (unlikely(val & (1 << bit))) + return 1; - trace_recursive_fail(); + val |= (1 << bit); + this_cpu_write(current_context, val); - return -1; + return 0; } -static inline void trace_recursive_unlock(void) +static __always_inline void trace_recursive_unlock(void) { - WARN_ON_ONCE(!trace_recursion_buffer()); + unsigned int val = this_cpu_read(current_context); - trace_recursion_dec(); + val--; + val &= this_cpu_read(current_context); + this_cpu_write(current_context, val); } #else diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index c203a51dd412..04a2c7ab1735 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -291,11 +291,6 @@ struct tracer { /* Only current can touch trace_recursion */ -#define trace_recursion_inc() do { (current)->trace_recursion++; } while (0) -#define trace_recursion_dec() do { (current)->trace_recursion--; } while (0) - -/* Ring buffer has the 10 LSB bits to count */ -#define trace_recursion_buffer() ((current)->trace_recursion & 0x3ff) /* * For function tracing recursion: @@ -323,7 +318,13 @@ struct tracer { * caller, and we can skip the current check. */ enum { - TRACE_FTRACE_BIT = 11, + TRACE_BUFFER_BIT, + TRACE_BUFFER_NMI_BIT, + TRACE_BUFFER_IRQ_BIT, + TRACE_BUFFER_SIRQ_BIT, + + /* Start of function recursion bits */ + TRACE_FTRACE_BIT, TRACE_FTRACE_NMI_BIT, TRACE_FTRACE_IRQ_BIT, TRACE_FTRACE_SIRQ_BIT, -- cgit v1.2.3 From debdd57f5145f3c6a4b3f8d0126abd1a2def7fc6 Mon Sep 17 00:00:00 2001 From: Hiraku Toyooka Date: Wed, 26 Dec 2012 11:53:00 +0900 Subject: tracing: Make a snapshot feature available from userspace Ftrace has a snapshot feature available from kernel space and latency tracers (e.g. irqsoff) are using it. This patch enables user applictions to take a snapshot via debugfs. Add "snapshot" debugfs file in "tracing" directory. snapshot: This is used to take a snapshot and to read the output of the snapshot. # echo 1 > snapshot This will allocate the spare buffer for snapshot (if it is not allocated), and take a snapshot. # cat snapshot This will show contents of the snapshot. # echo 0 > snapshot This will free the snapshot if it is allocated. Any other positive values will clear the snapshot contents if the snapshot is allocated, or return EINVAL if it is not allocated. Link: http://lkml.kernel.org/r/20121226025300.3252.86850.stgit@liselsia Cc: Jiri Olsa Cc: David Sharp Signed-off-by: Hiraku Toyooka [ Fixed irqsoff selftest and also a conflict with a change that fixes the update_max_tr. ] Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 3 + kernel/trace/Kconfig | 10 +++ kernel/trace/trace.c | 166 ++++++++++++++++++++++++++++++++++++------- kernel/trace/trace.h | 1 + 4 files changed, 154 insertions(+), 26 deletions(-) (limited to 'kernel/trace/trace.h') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 6f8d0b77006b..13a54d0bdfa8 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -83,6 +83,9 @@ struct trace_iterator { long idx; cpumask_var_t started; + + /* it's true when current open file is snapshot */ + bool snapshot; }; enum trace_iter_flags { diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index cdc9d284d24e..36567564e221 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -253,6 +253,16 @@ config FTRACE_SYSCALLS help Basic tracer to catch the syscall entry and exit events. +config TRACER_SNAPSHOT + bool "Create a snapshot trace buffer" + select TRACER_MAX_TRACE + help + Allow tracing users to take snapshot of the current buffer using the + ftrace interface, e.g.: + + echo 1 > /sys/kernel/debug/tracing/snapshot + cat snapshot + config TRACE_BRANCH_PROFILING bool select GENERIC_TRACER diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2c724662a3e8..70dce64b9ecf 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -710,12 +710,11 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) WARN_ON_ONCE(!irqs_disabled()); - /* If we disabled the tracer, stop now */ - if (current_trace == &nop_trace) - return; - - if (WARN_ON_ONCE(!current_trace->use_max_tr)) + if (!current_trace->allocated_snapshot) { + /* Only the nop tracer should hit this when disabling */ + WARN_ON_ONCE(current_trace != &nop_trace); return; + } arch_spin_lock(&ftrace_max_lock); @@ -743,10 +742,8 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) return; WARN_ON_ONCE(!irqs_disabled()); - if (!current_trace->use_max_tr) { - WARN_ON_ONCE(1); + if (WARN_ON_ONCE(!current_trace->allocated_snapshot)) return; - } arch_spin_lock(&ftrace_max_lock); @@ -866,10 +863,13 @@ int register_tracer(struct tracer *type) current_trace = type; - /* If we expanded the buffers, make sure the max is expanded too */ - if (ring_buffer_expanded && type->use_max_tr) - ring_buffer_resize(max_tr.buffer, trace_buf_size, - RING_BUFFER_ALL_CPUS); + if (type->use_max_tr) { + /* If we expanded the buffers, make sure the max is expanded too */ + if (ring_buffer_expanded) + ring_buffer_resize(max_tr.buffer, trace_buf_size, + RING_BUFFER_ALL_CPUS); + type->allocated_snapshot = true; + } /* the test is responsible for initializing and enabling */ pr_info("Testing tracer %s: ", type->name); @@ -885,10 +885,14 @@ int register_tracer(struct tracer *type) /* Only reset on passing, to avoid touching corrupted buffers */ tracing_reset_online_cpus(tr); - /* Shrink the max buffer again */ - if (ring_buffer_expanded && type->use_max_tr) - ring_buffer_resize(max_tr.buffer, 1, - RING_BUFFER_ALL_CPUS); + if (type->use_max_tr) { + type->allocated_snapshot = false; + + /* Shrink the max buffer again */ + if (ring_buffer_expanded) + ring_buffer_resize(max_tr.buffer, 1, + RING_BUFFER_ALL_CPUS); + } printk(KERN_CONT "PASSED\n"); } @@ -1964,7 +1968,11 @@ static void *s_start(struct seq_file *m, loff_t *pos) *iter->trace = *current_trace; mutex_unlock(&trace_types_lock); - atomic_inc(&trace_record_cmdline_disabled); + if (iter->snapshot && iter->trace->use_max_tr) + return ERR_PTR(-EBUSY); + + if (!iter->snapshot) + atomic_inc(&trace_record_cmdline_disabled); if (*pos != iter->pos) { iter->ent = NULL; @@ -2003,7 +2011,11 @@ static void s_stop(struct seq_file *m, void *p) { struct trace_iterator *iter = m->private; - atomic_dec(&trace_record_cmdline_disabled); + if (iter->snapshot && iter->trace->use_max_tr) + return; + + if (!iter->snapshot) + atomic_dec(&trace_record_cmdline_disabled); trace_access_unlock(iter->cpu_file); trace_event_read_unlock(); } @@ -2438,7 +2450,7 @@ static const struct seq_operations tracer_seq_ops = { }; static struct trace_iterator * -__tracing_open(struct inode *inode, struct file *file) +__tracing_open(struct inode *inode, struct file *file, bool snapshot) { long cpu_file = (long) inode->i_private; struct trace_iterator *iter; @@ -2471,10 +2483,11 @@ __tracing_open(struct inode *inode, struct file *file) if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL)) goto fail; - if (current_trace && current_trace->print_max) + if ((current_trace && current_trace->print_max) || snapshot) iter->tr = &max_tr; else iter->tr = &global_trace; + iter->snapshot = snapshot; iter->pos = -1; mutex_init(&iter->mutex); iter->cpu_file = cpu_file; @@ -2491,8 +2504,9 @@ __tracing_open(struct inode *inode, struct file *file) if (trace_clocks[trace_clock_id].in_ns) iter->iter_flags |= TRACE_FILE_TIME_IN_NS; - /* stop the trace while dumping */ - tracing_stop(); + /* stop the trace while dumping if we are not opening "snapshot" */ + if (!iter->snapshot) + tracing_stop(); if (iter->cpu_file == TRACE_PIPE_ALL_CPU) { for_each_tracing_cpu(cpu) { @@ -2555,8 +2569,9 @@ static int tracing_release(struct inode *inode, struct file *file) if (iter->trace && iter->trace->close) iter->trace->close(iter); - /* reenable tracing if it was previously enabled */ - tracing_start(); + if (!iter->snapshot) + /* reenable tracing if it was previously enabled */ + tracing_start(); mutex_unlock(&trace_types_lock); mutex_destroy(&iter->mutex); @@ -2584,7 +2599,7 @@ static int tracing_open(struct inode *inode, struct file *file) } if (file->f_mode & FMODE_READ) { - iter = __tracing_open(inode, file); + iter = __tracing_open(inode, file, false); if (IS_ERR(iter)) ret = PTR_ERR(iter); else if (trace_flags & TRACE_ITER_LATENCY_FMT) @@ -3219,7 +3234,7 @@ static int tracing_set_tracer(const char *buf) if (current_trace && current_trace->reset) current_trace->reset(tr); - had_max_tr = current_trace && current_trace->use_max_tr; + had_max_tr = current_trace && current_trace->allocated_snapshot; current_trace = &nop_trace; if (had_max_tr && !t->use_max_tr) { @@ -3238,6 +3253,8 @@ static int tracing_set_tracer(const char *buf) */ ring_buffer_resize(max_tr.buffer, 1, RING_BUFFER_ALL_CPUS); set_buffer_entries(&max_tr, 1); + tracing_reset_online_cpus(&max_tr); + current_trace->allocated_snapshot = false; } destroy_trace_option_files(topts); @@ -3248,6 +3265,7 @@ static int tracing_set_tracer(const char *buf) RING_BUFFER_ALL_CPUS); if (ret < 0) goto out; + t->allocated_snapshot = true; } if (t->init) { @@ -4066,6 +4084,87 @@ static int tracing_clock_open(struct inode *inode, struct file *file) return single_open(file, tracing_clock_show, NULL); } +#ifdef CONFIG_TRACER_SNAPSHOT +static int tracing_snapshot_open(struct inode *inode, struct file *file) +{ + struct trace_iterator *iter; + int ret = 0; + + if (file->f_mode & FMODE_READ) { + iter = __tracing_open(inode, file, true); + if (IS_ERR(iter)) + ret = PTR_ERR(iter); + } + return ret; +} + +static ssize_t +tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, + loff_t *ppos) +{ + unsigned long val; + int ret; + + ret = tracing_update_buffers(); + if (ret < 0) + return ret; + + ret = kstrtoul_from_user(ubuf, cnt, 10, &val); + if (ret) + return ret; + + mutex_lock(&trace_types_lock); + + if (current_trace && current_trace->use_max_tr) { + ret = -EBUSY; + goto out; + } + + switch (val) { + case 0: + if (current_trace->allocated_snapshot) { + /* free spare buffer */ + ring_buffer_resize(max_tr.buffer, 1, + RING_BUFFER_ALL_CPUS); + set_buffer_entries(&max_tr, 1); + tracing_reset_online_cpus(&max_tr); + current_trace->allocated_snapshot = false; + } + break; + case 1: + if (!current_trace->allocated_snapshot) { + /* allocate spare buffer */ + ret = resize_buffer_duplicate_size(&max_tr, + &global_trace, RING_BUFFER_ALL_CPUS); + if (ret < 0) + break; + current_trace->allocated_snapshot = true; + } + + local_irq_disable(); + /* Now, we're going to swap */ + update_max_tr(&global_trace, current, smp_processor_id()); + local_irq_enable(); + break; + default: + if (current_trace->allocated_snapshot) + tracing_reset_online_cpus(&max_tr); + else + ret = -EINVAL; + break; + } + + if (ret >= 0) { + *ppos += cnt; + ret = cnt; + } +out: + mutex_unlock(&trace_types_lock); + return ret; +} +#endif /* CONFIG_TRACER_SNAPSHOT */ + + static const struct file_operations tracing_max_lat_fops = { .open = tracing_open_generic, .read = tracing_max_lat_read, @@ -4122,6 +4221,16 @@ static const struct file_operations trace_clock_fops = { .write = tracing_clock_write, }; +#ifdef CONFIG_TRACER_SNAPSHOT +static const struct file_operations snapshot_fops = { + .open = tracing_snapshot_open, + .read = seq_read, + .write = tracing_snapshot_write, + .llseek = tracing_seek, + .release = tracing_release, +}; +#endif /* CONFIG_TRACER_SNAPSHOT */ + struct ftrace_buffer_info { struct trace_array *tr; void *spare; @@ -4921,6 +5030,11 @@ static __init int tracer_init_debugfs(void) &ftrace_update_tot_cnt, &tracing_dyn_info_fops); #endif +#ifdef CONFIG_TRACER_SNAPSHOT + trace_create_file("snapshot", 0644, d_tracer, + (void *) TRACE_PIPE_ALL_CPU, &snapshot_fops); +#endif + create_trace_options_dir(); for_each_tracing_cpu(cpu) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 04a2c7ab1735..57d7e5397d56 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -287,6 +287,7 @@ struct tracer { struct tracer_flags *flags; bool print_max; bool use_max_tr; + bool allocated_snapshot; }; -- cgit v1.2.3