diff options
author | Stephen Rothwell <sfr@canb.auug.org.au> | 2008-08-22 10:42:23 +1000 |
---|---|---|
committer | Stephen Rothwell <sfr@canb.auug.org.au> | 2008-08-22 10:42:23 +1000 |
commit | be21d065e6d1443c0720d76502513c5efe418403 (patch) | |
tree | 623a67a7f7beb744535032ceba6ac2d11dd46c53 /kernel | |
parent | 44fbdd467bea6c7fadab795176a15da167b86859 (diff) | |
parent | cdca408f6f55290eec706abad51826e113f2a091 (diff) |
Merge commit 'ftrace/auto-ftrace-next'
Conflicts:
kernel/module.c
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Makefile | 1 | ||||
-rw-r--r-- | kernel/exit.c | 10 | ||||
-rw-r--r-- | kernel/fork.c | 3 | ||||
-rw-r--r-- | kernel/kthread.c | 5 | ||||
-rw-r--r-- | kernel/module.c | 66 | ||||
-rw-r--r-- | kernel/notifier.c | 2 | ||||
-rw-r--r-- | kernel/sched.c | 17 | ||||
-rw-r--r-- | kernel/sched_clock.c | 8 | ||||
-rw-r--r-- | kernel/signal.c | 3 | ||||
-rw-r--r-- | kernel/trace/Kconfig | 1 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 7 | ||||
-rw-r--r-- | kernel/trace/trace_sched_switch.c | 120 | ||||
-rw-r--r-- | kernel/trace/trace_sched_wakeup.c | 135 | ||||
-rw-r--r-- | kernel/tracepoint.c | 476 |
14 files changed, 643 insertions, 211 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 4e1d7df7c3e2..8f9ce7ec21b6 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -83,6 +83,7 @@ obj-$(CONFIG_SYSCTL) += utsname_sysctl.o obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o obj-$(CONFIG_MARKERS) += marker.o +obj-$(CONFIG_TRACEPOINTS) += tracepoint.o obj-$(CONFIG_LATENCYTOP) += latencytop.o obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o obj-$(CONFIG_FTRACE) += trace/ diff --git a/kernel/exit.c b/kernel/exit.c index 38ec40630149..b91dbb210953 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -47,6 +47,7 @@ #include <linux/blkdev.h> #include <linux/task_io_accounting_ops.h> #include <linux/tracehook.h> +#include <trace/sched.h> #include <asm/uaccess.h> #include <asm/unistd.h> @@ -149,7 +150,10 @@ static void __exit_signal(struct task_struct *tsk) static void delayed_put_task_struct(struct rcu_head *rhp) { - put_task_struct(container_of(rhp, struct task_struct, rcu)); + struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); + + trace_sched_process_free(tsk); + put_task_struct(tsk); } @@ -1076,6 +1080,8 @@ NORET_TYPE void do_exit(long code) if (group_dead) acct_process(); + trace_sched_process_exit(tsk); + exit_sem(tsk); exit_files(tsk); exit_fs(tsk); @@ -1677,6 +1683,8 @@ static long do_wait(enum pid_type type, struct pid *pid, int options, struct task_struct *tsk; int retval; + trace_sched_process_wait(pid); + add_wait_queue(¤t->signal->wait_chldexit,&wait); repeat: /* diff --git a/kernel/fork.c b/kernel/fork.c index 7ce2ebe84796..5013b591f67b 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -58,6 +58,7 @@ #include <linux/tty.h> #include <linux/proc_fs.h> #include <linux/blkdev.h> +#include <trace/sched.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> @@ -1361,6 +1362,8 @@ long do_fork(unsigned long clone_flags, if (!IS_ERR(p)) { struct completion vfork; + trace_sched_process_fork(current, p); + nr = task_pid_vnr(p); if (clone_flags & CLONE_PARENT_SETTID) diff --git a/kernel/kthread.c b/kernel/kthread.c index 96cff2f8710b..50598e29439a 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -13,6 +13,7 @@ #include <linux/file.h> #include <linux/module.h> #include <linux/mutex.h> +#include <trace/sched.h> #define KTHREAD_NICE_LEVEL (-5) @@ -206,6 +207,8 @@ int kthread_stop(struct task_struct *k) /* It could exit after stop_info.k set, but before wake_up_process. */ get_task_struct(k); + trace_sched_kthread_stop(k); + /* Must init completion *before* thread sees kthread_stop_info.k */ init_completion(&kthread_stop_info.done); smp_wmb(); @@ -221,6 +224,8 @@ int kthread_stop(struct task_struct *k) ret = kthread_stop_info.err; mutex_unlock(&kthread_stop_lock); + trace_sched_kthread_stop_ret(ret); + return ret; } EXPORT_SYMBOL(kthread_stop); diff --git a/kernel/module.c b/kernel/module.c index b610466448ed..b227ca472ad3 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -47,6 +47,7 @@ #include <asm/sections.h> #include <linux/license.h> #include <asm/sections.h> +#include <linux/tracepoint.h> #if 0 #define DEBUGP printk @@ -1867,6 +1868,8 @@ static struct module *load_module(void __user *umod, unsigned int markersindex; unsigned int markersstringsindex; unsigned int verboseindex; + unsigned int tracepointsindex; + unsigned int tracepointsstringsindex; struct module *mod; long err = 0; void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ @@ -2154,6 +2157,9 @@ static struct module *load_module(void __user *umod, markersstringsindex = find_sec(hdr, sechdrs, secstrings, "__markers_strings"); verboseindex = find_sec(hdr, sechdrs, secstrings, "__verbose"); + tracepointsindex = find_sec(hdr, sechdrs, secstrings, "__tracepoints"); + tracepointsstringsindex = find_sec(hdr, sechdrs, secstrings, + "__tracepoints_strings"); /* Now do relocations. */ for (i = 1; i < hdr->e_shnum; i++) { @@ -2181,6 +2187,12 @@ static struct module *load_module(void __user *umod, mod->num_markers = sechdrs[markersindex].sh_size / sizeof(*mod->markers); #endif +#ifdef CONFIG_TRACEPOINTS + mod->tracepoints = (void *)sechdrs[tracepointsindex].sh_addr; + mod->num_tracepoints = + sechdrs[tracepointsindex].sh_size / sizeof(*mod->tracepoints); +#endif + /* Find duplicate symbols */ err = verify_export_symbols(mod); @@ -2199,11 +2211,16 @@ static struct module *load_module(void __user *umod, add_kallsyms(mod, sechdrs, symindex, strindex, secstrings); + if (!mod->taints) { #ifdef CONFIG_MARKERS - if (!mod->taints) marker_update_probe_range(mod->markers, mod->markers + mod->num_markers); #endif +#ifdef CONFIG_TRACEPOINTS + tracepoint_update_probe_range(mod->tracepoints, + mod->tracepoints + mod->num_tracepoints); +#endif + } dynamic_printk_setup(mod, sechdrs, verboseindex, secstrings); err = module_finalize(hdr, sechdrs, mod); if (err < 0) @@ -2755,3 +2772,50 @@ void module_update_markers(void) mutex_unlock(&module_mutex); } #endif + +#ifdef CONFIG_TRACEPOINTS +void module_update_tracepoints(void) +{ + struct module *mod; + + mutex_lock(&module_mutex); + list_for_each_entry(mod, &modules, list) + if (!mod->taints) + tracepoint_update_probe_range(mod->tracepoints, + mod->tracepoints + mod->num_tracepoints); + mutex_unlock(&module_mutex); +} + +/* + * Returns 0 if current not found. + * Returns 1 if current found. + */ +int module_get_iter_tracepoints(struct tracepoint_iter *iter) +{ + struct module *iter_mod; + int found = 0; + + mutex_lock(&module_mutex); + list_for_each_entry(iter_mod, &modules, list) { + if (!iter_mod->taints) { + /* + * Sorted module list + */ + if (iter_mod < iter->module) + continue; + else if (iter_mod > iter->module) + iter->tracepoint = NULL; + found = tracepoint_get_iter_range(&iter->tracepoint, + iter_mod->tracepoints, + iter_mod->tracepoints + + iter_mod->num_tracepoints); + if (found) { + iter->module = iter_mod; + break; + } + } + } + mutex_unlock(&module_mutex); + return found; +} +#endif diff --git a/kernel/notifier.c b/kernel/notifier.c index 823be11584ef..4282c0a40a57 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c @@ -550,7 +550,7 @@ EXPORT_SYMBOL(unregister_reboot_notifier); static ATOMIC_NOTIFIER_HEAD(die_chain); -int notify_die(enum die_val val, const char *str, +int notrace notify_die(enum die_val val, const char *str, struct pt_regs *regs, long err, int trap, int sig) { struct die_args args = { diff --git a/kernel/sched.c b/kernel/sched.c index 9a1ddb84e26d..7d995207feef 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -71,6 +71,7 @@ #include <linux/debugfs.h> #include <linux/ctype.h> #include <linux/ftrace.h> +#include <trace/sched.h> #include <asm/tlb.h> #include <asm/irq_regs.h> @@ -1918,6 +1919,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state) * just go back and repeat. */ rq = task_rq_lock(p, &flags); + trace_sched_wait_task(rq, p); running = task_running(rq, p); on_rq = p->se.on_rq; ncsw = 0; @@ -2282,9 +2284,7 @@ out_activate: success = 1; out_running: - trace_mark(kernel_sched_wakeup, - "pid %d state %ld ## rq %p task %p rq->curr %p", - p->pid, p->state, rq, p, rq->curr); + trace_sched_wakeup(rq, p); check_preempt_curr(rq, p); p->state = TASK_RUNNING; @@ -2417,9 +2417,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) p->sched_class->task_new(rq, p); inc_nr_running(rq); } - trace_mark(kernel_sched_wakeup_new, - "pid %d state %ld ## rq %p task %p rq->curr %p", - p->pid, p->state, rq, p, rq->curr); + trace_sched_wakeup_new(rq, p); check_preempt_curr(rq, p); #ifdef CONFIG_SMP if (p->sched_class->task_wake_up) @@ -2592,11 +2590,7 @@ context_switch(struct rq *rq, struct task_struct *prev, struct mm_struct *mm, *oldmm; prepare_task_switch(rq, prev, next); - trace_mark(kernel_sched_schedule, - "prev_pid %d next_pid %d prev_state %ld " - "## rq %p prev %p next %p", - prev->pid, next->pid, prev->state, - rq, prev, next); + trace_sched_switch(rq, prev, next); mm = next->mm; oldmm = prev->active_mm; /* @@ -2836,6 +2830,7 @@ static void sched_migrate_task(struct task_struct *p, int dest_cpu) || unlikely(!cpu_active(dest_cpu))) goto out; + trace_sched_migrate_task(rq, p, dest_cpu); /* force the process onto the specified CPU */ if (migrate_task(p, dest_cpu, &req)) { /* Need to wait for migration thread (might exit: take ref). */ diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c index 204991a0bfa7..24b0092fac11 100644 --- a/kernel/sched_clock.c +++ b/kernel/sched_clock.c @@ -31,6 +31,7 @@ #include <linux/spinlock.h> #include <linux/ktime.h> #include <linux/module.h> +#include <linux/hardirq.h> /* * Scheduler clock - returns current time in nanosec units. @@ -151,6 +152,13 @@ u64 sched_clock_cpu(int cpu) struct sched_clock_data *scd = cpu_sdc(cpu); u64 now, clock, this_clock, remote_clock; + /* + * Normally this is not called in NMI context - but if it is, + * trying to do any locking here is totally lethal. + */ + if (unlikely(in_nmi())) + return scd->clock; + if (unlikely(!sched_clock_running)) return 0ull; diff --git a/kernel/signal.c b/kernel/signal.c index e661b01d340f..bf40ecc87b26 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -27,6 +27,7 @@ #include <linux/freezer.h> #include <linux/pid_namespace.h> #include <linux/nsproxy.h> +#include <trace/sched.h> #include <asm/param.h> #include <asm/uaccess.h> @@ -803,6 +804,8 @@ static int send_signal(int sig, struct siginfo *info, struct task_struct *t, struct sigpending *pending; struct sigqueue *q; + trace_sched_signal_send(sig, t); + assert_spin_locked(&t->sighand->siglock); if (!prepare_signal(sig, t)) return 0; diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 263e9e6bbd60..cae2637d5e68 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -14,6 +14,7 @@ config TRACING bool select DEBUG_FS select STACKTRACE + select TRACEPOINTS config FTRACE bool "Kernel Function Tracer" diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index f6e3af31b403..5552f2535bf6 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -340,6 +340,13 @@ ftrace_record_ip(unsigned long ip) int resched; int atomic; int cpu; + static int once; + + if (!once && in_nmi()) { + once++; + ftrace_disabled = 1; + WARN_ON(1); + } if (!ftrace_enabled || ftrace_disabled) return; diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index cb817a209aa0..789e927abc9c 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -9,8 +9,8 @@ #include <linux/debugfs.h> #include <linux/kallsyms.h> #include <linux/uaccess.h> -#include <linux/marker.h> #include <linux/ftrace.h> +#include <trace/sched.h> #include "trace.h" @@ -19,16 +19,17 @@ static int __read_mostly tracer_enabled; static atomic_t sched_ref; static void -sched_switch_func(void *private, void *__rq, struct task_struct *prev, +probe_sched_switch(struct rq *__rq, struct task_struct *prev, struct task_struct *next) { - struct trace_array **ptr = private; - struct trace_array *tr = *ptr; struct trace_array_cpu *data; unsigned long flags; long disabled; int cpu; + if (!atomic_read(&sched_ref)) + return; + tracing_record_cmdline(prev); tracing_record_cmdline(next); @@ -37,95 +38,42 @@ sched_switch_func(void *private, void *__rq, struct task_struct *prev, local_irq_save(flags); cpu = raw_smp_processor_id(); - data = tr->data[cpu]; + data = ctx_trace->data[cpu]; disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) - tracing_sched_switch_trace(tr, data, prev, next, flags); + tracing_sched_switch_trace(ctx_trace, data, prev, next, flags); atomic_dec(&data->disabled); local_irq_restore(flags); } -static notrace void -sched_switch_callback(void *probe_data, void *call_data, - const char *format, va_list *args) -{ - struct task_struct *prev; - struct task_struct *next; - struct rq *__rq; - - if (!atomic_read(&sched_ref)) - return; - - /* skip prev_pid %d next_pid %d prev_state %ld */ - (void)va_arg(*args, int); - (void)va_arg(*args, int); - (void)va_arg(*args, long); - __rq = va_arg(*args, typeof(__rq)); - prev = va_arg(*args, typeof(prev)); - next = va_arg(*args, typeof(next)); - - /* - * If tracer_switch_func only points to the local - * switch func, it still needs the ptr passed to it. - */ - sched_switch_func(probe_data, __rq, prev, next); -} - static void -wakeup_func(void *private, void *__rq, struct task_struct *wakee, struct - task_struct *curr) +probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee) { - struct trace_array **ptr = private; - struct trace_array *tr = *ptr; struct trace_array_cpu *data; unsigned long flags; long disabled; int cpu; - if (!tracer_enabled) + if (!likely(tracer_enabled)) return; - tracing_record_cmdline(curr); + tracing_record_cmdline(current); local_irq_save(flags); cpu = raw_smp_processor_id(); - data = tr->data[cpu]; + data = ctx_trace->data[cpu]; disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) - tracing_sched_wakeup_trace(tr, data, wakee, curr, flags); + tracing_sched_wakeup_trace(ctx_trace, data, wakee, current, + flags); atomic_dec(&data->disabled); local_irq_restore(flags); } -static notrace void -wake_up_callback(void *probe_data, void *call_data, - const char *format, va_list *args) -{ - struct task_struct *curr; - struct task_struct *task; - struct rq *__rq; - - if (likely(!tracer_enabled)) - return; - - /* Skip pid %d state %ld */ - (void)va_arg(*args, int); - (void)va_arg(*args, long); - /* now get the meat: "rq %p task %p rq->curr %p" */ - __rq = va_arg(*args, typeof(__rq)); - task = va_arg(*args, typeof(task)); - curr = va_arg(*args, typeof(curr)); - - tracing_record_cmdline(task); - tracing_record_cmdline(curr); - - wakeup_func(probe_data, __rq, task, curr); -} - static void sched_switch_reset(struct trace_array *tr) { int cpu; @@ -140,60 +88,40 @@ static int tracing_sched_register(void) { int ret; - ret = marker_probe_register("kernel_sched_wakeup", - "pid %d state %ld ## rq %p task %p rq->curr %p", - wake_up_callback, - &ctx_trace); + ret = register_trace_sched_wakeup(probe_sched_wakeup); if (ret) { - pr_info("wakeup trace: Couldn't add marker" + pr_info("wakeup trace: Couldn't activate tracepoint" " probe to kernel_sched_wakeup\n"); return ret; } - ret = marker_probe_register("kernel_sched_wakeup_new", - "pid %d state %ld ## rq %p task %p rq->curr %p", - wake_up_callback, - &ctx_trace); + ret = register_trace_sched_wakeup_new(probe_sched_wakeup); if (ret) { - pr_info("wakeup trace: Couldn't add marker" + pr_info("wakeup trace: Couldn't activate tracepoint" " probe to kernel_sched_wakeup_new\n"); goto fail_deprobe; } - ret = marker_probe_register("kernel_sched_schedule", - "prev_pid %d next_pid %d prev_state %ld " - "## rq %p prev %p next %p", - sched_switch_callback, - &ctx_trace); + ret = register_trace_sched_switch(probe_sched_switch); if (ret) { - pr_info("sched trace: Couldn't add marker" + pr_info("sched trace: Couldn't activate tracepoint" " probe to kernel_sched_schedule\n"); goto fail_deprobe_wake_new; } return ret; fail_deprobe_wake_new: - marker_probe_unregister("kernel_sched_wakeup_new", - wake_up_callback, - &ctx_trace); + unregister_trace_sched_wakeup_new(probe_sched_wakeup); fail_deprobe: - marker_probe_unregister("kernel_sched_wakeup", - wake_up_callback, - &ctx_trace); + unregister_trace_sched_wakeup(probe_sched_wakeup); return ret; } static void tracing_sched_unregister(void) { - marker_probe_unregister("kernel_sched_schedule", - sched_switch_callback, - &ctx_trace); - marker_probe_unregister("kernel_sched_wakeup_new", - wake_up_callback, - &ctx_trace); - marker_probe_unregister("kernel_sched_wakeup", - wake_up_callback, - &ctx_trace); + unregister_trace_sched_switch(probe_sched_switch); + unregister_trace_sched_wakeup_new(probe_sched_wakeup); + unregister_trace_sched_wakeup(probe_sched_wakeup); } static void tracing_start_sched_switch(void) diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index e303ccb62cdf..08206b4e29c4 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -15,7 +15,7 @@ #include <linux/kallsyms.h> #include <linux/uaccess.h> #include <linux/ftrace.h> -#include <linux/marker.h> +#include <trace/sched.h> #include "trace.h" @@ -112,18 +112,18 @@ static int report_latency(cycle_t delta) } static void notrace -wakeup_sched_switch(void *private, void *rq, struct task_struct *prev, +probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev, struct task_struct *next) { unsigned long latency = 0, t0 = 0, t1 = 0; - struct trace_array **ptr = private; - struct trace_array *tr = *ptr; struct trace_array_cpu *data; cycle_t T0, T1, delta; unsigned long flags; long disabled; int cpu; + tracing_record_cmdline(prev); + if (unlikely(!tracer_enabled)) return; @@ -140,11 +140,11 @@ wakeup_sched_switch(void *private, void *rq, struct task_struct *prev, return; /* The task we are waiting for is waking up */ - data = tr->data[wakeup_cpu]; + data = wakeup_trace->data[wakeup_cpu]; /* disable local data, not wakeup_cpu data */ cpu = raw_smp_processor_id(); - disabled = atomic_inc_return(&tr->data[cpu]->disabled); + disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled); if (likely(disabled != 1)) goto out; @@ -155,7 +155,7 @@ wakeup_sched_switch(void *private, void *rq, struct task_struct *prev, if (unlikely(!tracer_enabled || next != wakeup_task)) goto out_unlock; - trace_function(tr, data, CALLER_ADDR1, CALLER_ADDR2, flags); + trace_function(wakeup_trace, data, CALLER_ADDR1, CALLER_ADDR2, flags); /* * usecs conversion is slow so we try to delay the conversion @@ -174,39 +174,14 @@ wakeup_sched_switch(void *private, void *rq, struct task_struct *prev, t0 = nsecs_to_usecs(T0); t1 = nsecs_to_usecs(T1); - update_max_tr(tr, wakeup_task, wakeup_cpu); + update_max_tr(wakeup_trace, wakeup_task, wakeup_cpu); out_unlock: - __wakeup_reset(tr); + __wakeup_reset(wakeup_trace); __raw_spin_unlock(&wakeup_lock); local_irq_restore(flags); out: - atomic_dec(&tr->data[cpu]->disabled); -} - -static notrace void -sched_switch_callback(void *probe_data, void *call_data, - const char *format, va_list *args) -{ - struct task_struct *prev; - struct task_struct *next; - struct rq *__rq; - - /* skip prev_pid %d next_pid %d prev_state %ld */ - (void)va_arg(*args, int); - (void)va_arg(*args, int); - (void)va_arg(*args, long); - __rq = va_arg(*args, typeof(__rq)); - prev = va_arg(*args, typeof(prev)); - next = va_arg(*args, typeof(next)); - - tracing_record_cmdline(prev); - - /* - * If tracer_switch_func only points to the local - * switch func, it still needs the ptr passed to it. - */ - wakeup_sched_switch(probe_data, __rq, prev, next); + atomic_dec(&wakeup_trace->data[cpu]->disabled); } static void __wakeup_reset(struct trace_array *tr) @@ -240,19 +215,24 @@ static void wakeup_reset(struct trace_array *tr) } static void -wakeup_check_start(struct trace_array *tr, struct task_struct *p, - struct task_struct *curr) +probe_wakeup(struct rq *rq, struct task_struct *p) { int cpu = smp_processor_id(); unsigned long flags; long disabled; + if (likely(!tracer_enabled)) + return; + + tracing_record_cmdline(p); + tracing_record_cmdline(current); + if (likely(!rt_task(p)) || p->prio >= wakeup_prio || - p->prio >= curr->prio) + p->prio >= current->prio) return; - disabled = atomic_inc_return(&tr->data[cpu]->disabled); + disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled); if (unlikely(disabled != 1)) goto out; @@ -264,7 +244,7 @@ wakeup_check_start(struct trace_array *tr, struct task_struct *p, goto out_locked; /* reset the trace */ - __wakeup_reset(tr); + __wakeup_reset(wakeup_trace); wakeup_cpu = task_cpu(p); wakeup_prio = p->prio; @@ -274,74 +254,37 @@ wakeup_check_start(struct trace_array *tr, struct task_struct *p, local_save_flags(flags); - tr->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu); - trace_function(tr, tr->data[wakeup_cpu], + wakeup_trace->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu); + trace_function(wakeup_trace, wakeup_trace->data[wakeup_cpu], CALLER_ADDR1, CALLER_ADDR2, flags); out_locked: __raw_spin_unlock(&wakeup_lock); out: - atomic_dec(&tr->data[cpu]->disabled); -} - -static notrace void -wake_up_callback(void *probe_data, void *call_data, - const char *format, va_list *args) -{ - struct trace_array **ptr = probe_data; - struct trace_array *tr = *ptr; - struct task_struct *curr; - struct task_struct *task; - struct rq *__rq; - - if (likely(!tracer_enabled)) - return; - - /* Skip pid %d state %ld */ - (void)va_arg(*args, int); - (void)va_arg(*args, long); - /* now get the meat: "rq %p task %p rq->curr %p" */ - __rq = va_arg(*args, typeof(__rq)); - task = va_arg(*args, typeof(task)); - curr = va_arg(*args, typeof(curr)); - - tracing_record_cmdline(task); - tracing_record_cmdline(curr); - - wakeup_check_start(tr, task, curr); + atomic_dec(&wakeup_trace->data[cpu]->disabled); } static void start_wakeup_tracer(struct trace_array *tr) { int ret; - ret = marker_probe_register("kernel_sched_wakeup", - "pid %d state %ld ## rq %p task %p rq->curr %p", - wake_up_callback, - &wakeup_trace); + ret = register_trace_sched_wakeup(probe_wakeup); if (ret) { - pr_info("wakeup trace: Couldn't add marker" + pr_info("wakeup trace: Couldn't activate tracepoint" " probe to kernel_sched_wakeup\n"); return; } - ret = marker_probe_register("kernel_sched_wakeup_new", - "pid %d state %ld ## rq %p task %p rq->curr %p", - wake_up_callback, - &wakeup_trace); + ret = register_trace_sched_wakeup_new(probe_wakeup); if (ret) { - pr_info("wakeup trace: Couldn't add marker" + pr_info("wakeup trace: Couldn't activate tracepoint" " probe to kernel_sched_wakeup_new\n"); goto fail_deprobe; } - ret = marker_probe_register("kernel_sched_schedule", - "prev_pid %d next_pid %d prev_state %ld " - "## rq %p prev %p next %p", - sched_switch_callback, - &wakeup_trace); + ret = register_trace_sched_switch(probe_wakeup_sched_switch); if (ret) { - pr_info("sched trace: Couldn't add marker" + pr_info("sched trace: Couldn't activate tracepoint" " probe to kernel_sched_schedule\n"); goto fail_deprobe_wake_new; } @@ -363,28 +306,18 @@ static void start_wakeup_tracer(struct trace_array *tr) return; fail_deprobe_wake_new: - marker_probe_unregister("kernel_sched_wakeup_new", - wake_up_callback, - &wakeup_trace); + unregister_trace_sched_wakeup_new(probe_wakeup); fail_deprobe: - marker_probe_unregister("kernel_sched_wakeup", - wake_up_callback, - &wakeup_trace); + unregister_trace_sched_wakeup(probe_wakeup); } static void stop_wakeup_tracer(struct trace_array *tr) { tracer_enabled = 0; unregister_ftrace_function(&trace_ops); - marker_probe_unregister("kernel_sched_schedule", - sched_switch_callback, - &wakeup_trace); - marker_probe_unregister("kernel_sched_wakeup_new", - wake_up_callback, - &wakeup_trace); - marker_probe_unregister("kernel_sched_wakeup", - wake_up_callback, - &wakeup_trace); + unregister_trace_sched_switch(probe_wakeup_sched_switch); + unregister_trace_sched_wakeup_new(probe_wakeup); + unregister_trace_sched_wakeup(probe_wakeup); } static void wakeup_tracer_init(struct trace_array *tr) diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c new file mode 100644 index 000000000000..c7c62a4a75f5 --- /dev/null +++ b/kernel/tracepoint.c @@ -0,0 +1,476 @@ +/* + * Copyright (C) 2008 Mathieu Desnoyers + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/types.h> +#include <linux/jhash.h> +#include <linux/list.h> +#include <linux/rcupdate.h> +#include <linux/tracepoint.h> +#include <linux/err.h> +#include <linux/slab.h> + +extern struct tracepoint __start___tracepoints[]; +extern struct tracepoint __stop___tracepoints[]; + +/* Set to 1 to enable tracepoint debug output */ +static const int tracepoint_debug; + +/* + * tracepoints_mutex nests inside module_mutex. Tracepoints mutex protects the + * builtin and module tracepoints and the hash table. + */ +static DEFINE_MUTEX(tracepoints_mutex); + +/* + * Tracepoint hash table, containing the active tracepoints. + * Protected by tracepoints_mutex. + */ +#define TRACEPOINT_HASH_BITS 6 +#define TRACEPOINT_TABLE_SIZE (1 << TRACEPOINT_HASH_BITS) + +/* + * Note about RCU : + * It is used to to delay the free of multiple probes array until a quiescent + * state is reached. + * Tracepoint entries modifications are protected by the tracepoints_mutex. + */ +struct tracepoint_entry { + struct hlist_node hlist; + void **funcs; + int refcount; /* Number of times armed. 0 if disarmed. */ + struct rcu_head rcu; + void *oldptr; + unsigned char rcu_pending:1; + char name[0]; +}; + +static struct hlist_head tracepoint_table[TRACEPOINT_TABLE_SIZE]; + +static void free_old_closure(struct rcu_head *head) +{ + struct tracepoint_entry *entry = container_of(head, + struct tracepoint_entry, rcu); + kfree(entry->oldptr); + /* Make sure we free the data before setting the pending flag to 0 */ + smp_wmb(); + entry->rcu_pending = 0; +} + +static void tracepoint_entry_free_old(struct tracepoint_entry *entry, void *old) +{ + if (!old) + return; + entry->oldptr = old; + entry->rcu_pending = 1; + /* write rcu_pending before calling the RCU callback */ + smp_wmb(); +#ifdef CONFIG_PREEMPT_RCU + synchronize_sched(); /* Until we have the call_rcu_sched() */ +#endif + call_rcu(&entry->rcu, free_old_closure); +} + +static void debug_print_probes(struct tracepoint_entry *entry) +{ + int i; + + if (!tracepoint_debug) + return; + + for (i = 0; entry->funcs[i]; i++) + printk(KERN_DEBUG "Probe %d : %p\n", i, entry->funcs[i]); +} + +static void * +tracepoint_entry_add_probe(struct tracepoint_entry *entry, void *probe) +{ + int nr_probes = 0; + void **old, **new; + + WARN_ON(!probe); + + debug_print_probes(entry); + old = entry->funcs; + if (old) { + /* (N -> N+1), (N != 0, 1) probes */ + for (nr_probes = 0; old[nr_probes]; nr_probes++) + if (old[nr_probes] == probe) + return ERR_PTR(-EEXIST); + } + /* + 2 : one for new probe, one for NULL func */ + new = kzalloc((nr_probes + 2) * sizeof(void *), GFP_KERNEL); + if (new == NULL) + return ERR_PTR(-ENOMEM); + if (old) + memcpy(new, old, nr_probes * sizeof(void *)); + new[nr_probes] = probe; + entry->refcount = nr_probes + 1; + entry->funcs = new; + debug_print_probes(entry); + return old; +} + +static void * +tracepoint_entry_remove_probe(struct tracepoint_entry *entry, void *probe) +{ + int nr_probes = 0, nr_del = 0, i; + void **old, **new; + + old = entry->funcs; + + debug_print_probes(entry); + /* (N -> M), (N > 1, M >= 0) probes */ + for (nr_probes = 0; old[nr_probes]; nr_probes++) { + if ((!probe || old[nr_probes] == probe)) + nr_del++; + } + + if (nr_probes - nr_del == 0) { + /* N -> 0, (N > 1) */ + entry->funcs = NULL; + entry->refcount = 0; + debug_print_probes(entry); + return old; + } else { + int j = 0; + /* N -> M, (N > 1, M > 0) */ + /* + 1 for NULL */ + new = kzalloc((nr_probes - nr_del + 1) + * sizeof(void *), GFP_KERNEL); + if (new == NULL) + return ERR_PTR(-ENOMEM); + for (i = 0; old[i]; i++) + if ((probe && old[i] != probe)) + new[j++] = old[i]; + entry->refcount = nr_probes - nr_del; + entry->funcs = new; + } + debug_print_probes(entry); + return old; +} + +/* + * Get tracepoint if the tracepoint is present in the tracepoint hash table. + * Must be called with tracepoints_mutex held. + * Returns NULL if not present. + */ +static struct tracepoint_entry *get_tracepoint(const char *name) +{ + struct hlist_head *head; + struct hlist_node *node; + struct tracepoint_entry *e; + u32 hash = jhash(name, strlen(name), 0); + + head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)]; + hlist_for_each_entry(e, node, head, hlist) { + if (!strcmp(name, e->name)) + return e; + } + return NULL; +} + +/* + * Add the tracepoint to the tracepoint hash table. Must be called with + * tracepoints_mutex held. + */ +static struct tracepoint_entry *add_tracepoint(const char *name) +{ + struct hlist_head *head; + struct hlist_node *node; + struct tracepoint_entry *e; + size_t name_len = strlen(name) + 1; + u32 hash = jhash(name, name_len-1, 0); + + head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)]; + hlist_for_each_entry(e, node, head, hlist) { + if (!strcmp(name, e->name)) { + printk(KERN_NOTICE + "tracepoint %s busy\n", name); + return ERR_PTR(-EEXIST); /* Already there */ + } + } + /* + * Using kmalloc here to allocate a variable length element. Could + * cause some memory fragmentation if overused. + */ + e = kmalloc(sizeof(struct tracepoint_entry) + name_len, GFP_KERNEL); + if (!e) + return ERR_PTR(-ENOMEM); + memcpy(&e->name[0], name, name_len); + e->funcs = NULL; + e->refcount = 0; + e->rcu_pending = 0; + hlist_add_head(&e->hlist, head); + return e; +} + +/* + * Remove the tracepoint from the tracepoint hash table. Must be called with + * mutex_lock held. + */ +static int remove_tracepoint(const char *name) +{ + struct hlist_head *head; + struct hlist_node *node; + struct tracepoint_entry *e; + int found = 0; + size_t len = strlen(name) + 1; + u32 hash = jhash(name, len-1, 0); + + head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)]; + hlist_for_each_entry(e, node, head, hlist) { + if (!strcmp(name, e->name)) { + found = 1; + break; + } + } + if (!found) + return -ENOENT; + if (e->refcount) + return -EBUSY; + hlist_del(&e->hlist); + /* Make sure the call_rcu has been executed */ + if (e->rcu_pending) + rcu_barrier(); + kfree(e); + return 0; +} + +/* + * Sets the probe callback corresponding to one tracepoint. + */ +static void set_tracepoint(struct tracepoint_entry **entry, + struct tracepoint *elem, int active) +{ + WARN_ON(strcmp((*entry)->name, elem->name) != 0); + + /* + * rcu_assign_pointer has a smp_wmb() which makes sure that the new + * probe callbacks array is consistent before setting a pointer to it. + * This array is referenced by __DO_TRACE from + * include/linux/tracepoints.h. A matching smp_read_barrier_depends() + * is used. + */ + rcu_assign_pointer(elem->funcs, (*entry)->funcs); + elem->state = active; +} + +/* + * Disable a tracepoint and its probe callback. + * Note: only waiting an RCU period after setting elem->call to the empty + * function insures that the original callback is not used anymore. This insured + * by preempt_disable around the call site. + */ +static void disable_tracepoint(struct tracepoint *elem) +{ + elem->state = 0; +} + +/** + * tracepoint_update_probe_range - Update a probe range + * @begin: beginning of the range + * @end: end of the range + * + * Updates the probe callback corresponding to a range of tracepoints. + */ +void tracepoint_update_probe_range(struct tracepoint *begin, + struct tracepoint *end) +{ + struct tracepoint *iter; + struct tracepoint_entry *mark_entry; + + mutex_lock(&tracepoints_mutex); + for (iter = begin; iter < end; iter++) { + mark_entry = get_tracepoint(iter->name); + if (mark_entry) { + set_tracepoint(&mark_entry, iter, + !!mark_entry->refcount); + } else { + disable_tracepoint(iter); + } + } + mutex_unlock(&tracepoints_mutex); +} + +/* + * Update probes, removing the faulty probes. + */ +static void tracepoint_update_probes(void) +{ + /* Core kernel tracepoints */ + tracepoint_update_probe_range(__start___tracepoints, + __stop___tracepoints); + /* tracepoints in modules. */ + module_update_tracepoints(); +} + +/** + * tracepoint_probe_register - Connect a probe to a tracepoint + * @name: tracepoint name + * @probe: probe handler + * + * Returns 0 if ok, error value on error. + * The probe address must at least be aligned on the architecture pointer size. + */ +int tracepoint_probe_register(const char *name, void *probe) +{ + struct tracepoint_entry *entry; + int ret = 0; + void *old; + + mutex_lock(&tracepoints_mutex); + entry = get_tracepoint(name); + if (!entry) { + entry = add_tracepoint(name); + if (IS_ERR(entry)) { + ret = PTR_ERR(entry); + goto end; + } + } + /* + * If we detect that a call_rcu is pending for this tracepoint, + * make sure it's executed now. + */ + if (entry->rcu_pending) + rcu_barrier(); + old = tracepoint_entry_add_probe(entry, probe); + if (IS_ERR(old)) { + ret = PTR_ERR(old); + goto end; + } + mutex_unlock(&tracepoints_mutex); + tracepoint_update_probes(); /* may update entry */ + mutex_lock(&tracepoints_mutex); + entry = get_tracepoint(name); + WARN_ON(!entry); + tracepoint_entry_free_old(entry, old); +end: + mutex_unlock(&tracepoints_mutex); + return ret; +} +EXPORT_SYMBOL_GPL(tracepoint_probe_register); + +/** + * tracepoint_probe_unregister - Disconnect a probe from a tracepoint + * @name: tracepoint name + * @probe: probe function pointer + * + * We do not need to call a synchronize_sched to make sure the probes have + * finished running before doing a module unload, because the module unload + * itself uses stop_machine(), which insures that every preempt disabled section + * have finished. + */ +int tracepoint_probe_unregister(const char *name, void *probe) +{ + struct tracepoint_entry *entry; + void *old; + int ret = -ENOENT; + + mutex_lock(&tracepoints_mutex); + entry = get_tracepoint(name); + if (!entry) + goto end; + if (entry->rcu_pending) + rcu_barrier(); + old = tracepoint_entry_remove_probe(entry, probe); + mutex_unlock(&tracepoints_mutex); + tracepoint_update_probes(); /* may update entry */ + mutex_lock(&tracepoints_mutex); + entry = get_tracepoint(name); + if (!entry) + goto end; + tracepoint_entry_free_old(entry, old); + remove_tracepoint(name); /* Ignore busy error message */ + ret = 0; +end: + mutex_unlock(&tracepoints_mutex); + return ret; +} +EXPORT_SYMBOL_GPL(tracepoint_probe_unregister); + +/** + * tracepoint_get_iter_range - Get a next tracepoint iterator given a range. + * @tracepoint: current tracepoints (in), next tracepoint (out) + * @begin: beginning of the range + * @end: end of the range + * + * Returns whether a next tracepoint has been found (1) or not (0). + * Will return the first tracepoint in the range if the input tracepoint is + * NULL. + */ +int tracepoint_get_iter_range(struct tracepoint **tracepoint, + struct tracepoint *begin, struct tracepoint *end) +{ + if (!*tracepoint && begin != end) { + *tracepoint = begin; + return 1; + } + if (*tracepoint >= begin && *tracepoint < end) + return 1; + return 0; +} +EXPORT_SYMBOL_GPL(tracepoint_get_iter_range); + +static void tracepoint_get_iter(struct tracepoint_iter *iter) +{ + int found = 0; + + /* Core kernel tracepoints */ + if (!iter->module) { + found = tracepoint_get_iter_range(&iter->tracepoint, + __start___tracepoints, __stop___tracepoints); + if (found) + goto end; + } + /* tracepoints in modules. */ + found = module_get_iter_tracepoints(iter); +end: + if (!found) + tracepoint_iter_reset(iter); +} + +void tracepoint_iter_start(struct tracepoint_iter *iter) +{ + tracepoint_get_iter(iter); +} +EXPORT_SYMBOL_GPL(tracepoint_iter_start); + +void tracepoint_iter_next(struct tracepoint_iter *iter) +{ + iter->tracepoint++; + /* + * iter->tracepoint may be invalid because we blindly incremented it. + * Make sure it is valid by marshalling on the tracepoints, getting the + * tracepoints from following modules if necessary. + */ + tracepoint_get_iter(iter); +} +EXPORT_SYMBOL_GPL(tracepoint_iter_next); + +void tracepoint_iter_stop(struct tracepoint_iter *iter) +{ +} +EXPORT_SYMBOL_GPL(tracepoint_iter_stop); + +void tracepoint_iter_reset(struct tracepoint_iter *iter) +{ + iter->module = NULL; + iter->tracepoint = NULL; +} +EXPORT_SYMBOL_GPL(tracepoint_iter_reset); |