summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2008-08-22 10:42:23 +1000
committerStephen Rothwell <sfr@canb.auug.org.au>2008-08-22 10:42:23 +1000
commitbe21d065e6d1443c0720d76502513c5efe418403 (patch)
tree623a67a7f7beb744535032ceba6ac2d11dd46c53 /kernel
parent44fbdd467bea6c7fadab795176a15da167b86859 (diff)
parentcdca408f6f55290eec706abad51826e113f2a091 (diff)
Merge commit 'ftrace/auto-ftrace-next'
Conflicts: kernel/module.c
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/exit.c10
-rw-r--r--kernel/fork.c3
-rw-r--r--kernel/kthread.c5
-rw-r--r--kernel/module.c66
-rw-r--r--kernel/notifier.c2
-rw-r--r--kernel/sched.c17
-rw-r--r--kernel/sched_clock.c8
-rw-r--r--kernel/signal.c3
-rw-r--r--kernel/trace/Kconfig1
-rw-r--r--kernel/trace/ftrace.c7
-rw-r--r--kernel/trace/trace_sched_switch.c120
-rw-r--r--kernel/trace/trace_sched_wakeup.c135
-rw-r--r--kernel/tracepoint.c476
14 files changed, 643 insertions, 211 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 4e1d7df7c3e2..8f9ce7ec21b6 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -83,6 +83,7 @@ obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
obj-$(CONFIG_MARKERS) += marker.o
+obj-$(CONFIG_TRACEPOINTS) += tracepoint.o
obj-$(CONFIG_LATENCYTOP) += latencytop.o
obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
obj-$(CONFIG_FTRACE) += trace/
diff --git a/kernel/exit.c b/kernel/exit.c
index 38ec40630149..b91dbb210953 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -47,6 +47,7 @@
#include <linux/blkdev.h>
#include <linux/task_io_accounting_ops.h>
#include <linux/tracehook.h>
+#include <trace/sched.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
@@ -149,7 +150,10 @@ static void __exit_signal(struct task_struct *tsk)
static void delayed_put_task_struct(struct rcu_head *rhp)
{
- put_task_struct(container_of(rhp, struct task_struct, rcu));
+ struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
+
+ trace_sched_process_free(tsk);
+ put_task_struct(tsk);
}
@@ -1076,6 +1080,8 @@ NORET_TYPE void do_exit(long code)
if (group_dead)
acct_process();
+ trace_sched_process_exit(tsk);
+
exit_sem(tsk);
exit_files(tsk);
exit_fs(tsk);
@@ -1677,6 +1683,8 @@ static long do_wait(enum pid_type type, struct pid *pid, int options,
struct task_struct *tsk;
int retval;
+ trace_sched_process_wait(pid);
+
add_wait_queue(&current->signal->wait_chldexit,&wait);
repeat:
/*
diff --git a/kernel/fork.c b/kernel/fork.c
index 7ce2ebe84796..5013b591f67b 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -58,6 +58,7 @@
#include <linux/tty.h>
#include <linux/proc_fs.h>
#include <linux/blkdev.h>
+#include <trace/sched.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -1361,6 +1362,8 @@ long do_fork(unsigned long clone_flags,
if (!IS_ERR(p)) {
struct completion vfork;
+ trace_sched_process_fork(current, p);
+
nr = task_pid_vnr(p);
if (clone_flags & CLONE_PARENT_SETTID)
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 96cff2f8710b..50598e29439a 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -13,6 +13,7 @@
#include <linux/file.h>
#include <linux/module.h>
#include <linux/mutex.h>
+#include <trace/sched.h>
#define KTHREAD_NICE_LEVEL (-5)
@@ -206,6 +207,8 @@ int kthread_stop(struct task_struct *k)
/* It could exit after stop_info.k set, but before wake_up_process. */
get_task_struct(k);
+ trace_sched_kthread_stop(k);
+
/* Must init completion *before* thread sees kthread_stop_info.k */
init_completion(&kthread_stop_info.done);
smp_wmb();
@@ -221,6 +224,8 @@ int kthread_stop(struct task_struct *k)
ret = kthread_stop_info.err;
mutex_unlock(&kthread_stop_lock);
+ trace_sched_kthread_stop_ret(ret);
+
return ret;
}
EXPORT_SYMBOL(kthread_stop);
diff --git a/kernel/module.c b/kernel/module.c
index b610466448ed..b227ca472ad3 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -47,6 +47,7 @@
#include <asm/sections.h>
#include <linux/license.h>
#include <asm/sections.h>
+#include <linux/tracepoint.h>
#if 0
#define DEBUGP printk
@@ -1867,6 +1868,8 @@ static struct module *load_module(void __user *umod,
unsigned int markersindex;
unsigned int markersstringsindex;
unsigned int verboseindex;
+ unsigned int tracepointsindex;
+ unsigned int tracepointsstringsindex;
struct module *mod;
long err = 0;
void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
@@ -2154,6 +2157,9 @@ static struct module *load_module(void __user *umod,
markersstringsindex = find_sec(hdr, sechdrs, secstrings,
"__markers_strings");
verboseindex = find_sec(hdr, sechdrs, secstrings, "__verbose");
+ tracepointsindex = find_sec(hdr, sechdrs, secstrings, "__tracepoints");
+ tracepointsstringsindex = find_sec(hdr, sechdrs, secstrings,
+ "__tracepoints_strings");
/* Now do relocations. */
for (i = 1; i < hdr->e_shnum; i++) {
@@ -2181,6 +2187,12 @@ static struct module *load_module(void __user *umod,
mod->num_markers =
sechdrs[markersindex].sh_size / sizeof(*mod->markers);
#endif
+#ifdef CONFIG_TRACEPOINTS
+ mod->tracepoints = (void *)sechdrs[tracepointsindex].sh_addr;
+ mod->num_tracepoints =
+ sechdrs[tracepointsindex].sh_size / sizeof(*mod->tracepoints);
+#endif
+
/* Find duplicate symbols */
err = verify_export_symbols(mod);
@@ -2199,11 +2211,16 @@ static struct module *load_module(void __user *umod,
add_kallsyms(mod, sechdrs, symindex, strindex, secstrings);
+ if (!mod->taints) {
#ifdef CONFIG_MARKERS
- if (!mod->taints)
marker_update_probe_range(mod->markers,
mod->markers + mod->num_markers);
#endif
+#ifdef CONFIG_TRACEPOINTS
+ tracepoint_update_probe_range(mod->tracepoints,
+ mod->tracepoints + mod->num_tracepoints);
+#endif
+ }
dynamic_printk_setup(mod, sechdrs, verboseindex, secstrings);
err = module_finalize(hdr, sechdrs, mod);
if (err < 0)
@@ -2755,3 +2772,50 @@ void module_update_markers(void)
mutex_unlock(&module_mutex);
}
#endif
+
+#ifdef CONFIG_TRACEPOINTS
+void module_update_tracepoints(void)
+{
+ struct module *mod;
+
+ mutex_lock(&module_mutex);
+ list_for_each_entry(mod, &modules, list)
+ if (!mod->taints)
+ tracepoint_update_probe_range(mod->tracepoints,
+ mod->tracepoints + mod->num_tracepoints);
+ mutex_unlock(&module_mutex);
+}
+
+/*
+ * Returns 0 if current not found.
+ * Returns 1 if current found.
+ */
+int module_get_iter_tracepoints(struct tracepoint_iter *iter)
+{
+ struct module *iter_mod;
+ int found = 0;
+
+ mutex_lock(&module_mutex);
+ list_for_each_entry(iter_mod, &modules, list) {
+ if (!iter_mod->taints) {
+ /*
+ * Sorted module list
+ */
+ if (iter_mod < iter->module)
+ continue;
+ else if (iter_mod > iter->module)
+ iter->tracepoint = NULL;
+ found = tracepoint_get_iter_range(&iter->tracepoint,
+ iter_mod->tracepoints,
+ iter_mod->tracepoints
+ + iter_mod->num_tracepoints);
+ if (found) {
+ iter->module = iter_mod;
+ break;
+ }
+ }
+ }
+ mutex_unlock(&module_mutex);
+ return found;
+}
+#endif
diff --git a/kernel/notifier.c b/kernel/notifier.c
index 823be11584ef..4282c0a40a57 100644
--- a/kernel/notifier.c
+++ b/kernel/notifier.c
@@ -550,7 +550,7 @@ EXPORT_SYMBOL(unregister_reboot_notifier);
static ATOMIC_NOTIFIER_HEAD(die_chain);
-int notify_die(enum die_val val, const char *str,
+int notrace notify_die(enum die_val val, const char *str,
struct pt_regs *regs, long err, int trap, int sig)
{
struct die_args args = {
diff --git a/kernel/sched.c b/kernel/sched.c
index 9a1ddb84e26d..7d995207feef 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -71,6 +71,7 @@
#include <linux/debugfs.h>
#include <linux/ctype.h>
#include <linux/ftrace.h>
+#include <trace/sched.h>
#include <asm/tlb.h>
#include <asm/irq_regs.h>
@@ -1918,6 +1919,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
* just go back and repeat.
*/
rq = task_rq_lock(p, &flags);
+ trace_sched_wait_task(rq, p);
running = task_running(rq, p);
on_rq = p->se.on_rq;
ncsw = 0;
@@ -2282,9 +2284,7 @@ out_activate:
success = 1;
out_running:
- trace_mark(kernel_sched_wakeup,
- "pid %d state %ld ## rq %p task %p rq->curr %p",
- p->pid, p->state, rq, p, rq->curr);
+ trace_sched_wakeup(rq, p);
check_preempt_curr(rq, p);
p->state = TASK_RUNNING;
@@ -2417,9 +2417,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
p->sched_class->task_new(rq, p);
inc_nr_running(rq);
}
- trace_mark(kernel_sched_wakeup_new,
- "pid %d state %ld ## rq %p task %p rq->curr %p",
- p->pid, p->state, rq, p, rq->curr);
+ trace_sched_wakeup_new(rq, p);
check_preempt_curr(rq, p);
#ifdef CONFIG_SMP
if (p->sched_class->task_wake_up)
@@ -2592,11 +2590,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
struct mm_struct *mm, *oldmm;
prepare_task_switch(rq, prev, next);
- trace_mark(kernel_sched_schedule,
- "prev_pid %d next_pid %d prev_state %ld "
- "## rq %p prev %p next %p",
- prev->pid, next->pid, prev->state,
- rq, prev, next);
+ trace_sched_switch(rq, prev, next);
mm = next->mm;
oldmm = prev->active_mm;
/*
@@ -2836,6 +2830,7 @@ static void sched_migrate_task(struct task_struct *p, int dest_cpu)
|| unlikely(!cpu_active(dest_cpu)))
goto out;
+ trace_sched_migrate_task(rq, p, dest_cpu);
/* force the process onto the specified CPU */
if (migrate_task(p, dest_cpu, &req)) {
/* Need to wait for migration thread (might exit: take ref). */
diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c
index 204991a0bfa7..24b0092fac11 100644
--- a/kernel/sched_clock.c
+++ b/kernel/sched_clock.c
@@ -31,6 +31,7 @@
#include <linux/spinlock.h>
#include <linux/ktime.h>
#include <linux/module.h>
+#include <linux/hardirq.h>
/*
* Scheduler clock - returns current time in nanosec units.
@@ -151,6 +152,13 @@ u64 sched_clock_cpu(int cpu)
struct sched_clock_data *scd = cpu_sdc(cpu);
u64 now, clock, this_clock, remote_clock;
+ /*
+ * Normally this is not called in NMI context - but if it is,
+ * trying to do any locking here is totally lethal.
+ */
+ if (unlikely(in_nmi()))
+ return scd->clock;
+
if (unlikely(!sched_clock_running))
return 0ull;
diff --git a/kernel/signal.c b/kernel/signal.c
index e661b01d340f..bf40ecc87b26 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -27,6 +27,7 @@
#include <linux/freezer.h>
#include <linux/pid_namespace.h>
#include <linux/nsproxy.h>
+#include <trace/sched.h>
#include <asm/param.h>
#include <asm/uaccess.h>
@@ -803,6 +804,8 @@ static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
struct sigpending *pending;
struct sigqueue *q;
+ trace_sched_signal_send(sig, t);
+
assert_spin_locked(&t->sighand->siglock);
if (!prepare_signal(sig, t))
return 0;
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 263e9e6bbd60..cae2637d5e68 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -14,6 +14,7 @@ config TRACING
bool
select DEBUG_FS
select STACKTRACE
+ select TRACEPOINTS
config FTRACE
bool "Kernel Function Tracer"
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index f6e3af31b403..5552f2535bf6 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -340,6 +340,13 @@ ftrace_record_ip(unsigned long ip)
int resched;
int atomic;
int cpu;
+ static int once;
+
+ if (!once && in_nmi()) {
+ once++;
+ ftrace_disabled = 1;
+ WARN_ON(1);
+ }
if (!ftrace_enabled || ftrace_disabled)
return;
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index cb817a209aa0..789e927abc9c 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -9,8 +9,8 @@
#include <linux/debugfs.h>
#include <linux/kallsyms.h>
#include <linux/uaccess.h>
-#include <linux/marker.h>
#include <linux/ftrace.h>
+#include <trace/sched.h>
#include "trace.h"
@@ -19,16 +19,17 @@ static int __read_mostly tracer_enabled;
static atomic_t sched_ref;
static void
-sched_switch_func(void *private, void *__rq, struct task_struct *prev,
+probe_sched_switch(struct rq *__rq, struct task_struct *prev,
struct task_struct *next)
{
- struct trace_array **ptr = private;
- struct trace_array *tr = *ptr;
struct trace_array_cpu *data;
unsigned long flags;
long disabled;
int cpu;
+ if (!atomic_read(&sched_ref))
+ return;
+
tracing_record_cmdline(prev);
tracing_record_cmdline(next);
@@ -37,95 +38,42 @@ sched_switch_func(void *private, void *__rq, struct task_struct *prev,
local_irq_save(flags);
cpu = raw_smp_processor_id();
- data = tr->data[cpu];
+ data = ctx_trace->data[cpu];
disabled = atomic_inc_return(&data->disabled);
if (likely(disabled == 1))
- tracing_sched_switch_trace(tr, data, prev, next, flags);
+ tracing_sched_switch_trace(ctx_trace, data, prev, next, flags);
atomic_dec(&data->disabled);
local_irq_restore(flags);
}
-static notrace void
-sched_switch_callback(void *probe_data, void *call_data,
- const char *format, va_list *args)
-{
- struct task_struct *prev;
- struct task_struct *next;
- struct rq *__rq;
-
- if (!atomic_read(&sched_ref))
- return;
-
- /* skip prev_pid %d next_pid %d prev_state %ld */
- (void)va_arg(*args, int);
- (void)va_arg(*args, int);
- (void)va_arg(*args, long);
- __rq = va_arg(*args, typeof(__rq));
- prev = va_arg(*args, typeof(prev));
- next = va_arg(*args, typeof(next));
-
- /*
- * If tracer_switch_func only points to the local
- * switch func, it still needs the ptr passed to it.
- */
- sched_switch_func(probe_data, __rq, prev, next);
-}
-
static void
-wakeup_func(void *private, void *__rq, struct task_struct *wakee, struct
- task_struct *curr)
+probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee)
{
- struct trace_array **ptr = private;
- struct trace_array *tr = *ptr;
struct trace_array_cpu *data;
unsigned long flags;
long disabled;
int cpu;
- if (!tracer_enabled)
+ if (!likely(tracer_enabled))
return;
- tracing_record_cmdline(curr);
+ tracing_record_cmdline(current);
local_irq_save(flags);
cpu = raw_smp_processor_id();
- data = tr->data[cpu];
+ data = ctx_trace->data[cpu];
disabled = atomic_inc_return(&data->disabled);
if (likely(disabled == 1))
- tracing_sched_wakeup_trace(tr, data, wakee, curr, flags);
+ tracing_sched_wakeup_trace(ctx_trace, data, wakee, current,
+ flags);
atomic_dec(&data->disabled);
local_irq_restore(flags);
}
-static notrace void
-wake_up_callback(void *probe_data, void *call_data,
- const char *format, va_list *args)
-{
- struct task_struct *curr;
- struct task_struct *task;
- struct rq *__rq;
-
- if (likely(!tracer_enabled))
- return;
-
- /* Skip pid %d state %ld */
- (void)va_arg(*args, int);
- (void)va_arg(*args, long);
- /* now get the meat: "rq %p task %p rq->curr %p" */
- __rq = va_arg(*args, typeof(__rq));
- task = va_arg(*args, typeof(task));
- curr = va_arg(*args, typeof(curr));
-
- tracing_record_cmdline(task);
- tracing_record_cmdline(curr);
-
- wakeup_func(probe_data, __rq, task, curr);
-}
-
static void sched_switch_reset(struct trace_array *tr)
{
int cpu;
@@ -140,60 +88,40 @@ static int tracing_sched_register(void)
{
int ret;
- ret = marker_probe_register("kernel_sched_wakeup",
- "pid %d state %ld ## rq %p task %p rq->curr %p",
- wake_up_callback,
- &ctx_trace);
+ ret = register_trace_sched_wakeup(probe_sched_wakeup);
if (ret) {
- pr_info("wakeup trace: Couldn't add marker"
+ pr_info("wakeup trace: Couldn't activate tracepoint"
" probe to kernel_sched_wakeup\n");
return ret;
}
- ret = marker_probe_register("kernel_sched_wakeup_new",
- "pid %d state %ld ## rq %p task %p rq->curr %p",
- wake_up_callback,
- &ctx_trace);
+ ret = register_trace_sched_wakeup_new(probe_sched_wakeup);
if (ret) {
- pr_info("wakeup trace: Couldn't add marker"
+ pr_info("wakeup trace: Couldn't activate tracepoint"
" probe to kernel_sched_wakeup_new\n");
goto fail_deprobe;
}
- ret = marker_probe_register("kernel_sched_schedule",
- "prev_pid %d next_pid %d prev_state %ld "
- "## rq %p prev %p next %p",
- sched_switch_callback,
- &ctx_trace);
+ ret = register_trace_sched_switch(probe_sched_switch);
if (ret) {
- pr_info("sched trace: Couldn't add marker"
+ pr_info("sched trace: Couldn't activate tracepoint"
" probe to kernel_sched_schedule\n");
goto fail_deprobe_wake_new;
}
return ret;
fail_deprobe_wake_new:
- marker_probe_unregister("kernel_sched_wakeup_new",
- wake_up_callback,
- &ctx_trace);
+ unregister_trace_sched_wakeup_new(probe_sched_wakeup);
fail_deprobe:
- marker_probe_unregister("kernel_sched_wakeup",
- wake_up_callback,
- &ctx_trace);
+ unregister_trace_sched_wakeup(probe_sched_wakeup);
return ret;
}
static void tracing_sched_unregister(void)
{
- marker_probe_unregister("kernel_sched_schedule",
- sched_switch_callback,
- &ctx_trace);
- marker_probe_unregister("kernel_sched_wakeup_new",
- wake_up_callback,
- &ctx_trace);
- marker_probe_unregister("kernel_sched_wakeup",
- wake_up_callback,
- &ctx_trace);
+ unregister_trace_sched_switch(probe_sched_switch);
+ unregister_trace_sched_wakeup_new(probe_sched_wakeup);
+ unregister_trace_sched_wakeup(probe_sched_wakeup);
}
static void tracing_start_sched_switch(void)
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index e303ccb62cdf..08206b4e29c4 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -15,7 +15,7 @@
#include <linux/kallsyms.h>
#include <linux/uaccess.h>
#include <linux/ftrace.h>
-#include <linux/marker.h>
+#include <trace/sched.h>
#include "trace.h"
@@ -112,18 +112,18 @@ static int report_latency(cycle_t delta)
}
static void notrace
-wakeup_sched_switch(void *private, void *rq, struct task_struct *prev,
+probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
struct task_struct *next)
{
unsigned long latency = 0, t0 = 0, t1 = 0;
- struct trace_array **ptr = private;
- struct trace_array *tr = *ptr;
struct trace_array_cpu *data;
cycle_t T0, T1, delta;
unsigned long flags;
long disabled;
int cpu;
+ tracing_record_cmdline(prev);
+
if (unlikely(!tracer_enabled))
return;
@@ -140,11 +140,11 @@ wakeup_sched_switch(void *private, void *rq, struct task_struct *prev,
return;
/* The task we are waiting for is waking up */
- data = tr->data[wakeup_cpu];
+ data = wakeup_trace->data[wakeup_cpu];
/* disable local data, not wakeup_cpu data */
cpu = raw_smp_processor_id();
- disabled = atomic_inc_return(&tr->data[cpu]->disabled);
+ disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
if (likely(disabled != 1))
goto out;
@@ -155,7 +155,7 @@ wakeup_sched_switch(void *private, void *rq, struct task_struct *prev,
if (unlikely(!tracer_enabled || next != wakeup_task))
goto out_unlock;
- trace_function(tr, data, CALLER_ADDR1, CALLER_ADDR2, flags);
+ trace_function(wakeup_trace, data, CALLER_ADDR1, CALLER_ADDR2, flags);
/*
* usecs conversion is slow so we try to delay the conversion
@@ -174,39 +174,14 @@ wakeup_sched_switch(void *private, void *rq, struct task_struct *prev,
t0 = nsecs_to_usecs(T0);
t1 = nsecs_to_usecs(T1);
- update_max_tr(tr, wakeup_task, wakeup_cpu);
+ update_max_tr(wakeup_trace, wakeup_task, wakeup_cpu);
out_unlock:
- __wakeup_reset(tr);
+ __wakeup_reset(wakeup_trace);
__raw_spin_unlock(&wakeup_lock);
local_irq_restore(flags);
out:
- atomic_dec(&tr->data[cpu]->disabled);
-}
-
-static notrace void
-sched_switch_callback(void *probe_data, void *call_data,
- const char *format, va_list *args)
-{
- struct task_struct *prev;
- struct task_struct *next;
- struct rq *__rq;
-
- /* skip prev_pid %d next_pid %d prev_state %ld */
- (void)va_arg(*args, int);
- (void)va_arg(*args, int);
- (void)va_arg(*args, long);
- __rq = va_arg(*args, typeof(__rq));
- prev = va_arg(*args, typeof(prev));
- next = va_arg(*args, typeof(next));
-
- tracing_record_cmdline(prev);
-
- /*
- * If tracer_switch_func only points to the local
- * switch func, it still needs the ptr passed to it.
- */
- wakeup_sched_switch(probe_data, __rq, prev, next);
+ atomic_dec(&wakeup_trace->data[cpu]->disabled);
}
static void __wakeup_reset(struct trace_array *tr)
@@ -240,19 +215,24 @@ static void wakeup_reset(struct trace_array *tr)
}
static void
-wakeup_check_start(struct trace_array *tr, struct task_struct *p,
- struct task_struct *curr)
+probe_wakeup(struct rq *rq, struct task_struct *p)
{
int cpu = smp_processor_id();
unsigned long flags;
long disabled;
+ if (likely(!tracer_enabled))
+ return;
+
+ tracing_record_cmdline(p);
+ tracing_record_cmdline(current);
+
if (likely(!rt_task(p)) ||
p->prio >= wakeup_prio ||
- p->prio >= curr->prio)
+ p->prio >= current->prio)
return;
- disabled = atomic_inc_return(&tr->data[cpu]->disabled);
+ disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
if (unlikely(disabled != 1))
goto out;
@@ -264,7 +244,7 @@ wakeup_check_start(struct trace_array *tr, struct task_struct *p,
goto out_locked;
/* reset the trace */
- __wakeup_reset(tr);
+ __wakeup_reset(wakeup_trace);
wakeup_cpu = task_cpu(p);
wakeup_prio = p->prio;
@@ -274,74 +254,37 @@ wakeup_check_start(struct trace_array *tr, struct task_struct *p,
local_save_flags(flags);
- tr->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu);
- trace_function(tr, tr->data[wakeup_cpu],
+ wakeup_trace->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu);
+ trace_function(wakeup_trace, wakeup_trace->data[wakeup_cpu],
CALLER_ADDR1, CALLER_ADDR2, flags);
out_locked:
__raw_spin_unlock(&wakeup_lock);
out:
- atomic_dec(&tr->data[cpu]->disabled);
-}
-
-static notrace void
-wake_up_callback(void *probe_data, void *call_data,
- const char *format, va_list *args)
-{
- struct trace_array **ptr = probe_data;
- struct trace_array *tr = *ptr;
- struct task_struct *curr;
- struct task_struct *task;
- struct rq *__rq;
-
- if (likely(!tracer_enabled))
- return;
-
- /* Skip pid %d state %ld */
- (void)va_arg(*args, int);
- (void)va_arg(*args, long);
- /* now get the meat: "rq %p task %p rq->curr %p" */
- __rq = va_arg(*args, typeof(__rq));
- task = va_arg(*args, typeof(task));
- curr = va_arg(*args, typeof(curr));
-
- tracing_record_cmdline(task);
- tracing_record_cmdline(curr);
-
- wakeup_check_start(tr, task, curr);
+ atomic_dec(&wakeup_trace->data[cpu]->disabled);
}
static void start_wakeup_tracer(struct trace_array *tr)
{
int ret;
- ret = marker_probe_register("kernel_sched_wakeup",
- "pid %d state %ld ## rq %p task %p rq->curr %p",
- wake_up_callback,
- &wakeup_trace);
+ ret = register_trace_sched_wakeup(probe_wakeup);
if (ret) {
- pr_info("wakeup trace: Couldn't add marker"
+ pr_info("wakeup trace: Couldn't activate tracepoint"
" probe to kernel_sched_wakeup\n");
return;
}
- ret = marker_probe_register("kernel_sched_wakeup_new",
- "pid %d state %ld ## rq %p task %p rq->curr %p",
- wake_up_callback,
- &wakeup_trace);
+ ret = register_trace_sched_wakeup_new(probe_wakeup);
if (ret) {
- pr_info("wakeup trace: Couldn't add marker"
+ pr_info("wakeup trace: Couldn't activate tracepoint"
" probe to kernel_sched_wakeup_new\n");
goto fail_deprobe;
}
- ret = marker_probe_register("kernel_sched_schedule",
- "prev_pid %d next_pid %d prev_state %ld "
- "## rq %p prev %p next %p",
- sched_switch_callback,
- &wakeup_trace);
+ ret = register_trace_sched_switch(probe_wakeup_sched_switch);
if (ret) {
- pr_info("sched trace: Couldn't add marker"
+ pr_info("sched trace: Couldn't activate tracepoint"
" probe to kernel_sched_schedule\n");
goto fail_deprobe_wake_new;
}
@@ -363,28 +306,18 @@ static void start_wakeup_tracer(struct trace_array *tr)
return;
fail_deprobe_wake_new:
- marker_probe_unregister("kernel_sched_wakeup_new",
- wake_up_callback,
- &wakeup_trace);
+ unregister_trace_sched_wakeup_new(probe_wakeup);
fail_deprobe:
- marker_probe_unregister("kernel_sched_wakeup",
- wake_up_callback,
- &wakeup_trace);
+ unregister_trace_sched_wakeup(probe_wakeup);
}
static void stop_wakeup_tracer(struct trace_array *tr)
{
tracer_enabled = 0;
unregister_ftrace_function(&trace_ops);
- marker_probe_unregister("kernel_sched_schedule",
- sched_switch_callback,
- &wakeup_trace);
- marker_probe_unregister("kernel_sched_wakeup_new",
- wake_up_callback,
- &wakeup_trace);
- marker_probe_unregister("kernel_sched_wakeup",
- wake_up_callback,
- &wakeup_trace);
+ unregister_trace_sched_switch(probe_wakeup_sched_switch);
+ unregister_trace_sched_wakeup_new(probe_wakeup);
+ unregister_trace_sched_wakeup(probe_wakeup);
}
static void wakeup_tracer_init(struct trace_array *tr)
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
new file mode 100644
index 000000000000..c7c62a4a75f5
--- /dev/null
+++ b/kernel/tracepoint.c
@@ -0,0 +1,476 @@
+/*
+ * Copyright (C) 2008 Mathieu Desnoyers
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/types.h>
+#include <linux/jhash.h>
+#include <linux/list.h>
+#include <linux/rcupdate.h>
+#include <linux/tracepoint.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+
+extern struct tracepoint __start___tracepoints[];
+extern struct tracepoint __stop___tracepoints[];
+
+/* Set to 1 to enable tracepoint debug output */
+static const int tracepoint_debug;
+
+/*
+ * tracepoints_mutex nests inside module_mutex. Tracepoints mutex protects the
+ * builtin and module tracepoints and the hash table.
+ */
+static DEFINE_MUTEX(tracepoints_mutex);
+
+/*
+ * Tracepoint hash table, containing the active tracepoints.
+ * Protected by tracepoints_mutex.
+ */
+#define TRACEPOINT_HASH_BITS 6
+#define TRACEPOINT_TABLE_SIZE (1 << TRACEPOINT_HASH_BITS)
+
+/*
+ * Note about RCU :
+ * It is used to to delay the free of multiple probes array until a quiescent
+ * state is reached.
+ * Tracepoint entries modifications are protected by the tracepoints_mutex.
+ */
+struct tracepoint_entry {
+ struct hlist_node hlist;
+ void **funcs;
+ int refcount; /* Number of times armed. 0 if disarmed. */
+ struct rcu_head rcu;
+ void *oldptr;
+ unsigned char rcu_pending:1;
+ char name[0];
+};
+
+static struct hlist_head tracepoint_table[TRACEPOINT_TABLE_SIZE];
+
+static void free_old_closure(struct rcu_head *head)
+{
+ struct tracepoint_entry *entry = container_of(head,
+ struct tracepoint_entry, rcu);
+ kfree(entry->oldptr);
+ /* Make sure we free the data before setting the pending flag to 0 */
+ smp_wmb();
+ entry->rcu_pending = 0;
+}
+
+static void tracepoint_entry_free_old(struct tracepoint_entry *entry, void *old)
+{
+ if (!old)
+ return;
+ entry->oldptr = old;
+ entry->rcu_pending = 1;
+ /* write rcu_pending before calling the RCU callback */
+ smp_wmb();
+#ifdef CONFIG_PREEMPT_RCU
+ synchronize_sched(); /* Until we have the call_rcu_sched() */
+#endif
+ call_rcu(&entry->rcu, free_old_closure);
+}
+
+static void debug_print_probes(struct tracepoint_entry *entry)
+{
+ int i;
+
+ if (!tracepoint_debug)
+ return;
+
+ for (i = 0; entry->funcs[i]; i++)
+ printk(KERN_DEBUG "Probe %d : %p\n", i, entry->funcs[i]);
+}
+
+static void *
+tracepoint_entry_add_probe(struct tracepoint_entry *entry, void *probe)
+{
+ int nr_probes = 0;
+ void **old, **new;
+
+ WARN_ON(!probe);
+
+ debug_print_probes(entry);
+ old = entry->funcs;
+ if (old) {
+ /* (N -> N+1), (N != 0, 1) probes */
+ for (nr_probes = 0; old[nr_probes]; nr_probes++)
+ if (old[nr_probes] == probe)
+ return ERR_PTR(-EEXIST);
+ }
+ /* + 2 : one for new probe, one for NULL func */
+ new = kzalloc((nr_probes + 2) * sizeof(void *), GFP_KERNEL);
+ if (new == NULL)
+ return ERR_PTR(-ENOMEM);
+ if (old)
+ memcpy(new, old, nr_probes * sizeof(void *));
+ new[nr_probes] = probe;
+ entry->refcount = nr_probes + 1;
+ entry->funcs = new;
+ debug_print_probes(entry);
+ return old;
+}
+
+static void *
+tracepoint_entry_remove_probe(struct tracepoint_entry *entry, void *probe)
+{
+ int nr_probes = 0, nr_del = 0, i;
+ void **old, **new;
+
+ old = entry->funcs;
+
+ debug_print_probes(entry);
+ /* (N -> M), (N > 1, M >= 0) probes */
+ for (nr_probes = 0; old[nr_probes]; nr_probes++) {
+ if ((!probe || old[nr_probes] == probe))
+ nr_del++;
+ }
+
+ if (nr_probes - nr_del == 0) {
+ /* N -> 0, (N > 1) */
+ entry->funcs = NULL;
+ entry->refcount = 0;
+ debug_print_probes(entry);
+ return old;
+ } else {
+ int j = 0;
+ /* N -> M, (N > 1, M > 0) */
+ /* + 1 for NULL */
+ new = kzalloc((nr_probes - nr_del + 1)
+ * sizeof(void *), GFP_KERNEL);
+ if (new == NULL)
+ return ERR_PTR(-ENOMEM);
+ for (i = 0; old[i]; i++)
+ if ((probe && old[i] != probe))
+ new[j++] = old[i];
+ entry->refcount = nr_probes - nr_del;
+ entry->funcs = new;
+ }
+ debug_print_probes(entry);
+ return old;
+}
+
+/*
+ * Get tracepoint if the tracepoint is present in the tracepoint hash table.
+ * Must be called with tracepoints_mutex held.
+ * Returns NULL if not present.
+ */
+static struct tracepoint_entry *get_tracepoint(const char *name)
+{
+ struct hlist_head *head;
+ struct hlist_node *node;
+ struct tracepoint_entry *e;
+ u32 hash = jhash(name, strlen(name), 0);
+
+ head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)];
+ hlist_for_each_entry(e, node, head, hlist) {
+ if (!strcmp(name, e->name))
+ return e;
+ }
+ return NULL;
+}
+
+/*
+ * Add the tracepoint to the tracepoint hash table. Must be called with
+ * tracepoints_mutex held.
+ */
+static struct tracepoint_entry *add_tracepoint(const char *name)
+{
+ struct hlist_head *head;
+ struct hlist_node *node;
+ struct tracepoint_entry *e;
+ size_t name_len = strlen(name) + 1;
+ u32 hash = jhash(name, name_len-1, 0);
+
+ head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)];
+ hlist_for_each_entry(e, node, head, hlist) {
+ if (!strcmp(name, e->name)) {
+ printk(KERN_NOTICE
+ "tracepoint %s busy\n", name);
+ return ERR_PTR(-EEXIST); /* Already there */
+ }
+ }
+ /*
+ * Using kmalloc here to allocate a variable length element. Could
+ * cause some memory fragmentation if overused.
+ */
+ e = kmalloc(sizeof(struct tracepoint_entry) + name_len, GFP_KERNEL);
+ if (!e)
+ return ERR_PTR(-ENOMEM);
+ memcpy(&e->name[0], name, name_len);
+ e->funcs = NULL;
+ e->refcount = 0;
+ e->rcu_pending = 0;
+ hlist_add_head(&e->hlist, head);
+ return e;
+}
+
+/*
+ * Remove the tracepoint from the tracepoint hash table. Must be called with
+ * mutex_lock held.
+ */
+static int remove_tracepoint(const char *name)
+{
+ struct hlist_head *head;
+ struct hlist_node *node;
+ struct tracepoint_entry *e;
+ int found = 0;
+ size_t len = strlen(name) + 1;
+ u32 hash = jhash(name, len-1, 0);
+
+ head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)];
+ hlist_for_each_entry(e, node, head, hlist) {
+ if (!strcmp(name, e->name)) {
+ found = 1;
+ break;
+ }
+ }
+ if (!found)
+ return -ENOENT;
+ if (e->refcount)
+ return -EBUSY;
+ hlist_del(&e->hlist);
+ /* Make sure the call_rcu has been executed */
+ if (e->rcu_pending)
+ rcu_barrier();
+ kfree(e);
+ return 0;
+}
+
+/*
+ * Sets the probe callback corresponding to one tracepoint.
+ */
+static void set_tracepoint(struct tracepoint_entry **entry,
+ struct tracepoint *elem, int active)
+{
+ WARN_ON(strcmp((*entry)->name, elem->name) != 0);
+
+ /*
+ * rcu_assign_pointer has a smp_wmb() which makes sure that the new
+ * probe callbacks array is consistent before setting a pointer to it.
+ * This array is referenced by __DO_TRACE from
+ * include/linux/tracepoints.h. A matching smp_read_barrier_depends()
+ * is used.
+ */
+ rcu_assign_pointer(elem->funcs, (*entry)->funcs);
+ elem->state = active;
+}
+
+/*
+ * Disable a tracepoint and its probe callback.
+ * Note: only waiting an RCU period after setting elem->call to the empty
+ * function insures that the original callback is not used anymore. This insured
+ * by preempt_disable around the call site.
+ */
+static void disable_tracepoint(struct tracepoint *elem)
+{
+ elem->state = 0;
+}
+
+/**
+ * tracepoint_update_probe_range - Update a probe range
+ * @begin: beginning of the range
+ * @end: end of the range
+ *
+ * Updates the probe callback corresponding to a range of tracepoints.
+ */
+void tracepoint_update_probe_range(struct tracepoint *begin,
+ struct tracepoint *end)
+{
+ struct tracepoint *iter;
+ struct tracepoint_entry *mark_entry;
+
+ mutex_lock(&tracepoints_mutex);
+ for (iter = begin; iter < end; iter++) {
+ mark_entry = get_tracepoint(iter->name);
+ if (mark_entry) {
+ set_tracepoint(&mark_entry, iter,
+ !!mark_entry->refcount);
+ } else {
+ disable_tracepoint(iter);
+ }
+ }
+ mutex_unlock(&tracepoints_mutex);
+}
+
+/*
+ * Update probes, removing the faulty probes.
+ */
+static void tracepoint_update_probes(void)
+{
+ /* Core kernel tracepoints */
+ tracepoint_update_probe_range(__start___tracepoints,
+ __stop___tracepoints);
+ /* tracepoints in modules. */
+ module_update_tracepoints();
+}
+
+/**
+ * tracepoint_probe_register - Connect a probe to a tracepoint
+ * @name: tracepoint name
+ * @probe: probe handler
+ *
+ * Returns 0 if ok, error value on error.
+ * The probe address must at least be aligned on the architecture pointer size.
+ */
+int tracepoint_probe_register(const char *name, void *probe)
+{
+ struct tracepoint_entry *entry;
+ int ret = 0;
+ void *old;
+
+ mutex_lock(&tracepoints_mutex);
+ entry = get_tracepoint(name);
+ if (!entry) {
+ entry = add_tracepoint(name);
+ if (IS_ERR(entry)) {
+ ret = PTR_ERR(entry);
+ goto end;
+ }
+ }
+ /*
+ * If we detect that a call_rcu is pending for this tracepoint,
+ * make sure it's executed now.
+ */
+ if (entry->rcu_pending)
+ rcu_barrier();
+ old = tracepoint_entry_add_probe(entry, probe);
+ if (IS_ERR(old)) {
+ ret = PTR_ERR(old);
+ goto end;
+ }
+ mutex_unlock(&tracepoints_mutex);
+ tracepoint_update_probes(); /* may update entry */
+ mutex_lock(&tracepoints_mutex);
+ entry = get_tracepoint(name);
+ WARN_ON(!entry);
+ tracepoint_entry_free_old(entry, old);
+end:
+ mutex_unlock(&tracepoints_mutex);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(tracepoint_probe_register);
+
+/**
+ * tracepoint_probe_unregister - Disconnect a probe from a tracepoint
+ * @name: tracepoint name
+ * @probe: probe function pointer
+ *
+ * We do not need to call a synchronize_sched to make sure the probes have
+ * finished running before doing a module unload, because the module unload
+ * itself uses stop_machine(), which insures that every preempt disabled section
+ * have finished.
+ */
+int tracepoint_probe_unregister(const char *name, void *probe)
+{
+ struct tracepoint_entry *entry;
+ void *old;
+ int ret = -ENOENT;
+
+ mutex_lock(&tracepoints_mutex);
+ entry = get_tracepoint(name);
+ if (!entry)
+ goto end;
+ if (entry->rcu_pending)
+ rcu_barrier();
+ old = tracepoint_entry_remove_probe(entry, probe);
+ mutex_unlock(&tracepoints_mutex);
+ tracepoint_update_probes(); /* may update entry */
+ mutex_lock(&tracepoints_mutex);
+ entry = get_tracepoint(name);
+ if (!entry)
+ goto end;
+ tracepoint_entry_free_old(entry, old);
+ remove_tracepoint(name); /* Ignore busy error message */
+ ret = 0;
+end:
+ mutex_unlock(&tracepoints_mutex);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(tracepoint_probe_unregister);
+
+/**
+ * tracepoint_get_iter_range - Get a next tracepoint iterator given a range.
+ * @tracepoint: current tracepoints (in), next tracepoint (out)
+ * @begin: beginning of the range
+ * @end: end of the range
+ *
+ * Returns whether a next tracepoint has been found (1) or not (0).
+ * Will return the first tracepoint in the range if the input tracepoint is
+ * NULL.
+ */
+int tracepoint_get_iter_range(struct tracepoint **tracepoint,
+ struct tracepoint *begin, struct tracepoint *end)
+{
+ if (!*tracepoint && begin != end) {
+ *tracepoint = begin;
+ return 1;
+ }
+ if (*tracepoint >= begin && *tracepoint < end)
+ return 1;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(tracepoint_get_iter_range);
+
+static void tracepoint_get_iter(struct tracepoint_iter *iter)
+{
+ int found = 0;
+
+ /* Core kernel tracepoints */
+ if (!iter->module) {
+ found = tracepoint_get_iter_range(&iter->tracepoint,
+ __start___tracepoints, __stop___tracepoints);
+ if (found)
+ goto end;
+ }
+ /* tracepoints in modules. */
+ found = module_get_iter_tracepoints(iter);
+end:
+ if (!found)
+ tracepoint_iter_reset(iter);
+}
+
+void tracepoint_iter_start(struct tracepoint_iter *iter)
+{
+ tracepoint_get_iter(iter);
+}
+EXPORT_SYMBOL_GPL(tracepoint_iter_start);
+
+void tracepoint_iter_next(struct tracepoint_iter *iter)
+{
+ iter->tracepoint++;
+ /*
+ * iter->tracepoint may be invalid because we blindly incremented it.
+ * Make sure it is valid by marshalling on the tracepoints, getting the
+ * tracepoints from following modules if necessary.
+ */
+ tracepoint_get_iter(iter);
+}
+EXPORT_SYMBOL_GPL(tracepoint_iter_next);
+
+void tracepoint_iter_stop(struct tracepoint_iter *iter)
+{
+}
+EXPORT_SYMBOL_GPL(tracepoint_iter_stop);
+
+void tracepoint_iter_reset(struct tracepoint_iter *iter)
+{
+ iter->module = NULL;
+ iter->tracepoint = NULL;
+}
+EXPORT_SYMBOL_GPL(tracepoint_iter_reset);