From 0f67f04ffcb592d065a20862a82d4539e0f8e909 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Tue, 3 Feb 2015 11:56:20 -0500
Subject: tracing: Only create tracer options files if directory exists

Do not bother creating tracer options if no tracing directory
exists. If a tracer is enabled via the command line, and is
started before the tracing directory is created, then it wont have
its tracer specific options created.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'kernel/trace')

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 38c613ede10d..d4627f15407a 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4172,8 +4172,11 @@ static int tracing_set_tracer(struct trace_array *tr, const char *buf)
 		free_snapshot(tr);
 	}
 #endif
-	/* Currently, only the top instance has options */
-	if (tr->flags & TRACE_ARRAY_FL_GLOBAL) {
+	/*
+	 * Only enable if the directory has been created already.
+	 * Currently, only the top instance has options
+	 */
+	if (tr->dir && tr->flags & TRACE_ARRAY_FL_GLOBAL) {
 		destroy_trace_option_files(topts);
 		topts = create_trace_option_files(tr, t);
 	}
-- 
cgit v1.2.3


From 09d23a1d8a82e814bd56a4f121b80ea8214ac49d Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Tue, 3 Feb 2015 12:45:53 -0500
Subject: tracing: Create cmdline tracer options on tracing fs init

The options for cmdline tracers are not created if the debugfs system
is not ready yet. If tracing has started before debugfs is up, then the
option files for the tracer are not created. Create them when creating
the tracing directory if the current tracer requires option files.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 30 +++++++++++++++++++++---------
 1 file changed, 21 insertions(+), 9 deletions(-)

(limited to 'kernel/trace')

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index d4627f15407a..05e0e50539fc 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4105,9 +4105,24 @@ static void tracing_set_nop(struct trace_array *tr)
 	tr->current_trace = &nop_trace;
 }
 
-static int tracing_set_tracer(struct trace_array *tr, const char *buf)
+static void update_tracer_options(struct trace_array *tr, struct tracer *t)
 {
 	static struct trace_option_dentry *topts;
+
+	/* Only enable if the directory has been created already. */
+	if (!tr->dir)
+		return;
+
+	/* Currently, only the top instance has options */
+	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL))
+		return;
+
+	destroy_trace_option_files(topts);
+	topts = create_trace_option_files(tr, t);
+}
+
+static int tracing_set_tracer(struct trace_array *tr, const char *buf)
+{
 	struct tracer *t;
 #ifdef CONFIG_TRACER_MAX_TRACE
 	bool had_max_tr;
@@ -4172,14 +4187,7 @@ static int tracing_set_tracer(struct trace_array *tr, const char *buf)
 		free_snapshot(tr);
 	}
 #endif
-	/*
-	 * Only enable if the directory has been created already.
-	 * Currently, only the top instance has options
-	 */
-	if (tr->dir && tr->flags & TRACE_ARRAY_FL_GLOBAL) {
-		destroy_trace_option_files(topts);
-		topts = create_trace_option_files(tr, t);
-	}
+	update_tracer_options(tr, t);
 
 #ifdef CONFIG_TRACER_MAX_TRACE
 	if (t->use_max_tr && !had_max_tr) {
@@ -6578,6 +6586,10 @@ static __init int tracer_init_debugfs(void)
 
 	create_trace_options_dir(&global_trace);
 
+	/* If the tracer was started via cmdline, create options for it here */
+	if (global_trace.current_trace != &nop_trace)
+		update_tracer_options(&global_trace, global_trace.current_trace);
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From 8434dc9340cd2e117fc944cf7526263bf490a52a Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Tue, 20 Jan 2015 12:13:40 -0500
Subject: tracing: Convert the tracing facility over to use tracefs

debugfs was fine for the tracing facility as a quick way to get
an interface. Now that tracing has matured, it should separate itself
from debugfs such that it can be mounted separately without needing
to mount all of debugfs with it. That is, users resist using tracing
because it requires mounting debugfs. Having tracing have its own file
system lets users get the features of tracing without needing to bring
in the rest of the kernel's debug infrastructure.

Another reason for tracefs is that debubfs does not support mkdir.
Currently, to create instances, one does a mkdir in the tracing/instance
directory. This is implemented via a hack that forces debugfs to do
something it is not intended on doing. By converting over to tracefs, this
hack can be removed and mkdir can be properly implemented. This patch does
not address this yet, but it lays the ground work for that to be done.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/ftrace.c                | 22 +++++++--------
 kernel/trace/trace.c                 | 55 +++++++++++++++++++++---------------
 kernel/trace/trace.h                 |  2 +-
 kernel/trace/trace_events.c          | 32 ++++++++++-----------
 kernel/trace/trace_functions_graph.c |  7 ++---
 kernel/trace/trace_kprobe.c          | 10 +++----
 kernel/trace/trace_probe.h           |  2 +-
 kernel/trace/trace_stat.c            | 10 +++----
 8 files changed, 74 insertions(+), 66 deletions(-)

(limited to 'kernel/trace')

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 45e5cb143d17..fcc0e7052a79 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -18,7 +18,7 @@
 #include <linux/kallsyms.h>
 #include <linux/seq_file.h>
 #include <linux/suspend.h>
-#include <linux/debugfs.h>
+#include <linux/tracefs.h>
 #include <linux/hardirq.h>
 #include <linux/kthread.h>
 #include <linux/uaccess.h>
@@ -1008,7 +1008,7 @@ static struct tracer_stat function_stats __initdata = {
 	.stat_show	= function_stat_show
 };
 
-static __init void ftrace_profile_debugfs(struct dentry *d_tracer)
+static __init void ftrace_profile_tracefs(struct dentry *d_tracer)
 {
 	struct ftrace_profile_stat *stat;
 	struct dentry *entry;
@@ -1044,15 +1044,15 @@ static __init void ftrace_profile_debugfs(struct dentry *d_tracer)
 		}
 	}
 
-	entry = debugfs_create_file("function_profile_enabled", 0644,
+	entry = tracefs_create_file("function_profile_enabled", 0644,
 				    d_tracer, NULL, &ftrace_profile_fops);
 	if (!entry)
-		pr_warning("Could not create debugfs "
+		pr_warning("Could not create tracefs "
 			   "'function_profile_enabled' entry\n");
 }
 
 #else /* CONFIG_FUNCTION_PROFILER */
-static __init void ftrace_profile_debugfs(struct dentry *d_tracer)
+static __init void ftrace_profile_tracefs(struct dentry *d_tracer)
 {
 }
 #endif /* CONFIG_FUNCTION_PROFILER */
@@ -4690,7 +4690,7 @@ void ftrace_destroy_filter_files(struct ftrace_ops *ops)
 	mutex_unlock(&ftrace_lock);
 }
 
-static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer)
+static __init int ftrace_init_dyn_tracefs(struct dentry *d_tracer)
 {
 
 	trace_create_file("available_filter_functions", 0444,
@@ -4998,7 +4998,7 @@ static int __init ftrace_nodyn_init(void)
 }
 core_initcall(ftrace_nodyn_init);
 
-static inline int ftrace_init_dyn_debugfs(struct dentry *d_tracer) { return 0; }
+static inline int ftrace_init_dyn_tracefs(struct dentry *d_tracer) { return 0; }
 static inline void ftrace_startup_enable(int command) { }
 static inline void ftrace_startup_all(int command) { }
 /* Keep as macros so we do not need to define the commands */
@@ -5451,7 +5451,7 @@ static const struct file_operations ftrace_pid_fops = {
 	.release	= ftrace_pid_release,
 };
 
-static __init int ftrace_init_debugfs(void)
+static __init int ftrace_init_tracefs(void)
 {
 	struct dentry *d_tracer;
 
@@ -5459,16 +5459,16 @@ static __init int ftrace_init_debugfs(void)
 	if (IS_ERR(d_tracer))
 		return 0;
 
-	ftrace_init_dyn_debugfs(d_tracer);
+	ftrace_init_dyn_tracefs(d_tracer);
 
 	trace_create_file("set_ftrace_pid", 0644, d_tracer,
 			    NULL, &ftrace_pid_fops);
 
-	ftrace_profile_debugfs(d_tracer);
+	ftrace_profile_tracefs(d_tracer);
 
 	return 0;
 }
-fs_initcall(ftrace_init_debugfs);
+fs_initcall(ftrace_init_tracefs);
 
 /**
  * ftrace_kill - kill ftrace
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 05e0e50539fc..6c4739bee4bb 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -20,6 +20,7 @@
 #include <linux/notifier.h>
 #include <linux/irqflags.h>
 #include <linux/debugfs.h>
+#include <linux/tracefs.h>
 #include <linux/pagemap.h>
 #include <linux/hardirq.h>
 #include <linux/linkage.h>
@@ -5828,6 +5829,14 @@ static inline __init int register_snapshot_cmd(void) { return 0; }
 
 static struct dentry *tracing_get_dentry(struct trace_array *tr)
 {
+	if (WARN_ON(!tr->dir))
+		return ERR_PTR(-ENODEV);
+
+	/* Top directory uses NULL as the parent */
+	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
+		return NULL;
+
+	/* All sub buffers have a descriptor */
 	return tr->dir;
 }
 
@@ -5842,10 +5851,10 @@ static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
 	if (IS_ERR(d_tracer))
 		return NULL;
 
-	tr->percpu_dir = debugfs_create_dir("per_cpu", d_tracer);
+	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
 
 	WARN_ONCE(!tr->percpu_dir,
-		  "Could not create debugfs directory 'per_cpu/%d'\n", cpu);
+		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
 
 	return tr->percpu_dir;
 }
@@ -5862,7 +5871,7 @@ trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
 }
 
 static void
-tracing_init_debugfs_percpu(struct trace_array *tr, long cpu)
+tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
 {
 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
 	struct dentry *d_cpu;
@@ -5872,9 +5881,9 @@ tracing_init_debugfs_percpu(struct trace_array *tr, long cpu)
 		return;
 
 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
-	d_cpu = debugfs_create_dir(cpu_dir, d_percpu);
+	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
 	if (!d_cpu) {
-		pr_warning("Could not create debugfs '%s' entry\n", cpu_dir);
+		pr_warning("Could not create tracefs '%s' entry\n", cpu_dir);
 		return;
 	}
 
@@ -6026,9 +6035,9 @@ struct dentry *trace_create_file(const char *name,
 {
 	struct dentry *ret;
 
-	ret = debugfs_create_file(name, mode, parent, data, fops);
+	ret = tracefs_create_file(name, mode, parent, data, fops);
 	if (!ret)
-		pr_warning("Could not create debugfs '%s' entry\n", name);
+		pr_warning("Could not create tracefs '%s' entry\n", name);
 
 	return ret;
 }
@@ -6045,9 +6054,9 @@ static struct dentry *trace_options_init_dentry(struct trace_array *tr)
 	if (IS_ERR(d_tracer))
 		return NULL;
 
-	tr->options = debugfs_create_dir("options", d_tracer);
+	tr->options = tracefs_create_dir("options", d_tracer);
 	if (!tr->options) {
-		pr_warning("Could not create debugfs directory 'options'\n");
+		pr_warning("Could not create tracefs directory 'options'\n");
 		return NULL;
 	}
 
@@ -6116,7 +6125,7 @@ destroy_trace_option_files(struct trace_option_dentry *topts)
 		return;
 
 	for (cnt = 0; topts[cnt].opt; cnt++)
-		debugfs_remove(topts[cnt].entry);
+		tracefs_remove(topts[cnt].entry);
 
 	kfree(topts);
 }
@@ -6205,7 +6214,7 @@ static const struct file_operations rb_simple_fops = {
 struct dentry *trace_instance_dir;
 
 static void
-init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer);
+init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
 
 static int
 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
@@ -6321,17 +6330,17 @@ static int new_instance_create(const char *name)
 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
 		goto out_free_tr;
 
-	tr->dir = debugfs_create_dir(name, trace_instance_dir);
+	tr->dir = tracefs_create_dir(name, trace_instance_dir);
 	if (!tr->dir)
 		goto out_free_tr;
 
 	ret = event_trace_add_tracer(tr->dir, tr);
 	if (ret) {
-		debugfs_remove_recursive(tr->dir);
+		tracefs_remove_recursive(tr->dir);
 		goto out_free_tr;
 	}
 
-	init_tracer_debugfs(tr, tr->dir);
+	init_tracer_tracefs(tr, tr->dir);
 
 	list_add(&tr->list, &ftrace_trace_arrays);
 
@@ -6404,7 +6413,7 @@ static int instance_mkdir (struct inode *inode, struct dentry *dentry, umode_t m
 		return -ENOENT;
 
 	/*
-	 * The inode mutex is locked, but debugfs_create_dir() will also
+	 * The inode mutex is locked, but tracefs_create_dir() will also
 	 * take the mutex. As the instances directory can not be destroyed
 	 * or changed in any other way, it is safe to unlock it, and
 	 * let the dentry try. If two users try to make the same dir at
@@ -6434,7 +6443,7 @@ static int instance_rmdir(struct inode *inode, struct dentry *dentry)
 	mutex_unlock(&dentry->d_inode->i_mutex);
 
 	/*
-	 * The inode mutex is locked, but debugfs_create_dir() will also
+	 * The inode mutex is locked, but tracefs_create_dir() will also
 	 * take the mutex. As the instances directory can not be destroyed
 	 * or changed in any other way, it is safe to unlock it, and
 	 * let the dentry try. If two users try to make the same dir at
@@ -6459,7 +6468,7 @@ static const struct inode_operations instance_dir_inode_operations = {
 
 static __init void create_trace_instances(struct dentry *d_tracer)
 {
-	trace_instance_dir = debugfs_create_dir("instances", d_tracer);
+	trace_instance_dir = tracefs_create_dir("instances", d_tracer);
 	if (WARN_ON(!trace_instance_dir))
 		return;
 
@@ -6468,7 +6477,7 @@ static __init void create_trace_instances(struct dentry *d_tracer)
 }
 
 static void
-init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer)
+init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
 {
 	int cpu;
 
@@ -6522,7 +6531,7 @@ init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer)
 #endif
 
 	for_each_tracing_cpu(cpu)
-		tracing_init_debugfs_percpu(tr, cpu);
+		tracing_init_tracefs_percpu(tr, cpu);
 
 }
 
@@ -6550,10 +6559,10 @@ struct dentry *tracing_init_dentry(void)
 		return ERR_PTR(-ENOMEM);
 	}
 
-	return tr->dir;
+	return NULL;
 }
 
-static __init int tracer_init_debugfs(void)
+static __init int tracer_init_tracefs(void)
 {
 	struct dentry *d_tracer;
 
@@ -6563,7 +6572,7 @@ static __init int tracer_init_debugfs(void)
 	if (IS_ERR(d_tracer))
 		return 0;
 
-	init_tracer_debugfs(&global_trace, d_tracer);
+	init_tracer_tracefs(&global_trace, d_tracer);
 
 	trace_create_file("tracing_thresh", 0644, d_tracer,
 			&global_trace, &tracing_thresh_fops);
@@ -6925,5 +6934,5 @@ __init static int clear_boot_tracer(void)
 	return 0;
 }
 
-fs_initcall(tracer_init_debugfs);
+fs_initcall(tracer_init_tracefs);
 late_initcall(clear_boot_tracer);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index dd8205a35760..d951deddec89 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -334,7 +334,7 @@ struct tracer_flags {
 
 
 /**
- * struct tracer - a specific tracer and its callbacks to interact with debugfs
+ * struct tracer - a specific tracer and its callbacks to interact with tracefs
  * @name: the name chosen to select it on the available_tracers file
  * @init: called when one switches to this tracer (echo name > current_tracer)
  * @reset: called when one switches to another tracer
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index db54dda10ccc..0d2e47370ee7 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -13,7 +13,7 @@
 #include <linux/workqueue.h>
 #include <linux/spinlock.h>
 #include <linux/kthread.h>
-#include <linux/debugfs.h>
+#include <linux/tracefs.h>
 #include <linux/uaccess.h>
 #include <linux/module.h>
 #include <linux/ctype.h>
@@ -480,7 +480,7 @@ static void remove_subsystem(struct ftrace_subsystem_dir *dir)
 		return;
 
 	if (!--dir->nr_events) {
-		debugfs_remove_recursive(dir->entry);
+		tracefs_remove_recursive(dir->entry);
 		list_del(&dir->list);
 		__put_system_dir(dir);
 	}
@@ -499,7 +499,7 @@ static void remove_event_file_dir(struct ftrace_event_file *file)
 		}
 		spin_unlock(&dir->d_lock);
 
-		debugfs_remove_recursive(dir);
+		tracefs_remove_recursive(dir);
 	}
 
 	list_del(&file->list);
@@ -1526,7 +1526,7 @@ event_subsystem_dir(struct trace_array *tr, const char *name,
 	} else
 		__get_system(system);
 
-	dir->entry = debugfs_create_dir(name, parent);
+	dir->entry = tracefs_create_dir(name, parent);
 	if (!dir->entry) {
 		pr_warn("Failed to create system directory %s\n", name);
 		__put_system(system);
@@ -1539,12 +1539,12 @@ event_subsystem_dir(struct trace_array *tr, const char *name,
 	dir->subsystem = system;
 	file->system = dir;
 
-	entry = debugfs_create_file("filter", 0644, dir->entry, dir,
+	entry = tracefs_create_file("filter", 0644, dir->entry, dir,
 				    &ftrace_subsystem_filter_fops);
 	if (!entry) {
 		kfree(system->filter);
 		system->filter = NULL;
-		pr_warn("Could not create debugfs '%s/filter' entry\n", name);
+		pr_warn("Could not create tracefs '%s/filter' entry\n", name);
 	}
 
 	trace_create_file("enable", 0644, dir->entry, dir,
@@ -1585,9 +1585,9 @@ event_create_dir(struct dentry *parent, struct ftrace_event_file *file)
 		d_events = parent;
 
 	name = ftrace_event_name(call);
-	file->dir = debugfs_create_dir(name, d_events);
+	file->dir = tracefs_create_dir(name, d_events);
 	if (!file->dir) {
-		pr_warn("Could not create debugfs '%s' directory\n", name);
+		pr_warn("Could not create tracefs '%s' directory\n", name);
 		return -1;
 	}
 
@@ -2228,7 +2228,7 @@ static inline int register_event_cmds(void) { return 0; }
 /*
  * The top level array has already had its ftrace_event_file
  * descriptors created in order to allow for early events to
- * be recorded. This function is called after the debugfs has been
+ * be recorded. This function is called after the tracefs has been
  * initialized, and we now have to create the files associated
  * to the events.
  */
@@ -2311,16 +2311,16 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr)
 	struct dentry *d_events;
 	struct dentry *entry;
 
-	entry = debugfs_create_file("set_event", 0644, parent,
+	entry = tracefs_create_file("set_event", 0644, parent,
 				    tr, &ftrace_set_event_fops);
 	if (!entry) {
-		pr_warn("Could not create debugfs 'set_event' entry\n");
+		pr_warn("Could not create tracefs 'set_event' entry\n");
 		return -ENOMEM;
 	}
 
-	d_events = debugfs_create_dir("events", parent);
+	d_events = tracefs_create_dir("events", parent);
 	if (!d_events) {
-		pr_warn("Could not create debugfs 'events' directory\n");
+		pr_warn("Could not create tracefs 'events' directory\n");
 		return -ENOMEM;
 	}
 
@@ -2412,7 +2412,7 @@ int event_trace_del_tracer(struct trace_array *tr)
 
 	down_write(&trace_event_sem);
 	__trace_remove_event_dirs(tr);
-	debugfs_remove_recursive(tr->event_dir);
+	tracefs_remove_recursive(tr->event_dir);
 	up_write(&trace_event_sem);
 
 	tr->event_dir = NULL;
@@ -2534,10 +2534,10 @@ static __init int event_trace_init(void)
 	if (IS_ERR(d_tracer))
 		return 0;
 
-	entry = debugfs_create_file("available_events", 0444, d_tracer,
+	entry = tracefs_create_file("available_events", 0444, d_tracer,
 				    tr, &ftrace_avail_fops);
 	if (!entry)
-		pr_warn("Could not create debugfs 'available_events' entry\n");
+		pr_warn("Could not create tracefs 'available_events' entry\n");
 
 	if (trace_define_common_fields())
 		pr_warn("tracing: Failed to allocate common fields");
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 2d25ad1526bb..9cfea4c6d314 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -6,7 +6,6 @@
  * is Copyright (c) Steven Rostedt <srostedt@redhat.com>
  *
  */
-#include <linux/debugfs.h>
 #include <linux/uaccess.h>
 #include <linux/ftrace.h>
 #include <linux/slab.h>
@@ -151,7 +150,7 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
 	 * The curr_ret_stack is initialized to -1 and get increased
 	 * in this function.  So it can be less than -1 only if it was
 	 * filtered out via ftrace_graph_notrace_addr() which can be
-	 * set from set_graph_notrace file in debugfs by user.
+	 * set from set_graph_notrace file in tracefs by user.
 	 */
 	if (current->curr_ret_stack < -1)
 		return -EBUSY;
@@ -1432,7 +1431,7 @@ static const struct file_operations graph_depth_fops = {
 	.llseek		= generic_file_llseek,
 };
 
-static __init int init_graph_debugfs(void)
+static __init int init_graph_tracefs(void)
 {
 	struct dentry *d_tracer;
 
@@ -1445,7 +1444,7 @@ static __init int init_graph_debugfs(void)
 
 	return 0;
 }
-fs_initcall(init_graph_debugfs);
+fs_initcall(init_graph_tracefs);
 
 static __init int init_graph_trace(void)
 {
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index b4a00def88f5..c1c6655847c8 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1310,7 +1310,7 @@ static int unregister_kprobe_event(struct trace_kprobe *tk)
 	return ret;
 }
 
-/* Make a debugfs interface for controlling probe points */
+/* Make a tracefs interface for controlling probe points */
 static __init int init_kprobe_trace(void)
 {
 	struct dentry *d_tracer;
@@ -1323,20 +1323,20 @@ static __init int init_kprobe_trace(void)
 	if (IS_ERR(d_tracer))
 		return 0;
 
-	entry = debugfs_create_file("kprobe_events", 0644, d_tracer,
+	entry = tracefs_create_file("kprobe_events", 0644, d_tracer,
 				    NULL, &kprobe_events_ops);
 
 	/* Event list interface */
 	if (!entry)
-		pr_warning("Could not create debugfs "
+		pr_warning("Could not create tracefs "
 			   "'kprobe_events' entry\n");
 
 	/* Profile interface */
-	entry = debugfs_create_file("kprobe_profile", 0444, d_tracer,
+	entry = tracefs_create_file("kprobe_profile", 0444, d_tracer,
 				    NULL, &kprobe_profile_ops);
 
 	if (!entry)
-		pr_warning("Could not create debugfs "
+		pr_warning("Could not create tracefs "
 			   "'kprobe_profile' entry\n");
 	return 0;
 }
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 4f815fbce16d..19aff635841a 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -25,7 +25,7 @@
 #include <linux/seq_file.h>
 #include <linux/slab.h>
 #include <linux/smp.h>
-#include <linux/debugfs.h>
+#include <linux/tracefs.h>
 #include <linux/types.h>
 #include <linux/string.h>
 #include <linux/ctype.h>
diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c
index 75e19e86c954..6cf935316769 100644
--- a/kernel/trace/trace_stat.c
+++ b/kernel/trace/trace_stat.c
@@ -12,7 +12,7 @@
 #include <linux/list.h>
 #include <linux/slab.h>
 #include <linux/rbtree.h>
-#include <linux/debugfs.h>
+#include <linux/tracefs.h>
 #include "trace_stat.h"
 #include "trace.h"
 
@@ -65,7 +65,7 @@ static void reset_stat_session(struct stat_session *session)
 
 static void destroy_session(struct stat_session *session)
 {
-	debugfs_remove(session->file);
+	tracefs_remove(session->file);
 	__reset_stat_session(session);
 	mutex_destroy(&session->stat_mutex);
 	kfree(session);
@@ -279,9 +279,9 @@ static int tracing_stat_init(void)
 	if (IS_ERR(d_tracing))
 		return 0;
 
-	stat_dir = debugfs_create_dir("trace_stat", d_tracing);
+	stat_dir = tracefs_create_dir("trace_stat", d_tracing);
 	if (!stat_dir)
-		pr_warning("Could not create debugfs "
+		pr_warning("Could not create tracefs "
 			   "'trace_stat' entry\n");
 	return 0;
 }
@@ -291,7 +291,7 @@ static int init_stat_file(struct stat_session *session)
 	if (!stat_dir && tracing_stat_init())
 		return -ENODEV;
 
-	session->file = debugfs_create_file(session->ts->name, 0644,
+	session->file = tracefs_create_file(session->ts->name, 0644,
 					    stat_dir,
 					    session, &tracing_stat_fops);
 	if (!session->file)
-- 
cgit v1.2.3


From f76180bc07abc399977bfbe8c43bf58c4570e893 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Tue, 20 Jan 2015 15:48:46 -0500
Subject: tracing: Automatically mount tracefs on debugfs/tracing

As tools currently rely on the tracing directory in debugfs, we can not
just created a tracefs infrastructure and expect sysadmins to mount
the new tracefs to have their old tools work.

Instead, the debugfs tracing directory is still created and the tracefs
file system is mounted there when the debugfs filesystem is mounted.

No longer does the tracing infrastructure update the debugfs file system,
but instead interacts with the tracefs file system. But now, it still
appears to the user like nothing changed, except you also have the feature
of mounting just the tracing system without needing all of debugfs!

Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 36 +++++++++++++++++++++++++++++++++---
 1 file changed, 33 insertions(+), 3 deletions(-)

(limited to 'kernel/trace')

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 6c4739bee4bb..b4aa936509d2 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -32,6 +32,7 @@
 #include <linux/splice.h>
 #include <linux/kdebug.h>
 #include <linux/string.h>
+#include <linux/mount.h>
 #include <linux/rwsem.h>
 #include <linux/slab.h>
 #include <linux/ctype.h>
@@ -6535,6 +6536,28 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
 
 }
 
+static struct vfsmount *trace_automount(void *ingore)
+{
+	struct vfsmount *mnt;
+	struct file_system_type *type;
+
+	/*
+	 * To maintain backward compatibility for tools that mount
+	 * debugfs to get to the tracing facility, tracefs is automatically
+	 * mounted to the debugfs/tracing directory.
+	 */
+	type = get_fs_type("tracefs");
+	if (!type)
+		return NULL;
+	mnt = vfs_kern_mount(type, 0, "tracefs", NULL);
+	put_filesystem(type);
+	if (IS_ERR(mnt))
+		return NULL;
+	mntget(mnt);
+
+	return mnt;
+}
+
 /**
  * tracing_init_dentry - initialize top level trace array
  *
@@ -6546,14 +6569,21 @@ struct dentry *tracing_init_dentry(void)
 {
 	struct trace_array *tr = &global_trace;
 
+	/* The top level trace array uses  NULL as parent */
 	if (tr->dir)
-		return tr->dir;
+		return NULL;
 
 	if (WARN_ON(!debugfs_initialized()))
 		return ERR_PTR(-ENODEV);
 
-	tr->dir = debugfs_create_dir("tracing", NULL);
-
+	/*
+	 * As there may still be users that expect the tracing
+	 * files to exist in debugfs/tracing, we must automount
+	 * the tracefs file system there, so older tools still
+	 * work with the newer kerenl.
+	 */
+	tr->dir = debugfs_create_automount("tracing", NULL,
+					   trace_automount, NULL);
 	if (!tr->dir) {
 		pr_warn_once("Could not create debugfs directory 'tracing'\n");
 		return ERR_PTR(-ENOMEM);
-- 
cgit v1.2.3


From eae473581cf93dad94ca833aa961c033c6a43924 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Wed, 21 Jan 2015 10:01:39 -0500
Subject: tracing: Have mkdir and rmdir be part of tracefs

The tracing "instances" directory can create sub tracing buffers
with mkdir, and remove them with rmdir. As a mkdir will also create
all the files and directories that control the sub buffer the inode
mutexes need to be released before this is done, to avoid deadlocks.
It is better to let the tracing system unlock the inode mutexes before
calling the functions that create the files within the new directory
(or deletes the files from the one being destroyed).

Now that tracing has been converted over to tracefs, the tracefs file
system can be modified to accommodate this feature. It still releases
the locks, but the filesystem itself can take care of the ugly
business and let the user just do what it needs.

The tracing system now attaches a descriptor to the directory dentry
that can have userspace create or remove sub directories. If this
descriptor does not exist for a dentry, then that dentry can not be
used to create other directories. This descriptor holds a mkdir and
rmdir method that only takes a character string as an argument.

The tracefs file system will first make a copy of the dentry name
before releasing the locks. Then it will pass the copied name to the
methods. It is up to the tracing system that supplied the methods to
handle races with duplicate names and such as all the inode mutexes
would be released when the functions are called.

Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 fs/tracefs/inode.c      | 151 +++++++++++++++++++++++++++++++++++++++++++-----
 include/linux/tracefs.h |   4 ++
 kernel/trace/trace.c    |  75 ++----------------------
 3 files changed, 145 insertions(+), 85 deletions(-)

(limited to 'kernel/trace')

diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
index 0b9cf5cf24c9..d92bdf3b079a 100644
--- a/fs/tracefs/inode.c
+++ b/fs/tracefs/inode.c
@@ -50,6 +50,84 @@ static const struct file_operations tracefs_file_operations = {
 	.llseek =	noop_llseek,
 };
 
+static struct tracefs_dir_ops {
+	int (*mkdir)(const char *name);
+	int (*rmdir)(const char *name);
+} tracefs_ops;
+
+static char *get_dname(struct dentry *dentry)
+{
+	const char *dname;
+	char *name;
+	int len = dentry->d_name.len;
+
+	dname = dentry->d_name.name;
+	name = kmalloc(len + 1, GFP_KERNEL);
+	if (!name)
+		return NULL;
+	memcpy(name, dname, len);
+	name[len] = 0;
+	return name;
+}
+
+static int tracefs_syscall_mkdir(struct inode *inode, struct dentry *dentry, umode_t mode)
+{
+	char *name;
+	int ret;
+
+	name = get_dname(dentry);
+	if (!name)
+		return -ENOMEM;
+
+	/*
+	 * The mkdir call can call the generic functions that create
+	 * the files within the tracefs system. It is up to the individual
+	 * mkdir routine to handle races.
+	 */
+	mutex_unlock(&inode->i_mutex);
+	ret = tracefs_ops.mkdir(name);
+	mutex_lock(&inode->i_mutex);
+
+	kfree(name);
+
+	return ret;
+}
+
+static int tracefs_syscall_rmdir(struct inode *inode, struct dentry *dentry)
+{
+	char *name;
+	int ret;
+
+	name = get_dname(dentry);
+	if (!name)
+		return -ENOMEM;
+
+	/*
+	 * The rmdir call can call the generic functions that create
+	 * the files within the tracefs system. It is up to the individual
+	 * rmdir routine to handle races.
+	 * This time we need to unlock not only the parent (inode) but
+	 * also the directory that is being deleted.
+	 */
+	mutex_unlock(&inode->i_mutex);
+	mutex_unlock(&dentry->d_inode->i_mutex);
+
+	ret = tracefs_ops.rmdir(name);
+
+	mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
+	mutex_lock(&dentry->d_inode->i_mutex);
+
+	kfree(name);
+
+	return ret;
+}
+
+static const struct inode_operations tracefs_dir_inode_operations = {
+	.lookup		= simple_lookup,
+	.mkdir		= tracefs_syscall_mkdir,
+	.rmdir		= tracefs_syscall_rmdir,
+};
+
 static struct inode *tracefs_get_inode(struct super_block *sb)
 {
 	struct inode *inode = new_inode(sb);
@@ -334,6 +412,31 @@ struct dentry *tracefs_create_file(const char *name, umode_t mode,
 	return end_creating(dentry);
 }
 
+static struct dentry *__create_dir(const char *name, struct dentry *parent,
+				   const struct inode_operations *ops)
+{
+	struct dentry *dentry = start_creating(name, parent);
+	struct inode *inode;
+
+	if (IS_ERR(dentry))
+		return NULL;
+
+	inode = tracefs_get_inode(dentry->d_sb);
+	if (unlikely(!inode))
+		return failed_creating(dentry);
+
+	inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
+	inode->i_op = ops;
+	inode->i_fop = &simple_dir_operations;
+
+	/* directory inodes start off with i_nlink == 2 (for "." entry) */
+	inc_nlink(inode);
+	d_instantiate(dentry, inode);
+	inc_nlink(dentry->d_parent->d_inode);
+	fsnotify_mkdir(dentry->d_parent->d_inode, dentry);
+	return end_creating(dentry);
+}
+
 /**
  * tracefs_create_dir - create a directory in the tracefs filesystem
  * @name: a pointer to a string containing the name of the directory to
@@ -353,26 +456,44 @@ struct dentry *tracefs_create_file(const char *name, umode_t mode,
  */
 struct dentry *tracefs_create_dir(const char *name, struct dentry *parent)
 {
-	struct dentry *dentry = start_creating(name, parent);
-	struct inode *inode;
+	return __create_dir(name, parent, &simple_dir_inode_operations);
+}
 
-	if (IS_ERR(dentry))
+/**
+ * tracefs_create_instance_dir - create the tracing instances directory
+ * @name: The name of the instances directory to create
+ * @parent: The parent directory that the instances directory will exist
+ * @mkdir: The function to call when a mkdir is performed.
+ * @rmdir: The function to call when a rmdir is performed.
+ *
+ * Only one instances directory is allowed.
+ *
+ * The instances directory is special as it allows for mkdir and rmdir to
+ * to be done by userspace. When a mkdir or rmdir is performed, the inode
+ * locks are released and the methhods passed in (@mkdir and @rmdir) are
+ * called without locks and with the name of the directory being created
+ * within the instances directory.
+ *
+ * Returns the dentry of the instances directory.
+ */
+struct dentry *tracefs_create_instance_dir(const char *name, struct dentry *parent,
+					  int (*mkdir)(const char *name),
+					  int (*rmdir)(const char *name))
+{
+	struct dentry *dentry;
+
+	/* Only allow one instance of the instances directory. */
+	if (WARN_ON(tracefs_ops.mkdir || tracefs_ops.rmdir))
 		return NULL;
 
-	inode = tracefs_get_inode(dentry->d_sb);
-	if (unlikely(!inode))
-		return failed_creating(dentry);
+	dentry = __create_dir(name, parent, &tracefs_dir_inode_operations);
+	if (!dentry)
+		return NULL;
 
-	inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
-	inode->i_op = &simple_dir_inode_operations;
-	inode->i_fop = &simple_dir_operations;
+	tracefs_ops.mkdir = mkdir;
+	tracefs_ops.rmdir = rmdir;
 
-	/* directory inodes start off with i_nlink == 2 (for "." entry) */
-	inc_nlink(inode);
-	d_instantiate(dentry, inode);
-	inc_nlink(dentry->d_parent->d_inode);
-	fsnotify_mkdir(dentry->d_parent->d_inode, dentry);
-	return end_creating(dentry);
+	return dentry;
 }
 
 static inline int tracefs_positive(struct dentry *dentry)
diff --git a/include/linux/tracefs.h b/include/linux/tracefs.h
index 23e04ce21749..5b727a17beee 100644
--- a/include/linux/tracefs.h
+++ b/include/linux/tracefs.h
@@ -34,6 +34,10 @@ struct dentry *tracefs_create_dir(const char *name, struct dentry *parent);
 void tracefs_remove(struct dentry *dentry);
 void tracefs_remove_recursive(struct dentry *dentry);
 
+struct dentry *tracefs_create_instance_dir(const char *name, struct dentry *parent,
+					   int (*mkdir)(const char *name),
+					   int (*rmdir)(const char *name));
+
 bool tracefs_initialized(void);
 
 #endif /* CONFIG_TRACING */
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index b4aa936509d2..3c8913bac204 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -6292,7 +6292,7 @@ static void free_trace_buffers(struct trace_array *tr)
 #endif
 }
 
-static int new_instance_create(const char *name)
+static int instance_mkdir(const char *name)
 {
 	struct trace_array *tr;
 	int ret;
@@ -6362,7 +6362,7 @@ static int new_instance_create(const char *name)
 
 }
 
-static int instance_delete(const char *name)
+static int instance_rmdir(const char *name)
 {
 	struct trace_array *tr;
 	int found = 0;
@@ -6403,78 +6403,13 @@ static int instance_delete(const char *name)
 	return ret;
 }
 
-static int instance_mkdir (struct inode *inode, struct dentry *dentry, umode_t mode)
-{
-	struct dentry *parent;
-	int ret;
-
-	/* Paranoid: Make sure the parent is the "instances" directory */
-	parent = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias);
-	if (WARN_ON_ONCE(parent != trace_instance_dir))
-		return -ENOENT;
-
-	/*
-	 * The inode mutex is locked, but tracefs_create_dir() will also
-	 * take the mutex. As the instances directory can not be destroyed
-	 * or changed in any other way, it is safe to unlock it, and
-	 * let the dentry try. If two users try to make the same dir at
-	 * the same time, then the new_instance_create() will determine the
-	 * winner.
-	 */
-	mutex_unlock(&inode->i_mutex);
-
-	ret = new_instance_create(dentry->d_iname);
-
-	mutex_lock(&inode->i_mutex);
-
-	return ret;
-}
-
-static int instance_rmdir(struct inode *inode, struct dentry *dentry)
-{
-	struct dentry *parent;
-	int ret;
-
-	/* Paranoid: Make sure the parent is the "instances" directory */
-	parent = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias);
-	if (WARN_ON_ONCE(parent != trace_instance_dir))
-		return -ENOENT;
-
-	/* The caller did a dget() on dentry */
-	mutex_unlock(&dentry->d_inode->i_mutex);
-
-	/*
-	 * The inode mutex is locked, but tracefs_create_dir() will also
-	 * take the mutex. As the instances directory can not be destroyed
-	 * or changed in any other way, it is safe to unlock it, and
-	 * let the dentry try. If two users try to make the same dir at
-	 * the same time, then the instance_delete() will determine the
-	 * winner.
-	 */
-	mutex_unlock(&inode->i_mutex);
-
-	ret = instance_delete(dentry->d_iname);
-
-	mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
-	mutex_lock(&dentry->d_inode->i_mutex);
-
-	return ret;
-}
-
-static const struct inode_operations instance_dir_inode_operations = {
-	.lookup		= simple_lookup,
-	.mkdir		= instance_mkdir,
-	.rmdir		= instance_rmdir,
-};
-
 static __init void create_trace_instances(struct dentry *d_tracer)
 {
-	trace_instance_dir = tracefs_create_dir("instances", d_tracer);
+	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
+							 instance_mkdir,
+							 instance_rmdir);
 	if (WARN_ON(!trace_instance_dir))
 		return;
-
-	/* Hijack the dir inode operations, to allow mkdir */
-	trace_instance_dir->d_inode->i_op = &instance_dir_inode_operations;
 }
 
 static void
-- 
cgit v1.2.3


From 50f16a8bf9d7a92c437ed1867d0f7e1dc6a9aca9 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 5 Mar 2015 22:10:19 +0100
Subject: perf: Remove type specific target pointers

The only reason CQM had to use a hard-coded pmu type was so it could use
cqm_target in hw_perf_event.

Do away with the {tp,bp,cqm}_target pointers and provide a non type
specific one.

This allows us to do away with that silly pmu type as well.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Vince Weaver <vince@deater.net>
Cc: acme@kernel.org
Cc: acme@redhat.com
Cc: hpa@zytor.com
Cc: jolsa@redhat.com
Cc: kanaka.d.juvva@intel.com
Cc: matt.fleming@intel.com
Cc: tglx@linutronix.de
Cc: torvalds@linux-foundation.org
Cc: vikas.shivappa@linux.intel.com
Link: http://lkml.kernel.org/r/20150305211019.GU21418@twins.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/arm/kernel/hw_breakpoint.c            |  2 +-
 arch/arm64/kernel/hw_breakpoint.c          |  2 +-
 arch/x86/kernel/cpu/perf_event_intel_cqm.c |  7 +++----
 include/linux/perf_event.h                 |  4 +---
 include/uapi/linux/perf_event.h            |  1 -
 kernel/events/core.c                       | 14 ++++----------
 kernel/events/hw_breakpoint.c              |  8 ++++----
 kernel/trace/trace_uprobe.c                | 10 +++++-----
 8 files changed, 19 insertions(+), 29 deletions(-)

(limited to 'kernel/trace')

diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
index 7fc70ae21185..dc7d0a95bd36 100644
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -648,7 +648,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
 		 * Per-cpu breakpoints are not supported by our stepping
 		 * mechanism.
 		 */
-		if (!bp->hw.bp_target)
+		if (!bp->hw.target)
 			return -EINVAL;
 
 		/*
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index df1cf15377b4..d062f35911c2 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -527,7 +527,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
 	 * Disallow per-task kernel breakpoints since these would
 	 * complicate the stepping code.
 	 */
-	if (info->ctrl.privilege == AARCH64_BREAKPOINT_EL1 && bp->hw.bp_target)
+	if (info->ctrl.privilege == AARCH64_BREAKPOINT_EL1 && bp->hw.target)
 		return -EINVAL;
 
 	return 0;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
index 9a8ef8376fcd..e4d1b8b738fa 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
@@ -263,7 +263,7 @@ static bool __match_event(struct perf_event *a, struct perf_event *b)
 	/*
 	 * Events that target same task are placed into the same cache group.
 	 */
-	if (a->hw.cqm_target == b->hw.cqm_target)
+	if (a->hw.target == b->hw.target)
 		return true;
 
 	/*
@@ -279,7 +279,7 @@ static bool __match_event(struct perf_event *a, struct perf_event *b)
 static inline struct perf_cgroup *event_to_cgroup(struct perf_event *event)
 {
 	if (event->attach_state & PERF_ATTACH_TASK)
-		return perf_cgroup_from_task(event->hw.cqm_target);
+		return perf_cgroup_from_task(event->hw.target);
 
 	return event->cgrp;
 }
@@ -1365,8 +1365,7 @@ static int __init intel_cqm_init(void)
 
 	__perf_cpu_notifier(intel_cqm_cpu_notifier);
 
-	ret = perf_pmu_register(&intel_cqm_pmu, "intel_cqm",
-				PERF_TYPE_INTEL_CQM);
+	ret = perf_pmu_register(&intel_cqm_pmu, "intel_cqm", -1);
 	if (ret)
 		pr_err("Intel CQM perf registration failed: %d\n", ret);
 	else
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index dac4c2831d82..5aa49d7bfd07 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -119,7 +119,6 @@ struct hw_perf_event {
 			struct hrtimer	hrtimer;
 		};
 		struct { /* tracepoint */
-			struct task_struct	*tp_target;
 			/* for tp_event->class */
 			struct list_head	tp_list;
 		};
@@ -129,7 +128,6 @@ struct hw_perf_event {
 			struct list_head	cqm_events_entry;
 			struct list_head	cqm_groups_entry;
 			struct list_head	cqm_group_entry;
-			struct task_struct	*cqm_target;
 		};
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 		struct { /* breakpoint */
@@ -138,12 +136,12 @@ struct hw_perf_event {
 			 * problem hw_breakpoint has with context
 			 * creation and event initalization.
 			 */
-			struct task_struct		*bp_target;
 			struct arch_hw_breakpoint	info;
 			struct list_head		bp_list;
 		};
 #endif
 	};
+	struct task_struct		*target;
 	int				state;
 	local64_t			prev_count;
 	u64				sample_period;
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 3c8b45de57ec..1e3cd07cf76e 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -32,7 +32,6 @@ enum perf_type_id {
 	PERF_TYPE_HW_CACHE			= 3,
 	PERF_TYPE_RAW				= 4,
 	PERF_TYPE_BREAKPOINT			= 5,
-	PERF_TYPE_INTEL_CQM			= 6,
 
 	PERF_TYPE_MAX,				/* non-ABI */
 };
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 71109a045450..525062b6fba1 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7171,18 +7171,12 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
 
 	if (task) {
 		event->attach_state = PERF_ATTACH_TASK;
-
-		if (attr->type == PERF_TYPE_TRACEPOINT)
-			event->hw.tp_target = task;
-#ifdef CONFIG_HAVE_HW_BREAKPOINT
 		/*
-		 * hw_breakpoint is a bit difficult here..
+		 * XXX pmu::event_init needs to know what task to account to
+		 * and we cannot use the ctx information because we need the
+		 * pmu before we get a ctx.
 		 */
-		else if (attr->type == PERF_TYPE_BREAKPOINT)
-			event->hw.bp_target = task;
-#endif
-		else if (attr->type == PERF_TYPE_INTEL_CQM)
-			event->hw.cqm_target = task;
+		event->hw.target = task;
 	}
 
 	if (!overflow_handler && parent_event) {
diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c
index 9803a6600d49..92ce5f4ccc26 100644
--- a/kernel/events/hw_breakpoint.c
+++ b/kernel/events/hw_breakpoint.c
@@ -116,12 +116,12 @@ static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type)
  */
 static int task_bp_pinned(int cpu, struct perf_event *bp, enum bp_type_idx type)
 {
-	struct task_struct *tsk = bp->hw.bp_target;
+	struct task_struct *tsk = bp->hw.target;
 	struct perf_event *iter;
 	int count = 0;
 
 	list_for_each_entry(iter, &bp_task_head, hw.bp_list) {
-		if (iter->hw.bp_target == tsk &&
+		if (iter->hw.target == tsk &&
 		    find_slot_idx(iter) == type &&
 		    (iter->cpu < 0 || cpu == iter->cpu))
 			count += hw_breakpoint_weight(iter);
@@ -153,7 +153,7 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp,
 		int nr;
 
 		nr = info->cpu_pinned;
-		if (!bp->hw.bp_target)
+		if (!bp->hw.target)
 			nr += max_task_bp_pinned(cpu, type);
 		else
 			nr += task_bp_pinned(cpu, bp, type);
@@ -210,7 +210,7 @@ toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type,
 		weight = -weight;
 
 	/* Pinned counter cpu profiling */
-	if (!bp->hw.bp_target) {
+	if (!bp->hw.target) {
 		get_bp_info(bp->cpu, type)->cpu_pinned += weight;
 		return;
 	}
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index b11441321e7a..93fdc7791eaa 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -1005,7 +1005,7 @@ __uprobe_perf_filter(struct trace_uprobe_filter *filter, struct mm_struct *mm)
 		return true;
 
 	list_for_each_entry(event, &filter->perf_events, hw.tp_list) {
-		if (event->hw.tp_target->mm == mm)
+		if (event->hw.target->mm == mm)
 			return true;
 	}
 
@@ -1015,7 +1015,7 @@ __uprobe_perf_filter(struct trace_uprobe_filter *filter, struct mm_struct *mm)
 static inline bool
 uprobe_filter_event(struct trace_uprobe *tu, struct perf_event *event)
 {
-	return __uprobe_perf_filter(&tu->filter, event->hw.tp_target->mm);
+	return __uprobe_perf_filter(&tu->filter, event->hw.target->mm);
 }
 
 static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event)
@@ -1023,10 +1023,10 @@ static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event)
 	bool done;
 
 	write_lock(&tu->filter.rwlock);
-	if (event->hw.tp_target) {
+	if (event->hw.target) {
 		list_del(&event->hw.tp_list);
 		done = tu->filter.nr_systemwide ||
-			(event->hw.tp_target->flags & PF_EXITING) ||
+			(event->hw.target->flags & PF_EXITING) ||
 			uprobe_filter_event(tu, event);
 	} else {
 		tu->filter.nr_systemwide--;
@@ -1046,7 +1046,7 @@ static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event)
 	int err;
 
 	write_lock(&tu->filter.rwlock);
-	if (event->hw.tp_target) {
+	if (event->hw.target) {
 		/*
 		 * event->parent != NULL means copy_process(), we can avoid
 		 * uprobe_apply(). current->mm must be probed and we can rely
-- 
cgit v1.2.3


From 80a9b64e2c156b6523e7a01f2ba6e5d86e722814 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Tue, 17 Mar 2015 10:40:38 -0400
Subject: ring-buffer: Replace this_cpu_*() with __this_cpu_*()

It has come to my attention that this_cpu_read/write are horrible on
architectures other than x86. Worse yet, they actually disable
preemption or interrupts! This caused some unexpected tracing results
on ARM.

   101.356868: preempt_count_add <-ring_buffer_lock_reserve
   101.356870: preempt_count_sub <-ring_buffer_lock_reserve

The ring_buffer_lock_reserve has recursion protection that requires
accessing a per cpu variable. But since preempt_disable() is traced, it
too got traced while accessing the variable that is suppose to prevent
recursion like this.

The generic version of this_cpu_read() and write() are:

 #define this_cpu_generic_read(pcp)					\
 ({	typeof(pcp) ret__;						\
	preempt_disable();						\
	ret__ = *this_cpu_ptr(&(pcp));					\
	preempt_enable();						\
	ret__;								\
 })

 #define this_cpu_generic_to_op(pcp, val, op)				\
 do {									\
	unsigned long flags;						\
	raw_local_irq_save(flags);					\
	*__this_cpu_ptr(&(pcp)) op val;					\
	raw_local_irq_restore(flags);					\
 } while (0)

Which is unacceptable for locations that know they are within preempt
disabled or interrupt disabled locations.

Paul McKenney stated that __this_cpu_() versions produce much better code on
other architectures than this_cpu_() does, if we know that the call is done in
a preempt disabled location.

I also changed the recursive_unlock() to use two local variables instead
of accessing the per_cpu variable twice.

Link: http://lkml.kernel.org/r/20150317114411.GE3589@linux.vnet.ibm.com
Link: http://lkml.kernel.org/r/20150317104038.312e73d1@gandalf.local.home

Cc: stable@vger.kernel.org
Acked-by: Christoph Lameter <cl@linux.com>
Reported-by: Uwe Kleine-Koenig <u.kleine-koenig@pengutronix.de>
Tested-by: Uwe Kleine-Koenig <u.kleine-koenig@pengutronix.de>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/ring_buffer.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

(limited to 'kernel/trace')

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 5040d44fe5a3..922048a0f7ea 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -2679,7 +2679,7 @@ static DEFINE_PER_CPU(unsigned int, current_context);
 
 static __always_inline int trace_recursive_lock(void)
 {
-	unsigned int val = this_cpu_read(current_context);
+	unsigned int val = __this_cpu_read(current_context);
 	int bit;
 
 	if (in_interrupt()) {
@@ -2696,18 +2696,17 @@ static __always_inline int trace_recursive_lock(void)
 		return 1;
 
 	val |= (1 << bit);
-	this_cpu_write(current_context, val);
+	__this_cpu_write(current_context, val);
 
 	return 0;
 }
 
 static __always_inline void trace_recursive_unlock(void)
 {
-	unsigned int val = this_cpu_read(current_context);
+	unsigned int val = __this_cpu_read(current_context);
 
-	val--;
-	val &= this_cpu_read(current_context);
-	this_cpu_write(current_context, val);
+	val &= val & (val - 1);
+	__this_cpu_write(current_context, val);
 }
 
 #else
-- 
cgit v1.2.3


From bbedb179944c29e5e449603163eec9951116fe39 Mon Sep 17 00:00:00 2001
From: Scott Wood <scottwood@freescale.com>
Date: Wed, 11 Mar 2015 22:13:57 -0500
Subject: tracing: %pF is only for function pointers

Use %pS for actual addresses, otherwise you'll get bad output
on arches like ppc64 where %pF expects a function descriptor.

Link: http://lkml.kernel.org/r/1426130037-17956-22-git-send-email-scottwood@freescale.com

Signed-off-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/trace/events/btrfs.h       |  4 ++--
 include/trace/events/ext3.h        |  2 +-
 include/trace/events/ext4.h        |  6 +++---
 include/trace/events/module.h      |  4 ++--
 include/trace/events/random.h      | 10 +++++-----
 kernel/trace/trace_entries.h       |  6 +++---
 tools/lib/traceevent/event-parse.c |  2 +-
 7 files changed, 17 insertions(+), 17 deletions(-)

(limited to 'kernel/trace')

diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index 1faecea101f3..572e6503394a 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -962,7 +962,7 @@ TRACE_EVENT(alloc_extent_state,
 		__entry->ip	= IP
 	),
 
-	TP_printk("state=%p; mask = %s; caller = %pF", __entry->state,
+	TP_printk("state=%p; mask = %s; caller = %pS", __entry->state,
 		  show_gfp_flags(__entry->mask), (void *)__entry->ip)
 );
 
@@ -982,7 +982,7 @@ TRACE_EVENT(free_extent_state,
 		__entry->ip = IP
 	),
 
-	TP_printk(" state=%p; caller = %pF", __entry->state,
+	TP_printk(" state=%p; caller = %pS", __entry->state,
 		  (void *)__entry->ip)
 );
 
diff --git a/include/trace/events/ext3.h b/include/trace/events/ext3.h
index 6797b9de90ed..7f20707849bb 100644
--- a/include/trace/events/ext3.h
+++ b/include/trace/events/ext3.h
@@ -144,7 +144,7 @@ TRACE_EVENT(ext3_mark_inode_dirty,
 		__entry->ip	= IP;
 	),
 
-	TP_printk("dev %d,%d ino %lu caller %pF",
+	TP_printk("dev %d,%d ino %lu caller %pS",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino, (void *)__entry->ip)
 );
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index 6e5abd6d38a2..47fca36ee426 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -240,7 +240,7 @@ TRACE_EVENT(ext4_mark_inode_dirty,
 		__entry->ip	= IP;
 	),
 
-	TP_printk("dev %d,%d ino %lu caller %pF",
+	TP_printk("dev %d,%d ino %lu caller %pS",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino, (void *)__entry->ip)
 );
@@ -1762,7 +1762,7 @@ TRACE_EVENT(ext4_journal_start,
 		__entry->rsv_blocks	 = rsv_blocks;
 	),
 
-	TP_printk("dev %d,%d blocks, %d rsv_blocks, %d caller %pF",
+	TP_printk("dev %d,%d blocks, %d rsv_blocks, %d caller %pS",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->blocks, __entry->rsv_blocks, (void *)__entry->ip)
 );
@@ -1784,7 +1784,7 @@ TRACE_EVENT(ext4_journal_start_reserved,
 		__entry->blocks		 = blocks;
 	),
 
-	TP_printk("dev %d,%d blocks, %d caller %pF",
+	TP_printk("dev %d,%d blocks, %d caller %pS",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->blocks, (void *)__entry->ip)
 );
diff --git a/include/trace/events/module.h b/include/trace/events/module.h
index 81c4c183d348..28c45997e451 100644
--- a/include/trace/events/module.h
+++ b/include/trace/events/module.h
@@ -84,7 +84,7 @@ DECLARE_EVENT_CLASS(module_refcnt,
 		__assign_str(name, mod->name);
 	),
 
-	TP_printk("%s call_site=%pf refcnt=%d",
+	TP_printk("%s call_site=%ps refcnt=%d",
 		  __get_str(name), (void *)__entry->ip, __entry->refcnt)
 );
 
@@ -121,7 +121,7 @@ TRACE_EVENT(module_request,
 		__assign_str(name, name);
 	),
 
-	TP_printk("%s wait=%d call_site=%pf",
+	TP_printk("%s wait=%d call_site=%ps",
 		  __get_str(name), (int)__entry->wait, (void *)__entry->ip)
 );
 
diff --git a/include/trace/events/random.h b/include/trace/events/random.h
index 805af6db41cc..4684de344c5d 100644
--- a/include/trace/events/random.h
+++ b/include/trace/events/random.h
@@ -22,7 +22,7 @@ TRACE_EVENT(add_device_randomness,
 		__entry->IP		= IP;
 	),
 
-	TP_printk("bytes %d caller %pF",
+	TP_printk("bytes %d caller %pS",
 		__entry->bytes, (void *)__entry->IP)
 );
 
@@ -43,7 +43,7 @@ DECLARE_EVENT_CLASS(random__mix_pool_bytes,
 		__entry->IP		= IP;
 	),
 
-	TP_printk("%s pool: bytes %d caller %pF",
+	TP_printk("%s pool: bytes %d caller %pS",
 		  __entry->pool_name, __entry->bytes, (void *)__entry->IP)
 );
 
@@ -82,7 +82,7 @@ TRACE_EVENT(credit_entropy_bits,
 	),
 
 	TP_printk("%s pool: bits %d entropy_count %d entropy_total %d "
-		  "caller %pF", __entry->pool_name, __entry->bits,
+		  "caller %pS", __entry->pool_name, __entry->bits,
 		  __entry->entropy_count, __entry->entropy_total,
 		  (void *)__entry->IP)
 );
@@ -207,7 +207,7 @@ DECLARE_EVENT_CLASS(random__get_random_bytes,
 		__entry->IP		= IP;
 	),
 
-	TP_printk("nbytes %d caller %pF", __entry->nbytes, (void *)__entry->IP)
+	TP_printk("nbytes %d caller %pS", __entry->nbytes, (void *)__entry->IP)
 );
 
 DEFINE_EVENT(random__get_random_bytes, get_random_bytes,
@@ -242,7 +242,7 @@ DECLARE_EVENT_CLASS(random__extract_entropy,
 		__entry->IP		= IP;
 	),
 
-	TP_printk("%s pool: nbytes %d entropy_count %d caller %pF",
+	TP_printk("%s pool: nbytes %d entropy_count %d caller %pS",
 		  __entry->pool_name, __entry->nbytes, __entry->entropy_count,
 		  (void *)__entry->IP)
 );
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index e2d027ac66a2..ee7b94a4810a 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -223,7 +223,7 @@ FTRACE_ENTRY(bprint, bprint_entry,
 		__dynamic_array(	u32,	buf	)
 	),
 
-	F_printk("%pf: %s",
+	F_printk("%ps: %s",
 		 (void *)__entry->ip, __entry->fmt),
 
 	FILTER_OTHER
@@ -238,7 +238,7 @@ FTRACE_ENTRY(print, print_entry,
 		__dynamic_array(	char,	buf	)
 	),
 
-	F_printk("%pf: %s",
+	F_printk("%ps: %s",
 		 (void *)__entry->ip, __entry->buf),
 
 	FILTER_OTHER
@@ -253,7 +253,7 @@ FTRACE_ENTRY(bputs, bputs_entry,
 		__field(	const char *,	str	)
 	),
 
-	F_printk("%pf: %s",
+	F_printk("%ps: %s",
 		 (void *)__entry->ip, __entry->str),
 
 	FILTER_OTHER
diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index afe20ed9fac8..2c0bd8f2aad0 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -3976,7 +3976,7 @@ static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struc
 	if (asprintf(&arg->atom.atom, "%lld", ip) < 0)
 		goto out_free;
 
-	/* skip the first "%pf: " */
+	/* skip the first "%ps: " */
 	for (ptr = fmt + 5, bptr = data + field->offset;
 	     bptr < data + size && *ptr; ptr++) {
 		int ls = 0;
-- 
cgit v1.2.3


From 754cb0071a5c9576ccfa6523969ef6a2f6a71676 Mon Sep 17 00:00:00 2001
From: He Kuang <hekuang@huawei.com>
Date: Tue, 3 Mar 2015 15:21:33 +0800
Subject: tracing: remove ftrace:function TRACE_EVENT_FL_USE_CALL_FILTER flag

TRACE_EVENT_FL_USE_CALL_FILTER flag in ftrace:functon event can be
removed. This flag was first introduced in commit
f306cc82a93d ("tracing: Update event filters for multibuffer").

Now, the only place uses this flag is ftrace:function, but the filter of
ftrace:function has a different code path with events/syscalls and
events/tracepoints. It uses ftrace_filter_write() and perf's
ftrace_profile_set_filter() to set the filter, the functionality of file
'tracing/events/ftrace/function/filter' is bypassed in function
init_pred(), in which case, neither call->filter nor file->filter is
used.

So we can safely remove TRACE_EVENT_FL_USE_CALL_FILTER flag from
ftrace:function events.

Link: http://lkml.kernel.org/r/1425367294-27852-1-git-send-email-hekuang@huawei.com

Signed-off-by: He Kuang <hekuang@huawei.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace_export.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel/trace')

diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 12e2b99be862..174a6a71146c 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -177,7 +177,7 @@ struct ftrace_event_call __used event_##call = {			\
 	},								\
 	.event.type		= etype,				\
 	.print_fmt		= print,				\
-	.flags			= TRACE_EVENT_FL_IGNORE_ENABLE | TRACE_EVENT_FL_USE_CALL_FILTER, \
+	.flags			= TRACE_EVENT_FL_IGNORE_ENABLE,		\
 };									\
 struct ftrace_event_call __used						\
 __attribute__((section("_ftrace_events"))) *__event_##call = &event_##call;
-- 
cgit v1.2.3


From d9a16d3ab8770357015c85a07387f1d2676a4773 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Thu, 12 Mar 2015 16:58:34 +1100
Subject: trace: Don't use __weak in header files

The commit that added a check for this to checkpatch says:

"Using weak declarations can have unintended link defects.  The __weak on
the declaration causes non-weak definitions to become weak."

In this case, when a PowerPC kernel is built with CONFIG_KPROBE_EVENT
but not CONFIG_UPROBE_EVENT, it generates the following warning:

WARNING: 1 bad relocations
c0000000014f2190 R_PPC64_ADDR64    uprobes_fetch_type_table

This is fixed by passing the fetch_table arrays to
traceprobe_parse_probe_arg() which also means that they can never be NULL.

Link: http://lkml.kernel.org/r/20150312165834.4482cb48@canb.auug.org.au

Acked-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace_kprobe.c |  5 +++--
 kernel/trace/trace_probe.c  | 19 +++++++------------
 kernel/trace/trace_probe.h  | 10 ++--------
 kernel/trace/trace_uprobe.c |  5 +++--
 4 files changed, 15 insertions(+), 24 deletions(-)

(limited to 'kernel/trace')

diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index d73f565b4e06..f34c3ad1b5f4 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -250,7 +250,7 @@ DEFINE_FETCH_symbol(string_size)
 #define fetch_file_offset_string_size	NULL
 
 /* Fetch type information table */
-const struct fetch_type kprobes_fetch_type_table[] = {
+static const struct fetch_type kprobes_fetch_type_table[] = {
 	/* Special types */
 	[FETCH_TYPE_STRING] = __ASSIGN_FETCH_TYPE("string", string, string,
 					sizeof(u32), 1, "__data_loc char[]"),
@@ -760,7 +760,8 @@ static int create_trace_kprobe(int argc, char **argv)
 
 		/* Parse fetch argument */
 		ret = traceprobe_parse_probe_arg(arg, &tk->tp.size, parg,
-						is_return, true);
+						is_return, true,
+						kprobes_fetch_type_table);
 		if (ret) {
 			pr_info("Parse error at argument[%d]. (%d)\n", i, ret);
 			goto error;
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index b983b2fd2ca1..1769a81da8a7 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -356,17 +356,14 @@ static int parse_probe_vars(char *arg, const struct fetch_type *t,
 
 /* Recursive argument parser */
 static int parse_probe_arg(char *arg, const struct fetch_type *t,
-		     struct fetch_param *f, bool is_return, bool is_kprobe)
+		     struct fetch_param *f, bool is_return, bool is_kprobe,
+		     const struct fetch_type *ftbl)
 {
-	const struct fetch_type *ftbl;
 	unsigned long param;
 	long offset;
 	char *tmp;
 	int ret = 0;
 
-	ftbl = is_kprobe ? kprobes_fetch_type_table : uprobes_fetch_type_table;
-	BUG_ON(ftbl == NULL);
-
 	switch (arg[0]) {
 	case '$':
 		ret = parse_probe_vars(arg + 1, t, f, is_return, is_kprobe);
@@ -447,7 +444,7 @@ static int parse_probe_arg(char *arg, const struct fetch_type *t,
 			dprm->fetch_size = get_fetch_size_function(t,
 							dprm->fetch, ftbl);
 			ret = parse_probe_arg(arg, t2, &dprm->orig, is_return,
-							is_kprobe);
+							is_kprobe, ftbl);
 			if (ret)
 				kfree(dprm);
 			else {
@@ -505,15 +502,12 @@ static int __parse_bitfield_probe_arg(const char *bf,
 
 /* String length checking wrapper */
 int traceprobe_parse_probe_arg(char *arg, ssize_t *size,
-		struct probe_arg *parg, bool is_return, bool is_kprobe)
+		struct probe_arg *parg, bool is_return, bool is_kprobe,
+		const struct fetch_type *ftbl)
 {
-	const struct fetch_type *ftbl;
 	const char *t;
 	int ret;
 
-	ftbl = is_kprobe ? kprobes_fetch_type_table : uprobes_fetch_type_table;
-	BUG_ON(ftbl == NULL);
-
 	if (strlen(arg) > MAX_ARGSTR_LEN) {
 		pr_info("Argument is too long.: %s\n",  arg);
 		return -ENOSPC;
@@ -535,7 +529,8 @@ int traceprobe_parse_probe_arg(char *arg, ssize_t *size,
 	}
 	parg->offset = *size;
 	*size += parg->type->size;
-	ret = parse_probe_arg(arg, parg->type, &parg->fetch, is_return, is_kprobe);
+	ret = parse_probe_arg(arg, parg->type, &parg->fetch, is_return,
+			      is_kprobe, ftbl);
 
 	if (ret >= 0 && t != NULL)
 		ret = __parse_bitfield_probe_arg(t, parg->type, &parg->fetch);
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 4f815fbce16d..e30f6cce4af6 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -229,13 +229,6 @@ ASSIGN_FETCH_FUNC(file_offset, ftype),			\
 #define FETCH_TYPE_STRING	0
 #define FETCH_TYPE_STRSIZE	1
 
-/*
- * Fetch type information table.
- * It's declared as a weak symbol due to conditional compilation.
- */
-extern __weak const struct fetch_type kprobes_fetch_type_table[];
-extern __weak const struct fetch_type uprobes_fetch_type_table[];
-
 #ifdef CONFIG_KPROBE_EVENT
 struct symbol_cache;
 unsigned long update_symbol_cache(struct symbol_cache *sc);
@@ -333,7 +326,8 @@ find_event_file_link(struct trace_probe *tp, struct ftrace_event_file *file)
 }
 
 extern int traceprobe_parse_probe_arg(char *arg, ssize_t *size,
-		   struct probe_arg *parg, bool is_return, bool is_kprobe);
+		   struct probe_arg *parg, bool is_return, bool is_kprobe,
+		   const struct fetch_type *ftbl);
 
 extern int traceprobe_conflict_field_name(const char *name,
 			       struct probe_arg *args, int narg);
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 7dc1c8abecd6..74865465e0b7 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -196,7 +196,7 @@ DEFINE_FETCH_file_offset(string)
 DEFINE_FETCH_file_offset(string_size)
 
 /* Fetch type information table */
-const struct fetch_type uprobes_fetch_type_table[] = {
+static const struct fetch_type uprobes_fetch_type_table[] = {
 	/* Special types */
 	[FETCH_TYPE_STRING] = __ASSIGN_FETCH_TYPE("string", string, string,
 					sizeof(u32), 1, "__data_loc char[]"),
@@ -535,7 +535,8 @@ static int create_trace_uprobe(int argc, char **argv)
 
 		/* Parse fetch argument */
 		ret = traceprobe_parse_probe_arg(arg, &tu->tp.size, parg,
-						 is_return, false);
+						 is_return, false,
+						 uprobes_fetch_type_table);
 		if (ret) {
 			pr_info("Parse error at argument[%d]. (%d)\n", i, ret);
 			goto error;
-- 
cgit v1.2.3


From d631c8cceb1d1d06f372878935949d421585186b Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Fri, 27 Mar 2015 17:39:49 -0400
Subject: ring-buffer: Remove duplicate use of '&' in recursive code

A clean up of the recursive protection code changed

  val = this_cpu_read(current_context);
  val--;
  val &= this_cpu_read(current_context);

to

  val = this_cpu_read(current_context);
  val &= val & (val - 1);

Which has a duplicate use of '&' as the above is the same as

  val = val & (val - 1);

Actually, it would be best to remove that line altogether and
just add it to where it is used.

And Christoph even mentioned that it can be further compacted to
just a single line:

  __this_cpu_and(current_context, __this_cpu_read(current_context) - 1);

Link: http://lkml.kernel.org/alpine.DEB.2.11.1503271423580.23114@gentwo.org

Suggested-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/ring_buffer.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'kernel/trace')

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 922048a0f7ea..0315d43176d8 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -2703,10 +2703,7 @@ static __always_inline int trace_recursive_lock(void)
 
 static __always_inline void trace_recursive_unlock(void)
 {
-	unsigned int val = __this_cpu_read(current_context);
-
-	val &= val & (val - 1);
-	__this_cpu_write(current_context, val);
+	__this_cpu_and(current_context, __this_cpu_read(current_context) - 1);
 }
 
 #else
-- 
cgit v1.2.3


From 72cbbc8994242b5b43753738c01bf07bf29cb70d Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Wed, 25 Mar 2015 12:49:19 -0700
Subject: tracing: Add kprobe flag

add TRACE_EVENT_FL_KPROBE flag to differentiate kprobe type of
tracepoints, since bpf programs can only be attached to kprobe
type of PERF_TYPE_TRACEPOINT perf events.

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Reviewed-by: Steven Rostedt <rostedt@goodmis.org>
Reviewed-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: David S. Miller <davem@davemloft.net>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1427312966-8434-3-git-send-email-ast@plumgrid.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/ftrace_event.h | 3 +++
 kernel/trace/trace_kprobe.c  | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'kernel/trace')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index c674ee8f7fca..77325e1a1816 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -252,6 +252,7 @@ enum {
 	TRACE_EVENT_FL_WAS_ENABLED_BIT,
 	TRACE_EVENT_FL_USE_CALL_FILTER_BIT,
 	TRACE_EVENT_FL_TRACEPOINT_BIT,
+	TRACE_EVENT_FL_KPROBE_BIT,
 };
 
 /*
@@ -265,6 +266,7 @@ enum {
  *                     it is best to clear the buffers that used it).
  *  USE_CALL_FILTER - For ftrace internal events, don't use file filter
  *  TRACEPOINT    - Event is a tracepoint
+ *  KPROBE        - Event is a kprobe
  */
 enum {
 	TRACE_EVENT_FL_FILTERED		= (1 << TRACE_EVENT_FL_FILTERED_BIT),
@@ -274,6 +276,7 @@ enum {
 	TRACE_EVENT_FL_WAS_ENABLED	= (1 << TRACE_EVENT_FL_WAS_ENABLED_BIT),
 	TRACE_EVENT_FL_USE_CALL_FILTER	= (1 << TRACE_EVENT_FL_USE_CALL_FILTER_BIT),
 	TRACE_EVENT_FL_TRACEPOINT	= (1 << TRACE_EVENT_FL_TRACEPOINT_BIT),
+	TRACE_EVENT_FL_KPROBE		= (1 << TRACE_EVENT_FL_KPROBE_BIT),
 };
 
 struct ftrace_event_call {
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index d73f565b4e06..8fa549f6f528 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1286,7 +1286,7 @@ static int register_kprobe_event(struct trace_kprobe *tk)
 		kfree(call->print_fmt);
 		return -ENODEV;
 	}
-	call->flags = 0;
+	call->flags = TRACE_EVENT_FL_KPROBE;
 	call->class->reg = kprobe_register;
 	call->data = tk;
 	ret = trace_add_event_call(call);
-- 
cgit v1.2.3


From 2541517c32be2531e0da59dfd7efc1ce844644f5 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Wed, 25 Mar 2015 12:49:20 -0700
Subject: tracing, perf: Implement BPF programs attached to kprobes

BPF programs, attached to kprobes, provide a safe way to execute
user-defined BPF byte-code programs without being able to crash or
hang the kernel in any way. The BPF engine makes sure that such
programs have a finite execution time and that they cannot break
out of their sandbox.

The user interface is to attach to a kprobe via the perf syscall:

	struct perf_event_attr attr = {
		.type	= PERF_TYPE_TRACEPOINT,
		.config	= event_id,
		...
	};

	event_fd = perf_event_open(&attr,...);
	ioctl(event_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);

'prog_fd' is a file descriptor associated with BPF program
previously loaded.

'event_id' is an ID of the kprobe created.

Closing 'event_fd':

	close(event_fd);

... automatically detaches BPF program from it.

BPF programs can call in-kernel helper functions to:

  - lookup/update/delete elements in maps

  - probe_read - wraper of probe_kernel_read() used to access any
    kernel data structures

BPF programs receive 'struct pt_regs *' as an input ('struct pt_regs' is
architecture dependent) and return 0 to ignore the event and 1 to store
kprobe event into the ring buffer.

Note, kprobes are a fundamentally _not_ a stable kernel ABI,
so BPF programs attached to kprobes must be recompiled for
every kernel version and user must supply correct LINUX_VERSION_CODE
in attr.kern_version during bpf_prog_load() call.

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Reviewed-by: Steven Rostedt <rostedt@goodmis.org>
Reviewed-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: David S. Miller <davem@davemloft.net>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1427312966-8434-4-git-send-email-ast@plumgrid.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/ftrace_event.h    |  11 ++++
 include/uapi/linux/bpf.h        |   3 +
 include/uapi/linux/perf_event.h |   1 +
 kernel/bpf/syscall.c            |   7 ++-
 kernel/events/core.c            |  59 ++++++++++++++++++
 kernel/trace/Makefile           |   1 +
 kernel/trace/bpf_trace.c        | 130 ++++++++++++++++++++++++++++++++++++++++
 kernel/trace/trace_kprobe.c     |   8 +++
 8 files changed, 219 insertions(+), 1 deletion(-)
 create mode 100644 kernel/trace/bpf_trace.c

(limited to 'kernel/trace')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 77325e1a1816..0aa535bc9f05 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -13,6 +13,7 @@ struct trace_array;
 struct trace_buffer;
 struct tracer;
 struct dentry;
+struct bpf_prog;
 
 struct trace_print_flags {
 	unsigned long		mask;
@@ -306,6 +307,7 @@ struct ftrace_event_call {
 #ifdef CONFIG_PERF_EVENTS
 	int				perf_refcount;
 	struct hlist_head __percpu	*perf_events;
+	struct bpf_prog			*prog;
 
 	int	(*perf_perm)(struct ftrace_event_call *,
 			     struct perf_event *);
@@ -551,6 +553,15 @@ event_trigger_unlock_commit_regs(struct ftrace_event_file *file,
 		event_triggers_post_call(file, tt);
 }
 
+#ifdef CONFIG_BPF_SYSCALL
+unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx);
+#else
+static inline unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx)
+{
+	return 1;
+}
+#endif
+
 enum {
 	FILTER_OTHER = 0,
 	FILTER_STATIC_STRING,
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 45da7ec7d274..b2948feeb70b 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -118,6 +118,7 @@ enum bpf_map_type {
 enum bpf_prog_type {
 	BPF_PROG_TYPE_UNSPEC,
 	BPF_PROG_TYPE_SOCKET_FILTER,
+	BPF_PROG_TYPE_KPROBE,
 };
 
 /* flags for BPF_MAP_UPDATE_ELEM command */
@@ -151,6 +152,7 @@ union bpf_attr {
 		__u32		log_level;	/* verbosity level of verifier */
 		__u32		log_size;	/* size of user buffer */
 		__aligned_u64	log_buf;	/* user supplied buffer */
+		__u32		kern_version;	/* checked when prog_type=kprobe */
 	};
 } __attribute__((aligned(8)));
 
@@ -162,6 +164,7 @@ enum bpf_func_id {
 	BPF_FUNC_map_lookup_elem, /* void *map_lookup_elem(&map, &key) */
 	BPF_FUNC_map_update_elem, /* int map_update_elem(&map, &key, &value, flags) */
 	BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */
+	BPF_FUNC_probe_read,      /* int bpf_probe_read(void *dst, int size, void *src) */
 	__BPF_FUNC_MAX_ID,
 };
 
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 3bb40ddadbe5..91803e54ee73 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -381,6 +381,7 @@ struct perf_event_attr {
 #define PERF_EVENT_IOC_SET_OUTPUT	_IO ('$', 5)
 #define PERF_EVENT_IOC_SET_FILTER	_IOW('$', 6, char *)
 #define PERF_EVENT_IOC_ID		_IOR('$', 7, __u64 *)
+#define PERF_EVENT_IOC_SET_BPF		_IOW('$', 8, __u32)
 
 enum perf_event_ioc_flags {
 	PERF_IOC_FLAG_GROUP		= 1U << 0,
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 536edc2be307..504c10b990ef 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -16,6 +16,7 @@
 #include <linux/file.h>
 #include <linux/license.h>
 #include <linux/filter.h>
+#include <linux/version.h>
 
 static LIST_HEAD(bpf_map_types);
 
@@ -467,7 +468,7 @@ struct bpf_prog *bpf_prog_get(u32 ufd)
 }
 
 /* last field in 'union bpf_attr' used by this command */
-#define	BPF_PROG_LOAD_LAST_FIELD log_buf
+#define	BPF_PROG_LOAD_LAST_FIELD kern_version
 
 static int bpf_prog_load(union bpf_attr *attr)
 {
@@ -492,6 +493,10 @@ static int bpf_prog_load(union bpf_attr *attr)
 	if (attr->insn_cnt >= BPF_MAXINSNS)
 		return -EINVAL;
 
+	if (type == BPF_PROG_TYPE_KPROBE &&
+	    attr->kern_version != LINUX_VERSION_CODE)
+		return -EINVAL;
+
 	/* plain bpf_prog allocation */
 	prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
 	if (!prog)
diff --git a/kernel/events/core.c b/kernel/events/core.c
index c40c2cac2d8e..5c13862d3e85 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -42,6 +42,8 @@
 #include <linux/module.h>
 #include <linux/mman.h>
 #include <linux/compat.h>
+#include <linux/bpf.h>
+#include <linux/filter.h>
 
 #include "internal.h"
 
@@ -3407,6 +3409,7 @@ errout:
 }
 
 static void perf_event_free_filter(struct perf_event *event);
+static void perf_event_free_bpf_prog(struct perf_event *event);
 
 static void free_event_rcu(struct rcu_head *head)
 {
@@ -3416,6 +3419,7 @@ static void free_event_rcu(struct rcu_head *head)
 	if (event->ns)
 		put_pid_ns(event->ns);
 	perf_event_free_filter(event);
+	perf_event_free_bpf_prog(event);
 	kfree(event);
 }
 
@@ -3928,6 +3932,7 @@ static inline int perf_fget_light(int fd, struct fd *p)
 static int perf_event_set_output(struct perf_event *event,
 				 struct perf_event *output_event);
 static int perf_event_set_filter(struct perf_event *event, void __user *arg);
+static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd);
 
 static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned long arg)
 {
@@ -3981,6 +3986,9 @@ static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned lon
 	case PERF_EVENT_IOC_SET_FILTER:
 		return perf_event_set_filter(event, (void __user *)arg);
 
+	case PERF_EVENT_IOC_SET_BPF:
+		return perf_event_set_bpf_prog(event, arg);
+
 	default:
 		return -ENOTTY;
 	}
@@ -6455,6 +6463,49 @@ static void perf_event_free_filter(struct perf_event *event)
 	ftrace_profile_free_filter(event);
 }
 
+static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
+{
+	struct bpf_prog *prog;
+
+	if (event->attr.type != PERF_TYPE_TRACEPOINT)
+		return -EINVAL;
+
+	if (event->tp_event->prog)
+		return -EEXIST;
+
+	if (!(event->tp_event->flags & TRACE_EVENT_FL_KPROBE))
+		/* bpf programs can only be attached to kprobes */
+		return -EINVAL;
+
+	prog = bpf_prog_get(prog_fd);
+	if (IS_ERR(prog))
+		return PTR_ERR(prog);
+
+	if (prog->aux->prog_type != BPF_PROG_TYPE_KPROBE) {
+		/* valid fd, but invalid bpf program type */
+		bpf_prog_put(prog);
+		return -EINVAL;
+	}
+
+	event->tp_event->prog = prog;
+
+	return 0;
+}
+
+static void perf_event_free_bpf_prog(struct perf_event *event)
+{
+	struct bpf_prog *prog;
+
+	if (!event->tp_event)
+		return;
+
+	prog = event->tp_event->prog;
+	if (prog) {
+		event->tp_event->prog = NULL;
+		bpf_prog_put(prog);
+	}
+}
+
 #else
 
 static inline void perf_tp_register(void)
@@ -6470,6 +6521,14 @@ static void perf_event_free_filter(struct perf_event *event)
 {
 }
 
+static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
+{
+	return -ENOENT;
+}
+
+static void perf_event_free_bpf_prog(struct perf_event *event)
+{
+}
 #endif /* CONFIG_EVENT_TRACING */
 
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 98f26588255e..c575a300103b 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -53,6 +53,7 @@ obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o
 endif
 obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
 obj-$(CONFIG_EVENT_TRACING) += trace_events_trigger.o
+obj-$(CONFIG_BPF_SYSCALL) += bpf_trace.o
 obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
 obj-$(CONFIG_TRACEPOINTS) += power-traces.o
 ifeq ($(CONFIG_PM),y)
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
new file mode 100644
index 000000000000..f1e87da91da3
--- /dev/null
+++ b/kernel/trace/bpf_trace.c
@@ -0,0 +1,130 @@
+/* Copyright (c) 2011-2015 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <linux/uaccess.h>
+#include "trace.h"
+
+static DEFINE_PER_CPU(int, bpf_prog_active);
+
+/**
+ * trace_call_bpf - invoke BPF program
+ * @prog: BPF program
+ * @ctx: opaque context pointer
+ *
+ * kprobe handlers execute BPF programs via this helper.
+ * Can be used from static tracepoints in the future.
+ *
+ * Return: BPF programs always return an integer which is interpreted by
+ * kprobe handler as:
+ * 0 - return from kprobe (event is filtered out)
+ * 1 - store kprobe event into ring buffer
+ * Other values are reserved and currently alias to 1
+ */
+unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx)
+{
+	unsigned int ret;
+
+	if (in_nmi()) /* not supported yet */
+		return 1;
+
+	preempt_disable();
+
+	if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) {
+		/*
+		 * since some bpf program is already running on this cpu,
+		 * don't call into another bpf program (same or different)
+		 * and don't send kprobe event into ring-buffer,
+		 * so return zero here
+		 */
+		ret = 0;
+		goto out;
+	}
+
+	rcu_read_lock();
+	ret = BPF_PROG_RUN(prog, ctx);
+	rcu_read_unlock();
+
+ out:
+	__this_cpu_dec(bpf_prog_active);
+	preempt_enable();
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(trace_call_bpf);
+
+static u64 bpf_probe_read(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+	void *dst = (void *) (long) r1;
+	int size = (int) r2;
+	void *unsafe_ptr = (void *) (long) r3;
+
+	return probe_kernel_read(dst, unsafe_ptr, size);
+}
+
+static const struct bpf_func_proto bpf_probe_read_proto = {
+	.func		= bpf_probe_read,
+	.gpl_only	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_STACK,
+	.arg2_type	= ARG_CONST_STACK_SIZE,
+	.arg3_type	= ARG_ANYTHING,
+};
+
+static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func_id)
+{
+	switch (func_id) {
+	case BPF_FUNC_map_lookup_elem:
+		return &bpf_map_lookup_elem_proto;
+	case BPF_FUNC_map_update_elem:
+		return &bpf_map_update_elem_proto;
+	case BPF_FUNC_map_delete_elem:
+		return &bpf_map_delete_elem_proto;
+	case BPF_FUNC_probe_read:
+		return &bpf_probe_read_proto;
+	default:
+		return NULL;
+	}
+}
+
+/* bpf+kprobe programs can access fields of 'struct pt_regs' */
+static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type)
+{
+	/* check bounds */
+	if (off < 0 || off >= sizeof(struct pt_regs))
+		return false;
+
+	/* only read is allowed */
+	if (type != BPF_READ)
+		return false;
+
+	/* disallow misaligned access */
+	if (off % size != 0)
+		return false;
+
+	return true;
+}
+
+static struct bpf_verifier_ops kprobe_prog_ops = {
+	.get_func_proto  = kprobe_prog_func_proto,
+	.is_valid_access = kprobe_prog_is_valid_access,
+};
+
+static struct bpf_prog_type_list kprobe_tl = {
+	.ops	= &kprobe_prog_ops,
+	.type	= BPF_PROG_TYPE_KPROBE,
+};
+
+static int __init register_kprobe_prog_ops(void)
+{
+	bpf_register_prog_type(&kprobe_tl);
+	return 0;
+}
+late_initcall(register_kprobe_prog_ops);
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 8fa549f6f528..dc3462507d7c 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1134,11 +1134,15 @@ static void
 kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
 {
 	struct ftrace_event_call *call = &tk->tp.call;
+	struct bpf_prog *prog = call->prog;
 	struct kprobe_trace_entry_head *entry;
 	struct hlist_head *head;
 	int size, __size, dsize;
 	int rctx;
 
+	if (prog && !trace_call_bpf(prog, regs))
+		return;
+
 	head = this_cpu_ptr(call->perf_events);
 	if (hlist_empty(head))
 		return;
@@ -1165,11 +1169,15 @@ kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
 		    struct pt_regs *regs)
 {
 	struct ftrace_event_call *call = &tk->tp.call;
+	struct bpf_prog *prog = call->prog;
 	struct kretprobe_trace_entry_head *entry;
 	struct hlist_head *head;
 	int size, __size, dsize;
 	int rctx;
 
+	if (prog && !trace_call_bpf(prog, regs))
+		return;
+
 	head = this_cpu_ptr(call->perf_events);
 	if (hlist_empty(head))
 		return;
-- 
cgit v1.2.3


From d9847d310ab4003725e6ed1822682e24bd406908 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Wed, 25 Mar 2015 12:49:21 -0700
Subject: tracing: Allow BPF programs to call bpf_ktime_get_ns()

bpf_ktime_get_ns() is used by programs to compute time delta
between events or as a timestamp

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Reviewed-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: David S. Miller <davem@davemloft.net>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1427312966-8434-5-git-send-email-ast@plumgrid.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/uapi/linux/bpf.h |  1 +
 kernel/trace/bpf_trace.c | 14 ++++++++++++++
 2 files changed, 15 insertions(+)

(limited to 'kernel/trace')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index b2948feeb70b..238c6883877b 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -165,6 +165,7 @@ enum bpf_func_id {
 	BPF_FUNC_map_update_elem, /* int map_update_elem(&map, &key, &value, flags) */
 	BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */
 	BPF_FUNC_probe_read,      /* int bpf_probe_read(void *dst, int size, void *src) */
+	BPF_FUNC_ktime_get_ns,    /* u64 bpf_ktime_get_ns(void) */
 	__BPF_FUNC_MAX_ID,
 };
 
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index f1e87da91da3..8f5787294971 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -78,6 +78,18 @@ static const struct bpf_func_proto bpf_probe_read_proto = {
 	.arg3_type	= ARG_ANYTHING,
 };
 
+static u64 bpf_ktime_get_ns(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+	/* NMI safe access to clock monotonic */
+	return ktime_get_mono_fast_ns();
+}
+
+static const struct bpf_func_proto bpf_ktime_get_ns_proto = {
+	.func		= bpf_ktime_get_ns,
+	.gpl_only	= true,
+	.ret_type	= RET_INTEGER,
+};
+
 static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func_id)
 {
 	switch (func_id) {
@@ -89,6 +101,8 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func
 		return &bpf_map_delete_elem_proto;
 	case BPF_FUNC_probe_read:
 		return &bpf_probe_read_proto;
+	case BPF_FUNC_ktime_get_ns:
+		return &bpf_ktime_get_ns_proto;
 	default:
 		return NULL;
 	}
-- 
cgit v1.2.3


From 9c959c863f8217a2ff3d7c296e8223654d240569 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Wed, 25 Mar 2015 12:49:22 -0700
Subject: tracing: Allow BPF programs to call bpf_trace_printk()

Debugging of BPF programs needs some form of printk from the
program, so let programs call limited trace_printk() with %d %u
%x %p modifiers only.

Similar to kernel modules, during program load verifier checks
whether program is calling bpf_trace_printk() and if so, kernel
allocates trace_printk buffers and emits big 'this is debug
only' banner.

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Reviewed-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: David S. Miller <davem@davemloft.net>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1427312966-8434-6-git-send-email-ast@plumgrid.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/uapi/linux/bpf.h |  1 +
 kernel/trace/bpf_trace.c | 78 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 79 insertions(+)

(limited to 'kernel/trace')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 238c6883877b..cc47ef41076a 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -166,6 +166,7 @@ enum bpf_func_id {
 	BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */
 	BPF_FUNC_probe_read,      /* int bpf_probe_read(void *dst, int size, void *src) */
 	BPF_FUNC_ktime_get_ns,    /* u64 bpf_ktime_get_ns(void) */
+	BPF_FUNC_trace_printk,    /* int bpf_trace_printk(const char *fmt, int fmt_size, ...) */
 	__BPF_FUNC_MAX_ID,
 };
 
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 8f5787294971..2d56ce501632 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -10,6 +10,7 @@
 #include <linux/bpf.h>
 #include <linux/filter.h>
 #include <linux/uaccess.h>
+#include <linux/ctype.h>
 #include "trace.h"
 
 static DEFINE_PER_CPU(int, bpf_prog_active);
@@ -90,6 +91,74 @@ static const struct bpf_func_proto bpf_ktime_get_ns_proto = {
 	.ret_type	= RET_INTEGER,
 };
 
+/*
+ * limited trace_printk()
+ * only %d %u %x %ld %lu %lx %lld %llu %llx %p conversion specifiers allowed
+ */
+static u64 bpf_trace_printk(u64 r1, u64 fmt_size, u64 r3, u64 r4, u64 r5)
+{
+	char *fmt = (char *) (long) r1;
+	int mod[3] = {};
+	int fmt_cnt = 0;
+	int i;
+
+	/*
+	 * bpf_check()->check_func_arg()->check_stack_boundary()
+	 * guarantees that fmt points to bpf program stack,
+	 * fmt_size bytes of it were initialized and fmt_size > 0
+	 */
+	if (fmt[--fmt_size] != 0)
+		return -EINVAL;
+
+	/* check format string for allowed specifiers */
+	for (i = 0; i < fmt_size; i++) {
+		if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i]))
+			return -EINVAL;
+
+		if (fmt[i] != '%')
+			continue;
+
+		if (fmt_cnt >= 3)
+			return -EINVAL;
+
+		/* fmt[i] != 0 && fmt[last] == 0, so we can access fmt[i + 1] */
+		i++;
+		if (fmt[i] == 'l') {
+			mod[fmt_cnt]++;
+			i++;
+		} else if (fmt[i] == 'p') {
+			mod[fmt_cnt]++;
+			i++;
+			if (!isspace(fmt[i]) && !ispunct(fmt[i]) && fmt[i] != 0)
+				return -EINVAL;
+			fmt_cnt++;
+			continue;
+		}
+
+		if (fmt[i] == 'l') {
+			mod[fmt_cnt]++;
+			i++;
+		}
+
+		if (fmt[i] != 'd' && fmt[i] != 'u' && fmt[i] != 'x')
+			return -EINVAL;
+		fmt_cnt++;
+	}
+
+	return __trace_printk(1/* fake ip will not be printed */, fmt,
+			      mod[0] == 2 ? r3 : mod[0] == 1 ? (long) r3 : (u32) r3,
+			      mod[1] == 2 ? r4 : mod[1] == 1 ? (long) r4 : (u32) r4,
+			      mod[2] == 2 ? r5 : mod[2] == 1 ? (long) r5 : (u32) r5);
+}
+
+static const struct bpf_func_proto bpf_trace_printk_proto = {
+	.func		= bpf_trace_printk,
+	.gpl_only	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_STACK,
+	.arg2_type	= ARG_CONST_STACK_SIZE,
+};
+
 static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func_id)
 {
 	switch (func_id) {
@@ -103,6 +172,15 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func
 		return &bpf_probe_read_proto;
 	case BPF_FUNC_ktime_get_ns:
 		return &bpf_ktime_get_ns_proto;
+
+	case BPF_FUNC_trace_printk:
+		/*
+		 * this program might be calling bpf_trace_printk,
+		 * so allocate per-cpu printk buffers
+		 */
+		trace_printk_init_buffers();
+
+		return &bpf_trace_printk_proto;
 	default:
 		return NULL;
 	}
-- 
cgit v1.2.3


From e1abf2cc8d5d80b41c4419368ec743ccadbb131e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Apr 2015 15:51:39 +0200
Subject: bpf: Fix the build on BPF_SYSCALL=y && !CONFIG_TRACING kernels, make
 it more configurable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

So bpf_tracing.o depends on CONFIG_BPF_SYSCALL - but that's not its only
dependency, it also depends on the tracing infrastructure and on kprobes,
without which it will fail to build with:

  In file included from kernel/trace/bpf_trace.c:14:0:
  kernel/trace/trace.h: In function ‘trace_test_and_set_recursion’:
  kernel/trace/trace.h:491:28: error: ‘struct task_struct’ has no member named ‘trace_recursion’
    unsigned int val = current->trace_recursion;
  [...]

It took quite some time to trigger this build failure, because right now
BPF_SYSCALL is very obscure, depends on CONFIG_EXPERT. So also make BPF_SYSCALL
more configurable, not just under CONFIG_EXPERT.

If BPF_SYSCALL, tracing and kprobes are enabled then enable the bpf_tracing
gateway as well.

We might want to make this an interactive option later on, although
I'd not complicate it unnecessarily: enabling BPF_SYSCALL is enough of
an indicator that the user wants BPF support.

Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: David S. Miller <davem@davemloft.net>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 init/Kconfig          | 2 +-
 kernel/trace/Kconfig  | 8 ++++++++
 kernel/trace/Makefile | 2 +-
 3 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'kernel/trace')

diff --git a/init/Kconfig b/init/Kconfig
index f5dbc6d4261b..2b4d055aca4a 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1513,7 +1513,7 @@ config EVENTFD
 
 # syscall, maps, verifier
 config BPF_SYSCALL
-	bool "Enable bpf() system call" if EXPERT
+	bool "Enable bpf() system call"
 	select ANON_INODES
 	select BPF
 	default n
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index a5da09c899dd..c8e53c051293 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -432,6 +432,14 @@ config UPROBE_EVENT
 	  This option is required if you plan to use perf-probe subcommand
 	  of perf tools on user space applications.
 
+config BPF_EVENTS
+	depends on BPF_SYSCALL
+	depends on KPROBE_EVENT
+	bool
+	default y
+	help
+	  This allows the user to attach BPF programs to kprobe events.
+
 config PROBE_EVENTS
 	def_bool n
 
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index c575a300103b..9b1044e936a6 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -53,7 +53,7 @@ obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o
 endif
 obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
 obj-$(CONFIG_EVENT_TRACING) += trace_events_trigger.o
-obj-$(CONFIG_BPF_SYSCALL) += bpf_trace.o
+obj-$(CONFIG_BPF_EVENTS) += bpf_trace.o
 obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
 obj-$(CONFIG_TRACEPOINTS) += power-traces.o
 ifeq ($(CONFIG_PM),y)
-- 
cgit v1.2.3


From 00ccbf2f5b7580cd7dcdaeda84828d14f0cba3c9 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Thu, 19 Feb 2015 15:56:14 +0100
Subject: ftrace/x86: Let dynamic trampolines call ops->func even for dynamic
 fops

Dynamically allocated trampolines call ftrace_ops_get_func to get the
function which they should call. For dynamic fops (FTRACE_OPS_FL_DYNAMIC
flag is set) ftrace_ops_list_func is always returned. This is reasonable
for static trampolines but goes against the main advantage of dynamic
ones, that is avoidance of going through the list of all registered
callbacks for functions that are only being traced by a single callback.

We can fix it by returning ops->func (or recursion safe version) from
ftrace_ops_get_func whenever it is possible for dynamic trampolines.

Note that dynamic trampolines are not allowed for dynamic fops if
CONFIG_PREEMPT=y.

Link: http://lkml.kernel.org/r/alpine.LNX.2.00.1501291023000.25445@pobox.suse.cz
Link: http://lkml.kernel.org/r/1424357773-13536-1-git-send-email-mbenes@suse.cz

Reported-by: Miroslav Benes <mbenes@suse.cz>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/ftrace.c | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

(limited to 'kernel/trace')

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 4f228024055b..d01d238d8ef4 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -249,6 +249,19 @@ static void update_function_graph_func(void);
 static inline void update_function_graph_func(void) { }
 #endif
 
+
+static ftrace_func_t ftrace_ops_get_list_func(struct ftrace_ops *ops)
+{
+	/*
+	 * If this is a dynamic ops or we force list func,
+	 * then it needs to call the list anyway.
+	 */
+	if (ops->flags & FTRACE_OPS_FL_DYNAMIC || FTRACE_FORCE_LIST_FUNC)
+		return ftrace_ops_list_func;
+
+	return ftrace_ops_get_func(ops);
+}
+
 static void update_ftrace_function(void)
 {
 	ftrace_func_t func;
@@ -270,7 +283,7 @@ static void update_ftrace_function(void)
 	 * then have the mcount trampoline call the function directly.
 	 */
 	} else if (ftrace_ops_list->next == &ftrace_list_end) {
-		func = ftrace_ops_get_func(ftrace_ops_list);
+		func = ftrace_ops_get_list_func(ftrace_ops_list);
 
 	} else {
 		/* Just use the default ftrace_ops */
@@ -5208,13 +5221,6 @@ static void ftrace_ops_recurs_func(unsigned long ip, unsigned long parent_ip,
  */
 ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops)
 {
-	/*
-	 * If this is a dynamic ops or we force list func,
-	 * then it needs to call the list anyway.
-	 */
-	if (ops->flags & FTRACE_OPS_FL_DYNAMIC || FTRACE_FORCE_LIST_FUNC)
-		return ftrace_ops_list_func;
-
 	/*
 	 * If the func handles its own recursion, call it directly.
 	 * Otherwise call the recursion protected function that
-- 
cgit v1.2.3


From 0c564a538aa934ad15b2145aaf8b64f3feb0be63 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Tue, 24 Mar 2015 17:58:09 -0400
Subject: tracing: Add TRACE_DEFINE_ENUM() macro to map enums to their values

Several tracepoints use the helper functions __print_symbolic() or
__print_flags() and pass in enums that do the mapping between the
binary data stored and the value to print. This works well for reading
the ASCII trace files, but when the data is read via userspace tools
such as perf and trace-cmd, the conversion of the binary value to a
human string format is lost if an enum is used, as userspace does not
have access to what the ENUM is.

For example, the tracepoint trace_tlb_flush() has:

 __print_symbolic(REC->reason,
    { TLB_FLUSH_ON_TASK_SWITCH, "flush on task switch" },
    { TLB_REMOTE_SHOOTDOWN, "remote shootdown" },
    { TLB_LOCAL_SHOOTDOWN, "local shootdown" },
    { TLB_LOCAL_MM_SHOOTDOWN, "local mm shootdown" })

Which maps the enum values to the strings they represent. But perf and
trace-cmd do no know what value TLB_LOCAL_MM_SHOOTDOWN is, and would
not be able to map it.

With TRACE_DEFINE_ENUM(), developers can place these in the event header
files and ftrace will convert the enums to their values:

By adding:

 TRACE_DEFINE_ENUM(TLB_FLUSH_ON_TASK_SWITCH);
 TRACE_DEFINE_ENUM(TLB_REMOTE_SHOOTDOWN);
 TRACE_DEFINE_ENUM(TLB_LOCAL_SHOOTDOWN);
 TRACE_DEFINE_ENUM(TLB_LOCAL_MM_SHOOTDOWN);

 $ cat /sys/kernel/debug/tracing/events/tlb/tlb_flush/format
[...]
 __print_symbolic(REC->reason,
    { 0, "flush on task switch" },
    { 1, "remote shootdown" },
    { 2, "local shootdown" },
    { 3, "local mm shootdown" })

The above is what userspace expects to see, and tools do not need to
be modified to parse them.

Link: http://lkml.kernel.org/r/20150403013802.220157513@goodmis.org

Cc: Guilherme Cox <cox@computer.org>
Cc: Tony Luck <tony.luck@gmail.com>
Cc: Xie XiuQi <xiexiuqi@huawei.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Reviewed-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Tested-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/asm-generic/vmlinux.lds.h |   5 +-
 include/linux/ftrace_event.h      |   2 +-
 include/linux/tracepoint.h        |   8 +++
 include/trace/ftrace.h            |  22 ++++++-
 kernel/trace/trace.c              |  26 ++++++++-
 kernel/trace/trace.h              |   2 +
 kernel/trace/trace_events.c       | 119 ++++++++++++++++++++++++++++++++++++++
 7 files changed, 178 insertions(+), 6 deletions(-)

(limited to 'kernel/trace')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index ac78910d7416..f8e8b34dc427 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -124,7 +124,10 @@
 #define FTRACE_EVENTS()	. = ALIGN(8);					\
 			VMLINUX_SYMBOL(__start_ftrace_events) = .;	\
 			*(_ftrace_events)				\
-			VMLINUX_SYMBOL(__stop_ftrace_events) = .;
+			VMLINUX_SYMBOL(__stop_ftrace_events) = .;	\
+			VMLINUX_SYMBOL(__start_ftrace_enum_maps) = .;	\
+			*(_ftrace_enum_map)				\
+			VMLINUX_SYMBOL(__stop_ftrace_enum_maps) = .;
 #else
 #define FTRACE_EVENTS()
 #endif
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 62b8fac7ded5..112cf49d9576 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -285,7 +285,7 @@ struct ftrace_event_call {
 		struct tracepoint	*tp;
 	};
 	struct trace_event	event;
-	const char		*print_fmt;
+	char			*print_fmt;
 	struct event_filter	*filter;
 	void			*mod;
 	void			*data;
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index c72851328ca9..a5f7f3ecafa3 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -36,6 +36,12 @@ struct tracepoint {
 	struct tracepoint_func __rcu *funcs;
 };
 
+struct trace_enum_map {
+	const char		*system;
+	const char		*enum_string;
+	unsigned long		enum_value;
+};
+
 extern int
 tracepoint_probe_register(struct tracepoint *tp, void *probe, void *data);
 extern int
@@ -87,6 +93,8 @@ extern void syscall_unregfunc(void);
 
 #define PARAMS(args...) args
 
+#define TRACE_DEFINE_ENUM(x)
+
 #endif /* _LINUX_TRACEPOINT_H */
 
 /*
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 2f9b95b6d3fb..37d4b10b111d 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -33,6 +33,19 @@
 
 TRACE_MAKE_SYSTEM_STR();
 
+#undef TRACE_DEFINE_ENUM
+#define TRACE_DEFINE_ENUM(a)				\
+	static struct trace_enum_map __used __initdata	\
+	__##TRACE_SYSTEM##_##a =			\
+	{						\
+		.system = TRACE_SYSTEM_STRING,		\
+		.enum_string = #a,			\
+		.enum_value = a				\
+	};						\
+	static struct trace_enum_map __used		\
+	__attribute__((section("_ftrace_enum_map")))	\
+	*TRACE_SYSTEM##_##a = &__##TRACE_SYSTEM##_##a
+
 /*
  * DECLARE_EVENT_CLASS can be used to add a generic function
  * handlers for events. That is, if all events have the same
@@ -136,6 +149,9 @@ TRACE_MAKE_SYSTEM_STR();
  * The size of an array is also encoded, in the higher 16 bits of <item>.
  */
 
+#undef TRACE_DEFINE_ENUM
+#define TRACE_DEFINE_ENUM(a)
+
 #undef __field
 #define __field(type, item)
 
@@ -553,7 +569,7 @@ static inline notrace int ftrace_get_offsets_##call(			\
  *	.trace			= ftrace_raw_output_<call>, <-- stage 2
  * };
  *
- * static const char print_fmt_<call>[] = <TP_printk>;
+ * static char print_fmt_<call>[] = <TP_printk>;
  *
  * static struct ftrace_event_class __used event_class_<template> = {
  *	.system			= "<system>",
@@ -704,7 +720,7 @@ static inline void ftrace_test_probe_##call(void)			\
 #undef DECLARE_EVENT_CLASS
 #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)	\
 _TRACE_PERF_PROTO(call, PARAMS(proto));					\
-static const char print_fmt_##call[] = print;				\
+static char print_fmt_##call[] = print;					\
 static struct ftrace_event_class __used __refdata event_class_##call = { \
 	.system			= TRACE_SYSTEM_STRING,			\
 	.define_fields		= ftrace_define_fields_##call,		\
@@ -733,7 +749,7 @@ __attribute__((section("_ftrace_events"))) *__event_##call = &event_##call
 #undef DEFINE_EVENT_PRINT
 #define DEFINE_EVENT_PRINT(template, call, proto, args, print)		\
 									\
-static const char print_fmt_##call[] = print;				\
+static char print_fmt_##call[] = print;					\
 									\
 static struct ftrace_event_call __used event_##call = {			\
 	.class			= &event_class_##template,		\
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 62c6506d663f..ebf49649534c 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3908,6 +3908,20 @@ static const struct file_operations tracing_saved_cmdlines_size_fops = {
 	.write		= tracing_saved_cmdlines_size_write,
 };
 
+static void
+trace_insert_enum_map(struct trace_enum_map **start, struct trace_enum_map **stop)
+{
+	struct trace_enum_map **map;
+	int len = stop - start;
+
+	if (len <= 0)
+		return;
+
+	map = start;
+
+	trace_event_enum_update(map, len);
+}
+
 static ssize_t
 tracing_set_trace_read(struct file *filp, char __user *ubuf,
 		       size_t cnt, loff_t *ppos)
@@ -6542,6 +6556,14 @@ struct dentry *tracing_init_dentry(void)
 	return tr->dir;
 }
 
+extern struct trace_enum_map *__start_ftrace_enum_maps[];
+extern struct trace_enum_map *__stop_ftrace_enum_maps[];
+
+static void __init trace_enum_init(void)
+{
+	trace_insert_enum_map(__start_ftrace_enum_maps, __stop_ftrace_enum_maps);
+}
+
 static __init int tracer_init_debugfs(void)
 {
 	struct dentry *d_tracer;
@@ -6566,6 +6588,8 @@ static __init int tracer_init_debugfs(void)
 	trace_create_file("saved_cmdlines_size", 0644, d_tracer,
 			  NULL, &tracing_saved_cmdlines_size_fops);
 
+	trace_enum_init();
+
 #ifdef CONFIG_DYNAMIC_FTRACE
 	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
 			&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
@@ -6888,7 +6912,7 @@ void __init trace_init(void)
 			tracepoint_printk = 0;
 	}
 	tracer_alloc_buffers();
-	trace_event_init();	
+	trace_event_init();
 }
 
 __init static int clear_boot_tracer(void)
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index dd8205a35760..b48d4b08f691 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -1309,8 +1309,10 @@ static inline void init_ftrace_syscalls(void) { }
 
 #ifdef CONFIG_EVENT_TRACING
 void trace_event_init(void);
+void trace_event_enum_update(struct trace_enum_map **map, int len);
 #else
 static inline void __init trace_event_init(void) { }
+static inlin void trace_event_enum_update(struct trace_enum_map **map, int len) { }
 #endif
 
 extern struct trace_iterator *tracepoint_print_iter;
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index db54dda10ccc..fc58c50fbf01 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -1704,6 +1704,125 @@ __register_event(struct ftrace_event_call *call, struct module *mod)
 	return 0;
 }
 
+static char *enum_replace(char *ptr, struct trace_enum_map *map, int len)
+{
+	int rlen;
+	int elen;
+
+	/* Find the length of the enum value as a string */
+	elen = snprintf(ptr, 0, "%ld", map->enum_value);
+	/* Make sure there's enough room to replace the string with the value */
+	if (len < elen)
+		return NULL;
+
+	snprintf(ptr, elen + 1, "%ld", map->enum_value);
+
+	/* Get the rest of the string of ptr */
+	rlen = strlen(ptr + len);
+	memmove(ptr + elen, ptr + len, rlen);
+	/* Make sure we end the new string */
+	ptr[elen + rlen] = 0;
+
+	return ptr + elen;
+}
+
+static void update_event_printk(struct ftrace_event_call *call,
+				struct trace_enum_map *map)
+{
+	char *ptr;
+	int quote = 0;
+	int len = strlen(map->enum_string);
+
+	for (ptr = call->print_fmt; *ptr; ptr++) {
+		if (*ptr == '\\') {
+			ptr++;
+			/* paranoid */
+			if (!*ptr)
+				break;
+			continue;
+		}
+		if (*ptr == '"') {
+			quote ^= 1;
+			continue;
+		}
+		if (quote)
+			continue;
+		if (isdigit(*ptr)) {
+			/* skip numbers */
+			do {
+				ptr++;
+				/* Check for alpha chars like ULL */
+			} while (isalnum(*ptr));
+			/*
+			 * A number must have some kind of delimiter after
+			 * it, and we can ignore that too.
+			 */
+			continue;
+		}
+		if (isalpha(*ptr) || *ptr == '_') {
+			if (strncmp(map->enum_string, ptr, len) == 0 &&
+			    !isalnum(ptr[len]) && ptr[len] != '_') {
+				ptr = enum_replace(ptr, map, len);
+				/* Hmm, enum string smaller than value */
+				if (WARN_ON_ONCE(!ptr))
+					return;
+				/*
+				 * No need to decrement here, as enum_replace()
+				 * returns the pointer to the character passed
+				 * the enum, and two enums can not be placed
+				 * back to back without something in between.
+				 * We can skip that something in between.
+				 */
+				continue;
+			}
+		skip_more:
+			do {
+				ptr++;
+			} while (isalnum(*ptr) || *ptr == '_');
+			/*
+			 * If what comes after this variable is a '.' or
+			 * '->' then we can continue to ignore that string.
+			 */
+			if (*ptr == '.' || (ptr[0] == '-' && ptr[1] == '>')) {
+				ptr += *ptr == '.' ? 1 : 2;
+				goto skip_more;
+			}
+			/*
+			 * Once again, we can skip the delimiter that came
+			 * after the string.
+			 */
+			continue;
+		}
+	}
+}
+
+void trace_event_enum_update(struct trace_enum_map **map, int len)
+{
+	struct ftrace_event_call *call, *p;
+	const char *last_system = NULL;
+	int last_i;
+	int i;
+
+	down_write(&trace_event_sem);
+	list_for_each_entry_safe(call, p, &ftrace_events, list) {
+		/* events are usually grouped together with systems */
+		if (!last_system || call->class->system != last_system) {
+			last_i = 0;
+			last_system = call->class->system;
+		}
+
+		for (i = last_i; i < len; i++) {
+			if (call->class->system == map[i]->system) {
+				/* Save the first system if need be */
+				if (!last_i)
+					last_i = i;
+				update_event_printk(call, map[i]);
+			}
+		}
+	}
+	up_write(&trace_event_sem);
+}
+
 static struct ftrace_event_file *
 trace_create_new_event(struct ftrace_event_call *call,
 		       struct trace_array *tr)
-- 
cgit v1.2.3


From 3673b8e4ce7237160fa31ee8d7e94a4d5a9976a1 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Wed, 25 Mar 2015 15:44:21 -0400
Subject: tracing: Allow for modules to convert their enums to values

Update the infrastructure such that modules that declare TRACE_DEFINE_ENUM()
will have those enums converted into their values in the tracepoint
print fmt strings.

Link: http://lkml.kernel.org/r/87vbhjp74q.fsf@rustcorp.com.au

Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Reviewed-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Tested-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/module.h      |  2 ++
 kernel/module.c             |  3 +++
 kernel/trace/trace.c        | 49 +++++++++++++++++++++++++++++++++++++++++----
 kernel/trace/trace_events.c |  2 +-
 4 files changed, 51 insertions(+), 5 deletions(-)

(limited to 'kernel/trace')

diff --git a/include/linux/module.h b/include/linux/module.h
index 42999fe2dbd0..53dc41dd5c62 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -338,6 +338,8 @@ struct module {
 #ifdef CONFIG_EVENT_TRACING
 	struct ftrace_event_call **trace_events;
 	unsigned int num_trace_events;
+	struct trace_enum_map **trace_enums;
+	unsigned int num_trace_enums;
 #endif
 #ifdef CONFIG_FTRACE_MCOUNT_RECORD
 	unsigned int num_ftrace_callsites;
diff --git a/kernel/module.c b/kernel/module.c
index b3d634ed06c9..d8f8ab271c2b 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2753,6 +2753,9 @@ static int find_module_sections(struct module *mod, struct load_info *info)
 	mod->trace_events = section_objs(info, "_ftrace_events",
 					 sizeof(*mod->trace_events),
 					 &mod->num_trace_events);
+	mod->trace_enums = section_objs(info, "_ftrace_enum_map",
+					sizeof(*mod->trace_enums),
+					&mod->num_trace_enums);
 #endif
 #ifdef CONFIG_TRACING
 	mod->trace_bprintk_fmt_start = section_objs(info, "__trace_printk_fmt",
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index ebf49649534c..28e6654e640d 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3908,11 +3908,9 @@ static const struct file_operations tracing_saved_cmdlines_size_fops = {
 	.write		= tracing_saved_cmdlines_size_write,
 };
 
-static void
-trace_insert_enum_map(struct trace_enum_map **start, struct trace_enum_map **stop)
+static void trace_insert_enum_map(struct trace_enum_map **start, int len)
 {
 	struct trace_enum_map **map;
-	int len = stop - start;
 
 	if (len <= 0)
 		return;
@@ -6561,9 +6559,48 @@ extern struct trace_enum_map *__stop_ftrace_enum_maps[];
 
 static void __init trace_enum_init(void)
 {
-	trace_insert_enum_map(__start_ftrace_enum_maps, __stop_ftrace_enum_maps);
+	int len;
+
+	len = __stop_ftrace_enum_maps - __start_ftrace_enum_maps;
+	trace_insert_enum_map(__start_ftrace_enum_maps, len);
+}
+
+#ifdef CONFIG_MODULES
+static void trace_module_add_enums(struct module *mod)
+{
+	if (!mod->num_trace_enums)
+		return;
+
+	/*
+	 * Modules with bad taint do not have events created, do
+	 * not bother with enums either.
+	 */
+	if (trace_module_has_bad_taint(mod))
+		return;
+
+	trace_insert_enum_map(mod->trace_enums, mod->num_trace_enums);
+}
+
+static int trace_module_notify(struct notifier_block *self,
+			       unsigned long val, void *data)
+{
+	struct module *mod = data;
+
+	switch (val) {
+	case MODULE_STATE_COMING:
+		trace_module_add_enums(mod);
+		break;
+	}
+
+	return 0;
 }
 
+static struct notifier_block trace_module_nb = {
+	.notifier_call = trace_module_notify,
+	.priority = 0,
+};
+#endif
+
 static __init int tracer_init_debugfs(void)
 {
 	struct dentry *d_tracer;
@@ -6590,6 +6627,10 @@ static __init int tracer_init_debugfs(void)
 
 	trace_enum_init();
 
+#ifdef CONFIG_MODULES
+	register_module_notifier(&trace_module_nb);
+#endif
+
 #ifdef CONFIG_DYNAMIC_FTRACE
 	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
 			&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index fc58c50fbf01..a576bbe75577 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -2034,7 +2034,7 @@ static int trace_module_notify(struct notifier_block *self,
 
 static struct notifier_block trace_module_nb = {
 	.notifier_call = trace_module_notify,
-	.priority = 0,
+	.priority = 1, /* higher than trace.c module notify */
 };
 #endif /* CONFIG_MODULES */
 
-- 
cgit v1.2.3


From 9828413d4715d4ed12bc92b161f4ed377d777ffb Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Tue, 31 Mar 2015 17:23:45 -0400
Subject: tracing: Add enum_map file to show enums that have been mapped

Add a enum_map file in the tracing directory to see what enums have been
saved to convert in the print fmt files.

As this requires the enum mapping to be persistent in memory, it is only
created if the new config option CONFIG_TRACE_ENUM_MAP_FILE is enabled.
This is for debugging and will increase the persistent memory footprint
of the kernel.

Link: http://lkml.kernel.org/r/20150403013802.220157513@goodmis.org

Reviewed-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Tested-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/Kconfig |  28 ++++++
 kernel/trace/trace.c | 245 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 269 insertions(+), 4 deletions(-)

(limited to 'kernel/trace')

diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index a5da09c899dd..fedbdd7d5d1e 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -599,6 +599,34 @@ config RING_BUFFER_STARTUP_TEST
 
 	 If unsure, say N
 
+config TRACE_ENUM_MAP_FILE
+       bool "Show enum mappings for trace events"
+       depends on TRACING
+       help
+        The "print fmt" of the trace events will show the enum names instead
+	of their values. This can cause problems for user space tools that
+	use this string to parse the raw data as user space does not know
+	how to convert the string to its value.
+
+	To fix this, there's a special macro in the kernel that can be used
+	to convert the enum into its value. If this macro is used, then the
+	print fmt strings will have the enums converted to their values.
+
+	If something does not get converted properly, this option can be
+	used to show what enums the kernel tried to convert.
+
+	This option is for debugging the enum conversions. A file is created
+	in the tracing directory called "enum_map" that will show the enum
+	names matched with their values and what trace event system they
+	belong too.
+
+	Normally, the mapping of the strings to values will be freed after
+	boot up or module load. With this option, they will not be freed, as
+	they are needed for the "enum_map" file. Enabling this option will
+	increase the memory footprint of the running kernel.
+
+	If unsure, say N
+
 endif # FTRACE
 
 endif # TRACING_SUPPORT
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 28e6654e640d..39e69568302e 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -123,6 +123,42 @@ enum ftrace_dump_mode ftrace_dump_on_oops;
 /* When set, tracing will stop when a WARN*() is hit */
 int __disable_trace_on_warning;
 
+#ifdef CONFIG_TRACE_ENUM_MAP_FILE
+/* Map of enums to their values, for "enum_map" file */
+struct trace_enum_map_head {
+	struct module			*mod;
+	unsigned long			length;
+};
+
+union trace_enum_map_item;
+
+struct trace_enum_map_tail {
+	/*
+	 * "end" is first and points to NULL as it must be different
+	 * than "mod" or "enum_string"
+	 */
+	union trace_enum_map_item	*next;
+	const char			*end;	/* points to NULL */
+};
+
+static DEFINE_MUTEX(trace_enum_mutex);
+
+/*
+ * The trace_enum_maps are saved in an array with two extra elements,
+ * one at the beginning, and one at the end. The beginning item contains
+ * the count of the saved maps (head.length), and the module they
+ * belong to if not built in (head.mod). The ending item contains a
+ * pointer to the next array of saved enum_map items.
+ */
+union trace_enum_map_item {
+	struct trace_enum_map		map;
+	struct trace_enum_map_head	head;
+	struct trace_enum_map_tail	tail;
+};
+
+static union trace_enum_map_item *trace_enum_maps;
+#endif /* CONFIG_TRACE_ENUM_MAP_FILE */
+
 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
 
 #define MAX_TRACER_SIZE		100
@@ -3908,7 +3944,169 @@ static const struct file_operations tracing_saved_cmdlines_size_fops = {
 	.write		= tracing_saved_cmdlines_size_write,
 };
 
-static void trace_insert_enum_map(struct trace_enum_map **start, int len)
+#ifdef CONFIG_TRACE_ENUM_MAP_FILE
+static union trace_enum_map_item *
+update_enum_map(union trace_enum_map_item *ptr)
+{
+	if (!ptr->map.enum_string) {
+		if (ptr->tail.next) {
+			ptr = ptr->tail.next;
+			/* Set ptr to the next real item (skip head) */
+			ptr++;
+		} else
+			return NULL;
+	}
+	return ptr;
+}
+
+static void *enum_map_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	union trace_enum_map_item *ptr = v;
+
+	/*
+	 * Paranoid! If ptr points to end, we don't want to increment past it.
+	 * This really should never happen.
+	 */
+	ptr = update_enum_map(ptr);
+	if (WARN_ON_ONCE(!ptr))
+		return NULL;
+
+	ptr++;
+
+	(*pos)++;
+
+	ptr = update_enum_map(ptr);
+
+	return ptr;
+}
+
+static void *enum_map_start(struct seq_file *m, loff_t *pos)
+{
+	union trace_enum_map_item *v;
+	loff_t l = 0;
+
+	mutex_lock(&trace_enum_mutex);
+
+	v = trace_enum_maps;
+	if (v)
+		v++;
+
+	while (v && l < *pos) {
+		v = enum_map_next(m, v, &l);
+	}
+
+	return v;
+}
+
+static void enum_map_stop(struct seq_file *m, void *v)
+{
+	mutex_unlock(&trace_enum_mutex);
+}
+
+static int enum_map_show(struct seq_file *m, void *v)
+{
+	union trace_enum_map_item *ptr = v;
+
+	seq_printf(m, "%s %ld (%s)\n",
+		   ptr->map.enum_string, ptr->map.enum_value,
+		   ptr->map.system);
+
+	return 0;
+}
+
+static const struct seq_operations tracing_enum_map_seq_ops = {
+	.start		= enum_map_start,
+	.next		= enum_map_next,
+	.stop		= enum_map_stop,
+	.show		= enum_map_show,
+};
+
+static int tracing_enum_map_open(struct inode *inode, struct file *filp)
+{
+	if (tracing_disabled)
+		return -ENODEV;
+
+	return seq_open(filp, &tracing_enum_map_seq_ops);
+}
+
+static const struct file_operations tracing_enum_map_fops = {
+	.open		= tracing_enum_map_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+static inline union trace_enum_map_item *
+trace_enum_jmp_to_tail(union trace_enum_map_item *ptr)
+{
+	/* Return tail of array given the head */
+	return ptr + ptr->head.length + 1;
+}
+
+static void
+trace_insert_enum_map_file(struct module *mod, struct trace_enum_map **start,
+			   int len)
+{
+	struct trace_enum_map **stop;
+	struct trace_enum_map **map;
+	union trace_enum_map_item *map_array;
+	union trace_enum_map_item *ptr;
+
+	stop = start + len;
+
+	/*
+	 * The trace_enum_maps contains the map plus a head and tail item,
+	 * where the head holds the module and length of array, and the
+	 * tail holds a pointer to the next list.
+	 */
+	map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
+	if (!map_array) {
+		pr_warning("Unable to allocate trace enum mapping\n");
+		return;
+	}
+
+	mutex_lock(&trace_enum_mutex);
+
+	if (!trace_enum_maps)
+		trace_enum_maps = map_array;
+	else {
+		ptr = trace_enum_maps;
+		for (;;) {
+			ptr = trace_enum_jmp_to_tail(ptr);
+			if (!ptr->tail.next)
+				break;
+			ptr = ptr->tail.next;
+
+		}
+		ptr->tail.next = map_array;
+	}
+	map_array->head.mod = mod;
+	map_array->head.length = len;
+	map_array++;
+
+	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
+		map_array->map = **map;
+		map_array++;
+	}
+	memset(map_array, 0, sizeof(*map_array));
+
+	mutex_unlock(&trace_enum_mutex);
+}
+
+static void trace_create_enum_file(struct dentry *d_tracer)
+{
+	trace_create_file("enum_map", 0444, d_tracer,
+			  NULL, &tracing_enum_map_fops);
+}
+
+#else /* CONFIG_TRACE_ENUM_MAP_FILE */
+static inline void trace_create_enum_file(struct dentry *d_tracer) { }
+static inline void trace_insert_enum_map_file(struct module *mod,
+			      struct trace_enum_map **start, int len) { }
+#endif /* !CONFIG_TRACE_ENUM_MAP_FILE */
+
+static void trace_insert_enum_map(struct module *mod,
+				  struct trace_enum_map **start, int len)
 {
 	struct trace_enum_map **map;
 
@@ -3918,6 +4116,8 @@ static void trace_insert_enum_map(struct trace_enum_map **start, int len)
 	map = start;
 
 	trace_event_enum_update(map, len);
+
+	trace_insert_enum_map_file(mod, start, len);
 }
 
 static ssize_t
@@ -6562,7 +6762,7 @@ static void __init trace_enum_init(void)
 	int len;
 
 	len = __stop_ftrace_enum_maps - __start_ftrace_enum_maps;
-	trace_insert_enum_map(__start_ftrace_enum_maps, len);
+	trace_insert_enum_map(NULL, __start_ftrace_enum_maps, len);
 }
 
 #ifdef CONFIG_MODULES
@@ -6578,9 +6778,41 @@ static void trace_module_add_enums(struct module *mod)
 	if (trace_module_has_bad_taint(mod))
 		return;
 
-	trace_insert_enum_map(mod->trace_enums, mod->num_trace_enums);
+	trace_insert_enum_map(mod, mod->trace_enums, mod->num_trace_enums);
 }
 
+#ifdef CONFIG_TRACE_ENUM_MAP_FILE
+static void trace_module_remove_enums(struct module *mod)
+{
+	union trace_enum_map_item *map;
+	union trace_enum_map_item **last = &trace_enum_maps;
+
+	if (!mod->num_trace_enums)
+		return;
+
+	mutex_lock(&trace_enum_mutex);
+
+	map = trace_enum_maps;
+
+	while (map) {
+		if (map->head.mod == mod)
+			break;
+		map = trace_enum_jmp_to_tail(map);
+		last = &map->tail.next;
+		map = map->tail.next;
+	}
+	if (!map)
+		goto out;
+
+	*last = trace_enum_jmp_to_tail(map)->tail.next;
+	kfree(map);
+ out:
+	mutex_unlock(&trace_enum_mutex);
+}
+#else
+static inline void trace_module_remove_enums(struct module *mod) { }
+#endif /* CONFIG_TRACE_ENUM_MAP_FILE */
+
 static int trace_module_notify(struct notifier_block *self,
 			       unsigned long val, void *data)
 {
@@ -6590,6 +6822,9 @@ static int trace_module_notify(struct notifier_block *self,
 	case MODULE_STATE_COMING:
 		trace_module_add_enums(mod);
 		break;
+	case MODULE_STATE_GOING:
+		trace_module_remove_enums(mod);
+		break;
 	}
 
 	return 0;
@@ -6599,7 +6834,7 @@ static struct notifier_block trace_module_nb = {
 	.notifier_call = trace_module_notify,
 	.priority = 0,
 };
-#endif
+#endif /* CONFIG_MODULES */
 
 static __init int tracer_init_debugfs(void)
 {
@@ -6627,6 +6862,8 @@ static __init int tracer_init_debugfs(void)
 
 	trace_enum_init();
 
+	trace_create_enum_file(d_tracer);
+
 #ifdef CONFIG_MODULES
 	register_module_notifier(&trace_module_nb);
 #endif
-- 
cgit v1.2.3


From 7682c918439d42291df0d76b3e06627f27fbcdef Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 17 Mar 2015 22:26:16 +0000
Subject: VFS: kernel/: d_inode() annotations

relayfs and tracefs are dealing with inodes of their own;
those two act as filesystem drivers

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 kernel/relay.c              | 4 ++--
 kernel/trace/trace.c        | 8 ++++----
 kernel/trace/trace_events.c | 4 ++--
 kernel/trace/trace_uprobe.c | 2 +-
 4 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'kernel/trace')

diff --git a/kernel/relay.c b/kernel/relay.c
index 5a56d3c8dc03..e9dbaeb8fd65 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -407,7 +407,7 @@ static inline void relay_set_buf_dentry(struct rchan_buf *buf,
 					struct dentry *dentry)
 {
 	buf->dentry = dentry;
-	buf->dentry->d_inode->i_size = buf->early_bytes;
+	d_inode(buf->dentry)->i_size = buf->early_bytes;
 }
 
 static struct dentry *relay_create_buf_file(struct rchan *chan,
@@ -733,7 +733,7 @@ size_t relay_switch_subbuf(struct rchan_buf *buf, size_t length)
 		buf->padding[old_subbuf] = buf->prev_padding;
 		buf->subbufs_produced++;
 		if (buf->dentry)
-			buf->dentry->d_inode->i_size +=
+			d_inode(buf->dentry)->i_size +=
 				buf->chan->subbuf_size -
 				buf->padding[old_subbuf];
 		else
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 62c6506d663f..d44901bcd867 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -5846,7 +5846,7 @@ trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
 
 	if (ret) /* See tracing_get_cpu() */
-		ret->d_inode->i_cdev = (void *)(cpu + 1);
+		d_inode(ret)->i_cdev = (void *)(cpu + 1);
 	return ret;
 }
 
@@ -6420,7 +6420,7 @@ static int instance_rmdir(struct inode *inode, struct dentry *dentry)
 		return -ENOENT;
 
 	/* The caller did a dget() on dentry */
-	mutex_unlock(&dentry->d_inode->i_mutex);
+	mutex_unlock(&d_inode(dentry)->i_mutex);
 
 	/*
 	 * The inode mutex is locked, but debugfs_create_dir() will also
@@ -6435,7 +6435,7 @@ static int instance_rmdir(struct inode *inode, struct dentry *dentry)
 	ret = instance_delete(dentry->d_iname);
 
 	mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
-	mutex_lock(&dentry->d_inode->i_mutex);
+	mutex_lock(&d_inode(dentry)->i_mutex);
 
 	return ret;
 }
@@ -6453,7 +6453,7 @@ static __init void create_trace_instances(struct dentry *d_tracer)
 		return;
 
 	/* Hijack the dir inode operations, to allow mkdir */
-	trace_instance_dir->d_inode->i_op = &instance_dir_inode_operations;
+	d_inode(trace_instance_dir)->i_op = &instance_dir_inode_operations;
 }
 
 static void
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index db54dda10ccc..e19e7578a0d3 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -494,8 +494,8 @@ static void remove_event_file_dir(struct ftrace_event_file *file)
 	if (dir) {
 		spin_lock(&dir->d_lock);	/* probably unneeded */
 		list_for_each_entry(child, &dir->d_subdirs, d_child) {
-			if (child->d_inode)	/* probably unneeded */
-				child->d_inode->i_private = NULL;
+			if (d_really_is_positive(child))	/* probably unneeded */
+				d_inode(child)->i_private = NULL;
 		}
 		spin_unlock(&dir->d_lock);
 
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 7dc1c8abecd6..cb3b37e533cd 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -443,7 +443,7 @@ static int create_trace_uprobe(int argc, char **argv)
 	if (ret)
 		goto fail_address_parse;
 
-	inode = igrab(path.dentry->d_inode);
+	inode = igrab(d_inode(path.dentry));
 	path_put(&path);
 
 	if (!inode || !S_ISREG(inode->i_mode)) {
-- 
cgit v1.2.3


From 962e3707d9fb16bcf66ec5e5ebcea5248b9c2ab3 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Wed, 15 Apr 2015 16:18:22 -0700
Subject: tracing: remove use of seq_printf return value

The seq_printf return value, because it's frequently misused,
will eventually be converted to void.

See: commit 1f33c41c03da ("seq_file: Rename seq_overflow() to
     seq_has_overflowed() and make public")

Miscellanea:

o Remove unused return value from trace_lookup_stack

Signed-off-by: Joe Perches <joe@perches.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/trace/trace_stack.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel/trace')

diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index c3e4fcfddd45..3f34496244e9 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -327,11 +327,11 @@ static void t_stop(struct seq_file *m, void *p)
 	local_irq_enable();
 }
 
-static int trace_lookup_stack(struct seq_file *m, long i)
+static void trace_lookup_stack(struct seq_file *m, long i)
 {
 	unsigned long addr = stack_dump_trace[i];
 
-	return seq_printf(m, "%pS\n", (void *)addr);
+	seq_printf(m, "%pS\n", (void *)addr);
 }
 
 static void print_disabled(struct seq_file *m)
-- 
cgit v1.2.3


From ef99b88b16bee753fa51207abdc58ae660453ec6 Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin@rab.in>
Date: Mon, 13 Apr 2015 22:30:12 +0200
Subject: tracing: Handle ftrace_dump() atomic context in graph_trace_open()

graph_trace_open() can be called in atomic context from ftrace_dump().
Use GFP_ATOMIC for the memory allocations when that's the case, in order
to avoid the following splat.

 BUG: sleeping function called from invalid context at mm/slab.c:2849
 in_atomic(): 1, irqs_disabled(): 128, pid: 0, name: swapper/0
 Backtrace:
 ..
 [<8004dc94>] (__might_sleep) from [<801371f4>] (kmem_cache_alloc_trace+0x160/0x238)
  r7:87800040 r6:000080d0 r5:810d16e8 r4:000080d0
 [<80137094>] (kmem_cache_alloc_trace) from [<800cbd60>] (graph_trace_open+0x30/0xd0)
  r10:00000100 r9:809171a8 r8:00008e28 r7:810d16f0 r6:00000001 r5:810d16e8
  r4:810d16f0
 [<800cbd30>] (graph_trace_open) from [<800c79c4>] (trace_init_global_iter+0x50/0x9c)
  r8:00008e28 r7:808c853c r6:00000001 r5:810d16e8 r4:810d16f0 r3:800cbd30
 [<800c7974>] (trace_init_global_iter) from [<800c7aa0>] (ftrace_dump+0x90/0x2ec)
  r4:810d2580 r3:00000000
 [<800c7a10>] (ftrace_dump) from [<80414b2c>] (sysrq_ftrace_dump+0x1c/0x20)
  r10:00000100 r9:809171a8 r8:808f6e7c r7:00000001 r6:00000007 r5:0000007a
  r4:808d5394
 [<80414b10>] (sysrq_ftrace_dump) from [<800169b8>] (return_to_handler+0x0/0x18)
 [<80415498>] (__handle_sysrq) from [<800169b8>] (return_to_handler+0x0/0x18)
  r8:808c8100 r7:808c8444 r6:00000101 r5:00000010 r4:84eb3210
 [<80415668>] (handle_sysrq) from [<800169b8>] (return_to_handler+0x0/0x18)
 [<8042a760>] (pl011_int) from [<800169b8>] (return_to_handler+0x0/0x18)
  r10:809171bc r9:809171a8 r8:00000001 r7:00000026 r6:808c6000 r5:84f01e60
  r4:8454fe00
 [<8007782c>] (handle_irq_event_percpu) from [<80077b44>] (handle_irq_event+0x4c/0x6c)
  r10:808c7ef0 r9:87283e00 r8:00000001 r7:00000000 r6:8454fe00 r5:84f01e60
  r4:84f01e00
 [<80077af8>] (handle_irq_event) from [<8007aa28>] (handle_fasteoi_irq+0xf0/0x1ac)
  r6:808f52a4 r5:84f01e60 r4:84f01e00 r3:00000000
 [<8007a938>] (handle_fasteoi_irq) from [<80076dc0>] (generic_handle_irq+0x3c/0x4c)
  r6:00000026 r5:00000000 r4:00000026 r3:8007a938
 [<80076d84>] (generic_handle_irq) from [<80077128>] (__handle_domain_irq+0x8c/0xfc)
  r4:808c1e38 r3:0000002e
 [<8007709c>] (__handle_domain_irq) from [<800087b8>] (gic_handle_irq+0x34/0x6c)
  r10:80917748 r9:00000001 r8:88802100 r7:808c7ef0 r6:808c8fb0 r5:00000015
  r4:8880210c r3:808c7ef0
 [<80008784>] (gic_handle_irq) from [<80014044>] (__irq_svc+0x44/0x7c)

Link: http://lkml.kernel.org/r/1428953721-31349-1-git-send-email-rabin@rab.in
Link: http://lkml.kernel.org/r/1428957012-2319-1-git-send-email-rabin@rab.in

Cc: stable@vger.kernel.org # 3.13+
Signed-off-by: Rabin Vincent <rabin@rab.in>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace_functions_graph.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'kernel/trace')

diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 2d25ad1526bb..b6fce365ef27 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -1309,15 +1309,19 @@ void graph_trace_open(struct trace_iterator *iter)
 {
 	/* pid and depth on the last trace processed */
 	struct fgraph_data *data;
+	gfp_t gfpflags;
 	int cpu;
 
 	iter->private = NULL;
 
-	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	/* We can be called in atomic context via ftrace_dump() */
+	gfpflags = (in_atomic() || irqs_disabled()) ? GFP_ATOMIC : GFP_KERNEL;
+
+	data = kzalloc(sizeof(*data), gfpflags);
 	if (!data)
 		goto out_err;
 
-	data->cpu_data = alloc_percpu(struct fgraph_cpu_data);
+	data->cpu_data = alloc_percpu_gfp(struct fgraph_cpu_data, gfpflags);
 	if (!data->cpu_data)
 		goto out_err_free;
 
-- 
cgit v1.2.3


From 84fce9db4d7eaebd6cb2ee30c15da6d4e4daf846 Mon Sep 17 00:00:00 2001
From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Date: Thu, 16 Apr 2015 13:44:44 +0900
Subject: tracing: Fix incorrect enabling of trace events by boot cmdline

There is a problem that trace events are not properly enabled with
boot cmdline. The problem is that if we pass "trace_event=kmem:mm_page_alloc"
to the boot cmdline, it enables all kmem trace events, and not just
the page_alloc event.

This is caused by the parsing mechanism. When we parse the cmdline, the buffer
contents is modified due to tokenization. And, if we use this buffer
again, we will get the wrong result.

Unfortunately, this buffer is be accessed three times to set trace events
properly at boot time. So, we need to handle this situation.

There is already code handling ",", but we need another for ":".
This patch adds it.

Link: http://lkml.kernel.org/r/1429159484-22977-1-git-send-email-iamjoonsoo.kim@lge.com

Cc: stable@vger.kernel.org # 3.19+
Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
[ added missing return ret; ]
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace_events.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'kernel/trace')

diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index a576bbe75577..36a957c996c7 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -565,6 +565,7 @@ static int __ftrace_set_clr_event(struct trace_array *tr, const char *match,
 static int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set)
 {
 	char *event = NULL, *sub = NULL, *match;
+	int ret;
 
 	/*
 	 * The buf format can be <subsystem>:<event-name>
@@ -590,7 +591,13 @@ static int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set)
 			event = NULL;
 	}
 
-	return __ftrace_set_clr_event(tr, match, sub, event, set);
+	ret = __ftrace_set_clr_event(tr, match, sub, event, set);
+
+	/* Put back the colon to allow this to be called again */
+	if (buf)
+		*(buf - 1) = ':';
+
+	return ret;
 }
 
 /**
-- 
cgit v1.2.3


From 3193899d4dd54056f8c2e0b1e40dd6e2f0009f28 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Fri, 17 Apr 2015 10:27:57 -0400
Subject: tracing: Fix possible out of bounds memory access when parsing enums

The code that replaces the enum names with the enum values in the
tracepoints' format files could possible miss the end of string nul
character. This was caused by processing things like backslashes, quotes
and other tokens. After processing the tokens, a check for the nul
character needed to be done before continuing the loop, because the loop
incremented the pointer before doing the check, which could bypass the nul
character.

Link: http://lkml.kernel.org/r/552E661D.5060502@oracle.com

Reported-by: Sasha Levin <sasha.levin@oracle.com> # via KASan
Tested-by: Andrey Ryabinin <a.ryabinin@samsung.com>
Fixes: 0c564a538aa9 "tracing: Add TRACE_DEFINE_ENUM() macro to map enums to their values"
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace_events.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'kernel/trace')

diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 36a957c996c7..b49c107f82ac 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -1760,6 +1760,8 @@ static void update_event_printk(struct ftrace_event_call *call,
 				ptr++;
 				/* Check for alpha chars like ULL */
 			} while (isalnum(*ptr));
+			if (!*ptr)
+				break;
 			/*
 			 * A number must have some kind of delimiter after
 			 * it, and we can ignore that too.
@@ -1786,12 +1788,16 @@ static void update_event_printk(struct ftrace_event_call *call,
 			do {
 				ptr++;
 			} while (isalnum(*ptr) || *ptr == '_');
+			if (!*ptr)
+				break;
 			/*
 			 * If what comes after this variable is a '.' or
 			 * '->' then we can continue to ignore that string.
 			 */
 			if (*ptr == '.' || (ptr[0] == '-' && ptr[1] == '>')) {
 				ptr += *ptr == '.' ? 1 : 2;
+				if (!*ptr)
+					break;
 				goto skip_more;
 			}
 			/*
-- 
cgit v1.2.3


From ac01ce1410fc2c7b5f3af5e9c972e6a412eee54f Mon Sep 17 00:00:00 2001
From: Alex Bennée <alex.bennee@linaro.org>
Date: Wed, 29 Apr 2015 16:18:46 +0100
Subject: tracing: Make ftrace_print_array_seq compute buf_len
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The only caller to this function (__print_array) was getting it wrong by
passing the array length instead of buffer length. As the element size
was already being passed for other reasons it seems reasonable to push
the calculation of buffer length into the function.

Link: http://lkml.kernel.org/r/1430320727-14582-1-git-send-email-alex.bennee@linaro.org

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h | 2 +-
 kernel/trace/trace_output.c  | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'kernel/trace')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 46e83c2156c6..f9ecf63d47f1 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -46,7 +46,7 @@ const char *ftrace_print_hex_seq(struct trace_seq *p,
 				 const unsigned char *buf, int len);
 
 const char *ftrace_print_array_seq(struct trace_seq *p,
-				   const void *buf, int buf_len,
+				   const void *buf, int count,
 				   size_t el_size);
 
 struct trace_iterator;
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 692bf7184c8c..25a086bcb700 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -178,12 +178,13 @@ ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len)
 EXPORT_SYMBOL(ftrace_print_hex_seq);
 
 const char *
-ftrace_print_array_seq(struct trace_seq *p, const void *buf, int buf_len,
+ftrace_print_array_seq(struct trace_seq *p, const void *buf, int count,
 		       size_t el_size)
 {
 	const char *ret = trace_seq_buffer_ptr(p);
 	const char *prefix = "";
 	void *ptr = (void *)buf;
+	size_t buf_len = count * el_size;
 
 	trace_seq_putc(p, '{');
 
-- 
cgit v1.2.3