summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/audit.c2
-rw-r--r--kernel/auditsc.c6
-rw-r--r--kernel/cgroup.c273
-rw-r--r--kernel/configs/xen.config48
-rw-r--r--kernel/events/core.c10
-rw-r--r--kernel/exit.c2
-rw-r--r--kernel/fork.c52
-rw-r--r--kernel/jump_label.c10
-rw-r--r--kernel/module.c329
-rw-r--r--kernel/params.c127
-rw-r--r--kernel/power/Kconfig2
-rw-r--r--kernel/power/Makefile3
-rw-r--r--kernel/power/block_io.c103
-rw-r--r--kernel/power/hibernate.c4
-rw-r--r--kernel/power/power.h9
-rw-r--r--kernel/power/swap.c159
-rw-r--r--kernel/printk/printk.c235
-rw-r--r--kernel/rcu/tiny.c2
-rw-r--r--kernel/rcu/tree.c2
-rw-r--r--kernel/signal.c13
-rw-r--r--kernel/sys.c158
-rw-r--r--kernel/time/timekeeping.c29
-rw-r--r--kernel/trace/blktrace.c10
-rw-r--r--kernel/trace/ring_buffer.c221
-rw-r--r--kernel/trace/ring_buffer_benchmark.c23
-rw-r--r--kernel/trace/trace.c23
-rw-r--r--kernel/trace/trace.h44
-rw-r--r--kernel/trace/trace_branch.c4
-rw-r--r--kernel/trace/trace_clock.c3
-rw-r--r--kernel/trace/trace_event_perf.c20
-rw-r--r--kernel/trace/trace_events.c304
-rw-r--r--kernel/trace/trace_events_filter.c101
-rw-r--r--kernel/trace/trace_events_trigger.c70
-rw-r--r--kernel/trace/trace_export.c10
-rw-r--r--kernel/trace/trace_functions_graph.c8
-rw-r--r--kernel/trace/trace_kprobe.c70
-rw-r--r--kernel/trace/trace_mmiotrace.c4
-rw-r--r--kernel/trace/trace_output.c78
-rw-r--r--kernel/trace/trace_output.h2
-rw-r--r--kernel/trace/trace_probe.h8
-rw-r--r--kernel/trace/trace_sched_wakeup.c4
-rw-r--r--kernel/trace/trace_syscalls.c72
-rw-r--r--kernel/trace/trace_uprobe.c46
-rw-r--r--kernel/workqueue.c491
44 files changed, 1833 insertions, 1361 deletions
diff --git a/kernel/audit.c b/kernel/audit.c
index 1c13e4267de6..f9e6065346db 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1904,7 +1904,7 @@ EXPORT_SYMBOL(audit_log_task_info);
/**
* audit_log_link_denied - report a link restriction denial
- * @operation: specific link opreation
+ * @operation: specific link operation
* @link: the path that triggered the restriction
*/
void audit_log_link_denied(const char *operation, struct path *link)
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 9fb9d1cb83ce..09c65640cad6 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -599,9 +599,7 @@ static int audit_filter_rules(struct task_struct *tsk,
result = match_tree_refs(ctx, rule->tree);
break;
case AUDIT_LOGINUID:
- result = 0;
- if (ctx)
- result = audit_uid_comparator(tsk->loginuid, f->op, f->uid);
+ result = audit_uid_comparator(tsk->loginuid, f->op, f->uid);
break;
case AUDIT_LOGINUID_SET:
result = audit_comparator(audit_loginuid_set(tsk), f->op, f->val);
@@ -1023,7 +1021,7 @@ static int audit_log_single_execve_arg(struct audit_context *context,
* for strings that are too long, we should not have created
* any.
*/
- if (unlikely((len == -1) || len > MAX_ARG_STRLEN - 1)) {
+ if (unlikely((len == 0) || len > MAX_ARG_STRLEN - 1)) {
WARN_ON(1);
send_sig(SIGKILL, current, 0);
return -1;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 469dd547770c..9ef9fc8a774b 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -46,6 +46,7 @@
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/rwsem.h>
+#include <linux/percpu-rwsem.h>
#include <linux/string.h>
#include <linux/sort.h>
#include <linux/kmod.h>
@@ -103,6 +104,8 @@ static DEFINE_SPINLOCK(cgroup_idr_lock);
*/
static DEFINE_SPINLOCK(release_agent_path_lock);
+struct percpu_rw_semaphore cgroup_threadgroup_rwsem;
+
#define cgroup_assert_mutex_or_rcu_locked() \
rcu_lockdep_assert(rcu_read_lock_held() || \
lockdep_is_held(&cgroup_mutex), \
@@ -156,7 +159,7 @@ static bool cgrp_dfl_root_visible;
static bool cgroup_legacy_files_on_dfl;
/* some controllers are not supported in the default hierarchy */
-static unsigned int cgrp_dfl_root_inhibit_ss_mask;
+static unsigned long cgrp_dfl_root_inhibit_ss_mask;
/* The list of hierarchy roots */
@@ -175,18 +178,19 @@ static DEFINE_IDR(cgroup_hierarchy_idr);
*/
static u64 css_serial_nr_next = 1;
-/* This flag indicates whether tasks in the fork and exit paths should
- * check for fork/exit handlers to call. This avoids us having to do
- * extra work in the fork/exit path if none of the subsystems need to
- * be called.
+/*
+ * These bitmask flags indicate whether tasks in the fork and exit paths have
+ * fork/exit handlers to call. This avoids us having to do extra work in the
+ * fork/exit path to check which subsystems have fork/exit callbacks.
*/
-static int need_forkexit_callback __read_mostly;
+static unsigned long have_fork_callback __read_mostly;
+static unsigned long have_exit_callback __read_mostly;
static struct cftype cgroup_dfl_base_files[];
static struct cftype cgroup_legacy_base_files[];
static int rebind_subsystems(struct cgroup_root *dst_root,
- unsigned int ss_mask);
+ unsigned long ss_mask);
static int cgroup_destroy_locked(struct cgroup *cgrp);
static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss,
bool visible);
@@ -261,7 +265,7 @@ static struct cgroup_subsys_state *cgroup_css(struct cgroup *cgrp,
* @cgrp: the cgroup of interest
* @ss: the subsystem of interest (%NULL returns @cgrp->self)
*
- * Similar to cgroup_css() but returns the effctive css, which is defined
+ * Similar to cgroup_css() but returns the effective css, which is defined
* as the matching css of the nearest ancestor including self which has @ss
* enabled. If @ss is associated with the hierarchy @cgrp is on, this
* function is guaranteed to return non-NULL css.
@@ -409,6 +413,24 @@ static int notify_on_release(const struct cgroup *cgrp)
for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT && \
(((ss) = cgroup_subsys[ssid]) || true); (ssid)++)
+/**
+ * for_each_subsys_which - filter for_each_subsys with a bitmask
+ * @ss: the iteration cursor
+ * @ssid: the index of @ss, CGROUP_SUBSYS_COUNT after reaching the end
+ * @ss_maskp: a pointer to the bitmask
+ *
+ * The block will only run for cases where the ssid-th bit (1 << ssid) of
+ * mask is set to 1.
+ */
+#define for_each_subsys_which(ss, ssid, ss_maskp) \
+ if (!CGROUP_SUBSYS_COUNT) /* to avoid spurious gcc warning */ \
+ (ssid) = 0; \
+ else \
+ for_each_set_bit(ssid, ss_maskp, CGROUP_SUBSYS_COUNT) \
+ if (((ss) = cgroup_subsys[ssid]) && false) \
+ break; \
+ else
+
/* iterate across the hierarchies */
#define for_each_root(root) \
list_for_each_entry((root), &cgroup_roots, root_list)
@@ -882,7 +904,7 @@ static void cgroup_exit_root_id(struct cgroup_root *root)
static void cgroup_free_root(struct cgroup_root *root)
{
if (root) {
- /* hierarhcy ID shoulid already have been released */
+ /* hierarchy ID should already have been released */
WARN_ON_ONCE(root->hierarchy_id);
idr_destroy(&root->cgroup_idr);
@@ -998,7 +1020,7 @@ static struct cgroup *task_cgroup_from_root(struct task_struct *task,
* update of a tasks cgroup pointer by cgroup_attach_task()
*/
-static int cgroup_populate_dir(struct cgroup *cgrp, unsigned int subsys_mask);
+static int cgroup_populate_dir(struct cgroup *cgrp, unsigned long subsys_mask);
static struct kernfs_syscall_ops cgroup_kf_syscall_ops;
static const struct file_operations proc_cgroupstats_operations;
@@ -1068,11 +1090,11 @@ static void cgroup_put(struct cgroup *cgrp)
* @subtree_control is to be applied to @cgrp. The returned mask is always
* a superset of @subtree_control and follows the usual hierarchy rules.
*/
-static unsigned int cgroup_calc_child_subsys_mask(struct cgroup *cgrp,
- unsigned int subtree_control)
+static unsigned long cgroup_calc_child_subsys_mask(struct cgroup *cgrp,
+ unsigned long subtree_control)
{
struct cgroup *parent = cgroup_parent(cgrp);
- unsigned int cur_ss_mask = subtree_control;
+ unsigned long cur_ss_mask = subtree_control;
struct cgroup_subsys *ss;
int ssid;
@@ -1082,11 +1104,10 @@ static unsigned int cgroup_calc_child_subsys_mask(struct cgroup *cgrp,
return cur_ss_mask;
while (true) {
- unsigned int new_ss_mask = cur_ss_mask;
+ unsigned long new_ss_mask = cur_ss_mask;
- for_each_subsys(ss, ssid)
- if (cur_ss_mask & (1 << ssid))
- new_ss_mask |= ss->depends_on;
+ for_each_subsys_which(ss, ssid, &cur_ss_mask)
+ new_ss_mask |= ss->depends_on;
/*
* Mask out subsystems which aren't available. This can
@@ -1200,7 +1221,7 @@ static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft)
* @cgrp: target cgroup
* @subsys_mask: mask of the subsystem ids whose files should be removed
*/
-static void cgroup_clear_dir(struct cgroup *cgrp, unsigned int subsys_mask)
+static void cgroup_clear_dir(struct cgroup *cgrp, unsigned long subsys_mask)
{
struct cgroup_subsys *ss;
int i;
@@ -1215,18 +1236,16 @@ static void cgroup_clear_dir(struct cgroup *cgrp, unsigned int subsys_mask)
}
}
-static int rebind_subsystems(struct cgroup_root *dst_root, unsigned int ss_mask)
+static int rebind_subsystems(struct cgroup_root *dst_root,
+ unsigned long ss_mask)
{
struct cgroup_subsys *ss;
- unsigned int tmp_ss_mask;
+ unsigned long tmp_ss_mask;
int ssid, i, ret;
lockdep_assert_held(&cgroup_mutex);
- for_each_subsys(ss, ssid) {
- if (!(ss_mask & (1 << ssid)))
- continue;
-
+ for_each_subsys_which(ss, ssid, &ss_mask) {
/* if @ss has non-root csses attached to it, can't move */
if (css_next_child(NULL, cgroup_css(&ss->root->cgrp, ss)))
return -EBUSY;
@@ -1253,7 +1272,7 @@ static int rebind_subsystems(struct cgroup_root *dst_root, unsigned int ss_mask)
* Just warn about it and continue.
*/
if (cgrp_dfl_root_visible) {
- pr_warn("failed to create files (%d) while rebinding 0x%x to default root\n",
+ pr_warn("failed to create files (%d) while rebinding 0x%lx to default root\n",
ret, ss_mask);
pr_warn("you may retry by moving them to a different hierarchy and unbinding\n");
}
@@ -1263,18 +1282,14 @@ static int rebind_subsystems(struct cgroup_root *dst_root, unsigned int ss_mask)
* Nothing can fail from this point on. Remove files for the
* removed subsystems and rebind each subsystem.
*/
- for_each_subsys(ss, ssid)
- if (ss_mask & (1 << ssid))
- cgroup_clear_dir(&ss->root->cgrp, 1 << ssid);
+ for_each_subsys_which(ss, ssid, &ss_mask)
+ cgroup_clear_dir(&ss->root->cgrp, 1 << ssid);
- for_each_subsys(ss, ssid) {
+ for_each_subsys_which(ss, ssid, &ss_mask) {
struct cgroup_root *src_root;
struct cgroup_subsys_state *css;
struct css_set *cset;
- if (!(ss_mask & (1 << ssid)))
- continue;
-
src_root = ss->root;
css = cgroup_css(&src_root->cgrp, ss);
@@ -1338,7 +1353,7 @@ static int cgroup_show_options(struct seq_file *seq,
}
struct cgroup_sb_opts {
- unsigned int subsys_mask;
+ unsigned long subsys_mask;
unsigned int flags;
char *release_agent;
bool cpuset_clone_children;
@@ -1351,7 +1366,7 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
{
char *token, *o = data;
bool all_ss = false, one_ss = false;
- unsigned int mask = -1U;
+ unsigned long mask = -1UL;
struct cgroup_subsys *ss;
int nr_opts = 0;
int i;
@@ -1495,7 +1510,7 @@ static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data)
int ret = 0;
struct cgroup_root *root = cgroup_root_from_kf(kf_root);
struct cgroup_sb_opts opts;
- unsigned int added_mask, removed_mask;
+ unsigned long added_mask, removed_mask;
if (root == &cgrp_dfl_root) {
pr_err("remount is not allowed\n");
@@ -1641,7 +1656,7 @@ static void init_cgroup_root(struct cgroup_root *root,
set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags);
}
-static int cgroup_setup_root(struct cgroup_root *root, unsigned int ss_mask)
+static int cgroup_setup_root(struct cgroup_root *root, unsigned long ss_mask)
{
LIST_HEAD(tmp_links);
struct cgroup *root_cgrp = &root->cgrp;
@@ -2052,9 +2067,9 @@ static void cgroup_task_migrate(struct cgroup *old_cgrp,
lockdep_assert_held(&css_set_rwsem);
/*
- * We are synchronized through threadgroup_lock() against PF_EXITING
- * setting such that we can't race against cgroup_exit() changing the
- * css_set to init_css_set and dropping the old one.
+ * We are synchronized through cgroup_threadgroup_rwsem against
+ * PF_EXITING setting such that we can't race against cgroup_exit()
+ * changing the css_set to init_css_set and dropping the old one.
*/
WARN_ON_ONCE(tsk->flags & PF_EXITING);
old_cset = task_css_set(tsk);
@@ -2111,10 +2126,11 @@ static void cgroup_migrate_finish(struct list_head *preloaded_csets)
* @src_cset and add it to @preloaded_csets, which should later be cleaned
* up by cgroup_migrate_finish().
*
- * This function may be called without holding threadgroup_lock even if the
- * target is a process. Threads may be created and destroyed but as long
- * as cgroup_mutex is not dropped, no new css_set can be put into play and
- * the preloaded css_sets are guaranteed to cover all migrations.
+ * This function may be called without holding cgroup_threadgroup_rwsem
+ * even if the target is a process. Threads may be created and destroyed
+ * but as long as cgroup_mutex is not dropped, no new css_set can be put
+ * into play and the preloaded css_sets are guaranteed to cover all
+ * migrations.
*/
static void cgroup_migrate_add_src(struct css_set *src_cset,
struct cgroup *dst_cgrp,
@@ -2217,7 +2233,7 @@ err:
* @threadgroup: whether @leader points to the whole process or a single task
*
* Migrate a process or task denoted by @leader to @cgrp. If migrating a
- * process, the caller must be holding threadgroup_lock of @leader. The
+ * process, the caller must be holding cgroup_threadgroup_rwsem. The
* caller is also responsible for invoking cgroup_migrate_add_src() and
* cgroup_migrate_prepare_dst() on the targets before invoking this
* function and following up with cgroup_migrate_finish().
@@ -2345,7 +2361,7 @@ out_release_tset:
* @leader: the task or the leader of the threadgroup to be attached
* @threadgroup: attach the whole threadgroup?
*
- * Call holding cgroup_mutex and threadgroup_lock of @leader.
+ * Call holding cgroup_mutex and cgroup_threadgroup_rwsem.
*/
static int cgroup_attach_task(struct cgroup *dst_cgrp,
struct task_struct *leader, bool threadgroup)
@@ -2376,6 +2392,47 @@ static int cgroup_attach_task(struct cgroup *dst_cgrp,
return ret;
}
+static int cgroup_procs_write_permission(struct task_struct *task,
+ struct cgroup *dst_cgrp,
+ struct kernfs_open_file *of)
+{
+ const struct cred *cred = current_cred();
+ const struct cred *tcred = get_task_cred(task);
+ int ret = 0;
+
+ /*
+ * even if we're attaching all tasks in the thread group, we only
+ * need to check permissions on one of them.
+ */
+ if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
+ !uid_eq(cred->euid, tcred->uid) &&
+ !uid_eq(cred->euid, tcred->suid))
+ ret = -EACCES;
+
+ if (!ret && cgroup_on_dfl(dst_cgrp)) {
+ struct super_block *sb = of->file->f_path.dentry->d_sb;
+ struct cgroup *cgrp;
+ struct inode *inode;
+
+ down_read(&css_set_rwsem);
+ cgrp = task_cgroup_from_root(task, &cgrp_dfl_root);
+ up_read(&css_set_rwsem);
+
+ while (!cgroup_is_descendant(dst_cgrp, cgrp))
+ cgrp = cgroup_parent(cgrp);
+
+ ret = -ENOMEM;
+ inode = kernfs_get_inode(sb, cgrp->procs_kn);
+ if (inode) {
+ ret = inode_permission(inode, MAY_WRITE);
+ iput(inode);
+ }
+ }
+
+ put_cred(tcred);
+ return ret;
+}
+
/*
* Find the task_struct of the task to attach by vpid and pass it along to the
* function to attach either it or all tasks in its threadgroup. Will lock
@@ -2385,7 +2442,6 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
size_t nbytes, loff_t off, bool threadgroup)
{
struct task_struct *tsk;
- const struct cred *cred = current_cred(), *tcred;
struct cgroup *cgrp;
pid_t pid;
int ret;
@@ -2397,29 +2453,17 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
if (!cgrp)
return -ENODEV;
-retry_find_task:
+ percpu_down_write(&cgroup_threadgroup_rwsem);
rcu_read_lock();
if (pid) {
tsk = find_task_by_vpid(pid);
if (!tsk) {
- rcu_read_unlock();
ret = -ESRCH;
- goto out_unlock_cgroup;
+ goto out_unlock_rcu;
}
- /*
- * even if we're attaching all tasks in the thread group, we
- * only need to check permissions on one of them.
- */
- tcred = __task_cred(tsk);
- if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
- !uid_eq(cred->euid, tcred->uid) &&
- !uid_eq(cred->euid, tcred->suid)) {
- rcu_read_unlock();
- ret = -EACCES;
- goto out_unlock_cgroup;
- }
- } else
+ } else {
tsk = current;
+ }
if (threadgroup)
tsk = tsk->group_leader;
@@ -2431,35 +2475,23 @@ retry_find_task:
*/
if (tsk == kthreadd_task || (tsk->flags & PF_NO_SETAFFINITY)) {
ret = -EINVAL;
- rcu_read_unlock();
- goto out_unlock_cgroup;
+ goto out_unlock_rcu;
}
get_task_struct(tsk);
rcu_read_unlock();
- threadgroup_lock(tsk);
- if (threadgroup) {
- if (!thread_group_leader(tsk)) {
- /*
- * a race with de_thread from another thread's exec()
- * may strip us of our leadership, if this happens,
- * there is no choice but to throw this task away and
- * try again; this is
- * "double-double-toil-and-trouble-check locking".
- */
- threadgroup_unlock(tsk);
- put_task_struct(tsk);
- goto retry_find_task;
- }
- }
-
- ret = cgroup_attach_task(cgrp, tsk, threadgroup);
-
- threadgroup_unlock(tsk);
+ ret = cgroup_procs_write_permission(tsk, cgrp, of);
+ if (!ret)
+ ret = cgroup_attach_task(cgrp, tsk, threadgroup);
put_task_struct(tsk);
-out_unlock_cgroup:
+ goto out_unlock_threadgroup;
+
+out_unlock_rcu:
+ rcu_read_unlock();
+out_unlock_threadgroup:
+ percpu_up_write(&cgroup_threadgroup_rwsem);
cgroup_kn_unlock(of->kn);
return ret ?: nbytes;
}
@@ -2542,19 +2574,17 @@ static int cgroup_sane_behavior_show(struct seq_file *seq, void *v)
return 0;
}
-static void cgroup_print_ss_mask(struct seq_file *seq, unsigned int ss_mask)
+static void cgroup_print_ss_mask(struct seq_file *seq, unsigned long ss_mask)
{
struct cgroup_subsys *ss;
bool printed = false;
int ssid;
- for_each_subsys(ss, ssid) {
- if (ss_mask & (1 << ssid)) {
- if (printed)
- seq_putc(seq, ' ');
- seq_printf(seq, "%s", ss->name);
- printed = true;
- }
+ for_each_subsys_which(ss, ssid, &ss_mask) {
+ if (printed)
+ seq_putc(seq, ' ');
+ seq_printf(seq, "%s", ss->name);
+ printed = true;
}
if (printed)
seq_putc(seq, '\n');
@@ -2606,6 +2636,8 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
lockdep_assert_held(&cgroup_mutex);
+ percpu_down_write(&cgroup_threadgroup_rwsem);
+
/* look up all csses currently attached to @cgrp's subtree */
down_read(&css_set_rwsem);
css_for_each_descendant_pre(css, cgroup_css(cgrp, NULL)) {
@@ -2661,17 +2693,8 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
goto out_finish;
last_task = task;
- threadgroup_lock(task);
- /* raced against de_thread() from another thread? */
- if (!thread_group_leader(task)) {
- threadgroup_unlock(task);
- put_task_struct(task);
- continue;
- }
-
ret = cgroup_migrate(src_cset->dfl_cgrp, task, true);
- threadgroup_unlock(task);
put_task_struct(task);
if (WARN(ret, "cgroup: failed to update controllers for the default hierarchy (%d), further operations may crash or hang\n", ret))
@@ -2681,6 +2704,7 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
out_finish:
cgroup_migrate_finish(&preloaded_csets);
+ percpu_up_write(&cgroup_threadgroup_rwsem);
return ret;
}
@@ -2689,8 +2713,8 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
char *buf, size_t nbytes,
loff_t off)
{
- unsigned int enable = 0, disable = 0;
- unsigned int css_enable, css_disable, old_sc, new_sc, old_ss, new_ss;
+ unsigned long enable = 0, disable = 0;
+ unsigned long css_enable, css_disable, old_sc, new_sc, old_ss, new_ss;
struct cgroup *cgrp, *child;
struct cgroup_subsys *ss;
char *tok;
@@ -2702,11 +2726,12 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
*/
buf = strstrip(buf);
while ((tok = strsep(&buf, " "))) {
+ unsigned long tmp_ss_mask = ~cgrp_dfl_root_inhibit_ss_mask;
+
if (tok[0] == '\0')
continue;
- for_each_subsys(ss, ssid) {
- if (ss->disabled || strcmp(tok + 1, ss->name) ||
- ((1 << ss->id) & cgrp_dfl_root_inhibit_ss_mask))
+ for_each_subsys_which(ss, ssid, &tmp_ss_mask) {
+ if (ss->disabled || strcmp(tok + 1, ss->name))
continue;
if (*tok == '+') {
@@ -2793,10 +2818,7 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
* still around. In such cases, wait till it's gone using
* offline_waitq.
*/
- for_each_subsys(ss, ssid) {
- if (!(css_enable & (1 << ssid)))
- continue;
-
+ for_each_subsys_which(ss, ssid, &css_enable) {
cgroup_for_each_live_child(child, cgrp) {
DEFINE_WAIT(wait);
@@ -3087,7 +3109,9 @@ static int cgroup_add_file(struct cgroup *cgrp, struct cftype *cft)
return ret;
}
- if (cft->seq_show == cgroup_populated_show)
+ if (cft->write == cgroup_procs_write)
+ cgrp->procs_kn = kn;
+ else if (cft->seq_show == cgroup_populated_show)
cgrp->populated_kn = kn;
return 0;
}
@@ -4322,7 +4346,7 @@ static struct cftype cgroup_legacy_base_files[] = {
*
* On failure, no file is added.
*/
-static int cgroup_populate_dir(struct cgroup *cgrp, unsigned int subsys_mask)
+static int cgroup_populate_dir(struct cgroup *cgrp, unsigned long subsys_mask)
{
struct cgroup_subsys *ss;
int i, ret = 0;
@@ -4931,7 +4955,8 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early)
* init_css_set is in the subsystem's root cgroup. */
init_css_set.subsys[ss->id] = css;
- need_forkexit_callback |= ss->fork || ss->exit;
+ have_fork_callback |= (bool)ss->fork << ss->id;
+ have_exit_callback |= (bool)ss->exit << ss->id;
/* At system boot, before all subsystems have been
* registered, no tasks have been forked, so we don't
@@ -4989,6 +5014,7 @@ int __init cgroup_init(void)
unsigned long key;
int ssid, err;
+ BUG_ON(percpu_init_rwsem(&cgroup_threadgroup_rwsem));
BUG_ON(cgroup_init_cftypes(NULL, cgroup_dfl_base_files));
BUG_ON(cgroup_init_cftypes(NULL, cgroup_legacy_base_files));
@@ -5241,11 +5267,8 @@ void cgroup_post_fork(struct task_struct *child)
* css_set; otherwise, @child might change state between ->fork()
* and addition to css_set.
*/
- if (need_forkexit_callback) {
- for_each_subsys(ss, i)
- if (ss->fork)
- ss->fork(child);
- }
+ for_each_subsys_which(ss, i, &have_fork_callback)
+ ss->fork(child);
}
/**
@@ -5289,16 +5312,12 @@ void cgroup_exit(struct task_struct *tsk)
cset = task_css_set(tsk);
RCU_INIT_POINTER(tsk->cgroups, &init_css_set);
- if (need_forkexit_callback) {
- /* see cgroup_post_fork() for details */
- for_each_subsys(ss, i) {
- if (ss->exit) {
- struct cgroup_subsys_state *old_css = cset->subsys[i];
- struct cgroup_subsys_state *css = task_css(tsk, i);
+ /* see cgroup_post_fork() for details */
+ for_each_subsys_which(ss, i, &have_exit_callback) {
+ struct cgroup_subsys_state *old_css = cset->subsys[i];
+ struct cgroup_subsys_state *css = task_css(tsk, i);
- ss->exit(css, old_css, tsk);
- }
- }
+ ss->exit(css, old_css, tsk);
}
if (put_cset)
diff --git a/kernel/configs/xen.config b/kernel/configs/xen.config
new file mode 100644
index 000000000000..ff756221f112
--- /dev/null
+++ b/kernel/configs/xen.config
@@ -0,0 +1,48 @@
+# global stuff - these enable us to allow some
+# of the not so generic stuff below for xen
+CONFIG_PARAVIRT=y
+CONFIG_NET=y
+CONFIG_NET_CORE=y
+CONFIG_NETDEVICES=y
+CONFIG_BLOCK=y
+CONFIG_WATCHDOG=y
+CONFIG_TARGET_CORE=y
+CONFIG_SCSI=y
+CONFIG_FB=y
+CONFIG_INPUT_MISC=y
+CONFIG_MEMORY_HOTPLUG=y
+CONFIG_TTY=y
+# Technically not required but otherwise produces
+# pretty useless systems starting from allnoconfig
+# You want TCP/IP and ELF binaries right?
+CONFIG_INET=y
+CONFIG_BINFMT_ELF=y
+# generic config
+CONFIG_XEN=y
+CONFIG_XEN_DOM0=y
+# backend drivers
+CONFIG_XEN_BACKEND=y
+CONFIG_XEN_BLKDEV_BACKEND=m
+CONFIG_XEN_NETDEV_BACKEND=m
+CONFIG_HVC_XEN=y
+CONFIG_XEN_WDT=m
+CONFIG_XEN_SCSI_BACKEND=m
+# frontend drivers
+CONFIG_XEN_FBDEV_FRONTEND=m
+CONFIG_HVC_XEN_FRONTEND=y
+CONFIG_INPUT_XEN_KBDDEV_FRONTEND=m
+CONFIG_XEN_SCSI_FRONTEND=m
+# others
+CONFIG_XEN_BALLOON=y
+CONFIG_XEN_SCRUB_PAGES=y
+CONFIG_XEN_DEV_EVTCHN=m
+CONFIG_XEN_BLKDEV_FRONTEND=m
+CONFIG_XEN_NETDEV_FRONTEND=m
+CONFIG_XENFS=m
+CONFIG_XEN_COMPAT_XENFS=y
+CONFIG_XEN_SYS_HYPERVISOR=y
+CONFIG_XEN_XENBUS_FRONTEND=y
+CONFIG_XEN_GNTDEV=m
+CONFIG_XEN_GRANT_DEV_ALLOC=m
+CONFIG_SWIOTLB_XEN=y
+CONFIG_XEN_PRIVCMD=m
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 8e13f3e54ec3..d1f37ddd1960 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -36,7 +36,7 @@
#include <linux/kernel_stat.h>
#include <linux/cgroup.h>
#include <linux/perf_event.h>
-#include <linux/ftrace_event.h>
+#include <linux/trace_events.h>
#include <linux/hw_breakpoint.h>
#include <linux/mm_types.h>
#include <linux/module.h>
@@ -1502,11 +1502,17 @@ static int __init perf_workqueue_init(void)
core_initcall(perf_workqueue_init);
+static inline int pmu_filter_match(struct perf_event *event)
+{
+ struct pmu *pmu = event->pmu;
+ return pmu->filter_match ? pmu->filter_match(event) : 1;
+}
+
static inline int
event_filter_match(struct perf_event *event)
{
return (event->cpu == -1 || event->cpu == smp_processor_id())
- && perf_cgroup_match(event);
+ && perf_cgroup_match(event) && pmu_filter_match(event);
}
static void
diff --git a/kernel/exit.c b/kernel/exit.c
index 185752a729f6..031325e9acf9 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -711,10 +711,10 @@ void do_exit(long code)
current->comm, task_pid_nr(current),
preempt_count());
- acct_update_integrals(tsk);
/* sync mm's RSS info before statistics gathering */
if (tsk->mm)
sync_mm_rss(tsk->mm);
+ acct_update_integrals(tsk);
group_dead = atomic_dec_and_test(&tsk->signal->live);
if (group_dead) {
hrtimer_cancel(&tsk->signal->real_timer);
diff --git a/kernel/fork.c b/kernel/fork.c
index 0bb88b555550..1bfefc6f96a4 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1141,10 +1141,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
tty_audit_fork(sig);
sched_autogroup_fork(sig);
-#ifdef CONFIG_CGROUPS
- init_rwsem(&sig->group_rwsem);
-#endif
-
sig->oom_score_adj = current->signal->oom_score_adj;
sig->oom_score_adj_min = current->signal->oom_score_adj_min;
@@ -1238,7 +1234,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
unsigned long stack_size,
int __user *child_tidptr,
struct pid *pid,
- int trace)
+ int trace,
+ unsigned long tls)
{
int retval;
struct task_struct *p;
@@ -1447,7 +1444,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
retval = copy_io(clone_flags, p);
if (retval)
goto bad_fork_cleanup_namespaces;
- retval = copy_thread(clone_flags, stack_start, stack_size, p);
+ retval = copy_thread_tls(clone_flags, stack_start, stack_size, p, tls);
if (retval)
goto bad_fork_cleanup_io;
@@ -1659,7 +1656,7 @@ static inline void init_idle_pids(struct pid_link *links)
struct task_struct *fork_idle(int cpu)
{
struct task_struct *task;
- task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0);
+ task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0, 0);
if (!IS_ERR(task)) {
init_idle_pids(task->pids);
init_idle(task, cpu);
@@ -1674,11 +1671,12 @@ struct task_struct *fork_idle(int cpu)
* It copies the process, and if successful kick-starts
* it and waits for it to finish using the VM if required.
*/
-long do_fork(unsigned long clone_flags,
+long _do_fork(unsigned long clone_flags,
unsigned long stack_start,
unsigned long stack_size,
int __user *parent_tidptr,
- int __user *child_tidptr)
+ int __user *child_tidptr,
+ unsigned long tls)
{
struct task_struct *p;
int trace = 0;
@@ -1703,7 +1701,7 @@ long do_fork(unsigned long clone_flags,
}
p = copy_process(clone_flags, stack_start, stack_size,
- child_tidptr, NULL, trace);
+ child_tidptr, NULL, trace, tls);
/*
* Do this prior waking up the new thread - the thread pointer
* might get invalid after that point, if the thread exits quickly.
@@ -1744,20 +1742,34 @@ long do_fork(unsigned long clone_flags,
return nr;
}
+#ifndef CONFIG_HAVE_COPY_THREAD_TLS
+/* For compatibility with architectures that call do_fork directly rather than
+ * using the syscall entry points below. */
+long do_fork(unsigned long clone_flags,
+ unsigned long stack_start,
+ unsigned long stack_size,
+ int __user *parent_tidptr,
+ int __user *child_tidptr)
+{
+ return _do_fork(clone_flags, stack_start, stack_size,
+ parent_tidptr, child_tidptr, 0);
+}
+#endif
+
/*
* Create a kernel thread.
*/
pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
{
- return do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn,
- (unsigned long)arg, NULL, NULL);
+ return _do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn,
+ (unsigned long)arg, NULL, NULL, 0);
}
#ifdef __ARCH_WANT_SYS_FORK
SYSCALL_DEFINE0(fork)
{
#ifdef CONFIG_MMU
- return do_fork(SIGCHLD, 0, 0, NULL, NULL);
+ return _do_fork(SIGCHLD, 0, 0, NULL, NULL, 0);
#else
/* can not support in nommu mode */
return -EINVAL;
@@ -1768,8 +1780,8 @@ SYSCALL_DEFINE0(fork)
#ifdef __ARCH_WANT_SYS_VFORK
SYSCALL_DEFINE0(vfork)
{
- return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0,
- 0, NULL, NULL);
+ return _do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0,
+ 0, NULL, NULL, 0);
}
#endif
@@ -1777,27 +1789,27 @@ SYSCALL_DEFINE0(vfork)
#ifdef CONFIG_CLONE_BACKWARDS
SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
int __user *, parent_tidptr,
- int, tls_val,
+ unsigned long, tls,
int __user *, child_tidptr)
#elif defined(CONFIG_CLONE_BACKWARDS2)
SYSCALL_DEFINE5(clone, unsigned long, newsp, unsigned long, clone_flags,
int __user *, parent_tidptr,
int __user *, child_tidptr,
- int, tls_val)
+ unsigned long, tls)
#elif defined(CONFIG_CLONE_BACKWARDS3)
SYSCALL_DEFINE6(clone, unsigned long, clone_flags, unsigned long, newsp,
int, stack_size,
int __user *, parent_tidptr,
int __user *, child_tidptr,
- int, tls_val)
+ unsigned long, tls)
#else
SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
int __user *, parent_tidptr,
int __user *, child_tidptr,
- int, tls_val)
+ unsigned long, tls)
#endif
{
- return do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr);
+ return _do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr, tls);
}
#endif
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 9019f15deab2..52ebaca1b9fc 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -302,7 +302,7 @@ static int jump_label_add_module(struct module *mod)
continue;
key = iterk;
- if (__module_address(iter->key) == mod) {
+ if (within_module(iter->key, mod)) {
/*
* Set key->entries to iter, but preserve JUMP_LABEL_TRUE_BRANCH.
*/
@@ -339,7 +339,7 @@ static void jump_label_del_module(struct module *mod)
key = (struct static_key *)(unsigned long)iter->key;
- if (__module_address(iter->key) == mod)
+ if (within_module(iter->key, mod))
continue;
prev = &key->next;
@@ -443,14 +443,16 @@ static void jump_label_update(struct static_key *key, int enable)
{
struct jump_entry *stop = __stop___jump_table;
struct jump_entry *entry = jump_label_get_entries(key);
-
#ifdef CONFIG_MODULES
- struct module *mod = __module_address((unsigned long)key);
+ struct module *mod;
__jump_label_mod_update(key, enable);
+ preempt_disable();
+ mod = __module_address((unsigned long)key);
if (mod)
stop = mod->jump_entries + mod->num_jump_entries;
+ preempt_enable();
#endif
/* if there are no users, entry can be NULL */
if (entry)
diff --git a/kernel/module.c b/kernel/module.c
index cfc9e843a924..3e0e19763d24 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -18,7 +18,7 @@
*/
#include <linux/export.h>
#include <linux/moduleloader.h>
-#include <linux/ftrace_event.h>
+#include <linux/trace_events.h>
#include <linux/init.h>
#include <linux/kallsyms.h>
#include <linux/file.h>
@@ -101,48 +101,201 @@
DEFINE_MUTEX(module_mutex);
EXPORT_SYMBOL_GPL(module_mutex);
static LIST_HEAD(modules);
-#ifdef CONFIG_KGDB_KDB
-struct list_head *kdb_modules = &modules; /* kdb needs the list of modules */
-#endif /* CONFIG_KGDB_KDB */
-#ifdef CONFIG_MODULE_SIG
-#ifdef CONFIG_MODULE_SIG_FORCE
-static bool sig_enforce = true;
-#else
-static bool sig_enforce = false;
+#ifdef CONFIG_MODULES_TREE_LOOKUP
+
+/*
+ * Use a latched RB-tree for __module_address(); this allows us to use
+ * RCU-sched lookups of the address from any context.
+ *
+ * Because modules have two address ranges: init and core, we need two
+ * latch_tree_nodes entries. Therefore we need the back-pointer from
+ * mod_tree_node.
+ *
+ * Because init ranges are short lived we mark them unlikely and have placed
+ * them outside the critical cacheline in struct module.
+ *
+ * This is conditional on PERF_EVENTS || TRACING because those can really hit
+ * __module_address() hard by doing a lot of stack unwinding; potentially from
+ * NMI context.
+ */
-static int param_set_bool_enable_only(const char *val,
- const struct kernel_param *kp)
+static __always_inline unsigned long __mod_tree_val(struct latch_tree_node *n)
{
- int err;
- bool test;
- struct kernel_param dummy_kp = *kp;
+ struct mod_tree_node *mtn = container_of(n, struct mod_tree_node, node);
+ struct module *mod = mtn->mod;
- dummy_kp.arg = &test;
+ if (unlikely(mtn == &mod->mtn_init))
+ return (unsigned long)mod->module_init;
- err = param_set_bool(val, &dummy_kp);
- if (err)
- return err;
+ return (unsigned long)mod->module_core;
+}
+
+static __always_inline unsigned long __mod_tree_size(struct latch_tree_node *n)
+{
+ struct mod_tree_node *mtn = container_of(n, struct mod_tree_node, node);
+ struct module *mod = mtn->mod;
+
+ if (unlikely(mtn == &mod->mtn_init))
+ return (unsigned long)mod->init_size;
- /* Don't let them unset it once it's set! */
- if (!test && sig_enforce)
- return -EROFS;
+ return (unsigned long)mod->core_size;
+}
+
+static __always_inline bool
+mod_tree_less(struct latch_tree_node *a, struct latch_tree_node *b)
+{
+ return __mod_tree_val(a) < __mod_tree_val(b);
+}
+
+static __always_inline int
+mod_tree_comp(void *key, struct latch_tree_node *n)
+{
+ unsigned long val = (unsigned long)key;
+ unsigned long start, end;
+
+ start = __mod_tree_val(n);
+ if (val < start)
+ return -1;
+
+ end = start + __mod_tree_size(n);
+ if (val >= end)
+ return 1;
- if (test)
- sig_enforce = true;
return 0;
}
-static const struct kernel_param_ops param_ops_bool_enable_only = {
- .flags = KERNEL_PARAM_OPS_FL_NOARG,
- .set = param_set_bool_enable_only,
- .get = param_get_bool,
+static const struct latch_tree_ops mod_tree_ops = {
+ .less = mod_tree_less,
+ .comp = mod_tree_comp,
};
-#define param_check_bool_enable_only param_check_bool
+static struct mod_tree_root {
+ struct latch_tree_root root;
+ unsigned long addr_min;
+ unsigned long addr_max;
+} mod_tree __cacheline_aligned = {
+ .addr_min = -1UL,
+};
+
+#define module_addr_min mod_tree.addr_min
+#define module_addr_max mod_tree.addr_max
+
+static noinline void __mod_tree_insert(struct mod_tree_node *node)
+{
+ latch_tree_insert(&node->node, &mod_tree.root, &mod_tree_ops);
+}
+
+static void __mod_tree_remove(struct mod_tree_node *node)
+{
+ latch_tree_erase(&node->node, &mod_tree.root, &mod_tree_ops);
+}
+
+/*
+ * These modifications: insert, remove_init and remove; are serialized by the
+ * module_mutex.
+ */
+static void mod_tree_insert(struct module *mod)
+{
+ mod->mtn_core.mod = mod;
+ mod->mtn_init.mod = mod;
+
+ __mod_tree_insert(&mod->mtn_core);
+ if (mod->init_size)
+ __mod_tree_insert(&mod->mtn_init);
+}
+
+static void mod_tree_remove_init(struct module *mod)
+{
+ if (mod->init_size)
+ __mod_tree_remove(&mod->mtn_init);
+}
+
+static void mod_tree_remove(struct module *mod)
+{
+ __mod_tree_remove(&mod->mtn_core);
+ mod_tree_remove_init(mod);
+}
+
+static struct module *mod_find(unsigned long addr)
+{
+ struct latch_tree_node *ltn;
+
+ ltn = latch_tree_find((void *)addr, &mod_tree.root, &mod_tree_ops);
+ if (!ltn)
+ return NULL;
+
+ return container_of(ltn, struct mod_tree_node, node)->mod;
+}
+
+#else /* MODULES_TREE_LOOKUP */
+
+static unsigned long module_addr_min = -1UL, module_addr_max = 0;
+
+static void mod_tree_insert(struct module *mod) { }
+static void mod_tree_remove_init(struct module *mod) { }
+static void mod_tree_remove(struct module *mod) { }
+
+static struct module *mod_find(unsigned long addr)
+{
+ struct module *mod;
+
+ list_for_each_entry_rcu(mod, &modules, list) {
+ if (within_module(addr, mod))
+ return mod;
+ }
+
+ return NULL;
+}
+
+#endif /* MODULES_TREE_LOOKUP */
+
+/*
+ * Bounds of module text, for speeding up __module_address.
+ * Protected by module_mutex.
+ */
+static void __mod_update_bounds(void *base, unsigned int size)
+{
+ unsigned long min = (unsigned long)base;
+ unsigned long max = min + size;
+
+ if (min < module_addr_min)
+ module_addr_min = min;
+ if (max > module_addr_max)
+ module_addr_max = max;
+}
+
+static void mod_update_bounds(struct module *mod)
+{
+ __mod_update_bounds(mod->module_core, mod->core_size);
+ if (mod->init_size)
+ __mod_update_bounds(mod->module_init, mod->init_size);
+}
+
+#ifdef CONFIG_KGDB_KDB
+struct list_head *kdb_modules = &modules; /* kdb needs the list of modules */
+#endif /* CONFIG_KGDB_KDB */
+
+static void module_assert_mutex(void)
+{
+ lockdep_assert_held(&module_mutex);
+}
+
+static void module_assert_mutex_or_preempt(void)
+{
+#ifdef CONFIG_LOCKDEP
+ if (unlikely(!debug_locks))
+ return;
+
+ WARN_ON(!rcu_read_lock_sched_held() &&
+ !lockdep_is_held(&module_mutex));
+#endif
+}
+
+static bool sig_enforce = IS_ENABLED(CONFIG_MODULE_SIG_FORCE);
+#ifndef CONFIG_MODULE_SIG_FORCE
module_param(sig_enforce, bool_enable_only, 0644);
#endif /* !CONFIG_MODULE_SIG_FORCE */
-#endif /* CONFIG_MODULE_SIG */
/* Block module loading/unloading? */
int modules_disabled = 0;
@@ -153,10 +306,6 @@ static DECLARE_WAIT_QUEUE_HEAD(module_wq);
static BLOCKING_NOTIFIER_HEAD(module_notify_list);
-/* Bounds of module allocation, for speeding __module_address.
- * Protected by module_mutex. */
-static unsigned long module_addr_min = -1UL, module_addr_max = 0;
-
int register_module_notifier(struct notifier_block *nb)
{
return blocking_notifier_chain_register(&module_notify_list, nb);
@@ -318,6 +467,8 @@ bool each_symbol_section(bool (*fn)(const struct symsearch *arr,
#endif
};
+ module_assert_mutex_or_preempt();
+
if (each_symbol_in_section(arr, ARRAY_SIZE(arr), NULL, fn, data))
return true;
@@ -457,6 +608,8 @@ static struct module *find_module_all(const char *name, size_t len,
{
struct module *mod;
+ module_assert_mutex();
+
list_for_each_entry(mod, &modules, list) {
if (!even_unformed && mod->state == MODULE_STATE_UNFORMED)
continue;
@@ -1169,11 +1322,17 @@ static inline int check_modstruct_version(Elf_Shdr *sechdrs,
{
const unsigned long *crc;
- /* Since this should be found in kernel (which can't be removed),
- * no locking is necessary. */
+ /*
+ * Since this should be found in kernel (which can't be removed), no
+ * locking is necessary -- use preempt_disable() to placate lockdep.
+ */
+ preempt_disable();
if (!find_symbol(VMLINUX_SYMBOL_STR(module_layout), NULL,
- &crc, true, false))
+ &crc, true, false)) {
+ preempt_enable();
BUG();
+ }
+ preempt_enable();
return check_version(sechdrs, versindex,
VMLINUX_SYMBOL_STR(module_layout), mod, crc,
NULL);
@@ -1661,6 +1820,10 @@ static void mod_sysfs_fini(struct module *mod)
mod_kobject_put(mod);
}
+static void init_param_lock(struct module *mod)
+{
+ mutex_init(&mod->param_lock);
+}
#else /* !CONFIG_SYSFS */
static int mod_sysfs_setup(struct module *mod,
@@ -1683,6 +1846,9 @@ static void del_usage_links(struct module *mod)
{
}
+static void init_param_lock(struct module *mod)
+{
+}
#endif /* CONFIG_SYSFS */
static void mod_sysfs_teardown(struct module *mod)
@@ -1852,10 +2018,11 @@ static void free_module(struct module *mod)
mutex_lock(&module_mutex);
/* Unlink carefully: kallsyms could be walking list. */
list_del_rcu(&mod->list);
+ mod_tree_remove(mod);
/* Remove this module from bug list, this uses list_del_rcu */
module_bug_cleanup(mod);
- /* Wait for RCU synchronizing before releasing mod->list and buglist. */
- synchronize_rcu();
+ /* Wait for RCU-sched synchronizing before releasing mod->list and buglist. */
+ synchronize_sched();
mutex_unlock(&module_mutex);
/* This may be NULL, but that's OK */
@@ -2384,22 +2551,6 @@ void * __weak module_alloc(unsigned long size)
return vmalloc_exec(size);
}
-static void *module_alloc_update_bounds(unsigned long size)
-{
- void *ret = module_alloc(size);
-
- if (ret) {
- mutex_lock(&module_mutex);
- /* Update module bounds. */
- if ((unsigned long)ret < module_addr_min)
- module_addr_min = (unsigned long)ret;
- if ((unsigned long)ret + size > module_addr_max)
- module_addr_max = (unsigned long)ret + size;
- mutex_unlock(&module_mutex);
- }
- return ret;
-}
-
#ifdef CONFIG_DEBUG_KMEMLEAK
static void kmemleak_load_module(const struct module *mod,
const struct load_info *info)
@@ -2805,7 +2956,7 @@ static int move_module(struct module *mod, struct load_info *info)
void *ptr;
/* Do the allocs. */
- ptr = module_alloc_update_bounds(mod->core_size);
+ ptr = module_alloc(mod->core_size);
/*
* The pointer to this block is stored in the module structure
* which is inside the block. Just mark it as not being a
@@ -2819,7 +2970,7 @@ static int move_module(struct module *mod, struct load_info *info)
mod->module_core = ptr;
if (mod->init_size) {
- ptr = module_alloc_update_bounds(mod->init_size);
+ ptr = module_alloc(mod->init_size);
/*
* The pointer to this block is stored in the module structure
* which is inside the block. This block doesn't need to be
@@ -3107,7 +3258,7 @@ static noinline int do_init_module(struct module *mod)
*
* http://thread.gmane.org/gmane.linux.kernel/1420814
*/
- if (current->flags & PF_USED_ASYNC)
+ if (!mod->async_probe_requested && (current->flags & PF_USED_ASYNC))
async_synchronize_full();
mutex_lock(&module_mutex);
@@ -3119,6 +3270,7 @@ static noinline int do_init_module(struct module *mod)
mod->symtab = mod->core_symtab;
mod->strtab = mod->core_strtab;
#endif
+ mod_tree_remove_init(mod);
unset_module_init_ro_nx(mod);
module_arch_freeing_init(mod);
mod->module_init = NULL;
@@ -3127,11 +3279,11 @@ static noinline int do_init_module(struct module *mod)
mod->init_text_size = 0;
/*
* We want to free module_init, but be aware that kallsyms may be
- * walking this with preempt disabled. In all the failure paths,
- * we call synchronize_rcu/synchronize_sched, but we don't want
- * to slow down the success path, so use actual RCU here.
+ * walking this with preempt disabled. In all the failure paths, we
+ * call synchronize_sched(), but we don't want to slow down the success
+ * path, so use actual RCU here.
*/
- call_rcu(&freeinit->rcu, do_free_init);
+ call_rcu_sched(&freeinit->rcu, do_free_init);
mutex_unlock(&module_mutex);
wake_up_all(&module_wq);
@@ -3188,7 +3340,9 @@ again:
err = -EEXIST;
goto out;
}
+ mod_update_bounds(mod);
list_add_rcu(&mod->list, &modules);
+ mod_tree_insert(mod);
err = 0;
out:
@@ -3237,10 +3391,19 @@ out:
return err;
}
-static int unknown_module_param_cb(char *param, char *val, const char *modname)
+static int unknown_module_param_cb(char *param, char *val, const char *modname,
+ void *arg)
{
+ struct module *mod = arg;
+ int ret;
+
+ if (strcmp(param, "async_probe") == 0) {
+ mod->async_probe_requested = true;
+ return 0;
+ }
+
/* Check for magic 'dyndbg' arg */
- int ret = ddebug_dyndbg_module_param_cb(param, val, modname);
+ ret = ddebug_dyndbg_module_param_cb(param, val, modname);
if (ret != 0)
pr_warn("%s: unknown parameter '%s' ignored\n", modname, param);
return 0;
@@ -3295,6 +3458,8 @@ static int load_module(struct load_info *info, const char __user *uargs,
if (err)
goto unlink_mod;
+ init_param_lock(mod);
+
/* Now we've got everything in the final locations, we can
* find optional sections. */
err = find_module_sections(mod, info);
@@ -3342,7 +3507,8 @@ static int load_module(struct load_info *info, const char __user *uargs,
/* Module is ready to execute: parsing args may do that. */
after_dashes = parse_args(mod->name, mod->args, mod->kp, mod->num_kp,
- -32768, 32767, unknown_module_param_cb);
+ -32768, 32767, NULL,
+ unknown_module_param_cb);
if (IS_ERR(after_dashes)) {
err = PTR_ERR(after_dashes);
goto bug_cleanup;
@@ -3392,8 +3558,8 @@ static int load_module(struct load_info *info, const char __user *uargs,
/* Unlink carefully: kallsyms could be walking list. */
list_del_rcu(&mod->list);
wake_up_all(&module_wq);
- /* Wait for RCU synchronizing before releasing mod->list. */
- synchronize_rcu();
+ /* Wait for RCU-sched synchronizing before releasing mod->list. */
+ synchronize_sched();
mutex_unlock(&module_mutex);
free_module:
/* Free lock-classes; relies on the preceding sync_rcu() */
@@ -3517,19 +3683,15 @@ const char *module_address_lookup(unsigned long addr,
char **modname,
char *namebuf)
{
- struct module *mod;
const char *ret = NULL;
+ struct module *mod;
preempt_disable();
- list_for_each_entry_rcu(mod, &modules, list) {
- if (mod->state == MODULE_STATE_UNFORMED)
- continue;
- if (within_module(addr, mod)) {
- if (modname)
- *modname = mod->name;
- ret = get_ksymbol(mod, addr, size, offset);
- break;
- }
+ mod = __module_address(addr);
+ if (mod) {
+ if (modname)
+ *modname = mod->name;
+ ret = get_ksymbol(mod, addr, size, offset);
}
/* Make a copy in here where it's safe */
if (ret) {
@@ -3537,6 +3699,7 @@ const char *module_address_lookup(unsigned long addr,
ret = namebuf;
}
preempt_enable();
+
return ret;
}
@@ -3660,6 +3823,8 @@ int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *,
unsigned int i;
int ret;
+ module_assert_mutex();
+
list_for_each_entry(mod, &modules, list) {
if (mod->state == MODULE_STATE_UNFORMED)
continue;
@@ -3834,13 +3999,15 @@ struct module *__module_address(unsigned long addr)
if (addr < module_addr_min || addr > module_addr_max)
return NULL;
- list_for_each_entry_rcu(mod, &modules, list) {
+ module_assert_mutex_or_preempt();
+
+ mod = mod_find(addr);
+ if (mod) {
+ BUG_ON(!within_module(addr, mod));
if (mod->state == MODULE_STATE_UNFORMED)
- continue;
- if (within_module(addr, mod))
- return mod;
+ mod = NULL;
}
- return NULL;
+ return mod;
}
EXPORT_SYMBOL_GPL(__module_address);
diff --git a/kernel/params.c b/kernel/params.c
index a22d6a759b1a..b6554aa71094 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -25,15 +25,34 @@
#include <linux/slab.h>
#include <linux/ctype.h>
-/* Protects all parameters, and incidentally kmalloced_param list. */
+#ifdef CONFIG_SYSFS
+/* Protects all built-in parameters, modules use their own param_lock */
static DEFINE_MUTEX(param_lock);
+/* Use the module's mutex, or if built-in use the built-in mutex */
+#ifdef CONFIG_MODULES
+#define KPARAM_MUTEX(mod) ((mod) ? &(mod)->param_lock : &param_lock)
+#else
+#define KPARAM_MUTEX(mod) (&param_lock)
+#endif
+
+static inline void check_kparam_locked(struct module *mod)
+{
+ BUG_ON(!mutex_is_locked(KPARAM_MUTEX(mod)));
+}
+#else
+static inline void check_kparam_locked(struct module *mod)
+{
+}
+#endif /* !CONFIG_SYSFS */
+
/* This just allows us to keep track of which parameters are kmalloced. */
struct kmalloced_param {
struct list_head list;
char val[];
};
static LIST_HEAD(kmalloced_params);
+static DEFINE_SPINLOCK(kmalloced_params_lock);
static void *kmalloc_parameter(unsigned int size)
{
@@ -43,7 +62,10 @@ static void *kmalloc_parameter(unsigned int size)
if (!p)
return NULL;
+ spin_lock(&kmalloced_params_lock);
list_add(&p->list, &kmalloced_params);
+ spin_unlock(&kmalloced_params_lock);
+
return p->val;
}
@@ -52,6 +74,7 @@ static void maybe_kfree_parameter(void *param)
{
struct kmalloced_param *p;
+ spin_lock(&kmalloced_params_lock);
list_for_each_entry(p, &kmalloced_params, list) {
if (p->val == param) {
list_del(&p->list);
@@ -59,6 +82,7 @@ static void maybe_kfree_parameter(void *param)
break;
}
}
+ spin_unlock(&kmalloced_params_lock);
}
static char dash2underscore(char c)
@@ -100,8 +124,9 @@ static int parse_one(char *param,
unsigned num_params,
s16 min_level,
s16 max_level,
+ void *arg,
int (*handle_unknown)(char *param, char *val,
- const char *doing))
+ const char *doing, void *arg))
{
unsigned int i;
int err;
@@ -118,17 +143,17 @@ static int parse_one(char *param,
return -EINVAL;
pr_debug("handling %s with %p\n", param,
params[i].ops->set);
- mutex_lock(&param_lock);
+ kernel_param_lock(params[i].mod);
param_check_unsafe(&params[i]);
err = params[i].ops->set(val, &params[i]);
- mutex_unlock(&param_lock);
+ kernel_param_unlock(params[i].mod);
return err;
}
}
if (handle_unknown) {
pr_debug("doing %s: %s='%s'\n", doing, param, val);
- return handle_unknown(param, val, doing);
+ return handle_unknown(param, val, doing, arg);
}
pr_debug("Unknown argument '%s'\n", param);
@@ -194,7 +219,9 @@ char *parse_args(const char *doing,
unsigned num,
s16 min_level,
s16 max_level,
- int (*unknown)(char *param, char *val, const char *doing))
+ void *arg,
+ int (*unknown)(char *param, char *val,
+ const char *doing, void *arg))
{
char *param, *val;
@@ -214,7 +241,7 @@ char *parse_args(const char *doing,
return args;
irq_was_disabled = irqs_disabled();
ret = parse_one(param, val, doing, params, num,
- min_level, max_level, unknown);
+ min_level, max_level, arg, unknown);
if (irq_was_disabled && !irqs_disabled())
pr_warn("%s: option '%s' enabled irq's!\n",
doing, param);
@@ -251,7 +278,7 @@ char *parse_args(const char *doing,
return scnprintf(buffer, PAGE_SIZE, format, \
*((type *)kp->arg)); \
} \
- struct kernel_param_ops param_ops_##name = { \
+ const struct kernel_param_ops param_ops_##name = { \
.set = param_set_##name, \
.get = param_get_##name, \
}; \
@@ -303,7 +330,7 @@ static void param_free_charp(void *arg)
maybe_kfree_parameter(*((char **)arg));
}
-struct kernel_param_ops param_ops_charp = {
+const struct kernel_param_ops param_ops_charp = {
.set = param_set_charp,
.get = param_get_charp,
.free = param_free_charp,
@@ -328,13 +355,44 @@ int param_get_bool(char *buffer, const struct kernel_param *kp)
}
EXPORT_SYMBOL(param_get_bool);
-struct kernel_param_ops param_ops_bool = {
+const struct kernel_param_ops param_ops_bool = {
.flags = KERNEL_PARAM_OPS_FL_NOARG,
.set = param_set_bool,
.get = param_get_bool,
};
EXPORT_SYMBOL(param_ops_bool);
+int param_set_bool_enable_only(const char *val, const struct kernel_param *kp)
+{
+ int err = 0;
+ bool new_value;
+ bool orig_value = *(bool *)kp->arg;
+ struct kernel_param dummy_kp = *kp;
+
+ dummy_kp.arg = &new_value;
+
+ err = param_set_bool(val, &dummy_kp);
+ if (err)
+ return err;
+
+ /* Don't let them unset it once it's set! */
+ if (!new_value && orig_value)
+ return -EROFS;
+
+ if (new_value)
+ err = param_set_bool(val, kp);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(param_set_bool_enable_only);
+
+const struct kernel_param_ops param_ops_bool_enable_only = {
+ .flags = KERNEL_PARAM_OPS_FL_NOARG,
+ .set = param_set_bool_enable_only,
+ .get = param_get_bool,
+};
+EXPORT_SYMBOL_GPL(param_ops_bool_enable_only);
+
/* This one must be bool. */
int param_set_invbool(const char *val, const struct kernel_param *kp)
{
@@ -356,7 +414,7 @@ int param_get_invbool(char *buffer, const struct kernel_param *kp)
}
EXPORT_SYMBOL(param_get_invbool);
-struct kernel_param_ops param_ops_invbool = {
+const struct kernel_param_ops param_ops_invbool = {
.set = param_set_invbool,
.get = param_get_invbool,
};
@@ -364,12 +422,11 @@ EXPORT_SYMBOL(param_ops_invbool);
int param_set_bint(const char *val, const struct kernel_param *kp)
{
- struct kernel_param boolkp;
+ /* Match bool exactly, by re-using it. */
+ struct kernel_param boolkp = *kp;
bool v;
int ret;
- /* Match bool exactly, by re-using it. */
- boolkp = *kp;
boolkp.arg = &v;
ret = param_set_bool(val, &boolkp);
@@ -379,7 +436,7 @@ int param_set_bint(const char *val, const struct kernel_param *kp)
}
EXPORT_SYMBOL(param_set_bint);
-struct kernel_param_ops param_ops_bint = {
+const struct kernel_param_ops param_ops_bint = {
.flags = KERNEL_PARAM_OPS_FL_NOARG,
.set = param_set_bint,
.get = param_get_int,
@@ -387,7 +444,8 @@ struct kernel_param_ops param_ops_bint = {
EXPORT_SYMBOL(param_ops_bint);
/* We break the rule and mangle the string. */
-static int param_array(const char *name,
+static int param_array(struct module *mod,
+ const char *name,
const char *val,
unsigned int min, unsigned int max,
void *elem, int elemsize,
@@ -418,7 +476,7 @@ static int param_array(const char *name,
/* nul-terminate and parse */
save = val[len];
((char *)val)[len] = '\0';
- BUG_ON(!mutex_is_locked(&param_lock));
+ check_kparam_locked(mod);
ret = set(val, &kp);
if (ret != 0)
@@ -440,7 +498,7 @@ static int param_array_set(const char *val, const struct kernel_param *kp)
const struct kparam_array *arr = kp->arr;
unsigned int temp_num;
- return param_array(kp->name, val, 1, arr->max, arr->elem,
+ return param_array(kp->mod, kp->name, val, 1, arr->max, arr->elem,
arr->elemsize, arr->ops->set, kp->level,
arr->num ?: &temp_num);
}
@@ -449,14 +507,13 @@ static int param_array_get(char *buffer, const struct kernel_param *kp)
{
int i, off, ret;
const struct kparam_array *arr = kp->arr;
- struct kernel_param p;
+ struct kernel_param p = *kp;
- p = *kp;
for (i = off = 0; i < (arr->num ? *arr->num : arr->max); i++) {
if (i)
buffer[off++] = ',';
p.arg = arr->elem + arr->elemsize * i;
- BUG_ON(!mutex_is_locked(&param_lock));
+ check_kparam_locked(p.mod);
ret = arr->ops->get(buffer + off, &p);
if (ret < 0)
return ret;
@@ -476,7 +533,7 @@ static void param_array_free(void *arg)
arr->ops->free(arr->elem + arr->elemsize * i);
}
-struct kernel_param_ops param_array_ops = {
+const struct kernel_param_ops param_array_ops = {
.set = param_array_set,
.get = param_array_get,
.free = param_array_free,
@@ -504,7 +561,7 @@ int param_get_string(char *buffer, const struct kernel_param *kp)
}
EXPORT_SYMBOL(param_get_string);
-struct kernel_param_ops param_ops_string = {
+const struct kernel_param_ops param_ops_string = {
.set = param_set_copystring,
.get = param_get_string,
};
@@ -539,9 +596,9 @@ static ssize_t param_attr_show(struct module_attribute *mattr,
if (!attribute->param->ops->get)
return -EPERM;
- mutex_lock(&param_lock);
+ kernel_param_lock(mk->mod);
count = attribute->param->ops->get(buf, attribute->param);
- mutex_unlock(&param_lock);
+ kernel_param_unlock(mk->mod);
if (count > 0) {
strcat(buf, "\n");
++count;
@@ -551,7 +608,7 @@ static ssize_t param_attr_show(struct module_attribute *mattr,
/* sysfs always hands a nul-terminated string in buf. We rely on that. */
static ssize_t param_attr_store(struct module_attribute *mattr,
- struct module_kobject *km,
+ struct module_kobject *mk,
const char *buf, size_t len)
{
int err;
@@ -560,10 +617,10 @@ static ssize_t param_attr_store(struct module_attribute *mattr,
if (!attribute->param->ops->set)
return -EPERM;
- mutex_lock(&param_lock);
+ kernel_param_lock(mk->mod);
param_check_unsafe(attribute->param);
err = attribute->param->ops->set(buf, attribute->param);
- mutex_unlock(&param_lock);
+ kernel_param_unlock(mk->mod);
if (!err)
return len;
return err;
@@ -577,17 +634,18 @@ static ssize_t param_attr_store(struct module_attribute *mattr,
#endif
#ifdef CONFIG_SYSFS
-void __kernel_param_lock(void)
+void kernel_param_lock(struct module *mod)
{
- mutex_lock(&param_lock);
+ mutex_lock(KPARAM_MUTEX(mod));
}
-EXPORT_SYMBOL(__kernel_param_lock);
-void __kernel_param_unlock(void)
+void kernel_param_unlock(struct module *mod)
{
- mutex_unlock(&param_lock);
+ mutex_unlock(KPARAM_MUTEX(mod));
}
-EXPORT_SYMBOL(__kernel_param_unlock);
+
+EXPORT_SYMBOL(kernel_param_lock);
+EXPORT_SYMBOL(kernel_param_unlock);
/*
* add_sysfs_param - add a parameter to sysfs
@@ -853,6 +911,7 @@ static void __init version_sysfs_builtin(void)
mk = locate_module_kobject(vattr->module_name);
if (mk) {
err = sysfs_create_file(&mk->kobj, &vattr->mattr.attr);
+ WARN_ON_ONCE(err);
kobject_uevent(&mk->kobj, KOBJ_ADD);
kobject_put(&mk->kobj);
}
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 7e01f78f0417..9e302315e33d 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -187,7 +187,7 @@ config DPM_WATCHDOG
config DPM_WATCHDOG_TIMEOUT
int "Watchdog timeout in seconds"
range 1 120
- default 12
+ default 60
depends on DPM_WATCHDOG
config PM_TRACE
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index 29472bff11ef..cb880a14cc39 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -7,8 +7,7 @@ obj-$(CONFIG_VT_CONSOLE_SLEEP) += console.o
obj-$(CONFIG_FREEZER) += process.o
obj-$(CONFIG_SUSPEND) += suspend.o
obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o
-obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o \
- block_io.o
+obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o
obj-$(CONFIG_PM_AUTOSLEEP) += autosleep.o
obj-$(CONFIG_PM_WAKELOCKS) += wakelock.o
diff --git a/kernel/power/block_io.c b/kernel/power/block_io.c
deleted file mode 100644
index 9a58bc258810..000000000000
--- a/kernel/power/block_io.c
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * This file provides functions for block I/O operations on swap/file.
- *
- * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@ucw.cz>
- * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
- *
- * This file is released under the GPLv2.
- */
-
-#include <linux/bio.h>
-#include <linux/kernel.h>
-#include <linux/pagemap.h>
-#include <linux/swap.h>
-
-#include "power.h"
-
-/**
- * submit - submit BIO request.
- * @rw: READ or WRITE.
- * @off physical offset of page.
- * @page: page we're reading or writing.
- * @bio_chain: list of pending biod (for async reading)
- *
- * Straight from the textbook - allocate and initialize the bio.
- * If we're reading, make sure the page is marked as dirty.
- * Then submit it and, if @bio_chain == NULL, wait.
- */
-static int submit(int rw, struct block_device *bdev, sector_t sector,
- struct page *page, struct bio **bio_chain)
-{
- const int bio_rw = rw | REQ_SYNC;
- struct bio *bio;
-
- bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1);
- bio->bi_iter.bi_sector = sector;
- bio->bi_bdev = bdev;
- bio->bi_end_io = end_swap_bio_read;
-
- if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
- printk(KERN_ERR "PM: Adding page to bio failed at %llu\n",
- (unsigned long long)sector);
- bio_put(bio);
- return -EFAULT;
- }
-
- lock_page(page);
- bio_get(bio);
-
- if (bio_chain == NULL) {
- submit_bio(bio_rw, bio);
- wait_on_page_locked(page);
- if (rw == READ)
- bio_set_pages_dirty(bio);
- bio_put(bio);
- } else {
- if (rw == READ)
- get_page(page); /* These pages are freed later */
- bio->bi_private = *bio_chain;
- *bio_chain = bio;
- submit_bio(bio_rw, bio);
- }
- return 0;
-}
-
-int hib_bio_read_page(pgoff_t page_off, void *addr, struct bio **bio_chain)
-{
- return submit(READ, hib_resume_bdev, page_off * (PAGE_SIZE >> 9),
- virt_to_page(addr), bio_chain);
-}
-
-int hib_bio_write_page(pgoff_t page_off, void *addr, struct bio **bio_chain)
-{
- return submit(WRITE, hib_resume_bdev, page_off * (PAGE_SIZE >> 9),
- virt_to_page(addr), bio_chain);
-}
-
-int hib_wait_on_bio_chain(struct bio **bio_chain)
-{
- struct bio *bio;
- struct bio *next_bio;
- int ret = 0;
-
- if (bio_chain == NULL)
- return 0;
-
- bio = *bio_chain;
- if (bio == NULL)
- return 0;
- while (bio) {
- struct page *page;
-
- next_bio = bio->bi_private;
- page = bio->bi_io_vec[0].bv_page;
- wait_on_page_locked(page);
- if (!PageUptodate(page) || PageError(page))
- ret = -EIO;
- put_page(page);
- bio_put(bio);
- bio = next_bio;
- }
- *bio_chain = NULL;
- return ret;
-}
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 2329daae5255..690f78f210f2 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -552,7 +552,7 @@ int hibernation_platform_enter(void)
error = disable_nonboot_cpus();
if (error)
- goto Platform_finish;
+ goto Enable_cpus;
local_irq_disable();
syscore_suspend();
@@ -568,6 +568,8 @@ int hibernation_platform_enter(void)
Power_up:
syscore_resume();
local_irq_enable();
+
+ Enable_cpus:
enable_nonboot_cpus();
Platform_finish:
diff --git a/kernel/power/power.h b/kernel/power/power.h
index ce9b8328a689..caadb566e82b 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -163,15 +163,6 @@ extern void swsusp_close(fmode_t);
extern int swsusp_unmark(void);
#endif
-/* kernel/power/block_io.c */
-extern struct block_device *hib_resume_bdev;
-
-extern int hib_bio_read_page(pgoff_t page_off, void *addr,
- struct bio **bio_chain);
-extern int hib_bio_write_page(pgoff_t page_off, void *addr,
- struct bio **bio_chain);
-extern int hib_wait_on_bio_chain(struct bio **bio_chain);
-
struct timeval;
/* kernel/power/swsusp.c */
extern void swsusp_show_speed(ktime_t, ktime_t, unsigned int, char *);
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 570aff817543..2f30ca91e4fa 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -212,7 +212,84 @@ int swsusp_swap_in_use(void)
*/
static unsigned short root_swap = 0xffff;
-struct block_device *hib_resume_bdev;
+static struct block_device *hib_resume_bdev;
+
+struct hib_bio_batch {
+ atomic_t count;
+ wait_queue_head_t wait;
+ int error;
+};
+
+static void hib_init_batch(struct hib_bio_batch *hb)
+{
+ atomic_set(&hb->count, 0);
+ init_waitqueue_head(&hb->wait);
+ hb->error = 0;
+}
+
+static void hib_end_io(struct bio *bio, int error)
+{
+ struct hib_bio_batch *hb = bio->bi_private;
+ const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
+ struct page *page = bio->bi_io_vec[0].bv_page;
+
+ if (!uptodate || error) {
+ printk(KERN_ALERT "Read-error on swap-device (%u:%u:%Lu)\n",
+ imajor(bio->bi_bdev->bd_inode),
+ iminor(bio->bi_bdev->bd_inode),
+ (unsigned long long)bio->bi_iter.bi_sector);
+
+ if (!error)
+ error = -EIO;
+ }
+
+ if (bio_data_dir(bio) == WRITE)
+ put_page(page);
+
+ if (error && !hb->error)
+ hb->error = error;
+ if (atomic_dec_and_test(&hb->count))
+ wake_up(&hb->wait);
+
+ bio_put(bio);
+}
+
+static int hib_submit_io(int rw, pgoff_t page_off, void *addr,
+ struct hib_bio_batch *hb)
+{
+ struct page *page = virt_to_page(addr);
+ struct bio *bio;
+ int error = 0;
+
+ bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1);
+ bio->bi_iter.bi_sector = page_off * (PAGE_SIZE >> 9);
+ bio->bi_bdev = hib_resume_bdev;
+
+ if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
+ printk(KERN_ERR "PM: Adding page to bio failed at %llu\n",
+ (unsigned long long)bio->bi_iter.bi_sector);
+ bio_put(bio);
+ return -EFAULT;
+ }
+
+ if (hb) {
+ bio->bi_end_io = hib_end_io;
+ bio->bi_private = hb;
+ atomic_inc(&hb->count);
+ submit_bio(rw, bio);
+ } else {
+ error = submit_bio_wait(rw, bio);
+ bio_put(bio);
+ }
+
+ return error;
+}
+
+static int hib_wait_io(struct hib_bio_batch *hb)
+{
+ wait_event(hb->wait, atomic_read(&hb->count) == 0);
+ return hb->error;
+}
/*
* Saving part
@@ -222,7 +299,7 @@ static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags)
{
int error;
- hib_bio_read_page(swsusp_resume_block, swsusp_header, NULL);
+ hib_submit_io(READ_SYNC, swsusp_resume_block, swsusp_header, NULL);
if (!memcmp("SWAP-SPACE",swsusp_header->sig, 10) ||
!memcmp("SWAPSPACE2",swsusp_header->sig, 10)) {
memcpy(swsusp_header->orig_sig,swsusp_header->sig, 10);
@@ -231,7 +308,7 @@ static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags)
swsusp_header->flags = flags;
if (flags & SF_CRC32_MODE)
swsusp_header->crc32 = handle->crc32;
- error = hib_bio_write_page(swsusp_resume_block,
+ error = hib_submit_io(WRITE_SYNC, swsusp_resume_block,
swsusp_header, NULL);
} else {
printk(KERN_ERR "PM: Swap header not found!\n");
@@ -271,10 +348,10 @@ static int swsusp_swap_check(void)
* write_page - Write one page to given swap location.
* @buf: Address we're writing.
* @offset: Offset of the swap page we're writing to.
- * @bio_chain: Link the next write BIO here
+ * @hb: bio completion batch
*/
-static int write_page(void *buf, sector_t offset, struct bio **bio_chain)
+static int write_page(void *buf, sector_t offset, struct hib_bio_batch *hb)
{
void *src;
int ret;
@@ -282,13 +359,13 @@ static int write_page(void *buf, sector_t offset, struct bio **bio_chain)
if (!offset)
return -ENOSPC;
- if (bio_chain) {
+ if (hb) {
src = (void *)__get_free_page(__GFP_WAIT | __GFP_NOWARN |
__GFP_NORETRY);
if (src) {
copy_page(src, buf);
} else {
- ret = hib_wait_on_bio_chain(bio_chain); /* Free pages */
+ ret = hib_wait_io(hb); /* Free pages */
if (ret)
return ret;
src = (void *)__get_free_page(__GFP_WAIT |
@@ -298,14 +375,14 @@ static int write_page(void *buf, sector_t offset, struct bio **bio_chain)
copy_page(src, buf);
} else {
WARN_ON_ONCE(1);
- bio_chain = NULL; /* Go synchronous */
+ hb = NULL; /* Go synchronous */
src = buf;
}
}
} else {
src = buf;
}
- return hib_bio_write_page(offset, src, bio_chain);
+ return hib_submit_io(WRITE_SYNC, offset, src, hb);
}
static void release_swap_writer(struct swap_map_handle *handle)
@@ -348,7 +425,7 @@ err_close:
}
static int swap_write_page(struct swap_map_handle *handle, void *buf,
- struct bio **bio_chain)
+ struct hib_bio_batch *hb)
{
int error = 0;
sector_t offset;
@@ -356,7 +433,7 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf,
if (!handle->cur)
return -EINVAL;
offset = alloc_swapdev_block(root_swap);
- error = write_page(buf, offset, bio_chain);
+ error = write_page(buf, offset, hb);
if (error)
return error;
handle->cur->entries[handle->k++] = offset;
@@ -365,15 +442,15 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf,
if (!offset)
return -ENOSPC;
handle->cur->next_swap = offset;
- error = write_page(handle->cur, handle->cur_swap, bio_chain);
+ error = write_page(handle->cur, handle->cur_swap, hb);
if (error)
goto out;
clear_page(handle->cur);
handle->cur_swap = offset;
handle->k = 0;
- if (bio_chain && low_free_pages() <= handle->reqd_free_pages) {
- error = hib_wait_on_bio_chain(bio_chain);
+ if (hb && low_free_pages() <= handle->reqd_free_pages) {
+ error = hib_wait_io(hb);
if (error)
goto out;
/*
@@ -445,23 +522,24 @@ static int save_image(struct swap_map_handle *handle,
int ret;
int nr_pages;
int err2;
- struct bio *bio;
+ struct hib_bio_batch hb;
ktime_t start;
ktime_t stop;
+ hib_init_batch(&hb);
+
printk(KERN_INFO "PM: Saving image data pages (%u pages)...\n",
nr_to_write);
m = nr_to_write / 10;
if (!m)
m = 1;
nr_pages = 0;
- bio = NULL;
start = ktime_get();
while (1) {
ret = snapshot_read_next(snapshot);
if (ret <= 0)
break;
- ret = swap_write_page(handle, data_of(*snapshot), &bio);
+ ret = swap_write_page(handle, data_of(*snapshot), &hb);
if (ret)
break;
if (!(nr_pages % m))
@@ -469,7 +547,7 @@ static int save_image(struct swap_map_handle *handle,
nr_pages / m * 10);
nr_pages++;
}
- err2 = hib_wait_on_bio_chain(&bio);
+ err2 = hib_wait_io(&hb);
stop = ktime_get();
if (!ret)
ret = err2;
@@ -580,7 +658,7 @@ static int save_image_lzo(struct swap_map_handle *handle,
int ret = 0;
int nr_pages;
int err2;
- struct bio *bio;
+ struct hib_bio_batch hb;
ktime_t start;
ktime_t stop;
size_t off;
@@ -589,6 +667,8 @@ static int save_image_lzo(struct swap_map_handle *handle,
struct cmp_data *data = NULL;
struct crc_data *crc = NULL;
+ hib_init_batch(&hb);
+
/*
* We'll limit the number of threads for compression to limit memory
* footprint.
@@ -674,7 +754,6 @@ static int save_image_lzo(struct swap_map_handle *handle,
if (!m)
m = 1;
nr_pages = 0;
- bio = NULL;
start = ktime_get();
for (;;) {
for (thr = 0; thr < nr_threads; thr++) {
@@ -748,7 +827,7 @@ static int save_image_lzo(struct swap_map_handle *handle,
off += PAGE_SIZE) {
memcpy(page, data[thr].cmp + off, PAGE_SIZE);
- ret = swap_write_page(handle, page, &bio);
+ ret = swap_write_page(handle, page, &hb);
if (ret)
goto out_finish;
}
@@ -759,7 +838,7 @@ static int save_image_lzo(struct swap_map_handle *handle,
}
out_finish:
- err2 = hib_wait_on_bio_chain(&bio);
+ err2 = hib_wait_io(&hb);
stop = ktime_get();
if (!ret)
ret = err2;
@@ -906,7 +985,7 @@ static int get_swap_reader(struct swap_map_handle *handle,
return -ENOMEM;
}
- error = hib_bio_read_page(offset, tmp->map, NULL);
+ error = hib_submit_io(READ_SYNC, offset, tmp->map, NULL);
if (error) {
release_swap_reader(handle);
return error;
@@ -919,7 +998,7 @@ static int get_swap_reader(struct swap_map_handle *handle,
}
static int swap_read_page(struct swap_map_handle *handle, void *buf,
- struct bio **bio_chain)
+ struct hib_bio_batch *hb)
{
sector_t offset;
int error;
@@ -930,7 +1009,7 @@ static int swap_read_page(struct swap_map_handle *handle, void *buf,
offset = handle->cur->entries[handle->k];
if (!offset)
return -EFAULT;
- error = hib_bio_read_page(offset, buf, bio_chain);
+ error = hib_submit_io(READ_SYNC, offset, buf, hb);
if (error)
return error;
if (++handle->k >= MAP_PAGE_ENTRIES) {
@@ -968,27 +1047,28 @@ static int load_image(struct swap_map_handle *handle,
int ret = 0;
ktime_t start;
ktime_t stop;
- struct bio *bio;
+ struct hib_bio_batch hb;
int err2;
unsigned nr_pages;
+ hib_init_batch(&hb);
+
printk(KERN_INFO "PM: Loading image data pages (%u pages)...\n",
nr_to_read);
m = nr_to_read / 10;
if (!m)
m = 1;
nr_pages = 0;
- bio = NULL;
start = ktime_get();
for ( ; ; ) {
ret = snapshot_write_next(snapshot);
if (ret <= 0)
break;
- ret = swap_read_page(handle, data_of(*snapshot), &bio);
+ ret = swap_read_page(handle, data_of(*snapshot), &hb);
if (ret)
break;
if (snapshot->sync_read)
- ret = hib_wait_on_bio_chain(&bio);
+ ret = hib_wait_io(&hb);
if (ret)
break;
if (!(nr_pages % m))
@@ -996,7 +1076,7 @@ static int load_image(struct swap_map_handle *handle,
nr_pages / m * 10);
nr_pages++;
}
- err2 = hib_wait_on_bio_chain(&bio);
+ err2 = hib_wait_io(&hb);
stop = ktime_get();
if (!ret)
ret = err2;
@@ -1067,7 +1147,7 @@ static int load_image_lzo(struct swap_map_handle *handle,
unsigned int m;
int ret = 0;
int eof = 0;
- struct bio *bio;
+ struct hib_bio_batch hb;
ktime_t start;
ktime_t stop;
unsigned nr_pages;
@@ -1080,6 +1160,8 @@ static int load_image_lzo(struct swap_map_handle *handle,
struct dec_data *data = NULL;
struct crc_data *crc = NULL;
+ hib_init_batch(&hb);
+
/*
* We'll limit the number of threads for decompression to limit memory
* footprint.
@@ -1190,7 +1272,6 @@ static int load_image_lzo(struct swap_map_handle *handle,
if (!m)
m = 1;
nr_pages = 0;
- bio = NULL;
start = ktime_get();
ret = snapshot_write_next(snapshot);
@@ -1199,7 +1280,7 @@ static int load_image_lzo(struct swap_map_handle *handle,
for(;;) {
for (i = 0; !eof && i < want; i++) {
- ret = swap_read_page(handle, page[ring], &bio);
+ ret = swap_read_page(handle, page[ring], &hb);
if (ret) {
/*
* On real read error, finish. On end of data,
@@ -1226,7 +1307,7 @@ static int load_image_lzo(struct swap_map_handle *handle,
if (!asked)
break;
- ret = hib_wait_on_bio_chain(&bio);
+ ret = hib_wait_io(&hb);
if (ret)
goto out_finish;
have += asked;
@@ -1281,7 +1362,7 @@ static int load_image_lzo(struct swap_map_handle *handle,
* Wait for more data while we are decompressing.
*/
if (have < LZO_CMP_PAGES && asked) {
- ret = hib_wait_on_bio_chain(&bio);
+ ret = hib_wait_io(&hb);
if (ret)
goto out_finish;
have += asked;
@@ -1430,7 +1511,7 @@ int swsusp_check(void)
if (!IS_ERR(hib_resume_bdev)) {
set_blocksize(hib_resume_bdev, PAGE_SIZE);
clear_page(swsusp_header);
- error = hib_bio_read_page(swsusp_resume_block,
+ error = hib_submit_io(READ_SYNC, swsusp_resume_block,
swsusp_header, NULL);
if (error)
goto put;
@@ -1438,7 +1519,7 @@ int swsusp_check(void)
if (!memcmp(HIBERNATE_SIG, swsusp_header->sig, 10)) {
memcpy(swsusp_header->sig, swsusp_header->orig_sig, 10);
/* Reset swap signature now */
- error = hib_bio_write_page(swsusp_resume_block,
+ error = hib_submit_io(WRITE_SYNC, swsusp_resume_block,
swsusp_header, NULL);
} else {
error = -EINVAL;
@@ -1482,10 +1563,10 @@ int swsusp_unmark(void)
{
int error;
- hib_bio_read_page(swsusp_resume_block, swsusp_header, NULL);
+ hib_submit_io(READ_SYNC, swsusp_resume_block, swsusp_header, NULL);
if (!memcmp(HIBERNATE_SIG,swsusp_header->sig, 10)) {
memcpy(swsusp_header->sig,swsusp_header->orig_sig, 10);
- error = hib_bio_write_page(swsusp_resume_block,
+ error = hib_submit_io(WRITE_SYNC, swsusp_resume_block,
swsusp_header, NULL);
} else {
printk(KERN_ERR "PM: Cannot find swsusp signature!\n");
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index c099b082cd02..de553849f3ac 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -85,6 +85,18 @@ static struct lockdep_map console_lock_dep_map = {
#endif
/*
+ * Number of registered extended console drivers.
+ *
+ * If extended consoles are present, in-kernel cont reassembly is disabled
+ * and each fragment is stored as a separate log entry with proper
+ * continuation flag so that every emitted message has full metadata. This
+ * doesn't change the result for regular consoles or /proc/kmsg. For
+ * /dev/kmsg, as long as the reader concatenates messages according to
+ * consecutive continuation flags, the end result should be the same too.
+ */
+static int nr_ext_console_drivers;
+
+/*
* Helper macros to handle lockdep when locking/unlocking console_sem. We use
* macros instead of functions so that _RET_IP_ contains useful information.
*/
@@ -195,7 +207,7 @@ static int console_may_schedule;
* need to be changed in the future, when the requirements change.
*
* /dev/kmsg exports the structured data in the following line format:
- * "level,sequnum,timestamp;<message text>\n"
+ * "<level>,<sequnum>,<timestamp>,<contflag>;<message text>\n"
*
* The optional key/value pairs are attached as continuation lines starting
* with a space character and terminated by a newline. All possible
@@ -477,18 +489,18 @@ static int syslog_action_restricted(int type)
type != SYSLOG_ACTION_SIZE_BUFFER;
}
-int check_syslog_permissions(int type, bool from_file)
+int check_syslog_permissions(int type, int source)
{
/*
* If this is from /proc/kmsg and we've already opened it, then we've
* already done the capabilities checks at open time.
*/
- if (from_file && type != SYSLOG_ACTION_OPEN)
- return 0;
+ if (source == SYSLOG_FROM_PROC && type != SYSLOG_ACTION_OPEN)
+ goto ok;
if (syslog_action_restricted(type)) {
if (capable(CAP_SYSLOG))
- return 0;
+ goto ok;
/*
* For historical reasons, accept CAP_SYS_ADMIN too, with
* a warning.
@@ -498,13 +510,94 @@ int check_syslog_permissions(int type, bool from_file)
"CAP_SYS_ADMIN but no CAP_SYSLOG "
"(deprecated).\n",
current->comm, task_pid_nr(current));
- return 0;
+ goto ok;
}
return -EPERM;
}
+ok:
return security_syslog(type);
}
+static void append_char(char **pp, char *e, char c)
+{
+ if (*pp < e)
+ *(*pp)++ = c;
+}
+
+static ssize_t msg_print_ext_header(char *buf, size_t size,
+ struct printk_log *msg, u64 seq,
+ enum log_flags prev_flags)
+{
+ u64 ts_usec = msg->ts_nsec;
+ char cont = '-';
+
+ do_div(ts_usec, 1000);
+
+ /*
+ * If we couldn't merge continuation line fragments during the print,
+ * export the stored flags to allow an optional external merge of the
+ * records. Merging the records isn't always neccessarily correct, like
+ * when we hit a race during printing. In most cases though, it produces
+ * better readable output. 'c' in the record flags mark the first
+ * fragment of a line, '+' the following.
+ */
+ if (msg->flags & LOG_CONT && !(prev_flags & LOG_CONT))
+ cont = 'c';
+ else if ((msg->flags & LOG_CONT) ||
+ ((prev_flags & LOG_CONT) && !(msg->flags & LOG_PREFIX)))
+ cont = '+';
+
+ return scnprintf(buf, size, "%u,%llu,%llu,%c;",
+ (msg->facility << 3) | msg->level, seq, ts_usec, cont);
+}
+
+static ssize_t msg_print_ext_body(char *buf, size_t size,
+ char *dict, size_t dict_len,
+ char *text, size_t text_len)
+{
+ char *p = buf, *e = buf + size;
+ size_t i;
+
+ /* escape non-printable characters */
+ for (i = 0; i < text_len; i++) {
+ unsigned char c = text[i];
+
+ if (c < ' ' || c >= 127 || c == '\\')
+ p += scnprintf(p, e - p, "\\x%02x", c);
+ else
+ append_char(&p, e, c);
+ }
+ append_char(&p, e, '\n');
+
+ if (dict_len) {
+ bool line = true;
+
+ for (i = 0; i < dict_len; i++) {
+ unsigned char c = dict[i];
+
+ if (line) {
+ append_char(&p, e, ' ');
+ line = false;
+ }
+
+ if (c == '\0') {
+ append_char(&p, e, '\n');
+ line = true;
+ continue;
+ }
+
+ if (c < ' ' || c >= 127 || c == '\\') {
+ p += scnprintf(p, e - p, "\\x%02x", c);
+ continue;
+ }
+
+ append_char(&p, e, c);
+ }
+ append_char(&p, e, '\n');
+ }
+
+ return p - buf;
+}
/* /dev/kmsg - userspace message inject/listen interface */
struct devkmsg_user {
@@ -512,7 +605,7 @@ struct devkmsg_user {
u32 idx;
enum log_flags prev;
struct mutex lock;
- char buf[8192];
+ char buf[CONSOLE_EXT_LOG_MAX];
};
static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from)
@@ -570,9 +663,6 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
{
struct devkmsg_user *user = file->private_data;
struct printk_log *msg;
- u64 ts_usec;
- size_t i;
- char cont = '-';
size_t len;
ssize_t ret;
@@ -608,66 +698,13 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
}
msg = log_from_idx(user->idx);
- ts_usec = msg->ts_nsec;
- do_div(ts_usec, 1000);
+ len = msg_print_ext_header(user->buf, sizeof(user->buf),
+ msg, user->seq, user->prev);
+ len += msg_print_ext_body(user->buf + len, sizeof(user->buf) - len,
+ log_dict(msg), msg->dict_len,
+ log_text(msg), msg->text_len);
- /*
- * If we couldn't merge continuation line fragments during the print,
- * export the stored flags to allow an optional external merge of the
- * records. Merging the records isn't always neccessarily correct, like
- * when we hit a race during printing. In most cases though, it produces
- * better readable output. 'c' in the record flags mark the first
- * fragment of a line, '+' the following.
- */
- if (msg->flags & LOG_CONT && !(user->prev & LOG_CONT))
- cont = 'c';
- else if ((msg->flags & LOG_CONT) ||
- ((user->prev & LOG_CONT) && !(msg->flags & LOG_PREFIX)))
- cont = '+';
-
- len = sprintf(user->buf, "%u,%llu,%llu,%c;",
- (msg->facility << 3) | msg->level,
- user->seq, ts_usec, cont);
user->prev = msg->flags;
-
- /* escape non-printable characters */
- for (i = 0; i < msg->text_len; i++) {
- unsigned char c = log_text(msg)[i];
-
- if (c < ' ' || c >= 127 || c == '\\')
- len += sprintf(user->buf + len, "\\x%02x", c);
- else
- user->buf[len++] = c;
- }
- user->buf[len++] = '\n';
-
- if (msg->dict_len) {
- bool line = true;
-
- for (i = 0; i < msg->dict_len; i++) {
- unsigned char c = log_dict(msg)[i];
-
- if (line) {
- user->buf[len++] = ' ';
- line = false;
- }
-
- if (c == '\0') {
- user->buf[len++] = '\n';
- line = true;
- continue;
- }
-
- if (c < ' ' || c >= 127 || c == '\\') {
- len += sprintf(user->buf + len, "\\x%02x", c);
- continue;
- }
-
- user->buf[len++] = c;
- }
- user->buf[len++] = '\n';
- }
-
user->idx = log_next(user->idx);
user->seq++;
raw_spin_unlock_irq(&logbuf_lock);
@@ -1253,20 +1290,16 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
return len;
}
-int do_syslog(int type, char __user *buf, int len, bool from_file)
+int do_syslog(int type, char __user *buf, int len, int source)
{
bool clear = false;
static int saved_console_loglevel = LOGLEVEL_DEFAULT;
int error;
- error = check_syslog_permissions(type, from_file);
+ error = check_syslog_permissions(type, source);
if (error)
goto out;
- error = security_syslog(type);
- if (error)
- return error;
-
switch (type) {
case SYSLOG_ACTION_CLOSE: /* Close log */
break;
@@ -1346,7 +1379,7 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
syslog_prev = 0;
syslog_partial = 0;
}
- if (from_file) {
+ if (source == SYSLOG_FROM_PROC) {
/*
* Short-cut for poll(/"proc/kmsg") which simply checks
* for pending data, not the size; return the count of
@@ -1393,7 +1426,9 @@ SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len)
* log_buf[start] to log_buf[end - 1].
* The console_lock must be held.
*/
-static void call_console_drivers(int level, const char *text, size_t len)
+static void call_console_drivers(int level,
+ const char *ext_text, size_t ext_len,
+ const char *text, size_t len)
{
struct console *con;
@@ -1414,7 +1449,10 @@ static void call_console_drivers(int level, const char *text, size_t len)
if (!cpu_online(smp_processor_id()) &&
!(con->flags & CON_ANYTIME))
continue;
- con->write(con, text, len);
+ if (con->flags & CON_EXTENDED)
+ con->write(con, ext_text, ext_len);
+ else
+ con->write(con, text, len);
}
}
@@ -1557,8 +1595,12 @@ static bool cont_add(int facility, int level, const char *text, size_t len)
if (cont.len && cont.flushed)
return false;
- if (cont.len + len > sizeof(cont.buf)) {
- /* the line gets too long, split it up in separate records */
+ /*
+ * If ext consoles are present, flush and skip in-kernel
+ * continuation. See nr_ext_console_drivers definition. Also, if
+ * the line gets too long, split it up in separate records.
+ */
+ if (nr_ext_console_drivers || cont.len + len > sizeof(cont.buf)) {
cont_flush(LOG_CONT);
return false;
}
@@ -1893,9 +1935,19 @@ static struct cont {
u8 level;
bool flushed:1;
} cont;
+static char *log_text(const struct printk_log *msg) { return NULL; }
+static char *log_dict(const struct printk_log *msg) { return NULL; }
static struct printk_log *log_from_idx(u32 idx) { return NULL; }
static u32 log_next(u32 idx) { return 0; }
-static void call_console_drivers(int level, const char *text, size_t len) {}
+static ssize_t msg_print_ext_header(char *buf, size_t size,
+ struct printk_log *msg, u64 seq,
+ enum log_flags prev_flags) { return 0; }
+static ssize_t msg_print_ext_body(char *buf, size_t size,
+ char *dict, size_t dict_len,
+ char *text, size_t text_len) { return 0; }
+static void call_console_drivers(int level,
+ const char *ext_text, size_t ext_len,
+ const char *text, size_t len) {}
static size_t msg_print_text(const struct printk_log *msg, enum log_flags prev,
bool syslog, char *buf, size_t size) { return 0; }
static size_t cont_print_text(char *text, size_t size) { return 0; }
@@ -2148,7 +2200,7 @@ static void console_cont_flush(char *text, size_t size)
len = cont_print_text(text, size);
raw_spin_unlock(&logbuf_lock);
stop_critical_timings();
- call_console_drivers(cont.level, text, len);
+ call_console_drivers(cont.level, NULL, 0, text, len);
start_critical_timings();
local_irq_restore(flags);
return;
@@ -2172,6 +2224,7 @@ out:
*/
void console_unlock(void)
{
+ static char ext_text[CONSOLE_EXT_LOG_MAX];
static char text[LOG_LINE_MAX + PREFIX_MAX];
static u64 seen_seq;
unsigned long flags;
@@ -2190,6 +2243,7 @@ void console_unlock(void)
again:
for (;;) {
struct printk_log *msg;
+ size_t ext_len = 0;
size_t len;
int level;
@@ -2235,13 +2289,22 @@ skip:
level = msg->level;
len += msg_print_text(msg, console_prev, false,
text + len, sizeof(text) - len);
+ if (nr_ext_console_drivers) {
+ ext_len = msg_print_ext_header(ext_text,
+ sizeof(ext_text),
+ msg, console_seq, console_prev);
+ ext_len += msg_print_ext_body(ext_text + ext_len,
+ sizeof(ext_text) - ext_len,
+ log_dict(msg), msg->dict_len,
+ log_text(msg), msg->text_len);
+ }
console_idx = log_next(console_idx);
console_seq++;
console_prev = msg->flags;
raw_spin_unlock(&logbuf_lock);
stop_critical_timings(); /* don't trace print latency */
- call_console_drivers(level, text, len);
+ call_console_drivers(level, ext_text, ext_len, text, len);
start_critical_timings();
local_irq_restore(flags);
}
@@ -2497,6 +2560,11 @@ void register_console(struct console *newcon)
newcon->next = console_drivers->next;
console_drivers->next = newcon;
}
+
+ if (newcon->flags & CON_EXTENDED)
+ if (!nr_ext_console_drivers++)
+ pr_info("printk: continuation disabled due to ext consoles, expect more fragments in /dev/kmsg\n");
+
if (newcon->flags & CON_PRINTBUFFER) {
/*
* console_unlock(); will print out the buffered messages
@@ -2569,6 +2637,9 @@ int unregister_console(struct console *console)
}
}
+ if (!res && (console->flags & CON_EXTENDED))
+ nr_ext_console_drivers--;
+
/*
* If this isn't the last console and it has CON_CONSDEV set, we
* need to set it on the next preferred console.
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
index 591af0cb7b9f..c291bd65d2cb 100644
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -35,7 +35,7 @@
#include <linux/time.h>
#include <linux/cpu.h>
#include <linux/prefetch.h>
-#include <linux/ftrace_event.h>
+#include <linux/trace_events.h>
#include "rcu.h"
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index add042926a66..65137bc28b2b 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -54,7 +54,7 @@
#include <linux/delay.h>
#include <linux/stop_machine.h>
#include <linux/random.h>
-#include <linux/ftrace_event.h>
+#include <linux/trace_events.h>
#include <linux/suspend.h>
#include "tree.h"
diff --git a/kernel/signal.c b/kernel/signal.c
index f19833b5db3c..836df8dac6cc 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -414,21 +414,16 @@ void flush_sigqueue(struct sigpending *queue)
}
/*
- * Flush all pending signals for a task.
+ * Flush all pending signals for this kthread.
*/
-void __flush_signals(struct task_struct *t)
-{
- clear_tsk_thread_flag(t, TIF_SIGPENDING);
- flush_sigqueue(&t->pending);
- flush_sigqueue(&t->signal->shared_pending);
-}
-
void flush_signals(struct task_struct *t)
{
unsigned long flags;
spin_lock_irqsave(&t->sighand->siglock, flags);
- __flush_signals(t);
+ clear_tsk_thread_flag(t, TIF_SIGPENDING);
+ flush_sigqueue(&t->pending);
+ flush_sigqueue(&t->signal->shared_pending);
spin_unlock_irqrestore(&t->sighand->siglock, flags);
}
diff --git a/kernel/sys.c b/kernel/sys.c
index 8571296b7ddb..259fda25eb6b 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1722,7 +1722,6 @@ exit_err:
goto exit;
}
-#ifdef CONFIG_CHECKPOINT_RESTORE
/*
* WARNING: we don't require any capability here so be very careful
* in what is allowed for modification from userspace.
@@ -1818,6 +1817,7 @@ out:
return error;
}
+#ifdef CONFIG_CHECKPOINT_RESTORE
static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data_size)
{
struct prctl_mm_map prctl_map = { .exe_fd = (u32)-1, };
@@ -1902,10 +1902,41 @@ out:
}
#endif /* CONFIG_CHECKPOINT_RESTORE */
+static int prctl_set_auxv(struct mm_struct *mm, unsigned long addr,
+ unsigned long len)
+{
+ /*
+ * This doesn't move the auxiliary vector itself since it's pinned to
+ * mm_struct, but it permits filling the vector with new values. It's
+ * up to the caller to provide sane values here, otherwise userspace
+ * tools which use this vector might be unhappy.
+ */
+ unsigned long user_auxv[AT_VECTOR_SIZE];
+
+ if (len > sizeof(user_auxv))
+ return -EINVAL;
+
+ if (copy_from_user(user_auxv, (const void __user *)addr, len))
+ return -EFAULT;
+
+ /* Make sure the last entry is always AT_NULL */
+ user_auxv[AT_VECTOR_SIZE - 2] = 0;
+ user_auxv[AT_VECTOR_SIZE - 1] = 0;
+
+ BUILD_BUG_ON(sizeof(user_auxv) != sizeof(mm->saved_auxv));
+
+ task_lock(current);
+ memcpy(mm->saved_auxv, user_auxv, len);
+ task_unlock(current);
+
+ return 0;
+}
+
static int prctl_set_mm(int opt, unsigned long addr,
unsigned long arg4, unsigned long arg5)
{
struct mm_struct *mm = current->mm;
+ struct prctl_mm_map prctl_map;
struct vm_area_struct *vma;
int error;
@@ -1925,6 +1956,9 @@ static int prctl_set_mm(int opt, unsigned long addr,
if (opt == PR_SET_MM_EXE_FILE)
return prctl_set_mm_exe_file(mm, (unsigned int)addr);
+ if (opt == PR_SET_MM_AUXV)
+ return prctl_set_auxv(mm, addr, arg4);
+
if (addr >= TASK_SIZE || addr < mmap_min_addr)
return -EINVAL;
@@ -1933,42 +1967,64 @@ static int prctl_set_mm(int opt, unsigned long addr,
down_read(&mm->mmap_sem);
vma = find_vma(mm, addr);
+ prctl_map.start_code = mm->start_code;
+ prctl_map.end_code = mm->end_code;
+ prctl_map.start_data = mm->start_data;
+ prctl_map.end_data = mm->end_data;
+ prctl_map.start_brk = mm->start_brk;
+ prctl_map.brk = mm->brk;
+ prctl_map.start_stack = mm->start_stack;
+ prctl_map.arg_start = mm->arg_start;
+ prctl_map.arg_end = mm->arg_end;
+ prctl_map.env_start = mm->env_start;
+ prctl_map.env_end = mm->env_end;
+ prctl_map.auxv = NULL;
+ prctl_map.auxv_size = 0;
+ prctl_map.exe_fd = -1;
+
switch (opt) {
case PR_SET_MM_START_CODE:
- mm->start_code = addr;
+ prctl_map.start_code = addr;
break;
case PR_SET_MM_END_CODE:
- mm->end_code = addr;
+ prctl_map.end_code = addr;
break;
case PR_SET_MM_START_DATA:
- mm->start_data = addr;
+ prctl_map.start_data = addr;
break;
case PR_SET_MM_END_DATA:
- mm->end_data = addr;
+ prctl_map.end_data = addr;
+ break;
+ case PR_SET_MM_START_STACK:
+ prctl_map.start_stack = addr;
break;
-
case PR_SET_MM_START_BRK:
- if (addr <= mm->end_data)
- goto out;
-
- if (check_data_rlimit(rlimit(RLIMIT_DATA), mm->brk, addr,
- mm->end_data, mm->start_data))
- goto out;
-
- mm->start_brk = addr;
+ prctl_map.start_brk = addr;
break;
-
case PR_SET_MM_BRK:
- if (addr <= mm->end_data)
- goto out;
-
- if (check_data_rlimit(rlimit(RLIMIT_DATA), addr, mm->start_brk,
- mm->end_data, mm->start_data))
- goto out;
-
- mm->brk = addr;
+ prctl_map.brk = addr;
break;
+ case PR_SET_MM_ARG_START:
+ prctl_map.arg_start = addr;
+ break;
+ case PR_SET_MM_ARG_END:
+ prctl_map.arg_end = addr;
+ break;
+ case PR_SET_MM_ENV_START:
+ prctl_map.env_start = addr;
+ break;
+ case PR_SET_MM_ENV_END:
+ prctl_map.env_end = addr;
+ break;
+ default:
+ goto out;
+ }
+
+ error = validate_prctl_map(&prctl_map);
+ if (error)
+ goto out;
+ switch (opt) {
/*
* If command line arguments and environment
* are placed somewhere else on stack, we can
@@ -1985,52 +2041,20 @@ static int prctl_set_mm(int opt, unsigned long addr,
error = -EFAULT;
goto out;
}
- if (opt == PR_SET_MM_START_STACK)
- mm->start_stack = addr;
- else if (opt == PR_SET_MM_ARG_START)
- mm->arg_start = addr;
- else if (opt == PR_SET_MM_ARG_END)
- mm->arg_end = addr;
- else if (opt == PR_SET_MM_ENV_START)
- mm->env_start = addr;
- else if (opt == PR_SET_MM_ENV_END)
- mm->env_end = addr;
- break;
-
- /*
- * This doesn't move auxiliary vector itself
- * since it's pinned to mm_struct, but allow
- * to fill vector with new values. It's up
- * to a caller to provide sane values here
- * otherwise user space tools which use this
- * vector might be unhappy.
- */
- case PR_SET_MM_AUXV: {
- unsigned long user_auxv[AT_VECTOR_SIZE];
-
- if (arg4 > sizeof(user_auxv))
- goto out;
- up_read(&mm->mmap_sem);
-
- if (copy_from_user(user_auxv, (const void __user *)addr, arg4))
- return -EFAULT;
-
- /* Make sure the last entry is always AT_NULL */
- user_auxv[AT_VECTOR_SIZE - 2] = 0;
- user_auxv[AT_VECTOR_SIZE - 1] = 0;
-
- BUILD_BUG_ON(sizeof(user_auxv) != sizeof(mm->saved_auxv));
-
- task_lock(current);
- memcpy(mm->saved_auxv, user_auxv, arg4);
- task_unlock(current);
-
- return 0;
- }
- default:
- goto out;
}
+ mm->start_code = prctl_map.start_code;
+ mm->end_code = prctl_map.end_code;
+ mm->start_data = prctl_map.start_data;
+ mm->end_data = prctl_map.end_data;
+ mm->start_brk = prctl_map.start_brk;
+ mm->brk = prctl_map.brk;
+ mm->start_stack = prctl_map.start_stack;
+ mm->arg_start = prctl_map.arg_start;
+ mm->arg_end = prctl_map.arg_end;
+ mm->env_start = prctl_map.env_start;
+ mm->env_end = prctl_map.env_end;
+
error = 0;
out:
up_read(&mm->mmap_sem);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 30b7a409bf1e..bca3667a2de1 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -319,32 +319,7 @@ static inline s64 timekeeping_get_ns(struct tk_read_base *tkr)
* We want to use this from any context including NMI and tracing /
* instrumenting the timekeeping code itself.
*
- * So we handle this differently than the other timekeeping accessor
- * functions which retry when the sequence count has changed. The
- * update side does:
- *
- * smp_wmb(); <- Ensure that the last base[1] update is visible
- * tkf->seq++;
- * smp_wmb(); <- Ensure that the seqcount update is visible
- * update(tkf->base[0], tkr);
- * smp_wmb(); <- Ensure that the base[0] update is visible
- * tkf->seq++;
- * smp_wmb(); <- Ensure that the seqcount update is visible
- * update(tkf->base[1], tkr);
- *
- * The reader side does:
- *
- * do {
- * seq = tkf->seq;
- * smp_rmb();
- * idx = seq & 0x01;
- * now = now(tkf->base[idx]);
- * smp_rmb();
- * } while (seq != tkf->seq)
- *
- * As long as we update base[0] readers are forced off to
- * base[1]. Once base[0] is updated readers are redirected to base[0]
- * and the base[1] update takes place.
+ * Employ the latch technique; see @raw_write_seqcount_latch.
*
* So if a NMI hits the update of base[0] then it will use base[1]
* which is still consistent. In the worst case this can result is a
@@ -407,7 +382,7 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
u64 now;
do {
- seq = raw_read_seqcount(&tkf->seq);
+ seq = raw_read_seqcount_latch(&tkf->seq);
tkr = tkf->base + (seq & 0x01);
now = ktime_to_ns(tkr->base) + timekeeping_get_ns(tkr);
} while (read_seqcount_retry(&tkf->seq, seq));
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 483cecfa5c17..b3e6b39b6cf9 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -439,7 +439,7 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
{
struct blk_trace *old_bt, *bt = NULL;
struct dentry *dir = NULL;
- int ret, i;
+ int ret;
if (!buts->buf_size || !buts->buf_nr)
return -EINVAL;
@@ -451,9 +451,7 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
* some device names have larger paths - convert the slashes
* to underscores for this to work as expected
*/
- for (i = 0; i < strlen(buts->name); i++)
- if (buts->name[i] == '/')
- buts->name[i] = '_';
+ strreplace(buts->name, '/', '_');
bt = kzalloc(sizeof(*bt), GFP_KERNEL);
if (!bt)
@@ -1450,14 +1448,14 @@ static struct trace_event trace_blk_event = {
static int __init init_blk_tracer(void)
{
- if (!register_ftrace_event(&trace_blk_event)) {
+ if (!register_trace_event(&trace_blk_event)) {
pr_warning("Warning: could not register block events\n");
return 1;
}
if (register_tracer(&blk_tracer) != 0) {
pr_warning("Warning: could not register the block tracer\n");
- unregister_ftrace_event(&trace_blk_event);
+ unregister_trace_event(&trace_blk_event);
return 1;
}
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 0315d43176d8..6260717c18e3 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -3,7 +3,7 @@
*
* Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
*/
-#include <linux/ftrace_event.h>
+#include <linux/trace_events.h>
#include <linux/ring_buffer.h>
#include <linux/trace_clock.h>
#include <linux/trace_seq.h>
@@ -115,63 +115,11 @@ int ring_buffer_print_entry_header(struct trace_seq *s)
*
*/
-/*
- * A fast way to enable or disable all ring buffers is to
- * call tracing_on or tracing_off. Turning off the ring buffers
- * prevents all ring buffers from being recorded to.
- * Turning this switch on, makes it OK to write to the
- * ring buffer, if the ring buffer is enabled itself.
- *
- * There's three layers that must be on in order to write
- * to the ring buffer.
- *
- * 1) This global flag must be set.
- * 2) The ring buffer must be enabled for recording.
- * 3) The per cpu buffer must be enabled for recording.
- *
- * In case of an anomaly, this global flag has a bit set that
- * will permantly disable all ring buffers.
- */
-
-/*
- * Global flag to disable all recording to ring buffers
- * This has two bits: ON, DISABLED
- *
- * ON DISABLED
- * ---- ----------
- * 0 0 : ring buffers are off
- * 1 0 : ring buffers are on
- * X 1 : ring buffers are permanently disabled
- */
-
-enum {
- RB_BUFFERS_ON_BIT = 0,
- RB_BUFFERS_DISABLED_BIT = 1,
-};
-
-enum {
- RB_BUFFERS_ON = 1 << RB_BUFFERS_ON_BIT,
- RB_BUFFERS_DISABLED = 1 << RB_BUFFERS_DISABLED_BIT,
-};
-
-static unsigned long ring_buffer_flags __read_mostly = RB_BUFFERS_ON;
-
/* Used for individual buffers (after the counter) */
#define RB_BUFFER_OFF (1 << 20)
#define BUF_PAGE_HDR_SIZE offsetof(struct buffer_data_page, data)
-/**
- * tracing_off_permanent - permanently disable ring buffers
- *
- * This function, once called, will disable all ring buffers
- * permanently.
- */
-void tracing_off_permanent(void)
-{
- set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags);
-}
-
#define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array))
#define RB_ALIGNMENT 4U
#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
@@ -452,6 +400,23 @@ struct rb_irq_work {
};
/*
+ * Used for which event context the event is in.
+ * NMI = 0
+ * IRQ = 1
+ * SOFTIRQ = 2
+ * NORMAL = 3
+ *
+ * See trace_recursive_lock() comment below for more details.
+ */
+enum {
+ RB_CTX_NMI,
+ RB_CTX_IRQ,
+ RB_CTX_SOFTIRQ,
+ RB_CTX_NORMAL,
+ RB_CTX_MAX
+};
+
+/*
* head_page == tail_page && head == tail then buffer is empty.
*/
struct ring_buffer_per_cpu {
@@ -462,6 +427,7 @@ struct ring_buffer_per_cpu {
arch_spinlock_t lock;
struct lock_class_key lock_key;
unsigned int nr_pages;
+ unsigned int current_context;
struct list_head *pages;
struct buffer_page *head_page; /* read from head */
struct buffer_page *tail_page; /* write to tail */
@@ -2224,7 +2190,7 @@ static unsigned rb_calculate_event_length(unsigned length)
/* zero length can cause confusions */
if (!length)
- length = 1;
+ length++;
if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
length += sizeof(event.array[0]);
@@ -2636,8 +2602,6 @@ rb_reserve_next_event(struct ring_buffer *buffer,
return NULL;
}
-#ifdef CONFIG_TRACING
-
/*
* The lock and unlock are done within a preempt disable section.
* The current_context per_cpu variable can only be modified
@@ -2675,44 +2639,38 @@ rb_reserve_next_event(struct ring_buffer *buffer,
* just so happens that it is the same bit corresponding to
* the current context.
*/
-static DEFINE_PER_CPU(unsigned int, current_context);
-static __always_inline int trace_recursive_lock(void)
+static __always_inline int
+trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer)
{
- unsigned int val = __this_cpu_read(current_context);
+ unsigned int val = cpu_buffer->current_context;
int bit;
if (in_interrupt()) {
if (in_nmi())
- bit = 0;
+ bit = RB_CTX_NMI;
else if (in_irq())
- bit = 1;
+ bit = RB_CTX_IRQ;
else
- bit = 2;
+ bit = RB_CTX_SOFTIRQ;
} else
- bit = 3;
+ bit = RB_CTX_NORMAL;
if (unlikely(val & (1 << bit)))
return 1;
val |= (1 << bit);
- __this_cpu_write(current_context, val);
+ cpu_buffer->current_context = val;
return 0;
}
-static __always_inline void trace_recursive_unlock(void)
+static __always_inline void
+trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer)
{
- __this_cpu_and(current_context, __this_cpu_read(current_context) - 1);
+ cpu_buffer->current_context &= cpu_buffer->current_context - 1;
}
-#else
-
-#define trace_recursive_lock() (0)
-#define trace_recursive_unlock() do { } while (0)
-
-#endif
-
/**
* ring_buffer_lock_reserve - reserve a part of the buffer
* @buffer: the ring buffer to reserve from
@@ -2735,41 +2693,37 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
struct ring_buffer_event *event;
int cpu;
- if (ring_buffer_flags != RB_BUFFERS_ON)
- return NULL;
-
/* If we are tracing schedule, we don't want to recurse */
preempt_disable_notrace();
- if (atomic_read(&buffer->record_disabled))
- goto out_nocheck;
-
- if (trace_recursive_lock())
- goto out_nocheck;
+ if (unlikely(atomic_read(&buffer->record_disabled)))
+ goto out;
cpu = raw_smp_processor_id();
- if (!cpumask_test_cpu(cpu, buffer->cpumask))
+ if (unlikely(!cpumask_test_cpu(cpu, buffer->cpumask)))
goto out;
cpu_buffer = buffer->buffers[cpu];
- if (atomic_read(&cpu_buffer->record_disabled))
+ if (unlikely(atomic_read(&cpu_buffer->record_disabled)))
goto out;
- if (length > BUF_MAX_DATA_SIZE)
+ if (unlikely(length > BUF_MAX_DATA_SIZE))
+ goto out;
+
+ if (unlikely(trace_recursive_lock(cpu_buffer)))
goto out;
event = rb_reserve_next_event(buffer, cpu_buffer, length);
if (!event)
- goto out;
+ goto out_unlock;
return event;
+ out_unlock:
+ trace_recursive_unlock(cpu_buffer);
out:
- trace_recursive_unlock();
-
- out_nocheck:
preempt_enable_notrace();
return NULL;
}
@@ -2859,7 +2813,7 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
rb_wakeups(buffer, cpu_buffer);
- trace_recursive_unlock();
+ trace_recursive_unlock(cpu_buffer);
preempt_enable_notrace();
@@ -2970,7 +2924,7 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer,
out:
rb_end_commit(cpu_buffer);
- trace_recursive_unlock();
+ trace_recursive_unlock(cpu_buffer);
preempt_enable_notrace();
@@ -3000,9 +2954,6 @@ int ring_buffer_write(struct ring_buffer *buffer,
int ret = -EBUSY;
int cpu;
- if (ring_buffer_flags != RB_BUFFERS_ON)
- return -EBUSY;
-
preempt_disable_notrace();
if (atomic_read(&buffer->record_disabled))
@@ -3021,9 +2972,12 @@ int ring_buffer_write(struct ring_buffer *buffer,
if (length > BUF_MAX_DATA_SIZE)
goto out;
+ if (unlikely(trace_recursive_lock(cpu_buffer)))
+ goto out;
+
event = rb_reserve_next_event(buffer, cpu_buffer, length);
if (!event)
- goto out;
+ goto out_unlock;
body = rb_event_data(event);
@@ -3034,6 +2988,10 @@ int ring_buffer_write(struct ring_buffer *buffer,
rb_wakeups(buffer, cpu_buffer);
ret = 0;
+
+ out_unlock:
+ trace_recursive_unlock(cpu_buffer);
+
out:
preempt_enable_notrace();
@@ -3860,19 +3818,36 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
}
EXPORT_SYMBOL_GPL(ring_buffer_iter_peek);
-static inline int rb_ok_to_lock(void)
+static inline bool rb_reader_lock(struct ring_buffer_per_cpu *cpu_buffer)
{
+ if (likely(!in_nmi())) {
+ raw_spin_lock(&cpu_buffer->reader_lock);
+ return true;
+ }
+
/*
* If an NMI die dumps out the content of the ring buffer
- * do not grab locks. We also permanently disable the ring
- * buffer too. A one time deal is all you get from reading
- * the ring buffer from an NMI.
+ * trylock must be used to prevent a deadlock if the NMI
+ * preempted a task that holds the ring buffer locks. If
+ * we get the lock then all is fine, if not, then continue
+ * to do the read, but this can corrupt the ring buffer,
+ * so it must be permanently disabled from future writes.
+ * Reading from NMI is a oneshot deal.
*/
- if (likely(!in_nmi()))
- return 1;
+ if (raw_spin_trylock(&cpu_buffer->reader_lock))
+ return true;
- tracing_off_permanent();
- return 0;
+ /* Continue without locking, but disable the ring buffer */
+ atomic_inc(&cpu_buffer->record_disabled);
+ return false;
+}
+
+static inline void
+rb_reader_unlock(struct ring_buffer_per_cpu *cpu_buffer, bool locked)
+{
+ if (likely(locked))
+ raw_spin_unlock(&cpu_buffer->reader_lock);
+ return;
}
/**
@@ -3892,21 +3867,18 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts,
struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
struct ring_buffer_event *event;
unsigned long flags;
- int dolock;
+ bool dolock;
if (!cpumask_test_cpu(cpu, buffer->cpumask))
return NULL;
- dolock = rb_ok_to_lock();
again:
local_irq_save(flags);
- if (dolock)
- raw_spin_lock(&cpu_buffer->reader_lock);
+ dolock = rb_reader_lock(cpu_buffer);
event = rb_buffer_peek(cpu_buffer, ts, lost_events);
if (event && event->type_len == RINGBUF_TYPE_PADDING)
rb_advance_reader(cpu_buffer);
- if (dolock)
- raw_spin_unlock(&cpu_buffer->reader_lock);
+ rb_reader_unlock(cpu_buffer, dolock);
local_irq_restore(flags);
if (event && event->type_len == RINGBUF_TYPE_PADDING)
@@ -3959,9 +3931,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts,
struct ring_buffer_per_cpu *cpu_buffer;
struct ring_buffer_event *event = NULL;
unsigned long flags;
- int dolock;
-
- dolock = rb_ok_to_lock();
+ bool dolock;
again:
/* might be called in atomic */
@@ -3972,8 +3942,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts,
cpu_buffer = buffer->buffers[cpu];
local_irq_save(flags);
- if (dolock)
- raw_spin_lock(&cpu_buffer->reader_lock);
+ dolock = rb_reader_lock(cpu_buffer);
event = rb_buffer_peek(cpu_buffer, ts, lost_events);
if (event) {
@@ -3981,8 +3950,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts,
rb_advance_reader(cpu_buffer);
}
- if (dolock)
- raw_spin_unlock(&cpu_buffer->reader_lock);
+ rb_reader_unlock(cpu_buffer, dolock);
local_irq_restore(flags);
out:
@@ -4263,21 +4231,17 @@ int ring_buffer_empty(struct ring_buffer *buffer)
{
struct ring_buffer_per_cpu *cpu_buffer;
unsigned long flags;
- int dolock;
+ bool dolock;
int cpu;
int ret;
- dolock = rb_ok_to_lock();
-
/* yes this is racy, but if you don't like the race, lock the buffer */
for_each_buffer_cpu(buffer, cpu) {
cpu_buffer = buffer->buffers[cpu];
local_irq_save(flags);
- if (dolock)
- raw_spin_lock(&cpu_buffer->reader_lock);
+ dolock = rb_reader_lock(cpu_buffer);
ret = rb_per_cpu_empty(cpu_buffer);
- if (dolock)
- raw_spin_unlock(&cpu_buffer->reader_lock);
+ rb_reader_unlock(cpu_buffer, dolock);
local_irq_restore(flags);
if (!ret)
@@ -4297,21 +4261,17 @@ int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu)
{
struct ring_buffer_per_cpu *cpu_buffer;
unsigned long flags;
- int dolock;
+ bool dolock;
int ret;
if (!cpumask_test_cpu(cpu, buffer->cpumask))
return 1;
- dolock = rb_ok_to_lock();
-
cpu_buffer = buffer->buffers[cpu];
local_irq_save(flags);
- if (dolock)
- raw_spin_lock(&cpu_buffer->reader_lock);
+ dolock = rb_reader_lock(cpu_buffer);
ret = rb_per_cpu_empty(cpu_buffer);
- if (dolock)
- raw_spin_unlock(&cpu_buffer->reader_lock);
+ rb_reader_unlock(cpu_buffer, dolock);
local_irq_restore(flags);
return ret;
@@ -4349,9 +4309,6 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
ret = -EAGAIN;
- if (ring_buffer_flags != RB_BUFFERS_ON)
- goto out;
-
if (atomic_read(&buffer_a->record_disabled))
goto out;
diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c
index 1b28df2d9104..a1503a027ee2 100644
--- a/kernel/trace/ring_buffer_benchmark.c
+++ b/kernel/trace/ring_buffer_benchmark.c
@@ -32,11 +32,11 @@ static struct task_struct *producer;
static struct task_struct *consumer;
static unsigned long read;
-static int disable_reader;
+static unsigned int disable_reader;
module_param(disable_reader, uint, 0644);
MODULE_PARM_DESC(disable_reader, "only run producer");
-static int write_iteration = 50;
+static unsigned int write_iteration = 50;
module_param(write_iteration, uint, 0644);
MODULE_PARM_DESC(write_iteration, "# of writes between timestamp readings");
@@ -46,16 +46,16 @@ static int consumer_nice = MAX_NICE;
static int producer_fifo = -1;
static int consumer_fifo = -1;
-module_param(producer_nice, uint, 0644);
+module_param(producer_nice, int, 0644);
MODULE_PARM_DESC(producer_nice, "nice prio for producer");
-module_param(consumer_nice, uint, 0644);
+module_param(consumer_nice, int, 0644);
MODULE_PARM_DESC(consumer_nice, "nice prio for consumer");
-module_param(producer_fifo, uint, 0644);
+module_param(producer_fifo, int, 0644);
MODULE_PARM_DESC(producer_fifo, "fifo prio for producer");
-module_param(consumer_fifo, uint, 0644);
+module_param(consumer_fifo, int, 0644);
MODULE_PARM_DESC(consumer_fifo, "fifo prio for consumer");
static int read_events;
@@ -263,6 +263,8 @@ static void ring_buffer_producer(void)
if (cnt % wakeup_interval)
cond_resched();
#endif
+ if (kthread_should_stop())
+ kill_test = 1;
} while (ktime_before(end_time, timeout) && !kill_test);
trace_printk("End ring buffer hammer\n");
@@ -285,7 +287,7 @@ static void ring_buffer_producer(void)
entries = ring_buffer_entries(buffer);
overruns = ring_buffer_overruns(buffer);
- if (kill_test)
+ if (kill_test && !kthread_should_stop())
trace_printk("ERROR!\n");
if (!disable_reader) {
@@ -379,7 +381,7 @@ static int ring_buffer_consumer_thread(void *arg)
}
__set_current_state(TASK_RUNNING);
- if (kill_test)
+ if (!kthread_should_stop())
wait_to_die();
return 0;
@@ -399,13 +401,16 @@ static int ring_buffer_producer_thread(void *arg)
}
ring_buffer_producer();
+ if (kill_test)
+ goto out_kill;
trace_printk("Sleeping for 10 secs\n");
set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(HZ * SLEEP_TIME);
}
- if (kill_test)
+out_kill:
+ if (!kthread_should_stop())
wait_to_die();
return 0;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 05330494a0df..abcbf7ff8743 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -297,11 +297,11 @@ void trace_array_put(struct trace_array *this_tr)
mutex_unlock(&trace_types_lock);
}
-int filter_check_discard(struct ftrace_event_file *file, void *rec,
+int filter_check_discard(struct trace_event_file *file, void *rec,
struct ring_buffer *buffer,
struct ring_buffer_event *event)
{
- if (unlikely(file->flags & FTRACE_EVENT_FL_FILTERED) &&
+ if (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
!filter_match_preds(file->filter, rec)) {
ring_buffer_discard_commit(buffer, event);
return 1;
@@ -311,7 +311,7 @@ int filter_check_discard(struct ftrace_event_file *file, void *rec,
}
EXPORT_SYMBOL_GPL(filter_check_discard);
-int call_filter_check_discard(struct ftrace_event_call *call, void *rec,
+int call_filter_check_discard(struct trace_event_call *call, void *rec,
struct ring_buffer *buffer,
struct ring_buffer_event *event)
{
@@ -876,6 +876,7 @@ static struct {
{ trace_clock_jiffies, "uptime", 0 },
{ trace_clock, "perf", 1 },
{ ktime_get_mono_fast_ns, "mono", 1 },
+ { ktime_get_raw_fast_ns, "mono_raw", 1 },
ARCH_TRACE_CLOCKS
};
@@ -1693,13 +1694,13 @@ static struct ring_buffer *temp_buffer;
struct ring_buffer_event *
trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
- struct ftrace_event_file *ftrace_file,
+ struct trace_event_file *trace_file,
int type, unsigned long len,
unsigned long flags, int pc)
{
struct ring_buffer_event *entry;
- *current_rb = ftrace_file->tr->trace_buffer.buffer;
+ *current_rb = trace_file->tr->trace_buffer.buffer;
entry = trace_buffer_lock_reserve(*current_rb,
type, len, flags, pc);
/*
@@ -1708,7 +1709,7 @@ trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
* to store the trace event for the tigger to use. It's recusive
* safe and will not be recorded anywhere.
*/
- if (!entry && ftrace_file->flags & FTRACE_EVENT_FL_TRIGGER_COND) {
+ if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
*current_rb = temp_buffer;
entry = trace_buffer_lock_reserve(*current_rb,
type, len, flags, pc);
@@ -1760,7 +1761,7 @@ trace_function(struct trace_array *tr,
unsigned long ip, unsigned long parent_ip, unsigned long flags,
int pc)
{
- struct ftrace_event_call *call = &event_function;
+ struct trace_event_call *call = &event_function;
struct ring_buffer *buffer = tr->trace_buffer.buffer;
struct ring_buffer_event *event;
struct ftrace_entry *entry;
@@ -1795,7 +1796,7 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer,
unsigned long flags,
int skip, int pc, struct pt_regs *regs)
{
- struct ftrace_event_call *call = &event_kernel_stack;
+ struct trace_event_call *call = &event_kernel_stack;
struct ring_buffer_event *event;
struct stack_entry *entry;
struct stack_trace trace;
@@ -1923,7 +1924,7 @@ static DEFINE_PER_CPU(int, user_stack_count);
void
ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
{
- struct ftrace_event_call *call = &event_user_stack;
+ struct trace_event_call *call = &event_user_stack;
struct ring_buffer_event *event;
struct userstack_entry *entry;
struct stack_trace trace;
@@ -2129,7 +2130,7 @@ static void trace_printk_start_stop_comm(int enabled)
*/
int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
{
- struct ftrace_event_call *call = &event_bprint;
+ struct trace_event_call *call = &event_bprint;
struct ring_buffer_event *event;
struct ring_buffer *buffer;
struct trace_array *tr = &global_trace;
@@ -2187,7 +2188,7 @@ static int
__trace_array_vprintk(struct ring_buffer *buffer,
unsigned long ip, const char *fmt, va_list args)
{
- struct ftrace_event_call *call = &event_print;
+ struct trace_event_call *call = &event_print;
struct ring_buffer_event *event;
int len = 0, size, pc;
struct print_entry *entry;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index d2612016de94..f060716b02ae 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -12,7 +12,7 @@
#include <linux/ftrace.h>
#include <linux/hw_breakpoint.h>
#include <linux/trace_seq.h>
-#include <linux/ftrace_event.h>
+#include <linux/trace_events.h>
#include <linux/compiler.h>
#include <linux/trace_seq.h>
@@ -211,8 +211,8 @@ struct trace_array {
#ifdef CONFIG_FTRACE_SYSCALLS
int sys_refcount_enter;
int sys_refcount_exit;
- struct ftrace_event_file __rcu *enter_syscall_files[NR_syscalls];
- struct ftrace_event_file __rcu *exit_syscall_files[NR_syscalls];
+ struct trace_event_file __rcu *enter_syscall_files[NR_syscalls];
+ struct trace_event_file __rcu *exit_syscall_files[NR_syscalls];
#endif
int stop_count;
int clock_id;
@@ -858,7 +858,7 @@ void ftrace_destroy_filter_files(struct ftrace_ops *ops);
#define ftrace_destroy_filter_files(ops) do { } while (0)
#endif /* CONFIG_FUNCTION_TRACER && CONFIG_DYNAMIC_FTRACE */
-int ftrace_event_is_function(struct ftrace_event_call *call);
+int ftrace_event_is_function(struct trace_event_call *call);
/*
* struct trace_parser - servers for reading the user input separated by spaces
@@ -992,7 +992,7 @@ struct event_subsystem {
int ref_count;
};
-struct ftrace_subsystem_dir {
+struct trace_subsystem_dir {
struct list_head list;
struct event_subsystem *subsystem;
struct trace_array *tr;
@@ -1052,30 +1052,30 @@ struct filter_pred {
extern enum regex_type
filter_parse_regex(char *buff, int len, char **search, int *not);
-extern void print_event_filter(struct ftrace_event_file *file,
+extern void print_event_filter(struct trace_event_file *file,
struct trace_seq *s);
-extern int apply_event_filter(struct ftrace_event_file *file,
+extern int apply_event_filter(struct trace_event_file *file,
char *filter_string);
-extern int apply_subsystem_event_filter(struct ftrace_subsystem_dir *dir,
+extern int apply_subsystem_event_filter(struct trace_subsystem_dir *dir,
char *filter_string);
extern void print_subsystem_event_filter(struct event_subsystem *system,
struct trace_seq *s);
extern int filter_assign_type(const char *type);
-extern int create_event_filter(struct ftrace_event_call *call,
+extern int create_event_filter(struct trace_event_call *call,
char *filter_str, bool set_str,
struct event_filter **filterp);
extern void free_event_filter(struct event_filter *filter);
struct ftrace_event_field *
-trace_find_event_field(struct ftrace_event_call *call, char *name);
+trace_find_event_field(struct trace_event_call *call, char *name);
extern void trace_event_enable_cmd_record(bool enable);
extern int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr);
extern int event_trace_del_tracer(struct trace_array *tr);
-extern struct ftrace_event_file *find_event_file(struct trace_array *tr,
- const char *system,
- const char *event);
+extern struct trace_event_file *find_event_file(struct trace_array *tr,
+ const char *system,
+ const char *event);
static inline void *event_file_data(struct file *filp)
{
@@ -1180,7 +1180,7 @@ struct event_trigger_ops {
* commands need to do this if they themselves log to the trace
* buffer (see the @post_trigger() member below). @trigger_type
* values are defined by adding new values to the trigger_type
- * enum in include/linux/ftrace_event.h.
+ * enum in include/linux/trace_events.h.
*
* @post_trigger: A flag that says whether or not this command needs
* to have its action delayed until after the current event has
@@ -1242,23 +1242,23 @@ struct event_command {
enum event_trigger_type trigger_type;
bool post_trigger;
int (*func)(struct event_command *cmd_ops,
- struct ftrace_event_file *file,
+ struct trace_event_file *file,
char *glob, char *cmd, char *params);
int (*reg)(char *glob,
struct event_trigger_ops *ops,
struct event_trigger_data *data,
- struct ftrace_event_file *file);
+ struct trace_event_file *file);
void (*unreg)(char *glob,
struct event_trigger_ops *ops,
struct event_trigger_data *data,
- struct ftrace_event_file *file);
+ struct trace_event_file *file);
int (*set_filter)(char *filter_str,
struct event_trigger_data *data,
- struct ftrace_event_file *file);
+ struct trace_event_file *file);
struct event_trigger_ops *(*get_trigger_ops)(char *cmd, char *param);
};
-extern int trace_event_enable_disable(struct ftrace_event_file *file,
+extern int trace_event_enable_disable(struct trace_event_file *file,
int enable, int soft_disable);
extern int tracing_alloc_snapshot(void);
@@ -1286,7 +1286,7 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled);
#undef FTRACE_ENTRY
#define FTRACE_ENTRY(call, struct_name, id, tstruct, print, filter) \
- extern struct ftrace_event_call \
+ extern struct trace_event_call \
__aligned(4) event_##call;
#undef FTRACE_ENTRY_DUP
#define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print, filter) \
@@ -1295,7 +1295,7 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled);
#include "trace_entries.h"
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_FUNCTION_TRACER)
-int perf_ftrace_event_register(struct ftrace_event_call *call,
+int perf_ftrace_event_register(struct trace_event_call *call,
enum trace_reg type, void *data);
#else
#define perf_ftrace_event_register NULL
@@ -1312,7 +1312,7 @@ void trace_event_init(void);
void trace_event_enum_update(struct trace_enum_map **map, int len);
#else
static inline void __init trace_event_init(void) { }
-static inlin void trace_event_enum_update(struct trace_enum_map **map, int len) { }
+static inline void trace_event_enum_update(struct trace_enum_map **map, int len) { }
#endif
extern struct trace_iterator *tracepoint_print_iter;
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index 57cbf1efdd44..a87b43f49eb4 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -29,7 +29,7 @@ static struct trace_array *branch_tracer;
static void
probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
{
- struct ftrace_event_call *call = &event_branch;
+ struct trace_event_call *call = &event_branch;
struct trace_array *tr = branch_tracer;
struct trace_array_cpu *data;
struct ring_buffer_event *event;
@@ -191,7 +191,7 @@ __init static int init_branch_tracer(void)
{
int ret;
- ret = register_ftrace_event(&trace_branch_event);
+ ret = register_trace_event(&trace_branch_event);
if (!ret) {
printk(KERN_WARNING "Warning: could not register "
"branch events\n");
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 57b67b1f24d1..0f06532a755b 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -56,6 +56,7 @@ u64 notrace trace_clock(void)
{
return local_clock();
}
+EXPORT_SYMBOL_GPL(trace_clock);
/*
* trace_jiffy_clock(): Simply use jiffies as a clock counter.
@@ -68,6 +69,7 @@ u64 notrace trace_clock_jiffies(void)
{
return jiffies_64_to_clock_t(jiffies_64 - INITIAL_JIFFIES);
}
+EXPORT_SYMBOL_GPL(trace_clock_jiffies);
/*
* trace_clock_global(): special globally coherent trace clock
@@ -123,6 +125,7 @@ u64 notrace trace_clock_global(void)
return now;
}
+EXPORT_SYMBOL_GPL(trace_clock_global);
static atomic64_t trace_counter;
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 6fa484de2ba1..abfc903e741e 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -21,7 +21,7 @@ typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)])
/* Count the events in use (per event id, not per instance) */
static int total_ref_count;
-static int perf_trace_event_perm(struct ftrace_event_call *tp_event,
+static int perf_trace_event_perm(struct trace_event_call *tp_event,
struct perf_event *p_event)
{
if (tp_event->perf_perm) {
@@ -83,7 +83,7 @@ static int perf_trace_event_perm(struct ftrace_event_call *tp_event,
return 0;
}
-static int perf_trace_event_reg(struct ftrace_event_call *tp_event,
+static int perf_trace_event_reg(struct trace_event_call *tp_event,
struct perf_event *p_event)
{
struct hlist_head __percpu *list;
@@ -143,7 +143,7 @@ fail:
static void perf_trace_event_unreg(struct perf_event *p_event)
{
- struct ftrace_event_call *tp_event = p_event->tp_event;
+ struct trace_event_call *tp_event = p_event->tp_event;
int i;
if (--tp_event->perf_refcount > 0)
@@ -172,17 +172,17 @@ out:
static int perf_trace_event_open(struct perf_event *p_event)
{
- struct ftrace_event_call *tp_event = p_event->tp_event;
+ struct trace_event_call *tp_event = p_event->tp_event;
return tp_event->class->reg(tp_event, TRACE_REG_PERF_OPEN, p_event);
}
static void perf_trace_event_close(struct perf_event *p_event)
{
- struct ftrace_event_call *tp_event = p_event->tp_event;
+ struct trace_event_call *tp_event = p_event->tp_event;
tp_event->class->reg(tp_event, TRACE_REG_PERF_CLOSE, p_event);
}
-static int perf_trace_event_init(struct ftrace_event_call *tp_event,
+static int perf_trace_event_init(struct trace_event_call *tp_event,
struct perf_event *p_event)
{
int ret;
@@ -206,7 +206,7 @@ static int perf_trace_event_init(struct ftrace_event_call *tp_event,
int perf_trace_init(struct perf_event *p_event)
{
- struct ftrace_event_call *tp_event;
+ struct trace_event_call *tp_event;
u64 event_id = p_event->attr.config;
int ret = -EINVAL;
@@ -236,7 +236,7 @@ void perf_trace_destroy(struct perf_event *p_event)
int perf_trace_add(struct perf_event *p_event, int flags)
{
- struct ftrace_event_call *tp_event = p_event->tp_event;
+ struct trace_event_call *tp_event = p_event->tp_event;
struct hlist_head __percpu *pcpu_list;
struct hlist_head *list;
@@ -255,7 +255,7 @@ int perf_trace_add(struct perf_event *p_event, int flags)
void perf_trace_del(struct perf_event *p_event, int flags)
{
- struct ftrace_event_call *tp_event = p_event->tp_event;
+ struct trace_event_call *tp_event = p_event->tp_event;
hlist_del_rcu(&p_event->hlist_entry);
tp_event->class->reg(tp_event, TRACE_REG_PERF_DEL, p_event);
}
@@ -357,7 +357,7 @@ static void perf_ftrace_function_disable(struct perf_event *event)
ftrace_function_local_disable(&event->ftrace_ops);
}
-int perf_ftrace_event_register(struct ftrace_event_call *call,
+int perf_ftrace_event_register(struct trace_event_call *call,
enum trace_reg type, void *data)
{
switch (type) {
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index c4de47fc5cca..404a372ad85a 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -61,14 +61,14 @@ static int system_refcount_dec(struct event_subsystem *system)
#define do_for_each_event_file_safe(tr, file) \
list_for_each_entry(tr, &ftrace_trace_arrays, list) { \
- struct ftrace_event_file *___n; \
+ struct trace_event_file *___n; \
list_for_each_entry_safe(file, ___n, &tr->events, list)
#define while_for_each_event_file() \
}
static struct list_head *
-trace_get_fields(struct ftrace_event_call *event_call)
+trace_get_fields(struct trace_event_call *event_call)
{
if (!event_call->class->get_fields)
return &event_call->class->fields;
@@ -89,7 +89,7 @@ __find_event_field(struct list_head *head, char *name)
}
struct ftrace_event_field *
-trace_find_event_field(struct ftrace_event_call *call, char *name)
+trace_find_event_field(struct trace_event_call *call, char *name)
{
struct ftrace_event_field *field;
struct list_head *head;
@@ -129,7 +129,7 @@ static int __trace_define_field(struct list_head *head, const char *type,
return 0;
}
-int trace_define_field(struct ftrace_event_call *call, const char *type,
+int trace_define_field(struct trace_event_call *call, const char *type,
const char *name, int offset, int size, int is_signed,
int filter_type)
{
@@ -166,7 +166,7 @@ static int trace_define_common_fields(void)
return ret;
}
-static void trace_destroy_fields(struct ftrace_event_call *call)
+static void trace_destroy_fields(struct trace_event_call *call)
{
struct ftrace_event_field *field, *next;
struct list_head *head;
@@ -178,11 +178,11 @@ static void trace_destroy_fields(struct ftrace_event_call *call)
}
}
-int trace_event_raw_init(struct ftrace_event_call *call)
+int trace_event_raw_init(struct trace_event_call *call)
{
int id;
- id = register_ftrace_event(&call->event);
+ id = register_trace_event(&call->event);
if (!id)
return -ENODEV;
@@ -190,18 +190,18 @@ int trace_event_raw_init(struct ftrace_event_call *call)
}
EXPORT_SYMBOL_GPL(trace_event_raw_init);
-void *ftrace_event_buffer_reserve(struct ftrace_event_buffer *fbuffer,
- struct ftrace_event_file *ftrace_file,
- unsigned long len)
+void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer,
+ struct trace_event_file *trace_file,
+ unsigned long len)
{
- struct ftrace_event_call *event_call = ftrace_file->event_call;
+ struct trace_event_call *event_call = trace_file->event_call;
local_save_flags(fbuffer->flags);
fbuffer->pc = preempt_count();
- fbuffer->ftrace_file = ftrace_file;
+ fbuffer->trace_file = trace_file;
fbuffer->event =
- trace_event_buffer_lock_reserve(&fbuffer->buffer, ftrace_file,
+ trace_event_buffer_lock_reserve(&fbuffer->buffer, trace_file,
event_call->event.type, len,
fbuffer->flags, fbuffer->pc);
if (!fbuffer->event)
@@ -210,13 +210,13 @@ void *ftrace_event_buffer_reserve(struct ftrace_event_buffer *fbuffer,
fbuffer->entry = ring_buffer_event_data(fbuffer->event);
return fbuffer->entry;
}
-EXPORT_SYMBOL_GPL(ftrace_event_buffer_reserve);
+EXPORT_SYMBOL_GPL(trace_event_buffer_reserve);
static DEFINE_SPINLOCK(tracepoint_iter_lock);
-static void output_printk(struct ftrace_event_buffer *fbuffer)
+static void output_printk(struct trace_event_buffer *fbuffer)
{
- struct ftrace_event_call *event_call;
+ struct trace_event_call *event_call;
struct trace_event *event;
unsigned long flags;
struct trace_iterator *iter = tracepoint_print_iter;
@@ -224,12 +224,12 @@ static void output_printk(struct ftrace_event_buffer *fbuffer)
if (!iter)
return;
- event_call = fbuffer->ftrace_file->event_call;
+ event_call = fbuffer->trace_file->event_call;
if (!event_call || !event_call->event.funcs ||
!event_call->event.funcs->trace)
return;
- event = &fbuffer->ftrace_file->event_call->event;
+ event = &fbuffer->trace_file->event_call->event;
spin_lock_irqsave(&tracepoint_iter_lock, flags);
trace_seq_init(&iter->seq);
@@ -241,21 +241,21 @@ static void output_printk(struct ftrace_event_buffer *fbuffer)
spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
}
-void ftrace_event_buffer_commit(struct ftrace_event_buffer *fbuffer)
+void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
{
if (tracepoint_printk)
output_printk(fbuffer);
- event_trigger_unlock_commit(fbuffer->ftrace_file, fbuffer->buffer,
+ event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
fbuffer->event, fbuffer->entry,
fbuffer->flags, fbuffer->pc);
}
-EXPORT_SYMBOL_GPL(ftrace_event_buffer_commit);
+EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
-int ftrace_event_reg(struct ftrace_event_call *call,
- enum trace_reg type, void *data)
+int trace_event_reg(struct trace_event_call *call,
+ enum trace_reg type, void *data)
{
- struct ftrace_event_file *file = data;
+ struct trace_event_file *file = data;
WARN_ON(!(call->flags & TRACE_EVENT_FL_TRACEPOINT));
switch (type) {
@@ -288,34 +288,34 @@ int ftrace_event_reg(struct ftrace_event_call *call,
}
return 0;
}
-EXPORT_SYMBOL_GPL(ftrace_event_reg);
+EXPORT_SYMBOL_GPL(trace_event_reg);
void trace_event_enable_cmd_record(bool enable)
{
- struct ftrace_event_file *file;
+ struct trace_event_file *file;
struct trace_array *tr;
mutex_lock(&event_mutex);
do_for_each_event_file(tr, file) {
- if (!(file->flags & FTRACE_EVENT_FL_ENABLED))
+ if (!(file->flags & EVENT_FILE_FL_ENABLED))
continue;
if (enable) {
tracing_start_cmdline_record();
- set_bit(FTRACE_EVENT_FL_RECORDED_CMD_BIT, &file->flags);
+ set_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
} else {
tracing_stop_cmdline_record();
- clear_bit(FTRACE_EVENT_FL_RECORDED_CMD_BIT, &file->flags);
+ clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
}
} while_for_each_event_file();
mutex_unlock(&event_mutex);
}
-static int __ftrace_event_enable_disable(struct ftrace_event_file *file,
+static int __ftrace_event_enable_disable(struct trace_event_file *file,
int enable, int soft_disable)
{
- struct ftrace_event_call *call = file->event_call;
+ struct trace_event_call *call = file->event_call;
int ret = 0;
int disable;
@@ -337,24 +337,24 @@ static int __ftrace_event_enable_disable(struct ftrace_event_file *file,
if (soft_disable) {
if (atomic_dec_return(&file->sm_ref) > 0)
break;
- disable = file->flags & FTRACE_EVENT_FL_SOFT_DISABLED;
- clear_bit(FTRACE_EVENT_FL_SOFT_MODE_BIT, &file->flags);
+ disable = file->flags & EVENT_FILE_FL_SOFT_DISABLED;
+ clear_bit(EVENT_FILE_FL_SOFT_MODE_BIT, &file->flags);
} else
- disable = !(file->flags & FTRACE_EVENT_FL_SOFT_MODE);
+ disable = !(file->flags & EVENT_FILE_FL_SOFT_MODE);
- if (disable && (file->flags & FTRACE_EVENT_FL_ENABLED)) {
- clear_bit(FTRACE_EVENT_FL_ENABLED_BIT, &file->flags);
- if (file->flags & FTRACE_EVENT_FL_RECORDED_CMD) {
+ if (disable && (file->flags & EVENT_FILE_FL_ENABLED)) {
+ clear_bit(EVENT_FILE_FL_ENABLED_BIT, &file->flags);
+ if (file->flags & EVENT_FILE_FL_RECORDED_CMD) {
tracing_stop_cmdline_record();
- clear_bit(FTRACE_EVENT_FL_RECORDED_CMD_BIT, &file->flags);
+ clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
}
call->class->reg(call, TRACE_REG_UNREGISTER, file);
}
/* If in SOFT_MODE, just set the SOFT_DISABLE_BIT, else clear it */
- if (file->flags & FTRACE_EVENT_FL_SOFT_MODE)
- set_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags);
+ if (file->flags & EVENT_FILE_FL_SOFT_MODE)
+ set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
else
- clear_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags);
+ clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
break;
case 1:
/*
@@ -366,31 +366,31 @@ static int __ftrace_event_enable_disable(struct ftrace_event_file *file,
* it still seems to be disabled.
*/
if (!soft_disable)
- clear_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags);
+ clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
else {
if (atomic_inc_return(&file->sm_ref) > 1)
break;
- set_bit(FTRACE_EVENT_FL_SOFT_MODE_BIT, &file->flags);
+ set_bit(EVENT_FILE_FL_SOFT_MODE_BIT, &file->flags);
}
- if (!(file->flags & FTRACE_EVENT_FL_ENABLED)) {
+ if (!(file->flags & EVENT_FILE_FL_ENABLED)) {
/* Keep the event disabled, when going to SOFT_MODE. */
if (soft_disable)
- set_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags);
+ set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
if (trace_flags & TRACE_ITER_RECORD_CMD) {
tracing_start_cmdline_record();
- set_bit(FTRACE_EVENT_FL_RECORDED_CMD_BIT, &file->flags);
+ set_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
}
ret = call->class->reg(call, TRACE_REG_REGISTER, file);
if (ret) {
tracing_stop_cmdline_record();
pr_info("event trace: Could not enable event "
- "%s\n", ftrace_event_name(call));
+ "%s\n", trace_event_name(call));
break;
}
- set_bit(FTRACE_EVENT_FL_ENABLED_BIT, &file->flags);
+ set_bit(EVENT_FILE_FL_ENABLED_BIT, &file->flags);
/* WAS_ENABLED gets set but never cleared. */
call->flags |= TRACE_EVENT_FL_WAS_ENABLED;
@@ -401,13 +401,13 @@ static int __ftrace_event_enable_disable(struct ftrace_event_file *file,
return ret;
}
-int trace_event_enable_disable(struct ftrace_event_file *file,
+int trace_event_enable_disable(struct trace_event_file *file,
int enable, int soft_disable)
{
return __ftrace_event_enable_disable(file, enable, soft_disable);
}
-static int ftrace_event_enable_disable(struct ftrace_event_file *file,
+static int ftrace_event_enable_disable(struct trace_event_file *file,
int enable)
{
return __ftrace_event_enable_disable(file, enable, 0);
@@ -415,7 +415,7 @@ static int ftrace_event_enable_disable(struct ftrace_event_file *file,
static void ftrace_clear_events(struct trace_array *tr)
{
- struct ftrace_event_file *file;
+ struct trace_event_file *file;
mutex_lock(&event_mutex);
list_for_each_entry(file, &tr->events, list) {
@@ -449,14 +449,14 @@ static void __get_system(struct event_subsystem *system)
system_refcount_inc(system);
}
-static void __get_system_dir(struct ftrace_subsystem_dir *dir)
+static void __get_system_dir(struct trace_subsystem_dir *dir)
{
WARN_ON_ONCE(dir->ref_count == 0);
dir->ref_count++;
__get_system(dir->subsystem);
}
-static void __put_system_dir(struct ftrace_subsystem_dir *dir)
+static void __put_system_dir(struct trace_subsystem_dir *dir)
{
WARN_ON_ONCE(dir->ref_count == 0);
/* If the subsystem is about to be freed, the dir must be too */
@@ -467,14 +467,14 @@ static void __put_system_dir(struct ftrace_subsystem_dir *dir)
kfree(dir);
}
-static void put_system(struct ftrace_subsystem_dir *dir)
+static void put_system(struct trace_subsystem_dir *dir)
{
mutex_lock(&event_mutex);
__put_system_dir(dir);
mutex_unlock(&event_mutex);
}
-static void remove_subsystem(struct ftrace_subsystem_dir *dir)
+static void remove_subsystem(struct trace_subsystem_dir *dir)
{
if (!dir)
return;
@@ -486,7 +486,7 @@ static void remove_subsystem(struct ftrace_subsystem_dir *dir)
}
}
-static void remove_event_file_dir(struct ftrace_event_file *file)
+static void remove_event_file_dir(struct trace_event_file *file)
{
struct dentry *dir = file->dir;
struct dentry *child;
@@ -515,15 +515,15 @@ static int
__ftrace_set_clr_event_nolock(struct trace_array *tr, const char *match,
const char *sub, const char *event, int set)
{
- struct ftrace_event_file *file;
- struct ftrace_event_call *call;
+ struct trace_event_file *file;
+ struct trace_event_call *call;
const char *name;
int ret = -EINVAL;
list_for_each_entry(file, &tr->events, list) {
call = file->event_call;
- name = ftrace_event_name(call);
+ name = trace_event_name(call);
if (!name || !call->class || !call->class->reg)
continue;
@@ -671,8 +671,8 @@ ftrace_event_write(struct file *file, const char __user *ubuf,
static void *
t_next(struct seq_file *m, void *v, loff_t *pos)
{
- struct ftrace_event_file *file = v;
- struct ftrace_event_call *call;
+ struct trace_event_file *file = v;
+ struct trace_event_call *call;
struct trace_array *tr = m->private;
(*pos)++;
@@ -692,13 +692,13 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
static void *t_start(struct seq_file *m, loff_t *pos)
{
- struct ftrace_event_file *file;
+ struct trace_event_file *file;
struct trace_array *tr = m->private;
loff_t l;
mutex_lock(&event_mutex);
- file = list_entry(&tr->events, struct ftrace_event_file, list);
+ file = list_entry(&tr->events, struct trace_event_file, list);
for (l = 0; l <= *pos; ) {
file = t_next(m, file, &l);
if (!file)
@@ -710,13 +710,13 @@ static void *t_start(struct seq_file *m, loff_t *pos)
static void *
s_next(struct seq_file *m, void *v, loff_t *pos)
{
- struct ftrace_event_file *file = v;
+ struct trace_event_file *file = v;
struct trace_array *tr = m->private;
(*pos)++;
list_for_each_entry_continue(file, &tr->events, list) {
- if (file->flags & FTRACE_EVENT_FL_ENABLED)
+ if (file->flags & EVENT_FILE_FL_ENABLED)
return file;
}
@@ -725,13 +725,13 @@ s_next(struct seq_file *m, void *v, loff_t *pos)
static void *s_start(struct seq_file *m, loff_t *pos)
{
- struct ftrace_event_file *file;
+ struct trace_event_file *file;
struct trace_array *tr = m->private;
loff_t l;
mutex_lock(&event_mutex);
- file = list_entry(&tr->events, struct ftrace_event_file, list);
+ file = list_entry(&tr->events, struct trace_event_file, list);
for (l = 0; l <= *pos; ) {
file = s_next(m, file, &l);
if (!file)
@@ -742,12 +742,12 @@ static void *s_start(struct seq_file *m, loff_t *pos)
static int t_show(struct seq_file *m, void *v)
{
- struct ftrace_event_file *file = v;
- struct ftrace_event_call *call = file->event_call;
+ struct trace_event_file *file = v;
+ struct trace_event_call *call = file->event_call;
if (strcmp(call->class->system, TRACE_SYSTEM) != 0)
seq_printf(m, "%s:", call->class->system);
- seq_printf(m, "%s\n", ftrace_event_name(call));
+ seq_printf(m, "%s\n", trace_event_name(call));
return 0;
}
@@ -761,7 +761,7 @@ static ssize_t
event_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
loff_t *ppos)
{
- struct ftrace_event_file *file;
+ struct trace_event_file *file;
unsigned long flags;
char buf[4] = "0";
@@ -774,12 +774,12 @@ event_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
if (!file)
return -ENODEV;
- if (flags & FTRACE_EVENT_FL_ENABLED &&
- !(flags & FTRACE_EVENT_FL_SOFT_DISABLED))
+ if (flags & EVENT_FILE_FL_ENABLED &&
+ !(flags & EVENT_FILE_FL_SOFT_DISABLED))
strcpy(buf, "1");
- if (flags & FTRACE_EVENT_FL_SOFT_DISABLED ||
- flags & FTRACE_EVENT_FL_SOFT_MODE)
+ if (flags & EVENT_FILE_FL_SOFT_DISABLED ||
+ flags & EVENT_FILE_FL_SOFT_MODE)
strcat(buf, "*");
strcat(buf, "\n");
@@ -791,7 +791,7 @@ static ssize_t
event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
loff_t *ppos)
{
- struct ftrace_event_file *file;
+ struct trace_event_file *file;
unsigned long val;
int ret;
@@ -828,10 +828,10 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
loff_t *ppos)
{
const char set_to_char[4] = { '?', '0', '1', 'X' };
- struct ftrace_subsystem_dir *dir = filp->private_data;
+ struct trace_subsystem_dir *dir = filp->private_data;
struct event_subsystem *system = dir->subsystem;
- struct ftrace_event_call *call;
- struct ftrace_event_file *file;
+ struct trace_event_call *call;
+ struct trace_event_file *file;
struct trace_array *tr = dir->tr;
char buf[2];
int set = 0;
@@ -840,7 +840,7 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
mutex_lock(&event_mutex);
list_for_each_entry(file, &tr->events, list) {
call = file->event_call;
- if (!ftrace_event_name(call) || !call->class || !call->class->reg)
+ if (!trace_event_name(call) || !call->class || !call->class->reg)
continue;
if (system && strcmp(call->class->system, system->name) != 0)
@@ -851,7 +851,7 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
* or if all events or cleared, or if we have
* a mixture.
*/
- set |= (1 << !!(file->flags & FTRACE_EVENT_FL_ENABLED));
+ set |= (1 << !!(file->flags & EVENT_FILE_FL_ENABLED));
/*
* If we have a mixture, no need to look further.
@@ -873,7 +873,7 @@ static ssize_t
system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
loff_t *ppos)
{
- struct ftrace_subsystem_dir *dir = filp->private_data;
+ struct trace_subsystem_dir *dir = filp->private_data;
struct event_subsystem *system = dir->subsystem;
const char *name = NULL;
unsigned long val;
@@ -917,7 +917,7 @@ enum {
static void *f_next(struct seq_file *m, void *v, loff_t *pos)
{
- struct ftrace_event_call *call = event_file_data(m->private);
+ struct trace_event_call *call = event_file_data(m->private);
struct list_head *common_head = &ftrace_common_fields;
struct list_head *head = trace_get_fields(call);
struct list_head *node = v;
@@ -949,13 +949,13 @@ static void *f_next(struct seq_file *m, void *v, loff_t *pos)
static int f_show(struct seq_file *m, void *v)
{
- struct ftrace_event_call *call = event_file_data(m->private);
+ struct trace_event_call *call = event_file_data(m->private);
struct ftrace_event_field *field;
const char *array_descriptor;
switch ((unsigned long)v) {
case FORMAT_HEADER:
- seq_printf(m, "name: %s\n", ftrace_event_name(call));
+ seq_printf(m, "name: %s\n", trace_event_name(call));
seq_printf(m, "ID: %d\n", call->event.type);
seq_puts(m, "format:\n");
return 0;
@@ -1062,7 +1062,7 @@ static ssize_t
event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
loff_t *ppos)
{
- struct ftrace_event_file *file;
+ struct trace_event_file *file;
struct trace_seq *s;
int r = -ENODEV;
@@ -1095,7 +1095,7 @@ static ssize_t
event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
loff_t *ppos)
{
- struct ftrace_event_file *file;
+ struct trace_event_file *file;
char *buf;
int err = -ENODEV;
@@ -1132,7 +1132,7 @@ static LIST_HEAD(event_subsystems);
static int subsystem_open(struct inode *inode, struct file *filp)
{
struct event_subsystem *system = NULL;
- struct ftrace_subsystem_dir *dir = NULL; /* Initialize for gcc */
+ struct trace_subsystem_dir *dir = NULL; /* Initialize for gcc */
struct trace_array *tr;
int ret;
@@ -1181,7 +1181,7 @@ static int subsystem_open(struct inode *inode, struct file *filp)
static int system_tr_open(struct inode *inode, struct file *filp)
{
- struct ftrace_subsystem_dir *dir;
+ struct trace_subsystem_dir *dir;
struct trace_array *tr = inode->i_private;
int ret;
@@ -1214,7 +1214,7 @@ static int system_tr_open(struct inode *inode, struct file *filp)
static int subsystem_release(struct inode *inode, struct file *file)
{
- struct ftrace_subsystem_dir *dir = file->private_data;
+ struct trace_subsystem_dir *dir = file->private_data;
trace_array_put(dir->tr);
@@ -1235,7 +1235,7 @@ static ssize_t
subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
loff_t *ppos)
{
- struct ftrace_subsystem_dir *dir = filp->private_data;
+ struct trace_subsystem_dir *dir = filp->private_data;
struct event_subsystem *system = dir->subsystem;
struct trace_seq *s;
int r;
@@ -1262,7 +1262,7 @@ static ssize_t
subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
loff_t *ppos)
{
- struct ftrace_subsystem_dir *dir = filp->private_data;
+ struct trace_subsystem_dir *dir = filp->private_data;
char *buf;
int err;
@@ -1497,9 +1497,9 @@ create_new_subsystem(const char *name)
static struct dentry *
event_subsystem_dir(struct trace_array *tr, const char *name,
- struct ftrace_event_file *file, struct dentry *parent)
+ struct trace_event_file *file, struct dentry *parent)
{
- struct ftrace_subsystem_dir *dir;
+ struct trace_subsystem_dir *dir;
struct event_subsystem *system;
struct dentry *entry;
@@ -1571,9 +1571,9 @@ event_subsystem_dir(struct trace_array *tr, const char *name,
}
static int
-event_create_dir(struct dentry *parent, struct ftrace_event_file *file)
+event_create_dir(struct dentry *parent, struct trace_event_file *file)
{
- struct ftrace_event_call *call = file->event_call;
+ struct trace_event_call *call = file->event_call;
struct trace_array *tr = file->tr;
struct list_head *head;
struct dentry *d_events;
@@ -1591,7 +1591,7 @@ event_create_dir(struct dentry *parent, struct ftrace_event_file *file)
} else
d_events = parent;
- name = ftrace_event_name(call);
+ name = trace_event_name(call);
file->dir = tracefs_create_dir(name, d_events);
if (!file->dir) {
pr_warn("Could not create tracefs '%s' directory\n", name);
@@ -1634,9 +1634,9 @@ event_create_dir(struct dentry *parent, struct ftrace_event_file *file)
return 0;
}
-static void remove_event_from_tracers(struct ftrace_event_call *call)
+static void remove_event_from_tracers(struct trace_event_call *call)
{
- struct ftrace_event_file *file;
+ struct trace_event_file *file;
struct trace_array *tr;
do_for_each_event_file_safe(tr, file) {
@@ -1654,10 +1654,10 @@ static void remove_event_from_tracers(struct ftrace_event_call *call)
} while_for_each_event_file();
}
-static void event_remove(struct ftrace_event_call *call)
+static void event_remove(struct trace_event_call *call)
{
struct trace_array *tr;
- struct ftrace_event_file *file;
+ struct trace_event_file *file;
do_for_each_event_file(tr, file) {
if (file->event_call != call)
@@ -1673,17 +1673,17 @@ static void event_remove(struct ftrace_event_call *call)
} while_for_each_event_file();
if (call->event.funcs)
- __unregister_ftrace_event(&call->event);
+ __unregister_trace_event(&call->event);
remove_event_from_tracers(call);
list_del(&call->list);
}
-static int event_init(struct ftrace_event_call *call)
+static int event_init(struct trace_event_call *call)
{
int ret = 0;
const char *name;
- name = ftrace_event_name(call);
+ name = trace_event_name(call);
if (WARN_ON(!name))
return -EINVAL;
@@ -1697,7 +1697,7 @@ static int event_init(struct ftrace_event_call *call)
}
static int
-__register_event(struct ftrace_event_call *call, struct module *mod)
+__register_event(struct trace_event_call *call, struct module *mod)
{
int ret;
@@ -1733,7 +1733,7 @@ static char *enum_replace(char *ptr, struct trace_enum_map *map, int len)
return ptr + elen;
}
-static void update_event_printk(struct ftrace_event_call *call,
+static void update_event_printk(struct trace_event_call *call,
struct trace_enum_map *map)
{
char *ptr;
@@ -1811,7 +1811,7 @@ static void update_event_printk(struct ftrace_event_call *call,
void trace_event_enum_update(struct trace_enum_map **map, int len)
{
- struct ftrace_event_call *call, *p;
+ struct trace_event_call *call, *p;
const char *last_system = NULL;
int last_i;
int i;
@@ -1836,11 +1836,11 @@ void trace_event_enum_update(struct trace_enum_map **map, int len)
up_write(&trace_event_sem);
}
-static struct ftrace_event_file *
-trace_create_new_event(struct ftrace_event_call *call,
+static struct trace_event_file *
+trace_create_new_event(struct trace_event_call *call,
struct trace_array *tr)
{
- struct ftrace_event_file *file;
+ struct trace_event_file *file;
file = kmem_cache_alloc(file_cachep, GFP_TRACE);
if (!file)
@@ -1858,9 +1858,9 @@ trace_create_new_event(struct ftrace_event_call *call,
/* Add an event to a trace directory */
static int
-__trace_add_new_event(struct ftrace_event_call *call, struct trace_array *tr)
+__trace_add_new_event(struct trace_event_call *call, struct trace_array *tr)
{
- struct ftrace_event_file *file;
+ struct trace_event_file *file;
file = trace_create_new_event(call, tr);
if (!file)
@@ -1875,10 +1875,10 @@ __trace_add_new_event(struct ftrace_event_call *call, struct trace_array *tr)
* the filesystem is initialized.
*/
static __init int
-__trace_early_add_new_event(struct ftrace_event_call *call,
+__trace_early_add_new_event(struct trace_event_call *call,
struct trace_array *tr)
{
- struct ftrace_event_file *file;
+ struct trace_event_file *file;
file = trace_create_new_event(call, tr);
if (!file)
@@ -1888,10 +1888,10 @@ __trace_early_add_new_event(struct ftrace_event_call *call,
}
struct ftrace_module_file_ops;
-static void __add_event_to_tracers(struct ftrace_event_call *call);
+static void __add_event_to_tracers(struct trace_event_call *call);
/* Add an additional event_call dynamically */
-int trace_add_event_call(struct ftrace_event_call *call)
+int trace_add_event_call(struct trace_event_call *call)
{
int ret;
mutex_lock(&trace_types_lock);
@@ -1910,7 +1910,7 @@ int trace_add_event_call(struct ftrace_event_call *call)
* Must be called under locking of trace_types_lock, event_mutex and
* trace_event_sem.
*/
-static void __trace_remove_event_call(struct ftrace_event_call *call)
+static void __trace_remove_event_call(struct trace_event_call *call)
{
event_remove(call);
trace_destroy_fields(call);
@@ -1918,10 +1918,10 @@ static void __trace_remove_event_call(struct ftrace_event_call *call)
call->filter = NULL;
}
-static int probe_remove_event_call(struct ftrace_event_call *call)
+static int probe_remove_event_call(struct trace_event_call *call)
{
struct trace_array *tr;
- struct ftrace_event_file *file;
+ struct trace_event_file *file;
#ifdef CONFIG_PERF_EVENTS
if (call->perf_refcount)
@@ -1932,10 +1932,10 @@ static int probe_remove_event_call(struct ftrace_event_call *call)
continue;
/*
* We can't rely on ftrace_event_enable_disable(enable => 0)
- * we are going to do, FTRACE_EVENT_FL_SOFT_MODE can suppress
+ * we are going to do, EVENT_FILE_FL_SOFT_MODE can suppress
* TRACE_REG_UNREGISTER.
*/
- if (file->flags & FTRACE_EVENT_FL_ENABLED)
+ if (file->flags & EVENT_FILE_FL_ENABLED)
return -EBUSY;
/*
* The do_for_each_event_file_safe() is
@@ -1952,7 +1952,7 @@ static int probe_remove_event_call(struct ftrace_event_call *call)
}
/* Remove an event_call */
-int trace_remove_event_call(struct ftrace_event_call *call)
+int trace_remove_event_call(struct trace_event_call *call)
{
int ret;
@@ -1976,7 +1976,7 @@ int trace_remove_event_call(struct ftrace_event_call *call)
static void trace_module_add_events(struct module *mod)
{
- struct ftrace_event_call **call, **start, **end;
+ struct trace_event_call **call, **start, **end;
if (!mod->num_trace_events)
return;
@@ -1999,7 +1999,7 @@ static void trace_module_add_events(struct module *mod)
static void trace_module_remove_events(struct module *mod)
{
- struct ftrace_event_call *call, *p;
+ struct trace_event_call *call, *p;
bool clear_trace = false;
down_write(&trace_event_sem);
@@ -2055,28 +2055,28 @@ static struct notifier_block trace_module_nb = {
static void
__trace_add_event_dirs(struct trace_array *tr)
{
- struct ftrace_event_call *call;
+ struct trace_event_call *call;
int ret;
list_for_each_entry(call, &ftrace_events, list) {
ret = __trace_add_new_event(call, tr);
if (ret < 0)
pr_warn("Could not create directory for event %s\n",
- ftrace_event_name(call));
+ trace_event_name(call));
}
}
-struct ftrace_event_file *
+struct trace_event_file *
find_event_file(struct trace_array *tr, const char *system, const char *event)
{
- struct ftrace_event_file *file;
- struct ftrace_event_call *call;
+ struct trace_event_file *file;
+ struct trace_event_call *call;
const char *name;
list_for_each_entry(file, &tr->events, list) {
call = file->event_call;
- name = ftrace_event_name(call);
+ name = trace_event_name(call);
if (!name || !call->class || !call->class->reg)
continue;
@@ -2098,7 +2098,7 @@ find_event_file(struct trace_array *tr, const char *system, const char *event)
#define DISABLE_EVENT_STR "disable_event"
struct event_probe_data {
- struct ftrace_event_file *file;
+ struct trace_event_file *file;
unsigned long count;
int ref;
bool enable;
@@ -2114,9 +2114,9 @@ event_enable_probe(unsigned long ip, unsigned long parent_ip, void **_data)
return;
if (data->enable)
- clear_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &data->file->flags);
+ clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &data->file->flags);
else
- set_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &data->file->flags);
+ set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &data->file->flags);
}
static void
@@ -2132,7 +2132,7 @@ event_enable_count_probe(unsigned long ip, unsigned long parent_ip, void **_data
return;
/* Skip if the event is in a state we want to switch to */
- if (data->enable == !(data->file->flags & FTRACE_EVENT_FL_SOFT_DISABLED))
+ if (data->enable == !(data->file->flags & EVENT_FILE_FL_SOFT_DISABLED))
return;
if (data->count != -1)
@@ -2152,7 +2152,7 @@ event_enable_print(struct seq_file *m, unsigned long ip,
seq_printf(m, "%s:%s:%s",
data->enable ? ENABLE_EVENT_STR : DISABLE_EVENT_STR,
data->file->event_call->class->system,
- ftrace_event_name(data->file->event_call));
+ trace_event_name(data->file->event_call));
if (data->count == -1)
seq_puts(m, ":unlimited\n");
@@ -2226,7 +2226,7 @@ event_enable_func(struct ftrace_hash *hash,
char *glob, char *cmd, char *param, int enabled)
{
struct trace_array *tr = top_trace_array();
- struct ftrace_event_file *file;
+ struct trace_event_file *file;
struct ftrace_probe_ops *ops;
struct event_probe_data *data;
const char *system;
@@ -2358,7 +2358,7 @@ static inline int register_event_cmds(void) { return 0; }
#endif /* CONFIG_DYNAMIC_FTRACE */
/*
- * The top level array has already had its ftrace_event_file
+ * The top level array has already had its trace_event_file
* descriptors created in order to allow for early events to
* be recorded. This function is called after the tracefs has been
* initialized, and we now have to create the files associated
@@ -2367,7 +2367,7 @@ static inline int register_event_cmds(void) { return 0; }
static __init void
__trace_early_add_event_dirs(struct trace_array *tr)
{
- struct ftrace_event_file *file;
+ struct trace_event_file *file;
int ret;
@@ -2375,7 +2375,7 @@ __trace_early_add_event_dirs(struct trace_array *tr)
ret = event_create_dir(tr->event_dir, file);
if (ret < 0)
pr_warn("Could not create directory for event %s\n",
- ftrace_event_name(file->event_call));
+ trace_event_name(file->event_call));
}
}
@@ -2388,7 +2388,7 @@ __trace_early_add_event_dirs(struct trace_array *tr)
static __init void
__trace_early_add_events(struct trace_array *tr)
{
- struct ftrace_event_call *call;
+ struct trace_event_call *call;
int ret;
list_for_each_entry(call, &ftrace_events, list) {
@@ -2399,7 +2399,7 @@ __trace_early_add_events(struct trace_array *tr)
ret = __trace_early_add_new_event(call, tr);
if (ret < 0)
pr_warn("Could not create early event %s\n",
- ftrace_event_name(call));
+ trace_event_name(call));
}
}
@@ -2407,13 +2407,13 @@ __trace_early_add_events(struct trace_array *tr)
static void
__trace_remove_event_dirs(struct trace_array *tr)
{
- struct ftrace_event_file *file, *next;
+ struct trace_event_file *file, *next;
list_for_each_entry_safe(file, next, &tr->events, list)
remove_event_file_dir(file);
}
-static void __add_event_to_tracers(struct ftrace_event_call *call)
+static void __add_event_to_tracers(struct trace_event_call *call)
{
struct trace_array *tr;
@@ -2421,8 +2421,8 @@ static void __add_event_to_tracers(struct ftrace_event_call *call)
__trace_add_new_event(call, tr);
}
-extern struct ftrace_event_call *__start_ftrace_events[];
-extern struct ftrace_event_call *__stop_ftrace_events[];
+extern struct trace_event_call *__start_ftrace_events[];
+extern struct trace_event_call *__stop_ftrace_events[];
static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata;
@@ -2557,7 +2557,7 @@ int event_trace_del_tracer(struct trace_array *tr)
static __init int event_trace_memsetup(void)
{
field_cachep = KMEM_CACHE(ftrace_event_field, SLAB_PANIC);
- file_cachep = KMEM_CACHE(ftrace_event_file, SLAB_PANIC);
+ file_cachep = KMEM_CACHE(trace_event_file, SLAB_PANIC);
return 0;
}
@@ -2593,7 +2593,7 @@ early_enable_events(struct trace_array *tr, bool disable_first)
static __init int event_trace_enable(void)
{
struct trace_array *tr = top_trace_array();
- struct ftrace_event_call **iter, *call;
+ struct trace_event_call **iter, *call;
int ret;
if (!tr)
@@ -2754,9 +2754,9 @@ static __init void event_test_stuff(void)
*/
static __init void event_trace_self_tests(void)
{
- struct ftrace_subsystem_dir *dir;
- struct ftrace_event_file *file;
- struct ftrace_event_call *call;
+ struct trace_subsystem_dir *dir;
+ struct trace_event_file *file;
+ struct trace_event_call *call;
struct event_subsystem *system;
struct trace_array *tr;
int ret;
@@ -2787,13 +2787,13 @@ static __init void event_trace_self_tests(void)
continue;
#endif
- pr_info("Testing event %s: ", ftrace_event_name(call));
+ pr_info("Testing event %s: ", trace_event_name(call));
/*
* If an event is already enabled, someone is using
* it and the self test should not be on.
*/
- if (file->flags & FTRACE_EVENT_FL_ENABLED) {
+ if (file->flags & EVENT_FILE_FL_ENABLED) {
pr_warn("Enabled event during self test!\n");
WARN_ON_ONCE(1);
continue;
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 7f2e97ce71a7..d81d6f302b14 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -643,7 +643,7 @@ static void append_filter_err(struct filter_parse_state *ps,
free_page((unsigned long) buf);
}
-static inline struct event_filter *event_filter(struct ftrace_event_file *file)
+static inline struct event_filter *event_filter(struct trace_event_file *file)
{
if (file->event_call->flags & TRACE_EVENT_FL_USE_CALL_FILTER)
return file->event_call->filter;
@@ -652,7 +652,7 @@ static inline struct event_filter *event_filter(struct ftrace_event_file *file)
}
/* caller must hold event_mutex */
-void print_event_filter(struct ftrace_event_file *file, struct trace_seq *s)
+void print_event_filter(struct trace_event_file *file, struct trace_seq *s)
{
struct event_filter *filter = event_filter(file);
@@ -780,14 +780,14 @@ static void __free_preds(struct event_filter *filter)
filter->n_preds = 0;
}
-static void filter_disable(struct ftrace_event_file *file)
+static void filter_disable(struct trace_event_file *file)
{
- struct ftrace_event_call *call = file->event_call;
+ struct trace_event_call *call = file->event_call;
if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER)
call->flags &= ~TRACE_EVENT_FL_FILTERED;
else
- file->flags &= ~FTRACE_EVENT_FL_FILTERED;
+ file->flags &= ~EVENT_FILE_FL_FILTERED;
}
static void __free_filter(struct event_filter *filter)
@@ -837,9 +837,9 @@ static int __alloc_preds(struct event_filter *filter, int n_preds)
return 0;
}
-static inline void __remove_filter(struct ftrace_event_file *file)
+static inline void __remove_filter(struct trace_event_file *file)
{
- struct ftrace_event_call *call = file->event_call;
+ struct trace_event_call *call = file->event_call;
filter_disable(file);
if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER)
@@ -848,10 +848,10 @@ static inline void __remove_filter(struct ftrace_event_file *file)
remove_filter_string(file->filter);
}
-static void filter_free_subsystem_preds(struct ftrace_subsystem_dir *dir,
+static void filter_free_subsystem_preds(struct trace_subsystem_dir *dir,
struct trace_array *tr)
{
- struct ftrace_event_file *file;
+ struct trace_event_file *file;
list_for_each_entry(file, &tr->events, list) {
if (file->system != dir)
@@ -860,9 +860,9 @@ static void filter_free_subsystem_preds(struct ftrace_subsystem_dir *dir,
}
}
-static inline void __free_subsystem_filter(struct ftrace_event_file *file)
+static inline void __free_subsystem_filter(struct trace_event_file *file)
{
- struct ftrace_event_call *call = file->event_call;
+ struct trace_event_call *call = file->event_call;
if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER) {
__free_filter(call->filter);
@@ -873,10 +873,10 @@ static inline void __free_subsystem_filter(struct ftrace_event_file *file)
}
}
-static void filter_free_subsystem_filters(struct ftrace_subsystem_dir *dir,
+static void filter_free_subsystem_filters(struct trace_subsystem_dir *dir,
struct trace_array *tr)
{
- struct ftrace_event_file *file;
+ struct trace_event_file *file;
list_for_each_entry(file, &tr->events, list) {
if (file->system != dir)
@@ -1056,6 +1056,9 @@ static void parse_init(struct filter_parse_state *ps,
static char infix_next(struct filter_parse_state *ps)
{
+ if (!ps->infix.cnt)
+ return 0;
+
ps->infix.cnt--;
return ps->infix.string[ps->infix.tail++];
@@ -1071,6 +1074,9 @@ static char infix_peek(struct filter_parse_state *ps)
static void infix_advance(struct filter_parse_state *ps)
{
+ if (!ps->infix.cnt)
+ return;
+
ps->infix.cnt--;
ps->infix.tail++;
}
@@ -1336,7 +1342,7 @@ parse_operand:
}
static struct filter_pred *create_pred(struct filter_parse_state *ps,
- struct ftrace_event_call *call,
+ struct trace_event_call *call,
int op, char *operand1, char *operand2)
{
struct ftrace_event_field *field;
@@ -1385,7 +1391,9 @@ static int check_preds(struct filter_parse_state *ps)
if (elt->op != OP_NOT)
cnt--;
n_normal_preds++;
- WARN_ON_ONCE(cnt < 0);
+ /* all ops should have operands */
+ if (cnt < 0)
+ break;
}
if (cnt != 1 || !n_normal_preds || n_logical_preds >= n_normal_preds) {
@@ -1556,7 +1564,7 @@ static int fold_pred_tree(struct event_filter *filter,
filter->preds);
}
-static int replace_preds(struct ftrace_event_call *call,
+static int replace_preds(struct trace_event_call *call,
struct event_filter *filter,
struct filter_parse_state *ps,
bool dry_run)
@@ -1669,20 +1677,20 @@ fail:
return err;
}
-static inline void event_set_filtered_flag(struct ftrace_event_file *file)
+static inline void event_set_filtered_flag(struct trace_event_file *file)
{
- struct ftrace_event_call *call = file->event_call;
+ struct trace_event_call *call = file->event_call;
if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER)
call->flags |= TRACE_EVENT_FL_FILTERED;
else
- file->flags |= FTRACE_EVENT_FL_FILTERED;
+ file->flags |= EVENT_FILE_FL_FILTERED;
}
-static inline void event_set_filter(struct ftrace_event_file *file,
+static inline void event_set_filter(struct trace_event_file *file,
struct event_filter *filter)
{
- struct ftrace_event_call *call = file->event_call;
+ struct trace_event_call *call = file->event_call;
if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER)
rcu_assign_pointer(call->filter, filter);
@@ -1690,9 +1698,9 @@ static inline void event_set_filter(struct ftrace_event_file *file,
rcu_assign_pointer(file->filter, filter);
}
-static inline void event_clear_filter(struct ftrace_event_file *file)
+static inline void event_clear_filter(struct trace_event_file *file)
{
- struct ftrace_event_call *call = file->event_call;
+ struct trace_event_call *call = file->event_call;
if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER)
RCU_INIT_POINTER(call->filter, NULL);
@@ -1701,33 +1709,33 @@ static inline void event_clear_filter(struct ftrace_event_file *file)
}
static inline void
-event_set_no_set_filter_flag(struct ftrace_event_file *file)
+event_set_no_set_filter_flag(struct trace_event_file *file)
{
- struct ftrace_event_call *call = file->event_call;
+ struct trace_event_call *call = file->event_call;
if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER)
call->flags |= TRACE_EVENT_FL_NO_SET_FILTER;
else
- file->flags |= FTRACE_EVENT_FL_NO_SET_FILTER;
+ file->flags |= EVENT_FILE_FL_NO_SET_FILTER;
}
static inline void
-event_clear_no_set_filter_flag(struct ftrace_event_file *file)
+event_clear_no_set_filter_flag(struct trace_event_file *file)
{
- struct ftrace_event_call *call = file->event_call;
+ struct trace_event_call *call = file->event_call;
if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER)
call->flags &= ~TRACE_EVENT_FL_NO_SET_FILTER;
else
- file->flags &= ~FTRACE_EVENT_FL_NO_SET_FILTER;
+ file->flags &= ~EVENT_FILE_FL_NO_SET_FILTER;
}
static inline bool
-event_no_set_filter_flag(struct ftrace_event_file *file)
+event_no_set_filter_flag(struct trace_event_file *file)
{
- struct ftrace_event_call *call = file->event_call;
+ struct trace_event_call *call = file->event_call;
- if (file->flags & FTRACE_EVENT_FL_NO_SET_FILTER)
+ if (file->flags & EVENT_FILE_FL_NO_SET_FILTER)
return true;
if ((call->flags & TRACE_EVENT_FL_USE_CALL_FILTER) &&
@@ -1742,12 +1750,12 @@ struct filter_list {
struct event_filter *filter;
};
-static int replace_system_preds(struct ftrace_subsystem_dir *dir,
+static int replace_system_preds(struct trace_subsystem_dir *dir,
struct trace_array *tr,
struct filter_parse_state *ps,
char *filter_string)
{
- struct ftrace_event_file *file;
+ struct trace_event_file *file;
struct filter_list *filter_item;
struct filter_list *tmp;
LIST_HEAD(filter_list);
@@ -1891,8 +1899,8 @@ static void create_filter_finish(struct filter_parse_state *ps)
}
/**
- * create_filter - create a filter for a ftrace_event_call
- * @call: ftrace_event_call to create a filter for
+ * create_filter - create a filter for a trace_event_call
+ * @call: trace_event_call to create a filter for
* @filter_str: filter string
* @set_str: remember @filter_str and enable detailed error in filter
* @filterp: out param for created filter (always updated on return)
@@ -1906,7 +1914,7 @@ static void create_filter_finish(struct filter_parse_state *ps)
* information if @set_str is %true and the caller is responsible for
* freeing it.
*/
-static int create_filter(struct ftrace_event_call *call,
+static int create_filter(struct trace_event_call *call,
char *filter_str, bool set_str,
struct event_filter **filterp)
{
@@ -1926,7 +1934,7 @@ static int create_filter(struct ftrace_event_call *call,
return err;
}
-int create_event_filter(struct ftrace_event_call *call,
+int create_event_filter(struct trace_event_call *call,
char *filter_str, bool set_str,
struct event_filter **filterp)
{
@@ -1942,7 +1950,7 @@ int create_event_filter(struct ftrace_event_call *call,
* Identical to create_filter() except that it creates a subsystem filter
* and always remembers @filter_str.
*/
-static int create_system_filter(struct ftrace_subsystem_dir *dir,
+static int create_system_filter(struct trace_subsystem_dir *dir,
struct trace_array *tr,
char *filter_str, struct event_filter **filterp)
{
@@ -1968,9 +1976,9 @@ static int create_system_filter(struct ftrace_subsystem_dir *dir,
}
/* caller must hold event_mutex */
-int apply_event_filter(struct ftrace_event_file *file, char *filter_string)
+int apply_event_filter(struct trace_event_file *file, char *filter_string)
{
- struct ftrace_event_call *call = file->event_call;
+ struct trace_event_call *call = file->event_call;
struct event_filter *filter;
int err;
@@ -2019,7 +2027,7 @@ int apply_event_filter(struct ftrace_event_file *file, char *filter_string)
return err;
}
-int apply_subsystem_event_filter(struct ftrace_subsystem_dir *dir,
+int apply_subsystem_event_filter(struct trace_subsystem_dir *dir,
char *filter_string)
{
struct event_subsystem *system = dir->subsystem;
@@ -2082,7 +2090,7 @@ struct function_filter_data {
static char **
ftrace_function_filter_re(char *buf, int len, int *count)
{
- char *str, *sep, **re;
+ char *str, **re;
str = kstrndup(buf, len, GFP_KERNEL);
if (!str)
@@ -2092,8 +2100,7 @@ ftrace_function_filter_re(char *buf, int len, int *count)
* The argv_split function takes white space
* as a separator, so convert ',' into spaces.
*/
- while ((sep = strchr(str, ',')))
- *sep = ' ';
+ strreplace(str, ',', ' ');
re = argv_split(GFP_KERNEL, str, count);
kfree(str);
@@ -2219,7 +2226,7 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id,
{
int err;
struct event_filter *filter;
- struct ftrace_event_call *call;
+ struct trace_event_call *call;
mutex_lock(&event_mutex);
@@ -2275,7 +2282,7 @@ out_unlock:
static struct test_filter_data_t {
char *filter;
- struct ftrace_raw_ftrace_test_filter rec;
+ struct trace_event_raw_ftrace_test_filter rec;
int match;
char *not_visited;
} test_filter_data[] = {
diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index 8712df9decb4..42a4009fd75a 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -40,7 +40,7 @@ trigger_data_free(struct event_trigger_data *data)
/**
* event_triggers_call - Call triggers associated with a trace event
- * @file: The ftrace_event_file associated with the event
+ * @file: The trace_event_file associated with the event
* @rec: The trace entry for the event, NULL for unconditional invocation
*
* For each trigger associated with an event, invoke the trigger
@@ -63,7 +63,7 @@ trigger_data_free(struct event_trigger_data *data)
* any trigger that should be deferred, ETT_NONE if nothing to defer.
*/
enum event_trigger_type
-event_triggers_call(struct ftrace_event_file *file, void *rec)
+event_triggers_call(struct trace_event_file *file, void *rec)
{
struct event_trigger_data *data;
enum event_trigger_type tt = ETT_NONE;
@@ -92,7 +92,7 @@ EXPORT_SYMBOL_GPL(event_triggers_call);
/**
* event_triggers_post_call - Call 'post_triggers' for a trace event
- * @file: The ftrace_event_file associated with the event
+ * @file: The trace_event_file associated with the event
* @tt: enum event_trigger_type containing a set bit for each trigger to invoke
*
* For each trigger associated with an event, invoke the trigger
@@ -103,7 +103,7 @@ EXPORT_SYMBOL_GPL(event_triggers_call);
* Called from tracepoint handlers (with rcu_read_lock_sched() held).
*/
void
-event_triggers_post_call(struct ftrace_event_file *file,
+event_triggers_post_call(struct trace_event_file *file,
enum event_trigger_type tt)
{
struct event_trigger_data *data;
@@ -119,7 +119,7 @@ EXPORT_SYMBOL_GPL(event_triggers_post_call);
static void *trigger_next(struct seq_file *m, void *t, loff_t *pos)
{
- struct ftrace_event_file *event_file = event_file_data(m->private);
+ struct trace_event_file *event_file = event_file_data(m->private);
if (t == SHOW_AVAILABLE_TRIGGERS)
return NULL;
@@ -129,7 +129,7 @@ static void *trigger_next(struct seq_file *m, void *t, loff_t *pos)
static void *trigger_start(struct seq_file *m, loff_t *pos)
{
- struct ftrace_event_file *event_file;
+ struct trace_event_file *event_file;
/* ->stop() is called even if ->start() fails */
mutex_lock(&event_mutex);
@@ -201,7 +201,7 @@ static int event_trigger_regex_open(struct inode *inode, struct file *file)
return ret;
}
-static int trigger_process_regex(struct ftrace_event_file *file, char *buff)
+static int trigger_process_regex(struct trace_event_file *file, char *buff)
{
char *command, *next = buff;
struct event_command *p;
@@ -227,7 +227,7 @@ static ssize_t event_trigger_regex_write(struct file *file,
const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
- struct ftrace_event_file *event_file;
+ struct trace_event_file *event_file;
ssize_t ret;
char *buf;
@@ -430,7 +430,7 @@ event_trigger_free(struct event_trigger_ops *ops,
trigger_data_free(data);
}
-static int trace_event_trigger_enable_disable(struct ftrace_event_file *file,
+static int trace_event_trigger_enable_disable(struct trace_event_file *file,
int trigger_enable)
{
int ret = 0;
@@ -438,12 +438,12 @@ static int trace_event_trigger_enable_disable(struct ftrace_event_file *file,
if (trigger_enable) {
if (atomic_inc_return(&file->tm_ref) > 1)
return ret;
- set_bit(FTRACE_EVENT_FL_TRIGGER_MODE_BIT, &file->flags);
+ set_bit(EVENT_FILE_FL_TRIGGER_MODE_BIT, &file->flags);
ret = trace_event_enable_disable(file, 1, 1);
} else {
if (atomic_dec_return(&file->tm_ref) > 0)
return ret;
- clear_bit(FTRACE_EVENT_FL_TRIGGER_MODE_BIT, &file->flags);
+ clear_bit(EVENT_FILE_FL_TRIGGER_MODE_BIT, &file->flags);
ret = trace_event_enable_disable(file, 0, 1);
}
@@ -466,7 +466,7 @@ static int trace_event_trigger_enable_disable(struct ftrace_event_file *file,
void
clear_event_triggers(struct trace_array *tr)
{
- struct ftrace_event_file *file;
+ struct trace_event_file *file;
list_for_each_entry(file, &tr->events, list) {
struct event_trigger_data *data;
@@ -480,7 +480,7 @@ clear_event_triggers(struct trace_array *tr)
/**
* update_cond_flag - Set or reset the TRIGGER_COND bit
- * @file: The ftrace_event_file associated with the event
+ * @file: The trace_event_file associated with the event
*
* If an event has triggers and any of those triggers has a filter or
* a post_trigger, trigger invocation needs to be deferred until after
@@ -488,7 +488,7 @@ clear_event_triggers(struct trace_array *tr)
* its TRIGGER_COND bit set, otherwise the TRIGGER_COND bit should be
* cleared.
*/
-static void update_cond_flag(struct ftrace_event_file *file)
+static void update_cond_flag(struct trace_event_file *file)
{
struct event_trigger_data *data;
bool set_cond = false;
@@ -501,9 +501,9 @@ static void update_cond_flag(struct ftrace_event_file *file)
}
if (set_cond)
- set_bit(FTRACE_EVENT_FL_TRIGGER_COND_BIT, &file->flags);
+ set_bit(EVENT_FILE_FL_TRIGGER_COND_BIT, &file->flags);
else
- clear_bit(FTRACE_EVENT_FL_TRIGGER_COND_BIT, &file->flags);
+ clear_bit(EVENT_FILE_FL_TRIGGER_COND_BIT, &file->flags);
}
/**
@@ -511,7 +511,7 @@ static void update_cond_flag(struct ftrace_event_file *file)
* @glob: The raw string used to register the trigger
* @ops: The trigger ops associated with the trigger
* @data: Trigger-specific data to associate with the trigger
- * @file: The ftrace_event_file associated with the event
+ * @file: The trace_event_file associated with the event
*
* Common implementation for event trigger registration.
*
@@ -522,7 +522,7 @@ static void update_cond_flag(struct ftrace_event_file *file)
*/
static int register_trigger(char *glob, struct event_trigger_ops *ops,
struct event_trigger_data *data,
- struct ftrace_event_file *file)
+ struct trace_event_file *file)
{
struct event_trigger_data *test;
int ret = 0;
@@ -557,7 +557,7 @@ out:
* @glob: The raw string used to register the trigger
* @ops: The trigger ops associated with the trigger
* @test: Trigger-specific data used to find the trigger to remove
- * @file: The ftrace_event_file associated with the event
+ * @file: The trace_event_file associated with the event
*
* Common implementation for event trigger unregistration.
*
@@ -566,7 +566,7 @@ out:
*/
static void unregister_trigger(char *glob, struct event_trigger_ops *ops,
struct event_trigger_data *test,
- struct ftrace_event_file *file)
+ struct trace_event_file *file)
{
struct event_trigger_data *data;
bool unregistered = false;
@@ -588,7 +588,7 @@ static void unregister_trigger(char *glob, struct event_trigger_ops *ops,
/**
* event_trigger_callback - Generic event_command @func implementation
* @cmd_ops: The command ops, used for trigger registration
- * @file: The ftrace_event_file associated with the event
+ * @file: The trace_event_file associated with the event
* @glob: The raw string used to register the trigger
* @cmd: The cmd portion of the string used to register the trigger
* @param: The params portion of the string used to register the trigger
@@ -603,7 +603,7 @@ static void unregister_trigger(char *glob, struct event_trigger_ops *ops,
*/
static int
event_trigger_callback(struct event_command *cmd_ops,
- struct ftrace_event_file *file,
+ struct trace_event_file *file,
char *glob, char *cmd, char *param)
{
struct event_trigger_data *trigger_data;
@@ -688,7 +688,7 @@ event_trigger_callback(struct event_command *cmd_ops,
* set_trigger_filter - Generic event_command @set_filter implementation
* @filter_str: The filter string for the trigger, NULL to remove filter
* @trigger_data: Trigger-specific data
- * @file: The ftrace_event_file associated with the event
+ * @file: The trace_event_file associated with the event
*
* Common implementation for event command filter parsing and filter
* instantiation.
@@ -702,7 +702,7 @@ event_trigger_callback(struct event_command *cmd_ops,
*/
static int set_trigger_filter(char *filter_str,
struct event_trigger_data *trigger_data,
- struct ftrace_event_file *file)
+ struct trace_event_file *file)
{
struct event_trigger_data *data = trigger_data;
struct event_filter *filter = NULL, *tmp;
@@ -900,7 +900,7 @@ snapshot_count_trigger(struct event_trigger_data *data)
static int
register_snapshot_trigger(char *glob, struct event_trigger_ops *ops,
struct event_trigger_data *data,
- struct ftrace_event_file *file)
+ struct trace_event_file *file)
{
int ret = register_trigger(glob, ops, data, file);
@@ -968,7 +968,7 @@ static __init int register_trigger_snapshot_cmd(void) { return 0; }
* Skip 3:
* stacktrace_trigger()
* event_triggers_post_call()
- * ftrace_raw_event_xxx()
+ * trace_event_raw_event_xxx()
*/
#define STACK_SKIP 3
@@ -1053,7 +1053,7 @@ static __init void unregister_trigger_traceon_traceoff_cmds(void)
#define DISABLE_EVENT_STR "disable_event"
struct enable_trigger_data {
- struct ftrace_event_file *file;
+ struct trace_event_file *file;
bool enable;
};
@@ -1063,9 +1063,9 @@ event_enable_trigger(struct event_trigger_data *data)
struct enable_trigger_data *enable_data = data->private_data;
if (enable_data->enable)
- clear_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &enable_data->file->flags);
+ clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &enable_data->file->flags);
else
- set_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &enable_data->file->flags);
+ set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &enable_data->file->flags);
}
static void
@@ -1077,7 +1077,7 @@ event_enable_count_trigger(struct event_trigger_data *data)
return;
/* Skip if the event is in a state we want to switch to */
- if (enable_data->enable == !(enable_data->file->flags & FTRACE_EVENT_FL_SOFT_DISABLED))
+ if (enable_data->enable == !(enable_data->file->flags & EVENT_FILE_FL_SOFT_DISABLED))
return;
if (data->count != -1)
@@ -1095,7 +1095,7 @@ event_enable_trigger_print(struct seq_file *m, struct event_trigger_ops *ops,
seq_printf(m, "%s:%s:%s",
enable_data->enable ? ENABLE_EVENT_STR : DISABLE_EVENT_STR,
enable_data->file->event_call->class->system,
- ftrace_event_name(enable_data->file->event_call));
+ trace_event_name(enable_data->file->event_call));
if (data->count == -1)
seq_puts(m, ":unlimited");
@@ -1159,10 +1159,10 @@ static struct event_trigger_ops event_disable_count_trigger_ops = {
static int
event_enable_trigger_func(struct event_command *cmd_ops,
- struct ftrace_event_file *file,
+ struct trace_event_file *file,
char *glob, char *cmd, char *param)
{
- struct ftrace_event_file *event_enable_file;
+ struct trace_event_file *event_enable_file;
struct enable_trigger_data *enable_data;
struct event_trigger_data *trigger_data;
struct event_trigger_ops *trigger_ops;
@@ -1294,7 +1294,7 @@ event_enable_trigger_func(struct event_command *cmd_ops,
static int event_enable_register_trigger(char *glob,
struct event_trigger_ops *ops,
struct event_trigger_data *data,
- struct ftrace_event_file *file)
+ struct trace_event_file *file)
{
struct enable_trigger_data *enable_data = data->private_data;
struct enable_trigger_data *test_enable_data;
@@ -1331,7 +1331,7 @@ out:
static void event_enable_unregister_trigger(char *glob,
struct event_trigger_ops *ops,
struct event_trigger_data *test,
- struct ftrace_event_file *file)
+ struct trace_event_file *file)
{
struct enable_trigger_data *test_enable_data = test->private_data;
struct enable_trigger_data *enable_data;
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 174a6a71146c..adabf7da9113 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -125,7 +125,7 @@ static void __always_unused ____ftrace_check_##name(void) \
#undef FTRACE_ENTRY
#define FTRACE_ENTRY(name, struct_name, id, tstruct, print, filter) \
static int __init \
-ftrace_define_fields_##name(struct ftrace_event_call *event_call) \
+ftrace_define_fields_##name(struct trace_event_call *event_call) \
{ \
struct struct_name field; \
int ret; \
@@ -163,14 +163,14 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \
#define FTRACE_ENTRY_REG(call, struct_name, etype, tstruct, print, filter,\
regfn) \
\
-struct ftrace_event_class __refdata event_class_ftrace_##call = { \
+struct trace_event_class __refdata event_class_ftrace_##call = { \
.system = __stringify(TRACE_SYSTEM), \
.define_fields = ftrace_define_fields_##call, \
.fields = LIST_HEAD_INIT(event_class_ftrace_##call.fields),\
.reg = regfn, \
}; \
\
-struct ftrace_event_call __used event_##call = { \
+struct trace_event_call __used event_##call = { \
.class = &event_class_ftrace_##call, \
{ \
.name = #call, \
@@ -179,7 +179,7 @@ struct ftrace_event_call __used event_##call = { \
.print_fmt = print, \
.flags = TRACE_EVENT_FL_IGNORE_ENABLE, \
}; \
-struct ftrace_event_call __used \
+struct trace_event_call __used \
__attribute__((section("_ftrace_events"))) *__event_##call = &event_##call;
#undef FTRACE_ENTRY
@@ -187,7 +187,7 @@ __attribute__((section("_ftrace_events"))) *__event_##call = &event_##call;
FTRACE_ENTRY_REG(call, struct_name, etype, \
PARAMS(tstruct), PARAMS(print), filter, NULL)
-int ftrace_event_is_function(struct ftrace_event_call *call)
+int ftrace_event_is_function(struct trace_event_call *call)
{
return call == &event_function;
}
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index a51e79688455..8968bf720c12 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -278,7 +278,7 @@ int __trace_graph_entry(struct trace_array *tr,
unsigned long flags,
int pc)
{
- struct ftrace_event_call *call = &event_funcgraph_entry;
+ struct trace_event_call *call = &event_funcgraph_entry;
struct ring_buffer_event *event;
struct ring_buffer *buffer = tr->trace_buffer.buffer;
struct ftrace_graph_ent_entry *entry;
@@ -393,7 +393,7 @@ void __trace_graph_return(struct trace_array *tr,
unsigned long flags,
int pc)
{
- struct ftrace_event_call *call = &event_funcgraph_exit;
+ struct trace_event_call *call = &event_funcgraph_exit;
struct ring_buffer_event *event;
struct ring_buffer *buffer = tr->trace_buffer.buffer;
struct ftrace_graph_ret_entry *entry;
@@ -1454,12 +1454,12 @@ static __init int init_graph_trace(void)
{
max_bytes_for_cpu = snprintf(NULL, 0, "%d", nr_cpu_ids - 1);
- if (!register_ftrace_event(&graph_trace_entry_event)) {
+ if (!register_trace_event(&graph_trace_entry_event)) {
pr_warning("Warning: could not register graph trace events\n");
return 1;
}
- if (!register_ftrace_event(&graph_trace_ret_event)) {
+ if (!register_trace_event(&graph_trace_ret_event)) {
pr_warning("Warning: could not register graph trace events\n");
return 1;
}
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index d0ce590f06e1..b7d0cdd9906c 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -348,7 +348,7 @@ static struct trace_kprobe *find_trace_kprobe(const char *event,
struct trace_kprobe *tk;
list_for_each_entry(tk, &probe_list, list)
- if (strcmp(ftrace_event_name(&tk->tp.call), event) == 0 &&
+ if (strcmp(trace_event_name(&tk->tp.call), event) == 0 &&
strcmp(tk->tp.call.class->system, group) == 0)
return tk;
return NULL;
@@ -359,7 +359,7 @@ static struct trace_kprobe *find_trace_kprobe(const char *event,
* if the file is NULL, enable "perf" handler, or enable "trace" handler.
*/
static int
-enable_trace_kprobe(struct trace_kprobe *tk, struct ftrace_event_file *file)
+enable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file)
{
int ret = 0;
@@ -394,7 +394,7 @@ enable_trace_kprobe(struct trace_kprobe *tk, struct ftrace_event_file *file)
* if the file is NULL, disable "perf" handler, or disable "trace" handler.
*/
static int
-disable_trace_kprobe(struct trace_kprobe *tk, struct ftrace_event_file *file)
+disable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file)
{
struct event_file_link *link = NULL;
int wait = 0;
@@ -523,7 +523,7 @@ static int register_trace_kprobe(struct trace_kprobe *tk)
mutex_lock(&probe_lock);
/* Delete old (same name) event if exist */
- old_tk = find_trace_kprobe(ftrace_event_name(&tk->tp.call),
+ old_tk = find_trace_kprobe(trace_event_name(&tk->tp.call),
tk->tp.call.class->system);
if (old_tk) {
ret = unregister_trace_kprobe(old_tk);
@@ -572,7 +572,7 @@ static int trace_kprobe_module_callback(struct notifier_block *nb,
if (ret)
pr_warning("Failed to re-register probe %s on"
"%s: %d\n",
- ftrace_event_name(&tk->tp.call),
+ trace_event_name(&tk->tp.call),
mod->name, ret);
}
}
@@ -829,7 +829,7 @@ static int probes_seq_show(struct seq_file *m, void *v)
seq_putc(m, trace_kprobe_is_return(tk) ? 'r' : 'p');
seq_printf(m, ":%s/%s", tk->tp.call.class->system,
- ftrace_event_name(&tk->tp.call));
+ trace_event_name(&tk->tp.call));
if (!tk->symbol)
seq_printf(m, " 0x%p", tk->rp.kp.addr);
@@ -888,7 +888,7 @@ static int probes_profile_seq_show(struct seq_file *m, void *v)
struct trace_kprobe *tk = v;
seq_printf(m, " %-44s %15lu %15lu\n",
- ftrace_event_name(&tk->tp.call), tk->nhit,
+ trace_event_name(&tk->tp.call), tk->nhit,
tk->rp.kp.nmissed);
return 0;
@@ -917,18 +917,18 @@ static const struct file_operations kprobe_profile_ops = {
/* Kprobe handler */
static nokprobe_inline void
__kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs,
- struct ftrace_event_file *ftrace_file)
+ struct trace_event_file *trace_file)
{
struct kprobe_trace_entry_head *entry;
struct ring_buffer_event *event;
struct ring_buffer *buffer;
int size, dsize, pc;
unsigned long irq_flags;
- struct ftrace_event_call *call = &tk->tp.call;
+ struct trace_event_call *call = &tk->tp.call;
- WARN_ON(call != ftrace_file->event_call);
+ WARN_ON(call != trace_file->event_call);
- if (ftrace_trigger_soft_disabled(ftrace_file))
+ if (trace_trigger_soft_disabled(trace_file))
return;
local_save_flags(irq_flags);
@@ -937,7 +937,7 @@ __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs,
dsize = __get_data_size(&tk->tp, regs);
size = sizeof(*entry) + tk->tp.size + dsize;
- event = trace_event_buffer_lock_reserve(&buffer, ftrace_file,
+ event = trace_event_buffer_lock_reserve(&buffer, trace_file,
call->event.type,
size, irq_flags, pc);
if (!event)
@@ -947,7 +947,7 @@ __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs,
entry->ip = (unsigned long)tk->rp.kp.addr;
store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
- event_trigger_unlock_commit_regs(ftrace_file, buffer, event,
+ event_trigger_unlock_commit_regs(trace_file, buffer, event,
entry, irq_flags, pc, regs);
}
@@ -965,18 +965,18 @@ NOKPROBE_SYMBOL(kprobe_trace_func);
static nokprobe_inline void
__kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
struct pt_regs *regs,
- struct ftrace_event_file *ftrace_file)
+ struct trace_event_file *trace_file)
{
struct kretprobe_trace_entry_head *entry;
struct ring_buffer_event *event;
struct ring_buffer *buffer;
int size, pc, dsize;
unsigned long irq_flags;
- struct ftrace_event_call *call = &tk->tp.call;
+ struct trace_event_call *call = &tk->tp.call;
- WARN_ON(call != ftrace_file->event_call);
+ WARN_ON(call != trace_file->event_call);
- if (ftrace_trigger_soft_disabled(ftrace_file))
+ if (trace_trigger_soft_disabled(trace_file))
return;
local_save_flags(irq_flags);
@@ -985,7 +985,7 @@ __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
dsize = __get_data_size(&tk->tp, regs);
size = sizeof(*entry) + tk->tp.size + dsize;
- event = trace_event_buffer_lock_reserve(&buffer, ftrace_file,
+ event = trace_event_buffer_lock_reserve(&buffer, trace_file,
call->event.type,
size, irq_flags, pc);
if (!event)
@@ -996,7 +996,7 @@ __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
entry->ret_ip = (unsigned long)ri->ret_addr;
store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
- event_trigger_unlock_commit_regs(ftrace_file, buffer, event,
+ event_trigger_unlock_commit_regs(trace_file, buffer, event,
entry, irq_flags, pc, regs);
}
@@ -1025,7 +1025,7 @@ print_kprobe_event(struct trace_iterator *iter, int flags,
field = (struct kprobe_trace_entry_head *)iter->ent;
tp = container_of(event, struct trace_probe, call.event);
- trace_seq_printf(s, "%s: (", ftrace_event_name(&tp->call));
+ trace_seq_printf(s, "%s: (", trace_event_name(&tp->call));
if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
goto out;
@@ -1056,7 +1056,7 @@ print_kretprobe_event(struct trace_iterator *iter, int flags,
field = (struct kretprobe_trace_entry_head *)iter->ent;
tp = container_of(event, struct trace_probe, call.event);
- trace_seq_printf(s, "%s: (", ftrace_event_name(&tp->call));
+ trace_seq_printf(s, "%s: (", trace_event_name(&tp->call));
if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET))
goto out;
@@ -1081,7 +1081,7 @@ print_kretprobe_event(struct trace_iterator *iter, int flags,
}
-static int kprobe_event_define_fields(struct ftrace_event_call *event_call)
+static int kprobe_event_define_fields(struct trace_event_call *event_call)
{
int ret, i;
struct kprobe_trace_entry_head field;
@@ -1104,7 +1104,7 @@ static int kprobe_event_define_fields(struct ftrace_event_call *event_call)
return 0;
}
-static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
+static int kretprobe_event_define_fields(struct trace_event_call *event_call)
{
int ret, i;
struct kretprobe_trace_entry_head field;
@@ -1134,7 +1134,7 @@ static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
static void
kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
{
- struct ftrace_event_call *call = &tk->tp.call;
+ struct trace_event_call *call = &tk->tp.call;
struct bpf_prog *prog = call->prog;
struct kprobe_trace_entry_head *entry;
struct hlist_head *head;
@@ -1169,7 +1169,7 @@ static void
kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
struct pt_regs *regs)
{
- struct ftrace_event_call *call = &tk->tp.call;
+ struct trace_event_call *call = &tk->tp.call;
struct bpf_prog *prog = call->prog;
struct kretprobe_trace_entry_head *entry;
struct hlist_head *head;
@@ -1206,11 +1206,11 @@ NOKPROBE_SYMBOL(kretprobe_perf_func);
* kprobe_trace_self_tests_init() does enable_trace_probe/disable_trace_probe
* lockless, but we can't race with this __init function.
*/
-static int kprobe_register(struct ftrace_event_call *event,
+static int kprobe_register(struct trace_event_call *event,
enum trace_reg type, void *data)
{
struct trace_kprobe *tk = (struct trace_kprobe *)event->data;
- struct ftrace_event_file *file = data;
+ struct trace_event_file *file = data;
switch (type) {
case TRACE_REG_REGISTER:
@@ -1276,10 +1276,10 @@ static struct trace_event_functions kprobe_funcs = {
static int register_kprobe_event(struct trace_kprobe *tk)
{
- struct ftrace_event_call *call = &tk->tp.call;
+ struct trace_event_call *call = &tk->tp.call;
int ret;
- /* Initialize ftrace_event_call */
+ /* Initialize trace_event_call */
INIT_LIST_HEAD(&call->class->fields);
if (trace_kprobe_is_return(tk)) {
call->event.funcs = &kretprobe_funcs;
@@ -1290,7 +1290,7 @@ static int register_kprobe_event(struct trace_kprobe *tk)
}
if (set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0)
return -ENOMEM;
- ret = register_ftrace_event(&call->event);
+ ret = register_trace_event(&call->event);
if (!ret) {
kfree(call->print_fmt);
return -ENODEV;
@@ -1301,9 +1301,9 @@ static int register_kprobe_event(struct trace_kprobe *tk)
ret = trace_add_event_call(call);
if (ret) {
pr_info("Failed to register kprobe event: %s\n",
- ftrace_event_name(call));
+ trace_event_name(call));
kfree(call->print_fmt);
- unregister_ftrace_event(&call->event);
+ unregister_trace_event(&call->event);
}
return ret;
}
@@ -1364,10 +1364,10 @@ static __used int kprobe_trace_selftest_target(int a1, int a2, int a3,
return a1 + a2 + a3 + a4 + a5 + a6;
}
-static struct ftrace_event_file *
+static struct trace_event_file *
find_trace_probe_file(struct trace_kprobe *tk, struct trace_array *tr)
{
- struct ftrace_event_file *file;
+ struct trace_event_file *file;
list_for_each_entry(file, &tr->events, list)
if (file->event_call == &tk->tp.call)
@@ -1385,7 +1385,7 @@ static __init int kprobe_trace_self_tests_init(void)
int ret, warn = 0;
int (*target)(int, int, int, int, int, int);
struct trace_kprobe *tk;
- struct ftrace_event_file *file;
+ struct trace_event_file *file;
if (tracing_is_disabled())
return -ENODEV;
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index 7a9ba62e9fef..638e110c5bfd 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -298,7 +298,7 @@ static void __trace_mmiotrace_rw(struct trace_array *tr,
struct trace_array_cpu *data,
struct mmiotrace_rw *rw)
{
- struct ftrace_event_call *call = &event_mmiotrace_rw;
+ struct trace_event_call *call = &event_mmiotrace_rw;
struct ring_buffer *buffer = tr->trace_buffer.buffer;
struct ring_buffer_event *event;
struct trace_mmiotrace_rw *entry;
@@ -328,7 +328,7 @@ static void __trace_mmiotrace_map(struct trace_array *tr,
struct trace_array_cpu *data,
struct mmiotrace_map *map)
{
- struct ftrace_event_call *call = &event_mmiotrace_map;
+ struct trace_event_call *call = &event_mmiotrace_map;
struct ring_buffer *buffer = tr->trace_buffer.buffer;
struct ring_buffer_event *event;
struct trace_mmiotrace_map *entry;
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 25a086bcb700..dfab253727dc 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -60,9 +60,9 @@ enum print_line_t trace_print_printk_msg_only(struct trace_iterator *iter)
}
const char *
-ftrace_print_flags_seq(struct trace_seq *p, const char *delim,
- unsigned long flags,
- const struct trace_print_flags *flag_array)
+trace_print_flags_seq(struct trace_seq *p, const char *delim,
+ unsigned long flags,
+ const struct trace_print_flags *flag_array)
{
unsigned long mask;
const char *str;
@@ -95,11 +95,11 @@ ftrace_print_flags_seq(struct trace_seq *p, const char *delim,
return ret;
}
-EXPORT_SYMBOL(ftrace_print_flags_seq);
+EXPORT_SYMBOL(trace_print_flags_seq);
const char *
-ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val,
- const struct trace_print_flags *symbol_array)
+trace_print_symbols_seq(struct trace_seq *p, unsigned long val,
+ const struct trace_print_flags *symbol_array)
{
int i;
const char *ret = trace_seq_buffer_ptr(p);
@@ -120,11 +120,11 @@ ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val,
return ret;
}
-EXPORT_SYMBOL(ftrace_print_symbols_seq);
+EXPORT_SYMBOL(trace_print_symbols_seq);
#if BITS_PER_LONG == 32
const char *
-ftrace_print_symbols_seq_u64(struct trace_seq *p, unsigned long long val,
+trace_print_symbols_seq_u64(struct trace_seq *p, unsigned long long val,
const struct trace_print_flags_u64 *symbol_array)
{
int i;
@@ -146,12 +146,12 @@ ftrace_print_symbols_seq_u64(struct trace_seq *p, unsigned long long val,
return ret;
}
-EXPORT_SYMBOL(ftrace_print_symbols_seq_u64);
+EXPORT_SYMBOL(trace_print_symbols_seq_u64);
#endif
const char *
-ftrace_print_bitmask_seq(struct trace_seq *p, void *bitmask_ptr,
- unsigned int bitmask_size)
+trace_print_bitmask_seq(struct trace_seq *p, void *bitmask_ptr,
+ unsigned int bitmask_size)
{
const char *ret = trace_seq_buffer_ptr(p);
@@ -160,10 +160,10 @@ ftrace_print_bitmask_seq(struct trace_seq *p, void *bitmask_ptr,
return ret;
}
-EXPORT_SYMBOL_GPL(ftrace_print_bitmask_seq);
+EXPORT_SYMBOL_GPL(trace_print_bitmask_seq);
const char *
-ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len)
+trace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len)
{
int i;
const char *ret = trace_seq_buffer_ptr(p);
@@ -175,11 +175,11 @@ ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len)
return ret;
}
-EXPORT_SYMBOL(ftrace_print_hex_seq);
+EXPORT_SYMBOL(trace_print_hex_seq);
const char *
-ftrace_print_array_seq(struct trace_seq *p, const void *buf, int count,
- size_t el_size)
+trace_print_array_seq(struct trace_seq *p, const void *buf, int count,
+ size_t el_size)
{
const char *ret = trace_seq_buffer_ptr(p);
const char *prefix = "";
@@ -220,17 +220,17 @@ ftrace_print_array_seq(struct trace_seq *p, const void *buf, int count,
return ret;
}
-EXPORT_SYMBOL(ftrace_print_array_seq);
+EXPORT_SYMBOL(trace_print_array_seq);
-int ftrace_raw_output_prep(struct trace_iterator *iter,
- struct trace_event *trace_event)
+int trace_raw_output_prep(struct trace_iterator *iter,
+ struct trace_event *trace_event)
{
- struct ftrace_event_call *event;
+ struct trace_event_call *event;
struct trace_seq *s = &iter->seq;
struct trace_seq *p = &iter->tmp_seq;
struct trace_entry *entry;
- event = container_of(trace_event, struct ftrace_event_call, event);
+ event = container_of(trace_event, struct trace_event_call, event);
entry = iter->ent;
if (entry->type != event->event.type) {
@@ -239,14 +239,14 @@ int ftrace_raw_output_prep(struct trace_iterator *iter,
}
trace_seq_init(p);
- trace_seq_printf(s, "%s: ", ftrace_event_name(event));
+ trace_seq_printf(s, "%s: ", trace_event_name(event));
return trace_handle_return(s);
}
-EXPORT_SYMBOL(ftrace_raw_output_prep);
+EXPORT_SYMBOL(trace_raw_output_prep);
-static int ftrace_output_raw(struct trace_iterator *iter, char *name,
- char *fmt, va_list ap)
+static int trace_output_raw(struct trace_iterator *iter, char *name,
+ char *fmt, va_list ap)
{
struct trace_seq *s = &iter->seq;
@@ -256,18 +256,18 @@ static int ftrace_output_raw(struct trace_iterator *iter, char *name,
return trace_handle_return(s);
}
-int ftrace_output_call(struct trace_iterator *iter, char *name, char *fmt, ...)
+int trace_output_call(struct trace_iterator *iter, char *name, char *fmt, ...)
{
va_list ap;
int ret;
va_start(ap, fmt);
- ret = ftrace_output_raw(iter, name, fmt, ap);
+ ret = trace_output_raw(iter, name, fmt, ap);
va_end(ap);
return ret;
}
-EXPORT_SYMBOL_GPL(ftrace_output_call);
+EXPORT_SYMBOL_GPL(trace_output_call);
#ifdef CONFIG_KRETPROBES
static inline const char *kretprobed(const char *name)
@@ -675,7 +675,7 @@ static int trace_search_list(struct list_head **list)
}
/* Did we used up all 65 thousand events??? */
- if ((last + 1) > FTRACE_MAX_EVENT)
+ if ((last + 1) > TRACE_EVENT_TYPE_MAX)
return 0;
*list = &e->list;
@@ -693,7 +693,7 @@ void trace_event_read_unlock(void)
}
/**
- * register_ftrace_event - register output for an event type
+ * register_trace_event - register output for an event type
* @event: the event type to register
*
* Event types are stored in a hash and this hash is used to
@@ -707,7 +707,7 @@ void trace_event_read_unlock(void)
*
* Returns the event type number or zero on error.
*/
-int register_ftrace_event(struct trace_event *event)
+int register_trace_event(struct trace_event *event)
{
unsigned key;
int ret = 0;
@@ -725,7 +725,7 @@ int register_ftrace_event(struct trace_event *event)
if (!event->type) {
struct list_head *list = NULL;
- if (next_event_type > FTRACE_MAX_EVENT) {
+ if (next_event_type > TRACE_EVENT_TYPE_MAX) {
event->type = trace_search_list(&list);
if (!event->type)
@@ -771,12 +771,12 @@ int register_ftrace_event(struct trace_event *event)
return ret;
}
-EXPORT_SYMBOL_GPL(register_ftrace_event);
+EXPORT_SYMBOL_GPL(register_trace_event);
/*
* Used by module code with the trace_event_sem held for write.
*/
-int __unregister_ftrace_event(struct trace_event *event)
+int __unregister_trace_event(struct trace_event *event)
{
hlist_del(&event->node);
list_del(&event->list);
@@ -784,18 +784,18 @@ int __unregister_ftrace_event(struct trace_event *event)
}
/**
- * unregister_ftrace_event - remove a no longer used event
+ * unregister_trace_event - remove a no longer used event
* @event: the event to remove
*/
-int unregister_ftrace_event(struct trace_event *event)
+int unregister_trace_event(struct trace_event *event)
{
down_write(&trace_event_sem);
- __unregister_ftrace_event(event);
+ __unregister_trace_event(event);
up_write(&trace_event_sem);
return 0;
}
-EXPORT_SYMBOL_GPL(unregister_ftrace_event);
+EXPORT_SYMBOL_GPL(unregister_trace_event);
/*
* Standard events
@@ -1243,7 +1243,7 @@ __init static int init_events(void)
for (i = 0; events[i]; i++) {
event = events[i];
- ret = register_ftrace_event(event);
+ ret = register_trace_event(event);
if (!ret) {
printk(KERN_WARNING "event %d failed to register\n",
event->type);
diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h
index 8ef2c40efb3c..4cbfe85b99c8 100644
--- a/kernel/trace/trace_output.h
+++ b/kernel/trace/trace_output.h
@@ -32,7 +32,7 @@ extern int
trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry);
/* used by module unregistering */
-extern int __unregister_ftrace_event(struct trace_event *event);
+extern int __unregister_trace_event(struct trace_event *event);
extern struct rw_semaphore trace_event_sem;
#define SEQ_PUT_FIELD(s, x) \
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index ab283e146b70..b98dee914542 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -272,8 +272,8 @@ struct probe_arg {
struct trace_probe {
unsigned int flags; /* For TP_FLAG_* */
- struct ftrace_event_class class;
- struct ftrace_event_call call;
+ struct trace_event_class class;
+ struct trace_event_call call;
struct list_head files;
ssize_t size; /* trace entry size */
unsigned int nr_args;
@@ -281,7 +281,7 @@ struct trace_probe {
};
struct event_file_link {
- struct ftrace_event_file *file;
+ struct trace_event_file *file;
struct list_head list;
};
@@ -314,7 +314,7 @@ static inline int is_good_name(const char *name)
}
static inline struct event_file_link *
-find_event_file_link(struct trace_probe *tp, struct ftrace_event_file *file)
+find_event_file_link(struct trace_probe *tp, struct trace_event_file *file)
{
struct event_file_link *link;
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index d6e1003724e9..9b33dd117f3f 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -369,7 +369,7 @@ tracing_sched_switch_trace(struct trace_array *tr,
struct task_struct *next,
unsigned long flags, int pc)
{
- struct ftrace_event_call *call = &event_context_switch;
+ struct trace_event_call *call = &event_context_switch;
struct ring_buffer *buffer = tr->trace_buffer.buffer;
struct ring_buffer_event *event;
struct ctx_switch_entry *entry;
@@ -397,7 +397,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
struct task_struct *curr,
unsigned long flags, int pc)
{
- struct ftrace_event_call *call = &event_wakeup;
+ struct trace_event_call *call = &event_wakeup;
struct ring_buffer_event *event;
struct ctx_switch_entry *entry;
struct ring_buffer *buffer = tr->trace_buffer.buffer;
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index f97f6e3a676c..7d567a4b9fa7 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -13,13 +13,13 @@
static DEFINE_MUTEX(syscall_trace_lock);
-static int syscall_enter_register(struct ftrace_event_call *event,
+static int syscall_enter_register(struct trace_event_call *event,
enum trace_reg type, void *data);
-static int syscall_exit_register(struct ftrace_event_call *event,
+static int syscall_exit_register(struct trace_event_call *event,
enum trace_reg type, void *data);
static struct list_head *
-syscall_get_enter_fields(struct ftrace_event_call *call)
+syscall_get_enter_fields(struct trace_event_call *call)
{
struct syscall_metadata *entry = call->data;
@@ -219,7 +219,7 @@ __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len)
return pos;
}
-static int __init set_syscall_print_fmt(struct ftrace_event_call *call)
+static int __init set_syscall_print_fmt(struct trace_event_call *call)
{
char *print_fmt;
int len;
@@ -244,7 +244,7 @@ static int __init set_syscall_print_fmt(struct ftrace_event_call *call)
return 0;
}
-static void __init free_syscall_print_fmt(struct ftrace_event_call *call)
+static void __init free_syscall_print_fmt(struct trace_event_call *call)
{
struct syscall_metadata *entry = call->data;
@@ -252,7 +252,7 @@ static void __init free_syscall_print_fmt(struct ftrace_event_call *call)
kfree(call->print_fmt);
}
-static int __init syscall_enter_define_fields(struct ftrace_event_call *call)
+static int __init syscall_enter_define_fields(struct trace_event_call *call)
{
struct syscall_trace_enter trace;
struct syscall_metadata *meta = call->data;
@@ -275,7 +275,7 @@ static int __init syscall_enter_define_fields(struct ftrace_event_call *call)
return ret;
}
-static int __init syscall_exit_define_fields(struct ftrace_event_call *call)
+static int __init syscall_exit_define_fields(struct trace_event_call *call)
{
struct syscall_trace_exit trace;
int ret;
@@ -293,7 +293,7 @@ static int __init syscall_exit_define_fields(struct ftrace_event_call *call)
static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
{
struct trace_array *tr = data;
- struct ftrace_event_file *ftrace_file;
+ struct trace_event_file *trace_file;
struct syscall_trace_enter *entry;
struct syscall_metadata *sys_data;
struct ring_buffer_event *event;
@@ -308,11 +308,11 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
return;
/* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE) */
- ftrace_file = rcu_dereference_sched(tr->enter_syscall_files[syscall_nr]);
- if (!ftrace_file)
+ trace_file = rcu_dereference_sched(tr->enter_syscall_files[syscall_nr]);
+ if (!trace_file)
return;
- if (ftrace_trigger_soft_disabled(ftrace_file))
+ if (trace_trigger_soft_disabled(trace_file))
return;
sys_data = syscall_nr_to_meta(syscall_nr);
@@ -334,14 +334,14 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
entry->nr = syscall_nr;
syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args);
- event_trigger_unlock_commit(ftrace_file, buffer, event, entry,
+ event_trigger_unlock_commit(trace_file, buffer, event, entry,
irq_flags, pc);
}
static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
{
struct trace_array *tr = data;
- struct ftrace_event_file *ftrace_file;
+ struct trace_event_file *trace_file;
struct syscall_trace_exit *entry;
struct syscall_metadata *sys_data;
struct ring_buffer_event *event;
@@ -355,11 +355,11 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
return;
/* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE()) */
- ftrace_file = rcu_dereference_sched(tr->exit_syscall_files[syscall_nr]);
- if (!ftrace_file)
+ trace_file = rcu_dereference_sched(tr->exit_syscall_files[syscall_nr]);
+ if (!trace_file)
return;
- if (ftrace_trigger_soft_disabled(ftrace_file))
+ if (trace_trigger_soft_disabled(trace_file))
return;
sys_data = syscall_nr_to_meta(syscall_nr);
@@ -380,12 +380,12 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
entry->nr = syscall_nr;
entry->ret = syscall_get_return_value(current, regs);
- event_trigger_unlock_commit(ftrace_file, buffer, event, entry,
+ event_trigger_unlock_commit(trace_file, buffer, event, entry,
irq_flags, pc);
}
-static int reg_event_syscall_enter(struct ftrace_event_file *file,
- struct ftrace_event_call *call)
+static int reg_event_syscall_enter(struct trace_event_file *file,
+ struct trace_event_call *call)
{
struct trace_array *tr = file->tr;
int ret = 0;
@@ -405,8 +405,8 @@ static int reg_event_syscall_enter(struct ftrace_event_file *file,
return ret;
}
-static void unreg_event_syscall_enter(struct ftrace_event_file *file,
- struct ftrace_event_call *call)
+static void unreg_event_syscall_enter(struct trace_event_file *file,
+ struct trace_event_call *call)
{
struct trace_array *tr = file->tr;
int num;
@@ -422,8 +422,8 @@ static void unreg_event_syscall_enter(struct ftrace_event_file *file,
mutex_unlock(&syscall_trace_lock);
}
-static int reg_event_syscall_exit(struct ftrace_event_file *file,
- struct ftrace_event_call *call)
+static int reg_event_syscall_exit(struct trace_event_file *file,
+ struct trace_event_call *call)
{
struct trace_array *tr = file->tr;
int ret = 0;
@@ -443,8 +443,8 @@ static int reg_event_syscall_exit(struct ftrace_event_file *file,
return ret;
}
-static void unreg_event_syscall_exit(struct ftrace_event_file *file,
- struct ftrace_event_call *call)
+static void unreg_event_syscall_exit(struct trace_event_file *file,
+ struct trace_event_call *call)
{
struct trace_array *tr = file->tr;
int num;
@@ -460,7 +460,7 @@ static void unreg_event_syscall_exit(struct ftrace_event_file *file,
mutex_unlock(&syscall_trace_lock);
}
-static int __init init_syscall_trace(struct ftrace_event_call *call)
+static int __init init_syscall_trace(struct trace_event_call *call)
{
int id;
int num;
@@ -493,7 +493,7 @@ struct trace_event_functions exit_syscall_print_funcs = {
.trace = print_syscall_exit,
};
-struct ftrace_event_class __refdata event_class_syscall_enter = {
+struct trace_event_class __refdata event_class_syscall_enter = {
.system = "syscalls",
.reg = syscall_enter_register,
.define_fields = syscall_enter_define_fields,
@@ -501,7 +501,7 @@ struct ftrace_event_class __refdata event_class_syscall_enter = {
.raw_init = init_syscall_trace,
};
-struct ftrace_event_class __refdata event_class_syscall_exit = {
+struct trace_event_class __refdata event_class_syscall_exit = {
.system = "syscalls",
.reg = syscall_exit_register,
.define_fields = syscall_exit_define_fields,
@@ -584,7 +584,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL);
}
-static int perf_sysenter_enable(struct ftrace_event_call *call)
+static int perf_sysenter_enable(struct trace_event_call *call)
{
int ret = 0;
int num;
@@ -605,7 +605,7 @@ static int perf_sysenter_enable(struct ftrace_event_call *call)
return ret;
}
-static void perf_sysenter_disable(struct ftrace_event_call *call)
+static void perf_sysenter_disable(struct trace_event_call *call)
{
int num;
@@ -656,7 +656,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL);
}
-static int perf_sysexit_enable(struct ftrace_event_call *call)
+static int perf_sysexit_enable(struct trace_event_call *call)
{
int ret = 0;
int num;
@@ -677,7 +677,7 @@ static int perf_sysexit_enable(struct ftrace_event_call *call)
return ret;
}
-static void perf_sysexit_disable(struct ftrace_event_call *call)
+static void perf_sysexit_disable(struct trace_event_call *call)
{
int num;
@@ -693,10 +693,10 @@ static void perf_sysexit_disable(struct ftrace_event_call *call)
#endif /* CONFIG_PERF_EVENTS */
-static int syscall_enter_register(struct ftrace_event_call *event,
+static int syscall_enter_register(struct trace_event_call *event,
enum trace_reg type, void *data)
{
- struct ftrace_event_file *file = data;
+ struct trace_event_file *file = data;
switch (type) {
case TRACE_REG_REGISTER:
@@ -721,10 +721,10 @@ static int syscall_enter_register(struct ftrace_event_call *event,
return 0;
}
-static int syscall_exit_register(struct ftrace_event_call *event,
+static int syscall_exit_register(struct trace_event_call *event,
enum trace_reg type, void *data)
{
- struct ftrace_event_file *file = data;
+ struct trace_event_file *file = data;
switch (type) {
case TRACE_REG_REGISTER:
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 6dd022c7b5bc..aa1ea7b36fa8 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -293,7 +293,7 @@ static struct trace_uprobe *find_probe_event(const char *event, const char *grou
struct trace_uprobe *tu;
list_for_each_entry(tu, &uprobe_list, list)
- if (strcmp(ftrace_event_name(&tu->tp.call), event) == 0 &&
+ if (strcmp(trace_event_name(&tu->tp.call), event) == 0 &&
strcmp(tu->tp.call.class->system, group) == 0)
return tu;
@@ -323,7 +323,7 @@ static int register_trace_uprobe(struct trace_uprobe *tu)
mutex_lock(&uprobe_lock);
/* register as an event */
- old_tu = find_probe_event(ftrace_event_name(&tu->tp.call),
+ old_tu = find_probe_event(trace_event_name(&tu->tp.call),
tu->tp.call.class->system);
if (old_tu) {
/* delete old event */
@@ -600,7 +600,7 @@ static int probes_seq_show(struct seq_file *m, void *v)
int i;
seq_printf(m, "%c:%s/%s", c, tu->tp.call.class->system,
- ftrace_event_name(&tu->tp.call));
+ trace_event_name(&tu->tp.call));
seq_printf(m, " %s:0x%p", tu->filename, (void *)tu->offset);
for (i = 0; i < tu->tp.nr_args; i++)
@@ -651,7 +651,7 @@ static int probes_profile_seq_show(struct seq_file *m, void *v)
struct trace_uprobe *tu = v;
seq_printf(m, " %s %-44s %15lu\n", tu->filename,
- ftrace_event_name(&tu->tp.call), tu->nhit);
+ trace_event_name(&tu->tp.call), tu->nhit);
return 0;
}
@@ -770,26 +770,26 @@ static void uprobe_buffer_put(struct uprobe_cpu_buffer *ucb)
static void __uprobe_trace_func(struct trace_uprobe *tu,
unsigned long func, struct pt_regs *regs,
struct uprobe_cpu_buffer *ucb, int dsize,
- struct ftrace_event_file *ftrace_file)
+ struct trace_event_file *trace_file)
{
struct uprobe_trace_entry_head *entry;
struct ring_buffer_event *event;
struct ring_buffer *buffer;
void *data;
int size, esize;
- struct ftrace_event_call *call = &tu->tp.call;
+ struct trace_event_call *call = &tu->tp.call;
- WARN_ON(call != ftrace_file->event_call);
+ WARN_ON(call != trace_file->event_call);
if (WARN_ON_ONCE(tu->tp.size + dsize > PAGE_SIZE))
return;
- if (ftrace_trigger_soft_disabled(ftrace_file))
+ if (trace_trigger_soft_disabled(trace_file))
return;
esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
size = esize + tu->tp.size + dsize;
- event = trace_event_buffer_lock_reserve(&buffer, ftrace_file,
+ event = trace_event_buffer_lock_reserve(&buffer, trace_file,
call->event.type, size, 0, 0);
if (!event)
return;
@@ -806,7 +806,7 @@ static void __uprobe_trace_func(struct trace_uprobe *tu,
memcpy(data, ucb->buf, tu->tp.size + dsize);
- event_trigger_unlock_commit(ftrace_file, buffer, event, entry, 0, 0);
+ event_trigger_unlock_commit(trace_file, buffer, event, entry, 0, 0);
}
/* uprobe handler */
@@ -853,12 +853,12 @@ print_uprobe_event(struct trace_iterator *iter, int flags, struct trace_event *e
if (is_ret_probe(tu)) {
trace_seq_printf(s, "%s: (0x%lx <- 0x%lx)",
- ftrace_event_name(&tu->tp.call),
+ trace_event_name(&tu->tp.call),
entry->vaddr[1], entry->vaddr[0]);
data = DATAOF_TRACE_ENTRY(entry, true);
} else {
trace_seq_printf(s, "%s: (0x%lx)",
- ftrace_event_name(&tu->tp.call),
+ trace_event_name(&tu->tp.call),
entry->vaddr[0]);
data = DATAOF_TRACE_ENTRY(entry, false);
}
@@ -881,7 +881,7 @@ typedef bool (*filter_func_t)(struct uprobe_consumer *self,
struct mm_struct *mm);
static int
-probe_event_enable(struct trace_uprobe *tu, struct ftrace_event_file *file,
+probe_event_enable(struct trace_uprobe *tu, struct trace_event_file *file,
filter_func_t filter)
{
bool enabled = trace_probe_is_enabled(&tu->tp);
@@ -938,7 +938,7 @@ probe_event_enable(struct trace_uprobe *tu, struct ftrace_event_file *file,
}
static void
-probe_event_disable(struct trace_uprobe *tu, struct ftrace_event_file *file)
+probe_event_disable(struct trace_uprobe *tu, struct trace_event_file *file)
{
if (!trace_probe_is_enabled(&tu->tp))
return;
@@ -967,7 +967,7 @@ probe_event_disable(struct trace_uprobe *tu, struct ftrace_event_file *file)
uprobe_buffer_disable();
}
-static int uprobe_event_define_fields(struct ftrace_event_call *event_call)
+static int uprobe_event_define_fields(struct trace_event_call *event_call)
{
int ret, i, size;
struct uprobe_trace_entry_head field;
@@ -1093,7 +1093,7 @@ static void __uprobe_perf_func(struct trace_uprobe *tu,
unsigned long func, struct pt_regs *regs,
struct uprobe_cpu_buffer *ucb, int dsize)
{
- struct ftrace_event_call *call = &tu->tp.call;
+ struct trace_event_call *call = &tu->tp.call;
struct uprobe_trace_entry_head *entry;
struct hlist_head *head;
void *data;
@@ -1159,11 +1159,11 @@ static void uretprobe_perf_func(struct trace_uprobe *tu, unsigned long func,
#endif /* CONFIG_PERF_EVENTS */
static int
-trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type,
+trace_uprobe_register(struct trace_event_call *event, enum trace_reg type,
void *data)
{
struct trace_uprobe *tu = event->data;
- struct ftrace_event_file *file = data;
+ struct trace_event_file *file = data;
switch (type) {
case TRACE_REG_REGISTER:
@@ -1272,10 +1272,10 @@ static struct trace_event_functions uprobe_funcs = {
static int register_uprobe_event(struct trace_uprobe *tu)
{
- struct ftrace_event_call *call = &tu->tp.call;
+ struct trace_event_call *call = &tu->tp.call;
int ret;
- /* Initialize ftrace_event_call */
+ /* Initialize trace_event_call */
INIT_LIST_HEAD(&call->class->fields);
call->event.funcs = &uprobe_funcs;
call->class->define_fields = uprobe_event_define_fields;
@@ -1283,7 +1283,7 @@ static int register_uprobe_event(struct trace_uprobe *tu)
if (set_print_fmt(&tu->tp, is_ret_probe(tu)) < 0)
return -ENOMEM;
- ret = register_ftrace_event(&call->event);
+ ret = register_trace_event(&call->event);
if (!ret) {
kfree(call->print_fmt);
return -ENODEV;
@@ -1295,9 +1295,9 @@ static int register_uprobe_event(struct trace_uprobe *tu)
if (ret) {
pr_info("Failed to register uprobe event: %s\n",
- ftrace_event_name(call));
+ trace_event_name(call));
kfree(call->print_fmt);
- unregister_ftrace_event(&call->event);
+ unregister_trace_event(&call->event);
}
return ret;
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 586ad91300b0..4c4f06176f74 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -127,6 +127,11 @@ enum {
*
* PR: wq_pool_mutex protected for writes. Sched-RCU protected for reads.
*
+ * PW: wq_pool_mutex and wq->mutex protected for writes. Either for reads.
+ *
+ * PWR: wq_pool_mutex and wq->mutex protected for writes. Either or
+ * sched-RCU for reads.
+ *
* WQ: wq->mutex protected.
*
* WR: wq->mutex protected for writes. Sched-RCU protected for reads.
@@ -247,8 +252,8 @@ struct workqueue_struct {
int nr_drainers; /* WQ: drain in progress */
int saved_max_active; /* WQ: saved pwq max_active */
- struct workqueue_attrs *unbound_attrs; /* WQ: only for unbound wqs */
- struct pool_workqueue *dfl_pwq; /* WQ: only for unbound wqs */
+ struct workqueue_attrs *unbound_attrs; /* PW: only for unbound wqs */
+ struct pool_workqueue *dfl_pwq; /* PW: only for unbound wqs */
#ifdef CONFIG_SYSFS
struct wq_device *wq_dev; /* I: for sysfs interface */
@@ -268,7 +273,7 @@ struct workqueue_struct {
/* hot fields used during command issue, aligned to cacheline */
unsigned int flags ____cacheline_aligned; /* WQ: WQ_* flags */
struct pool_workqueue __percpu *cpu_pwqs; /* I: per-cpu pwqs */
- struct pool_workqueue __rcu *numa_pwq_tbl[]; /* FR: unbound pwqs indexed by node */
+ struct pool_workqueue __rcu *numa_pwq_tbl[]; /* PWR: unbound pwqs indexed by node */
};
static struct kmem_cache *pwq_cache;
@@ -280,12 +285,7 @@ static bool wq_disable_numa;
module_param_named(disable_numa, wq_disable_numa, bool, 0444);
/* see the comment above the definition of WQ_POWER_EFFICIENT */
-#ifdef CONFIG_WQ_POWER_EFFICIENT_DEFAULT
-static bool wq_power_efficient = true;
-#else
-static bool wq_power_efficient;
-#endif
-
+static bool wq_power_efficient = IS_ENABLED(CONFIG_WQ_POWER_EFFICIENT_DEFAULT);
module_param_named(power_efficient, wq_power_efficient, bool, 0444);
static bool wq_numa_enabled; /* unbound NUMA affinity enabled */
@@ -299,6 +299,8 @@ static DEFINE_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */
static LIST_HEAD(workqueues); /* PR: list of all workqueues */
static bool workqueue_freezing; /* PL: have wqs started freezing? */
+static cpumask_var_t wq_unbound_cpumask; /* PL: low level cpumask for all unbound wqs */
+
/* the per-cpu worker pools */
static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS],
cpu_worker_pools);
@@ -330,8 +332,6 @@ struct workqueue_struct *system_freezable_power_efficient_wq __read_mostly;
EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq);
static int worker_thread(void *__worker);
-static void copy_workqueue_attrs(struct workqueue_attrs *to,
- const struct workqueue_attrs *from);
static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
#define CREATE_TRACE_POINTS
@@ -347,6 +347,12 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
lockdep_is_held(&wq->mutex), \
"sched RCU or wq->mutex should be held")
+#define assert_rcu_or_wq_mutex_or_pool_mutex(wq) \
+ rcu_lockdep_assert(rcu_read_lock_sched_held() || \
+ lockdep_is_held(&wq->mutex) || \
+ lockdep_is_held(&wq_pool_mutex), \
+ "sched RCU, wq->mutex or wq_pool_mutex should be held")
+
#define for_each_cpu_worker_pool(pool, cpu) \
for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0]; \
(pool) < &per_cpu(cpu_worker_pools, cpu)[NR_STD_WORKER_POOLS]; \
@@ -551,7 +557,8 @@ static int worker_pool_assign_id(struct worker_pool *pool)
* @wq: the target workqueue
* @node: the node ID
*
- * This must be called either with pwq_lock held or sched RCU read locked.
+ * This must be called with any of wq_pool_mutex, wq->mutex or sched RCU
+ * read locked.
* If the pwq needs to be used beyond the locking in effect, the caller is
* responsible for guaranteeing that the pwq stays online.
*
@@ -560,7 +567,7 @@ static int worker_pool_assign_id(struct worker_pool *pool)
static struct pool_workqueue *unbound_pwq_by_node(struct workqueue_struct *wq,
int node)
{
- assert_rcu_or_wq_mutex(wq);
+ assert_rcu_or_wq_mutex_or_pool_mutex(wq);
return rcu_dereference_raw(wq->numa_pwq_tbl[node]);
}
@@ -976,7 +983,7 @@ static struct worker *find_worker_executing_work(struct worker_pool *pool,
* move_linked_works - move linked works to a list
* @work: start of series of works to be scheduled
* @head: target list to append @work to
- * @nextp: out paramter for nested worklist walking
+ * @nextp: out parameter for nested worklist walking
*
* Schedule linked works starting from @work to @head. Work series to
* be scheduled starts at @work and includes any consecutive work with
@@ -2616,7 +2623,7 @@ EXPORT_SYMBOL_GPL(flush_workqueue);
* Wait until the workqueue becomes empty. While draining is in progress,
* only chain queueing is allowed. IOW, only currently pending or running
* work items on @wq can queue further work items on it. @wq is flushed
- * repeatedly until it becomes empty. The number of flushing is detemined
+ * repeatedly until it becomes empty. The number of flushing is determined
* by the depth of chaining and should be relatively short. Whine if it
* takes too long.
*/
@@ -2947,36 +2954,6 @@ int schedule_on_each_cpu(work_func_t func)
}
/**
- * flush_scheduled_work - ensure that any scheduled work has run to completion.
- *
- * Forces execution of the kernel-global workqueue and blocks until its
- * completion.
- *
- * Think twice before calling this function! It's very easy to get into
- * trouble if you don't take great care. Either of the following situations
- * will lead to deadlock:
- *
- * One of the work items currently on the workqueue needs to acquire
- * a lock held by your code or its caller.
- *
- * Your code is running in the context of a work routine.
- *
- * They will be detected by lockdep when they occur, but the first might not
- * occur very often. It depends on what work items are on the workqueue and
- * what locks they need, which you have no control over.
- *
- * In most situations flushing the entire workqueue is overkill; you merely
- * need to know that a particular work item isn't queued and isn't running.
- * In such cases you should use cancel_delayed_work_sync() or
- * cancel_work_sync() instead.
- */
-void flush_scheduled_work(void)
-{
- flush_workqueue(system_wq);
-}
-EXPORT_SYMBOL(flush_scheduled_work);
-
-/**
* execute_in_process_context - reliably execute the routine with user context
* @fn: the function to execute
* @ew: guaranteed storage for the execute work structure (must
@@ -3081,7 +3058,7 @@ static bool wqattrs_equal(const struct workqueue_attrs *a,
* init_worker_pool - initialize a newly zalloc'd worker_pool
* @pool: worker_pool to initialize
*
- * Initiailize a newly zalloc'd @pool. It also allocates @pool->attrs.
+ * Initialize a newly zalloc'd @pool. It also allocates @pool->attrs.
*
* Return: 0 on success, -errno on failure. Even on failure, all fields
* inside @pool proper are initialized and put_unbound_pool() can be called
@@ -3425,20 +3402,9 @@ static struct pool_workqueue *alloc_unbound_pwq(struct workqueue_struct *wq,
return pwq;
}
-/* undo alloc_unbound_pwq(), used only in the error path */
-static void free_unbound_pwq(struct pool_workqueue *pwq)
-{
- lockdep_assert_held(&wq_pool_mutex);
-
- if (pwq) {
- put_unbound_pool(pwq->pool);
- kmem_cache_free(pwq_cache, pwq);
- }
-}
-
/**
- * wq_calc_node_mask - calculate a wq_attrs' cpumask for the specified node
- * @attrs: the wq_attrs of interest
+ * wq_calc_node_cpumask - calculate a wq_attrs' cpumask for the specified node
+ * @attrs: the wq_attrs of the default pwq of the target workqueue
* @node: the target NUMA node
* @cpu_going_down: if >= 0, the CPU to consider as offline
* @cpumask: outarg, the resulting cpumask
@@ -3488,6 +3454,7 @@ static struct pool_workqueue *numa_pwq_tbl_install(struct workqueue_struct *wq,
{
struct pool_workqueue *old_pwq;
+ lockdep_assert_held(&wq_pool_mutex);
lockdep_assert_held(&wq->mutex);
/* link_pwq() can handle duplicate calls */
@@ -3498,46 +3465,59 @@ static struct pool_workqueue *numa_pwq_tbl_install(struct workqueue_struct *wq,
return old_pwq;
}
-/**
- * apply_workqueue_attrs - apply new workqueue_attrs to an unbound workqueue
- * @wq: the target workqueue
- * @attrs: the workqueue_attrs to apply, allocated with alloc_workqueue_attrs()
- *
- * Apply @attrs to an unbound workqueue @wq. Unless disabled, on NUMA
- * machines, this function maps a separate pwq to each NUMA node with
- * possibles CPUs in @attrs->cpumask so that work items are affine to the
- * NUMA node it was issued on. Older pwqs are released as in-flight work
- * items finish. Note that a work item which repeatedly requeues itself
- * back-to-back will stay on its current pwq.
- *
- * Performs GFP_KERNEL allocations.
- *
- * Return: 0 on success and -errno on failure.
- */
-int apply_workqueue_attrs(struct workqueue_struct *wq,
- const struct workqueue_attrs *attrs)
+/* context to store the prepared attrs & pwqs before applying */
+struct apply_wqattrs_ctx {
+ struct workqueue_struct *wq; /* target workqueue */
+ struct workqueue_attrs *attrs; /* attrs to apply */
+ struct list_head list; /* queued for batching commit */
+ struct pool_workqueue *dfl_pwq;
+ struct pool_workqueue *pwq_tbl[];
+};
+
+/* free the resources after success or abort */
+static void apply_wqattrs_cleanup(struct apply_wqattrs_ctx *ctx)
+{
+ if (ctx) {
+ int node;
+
+ for_each_node(node)
+ put_pwq_unlocked(ctx->pwq_tbl[node]);
+ put_pwq_unlocked(ctx->dfl_pwq);
+
+ free_workqueue_attrs(ctx->attrs);
+
+ kfree(ctx);
+ }
+}
+
+/* allocate the attrs and pwqs for later installation */
+static struct apply_wqattrs_ctx *
+apply_wqattrs_prepare(struct workqueue_struct *wq,
+ const struct workqueue_attrs *attrs)
{
+ struct apply_wqattrs_ctx *ctx;
struct workqueue_attrs *new_attrs, *tmp_attrs;
- struct pool_workqueue **pwq_tbl, *dfl_pwq;
- int node, ret;
+ int node;
- /* only unbound workqueues can change attributes */
- if (WARN_ON(!(wq->flags & WQ_UNBOUND)))
- return -EINVAL;
+ lockdep_assert_held(&wq_pool_mutex);
- /* creating multiple pwqs breaks ordering guarantee */
- if (WARN_ON((wq->flags & __WQ_ORDERED) && !list_empty(&wq->pwqs)))
- return -EINVAL;
+ ctx = kzalloc(sizeof(*ctx) + nr_node_ids * sizeof(ctx->pwq_tbl[0]),
+ GFP_KERNEL);
- pwq_tbl = kzalloc(nr_node_ids * sizeof(pwq_tbl[0]), GFP_KERNEL);
new_attrs = alloc_workqueue_attrs(GFP_KERNEL);
tmp_attrs = alloc_workqueue_attrs(GFP_KERNEL);
- if (!pwq_tbl || !new_attrs || !tmp_attrs)
- goto enomem;
+ if (!ctx || !new_attrs || !tmp_attrs)
+ goto out_free;
- /* make a copy of @attrs and sanitize it */
+ /*
+ * Calculate the attrs of the default pwq.
+ * If the user configured cpumask doesn't overlap with the
+ * wq_unbound_cpumask, we fallback to the wq_unbound_cpumask.
+ */
copy_workqueue_attrs(new_attrs, attrs);
- cpumask_and(new_attrs->cpumask, new_attrs->cpumask, cpu_possible_mask);
+ cpumask_and(new_attrs->cpumask, new_attrs->cpumask, wq_unbound_cpumask);
+ if (unlikely(cpumask_empty(new_attrs->cpumask)))
+ cpumask_copy(new_attrs->cpumask, wq_unbound_cpumask);
/*
* We may create multiple pwqs with differing cpumasks. Make a
@@ -3547,75 +3527,129 @@ int apply_workqueue_attrs(struct workqueue_struct *wq,
copy_workqueue_attrs(tmp_attrs, new_attrs);
/*
- * CPUs should stay stable across pwq creations and installations.
- * Pin CPUs, determine the target cpumask for each node and create
- * pwqs accordingly.
- */
- get_online_cpus();
-
- mutex_lock(&wq_pool_mutex);
-
- /*
* If something goes wrong during CPU up/down, we'll fall back to
* the default pwq covering whole @attrs->cpumask. Always create
* it even if we don't use it immediately.
*/
- dfl_pwq = alloc_unbound_pwq(wq, new_attrs);
- if (!dfl_pwq)
- goto enomem_pwq;
+ ctx->dfl_pwq = alloc_unbound_pwq(wq, new_attrs);
+ if (!ctx->dfl_pwq)
+ goto out_free;
for_each_node(node) {
- if (wq_calc_node_cpumask(attrs, node, -1, tmp_attrs->cpumask)) {
- pwq_tbl[node] = alloc_unbound_pwq(wq, tmp_attrs);
- if (!pwq_tbl[node])
- goto enomem_pwq;
+ if (wq_calc_node_cpumask(new_attrs, node, -1, tmp_attrs->cpumask)) {
+ ctx->pwq_tbl[node] = alloc_unbound_pwq(wq, tmp_attrs);
+ if (!ctx->pwq_tbl[node])
+ goto out_free;
} else {
- dfl_pwq->refcnt++;
- pwq_tbl[node] = dfl_pwq;
+ ctx->dfl_pwq->refcnt++;
+ ctx->pwq_tbl[node] = ctx->dfl_pwq;
}
}
- mutex_unlock(&wq_pool_mutex);
+ /* save the user configured attrs and sanitize it. */
+ copy_workqueue_attrs(new_attrs, attrs);
+ cpumask_and(new_attrs->cpumask, new_attrs->cpumask, cpu_possible_mask);
+ ctx->attrs = new_attrs;
+
+ ctx->wq = wq;
+ free_workqueue_attrs(tmp_attrs);
+ return ctx;
+
+out_free:
+ free_workqueue_attrs(tmp_attrs);
+ free_workqueue_attrs(new_attrs);
+ apply_wqattrs_cleanup(ctx);
+ return NULL;
+}
+
+/* set attrs and install prepared pwqs, @ctx points to old pwqs on return */
+static void apply_wqattrs_commit(struct apply_wqattrs_ctx *ctx)
+{
+ int node;
/* all pwqs have been created successfully, let's install'em */
- mutex_lock(&wq->mutex);
+ mutex_lock(&ctx->wq->mutex);
- copy_workqueue_attrs(wq->unbound_attrs, new_attrs);
+ copy_workqueue_attrs(ctx->wq->unbound_attrs, ctx->attrs);
/* save the previous pwq and install the new one */
for_each_node(node)
- pwq_tbl[node] = numa_pwq_tbl_install(wq, node, pwq_tbl[node]);
+ ctx->pwq_tbl[node] = numa_pwq_tbl_install(ctx->wq, node,
+ ctx->pwq_tbl[node]);
/* @dfl_pwq might not have been used, ensure it's linked */
- link_pwq(dfl_pwq);
- swap(wq->dfl_pwq, dfl_pwq);
+ link_pwq(ctx->dfl_pwq);
+ swap(ctx->wq->dfl_pwq, ctx->dfl_pwq);
- mutex_unlock(&wq->mutex);
+ mutex_unlock(&ctx->wq->mutex);
+}
- /* put the old pwqs */
- for_each_node(node)
- put_pwq_unlocked(pwq_tbl[node]);
- put_pwq_unlocked(dfl_pwq);
+static void apply_wqattrs_lock(void)
+{
+ /* CPUs should stay stable across pwq creations and installations */
+ get_online_cpus();
+ mutex_lock(&wq_pool_mutex);
+}
+static void apply_wqattrs_unlock(void)
+{
+ mutex_unlock(&wq_pool_mutex);
put_online_cpus();
- ret = 0;
- /* fall through */
-out_free:
- free_workqueue_attrs(tmp_attrs);
- free_workqueue_attrs(new_attrs);
- kfree(pwq_tbl);
+}
+
+static int apply_workqueue_attrs_locked(struct workqueue_struct *wq,
+ const struct workqueue_attrs *attrs)
+{
+ struct apply_wqattrs_ctx *ctx;
+ int ret = -ENOMEM;
+
+ /* only unbound workqueues can change attributes */
+ if (WARN_ON(!(wq->flags & WQ_UNBOUND)))
+ return -EINVAL;
+
+ /* creating multiple pwqs breaks ordering guarantee */
+ if (WARN_ON((wq->flags & __WQ_ORDERED) && !list_empty(&wq->pwqs)))
+ return -EINVAL;
+
+ ctx = apply_wqattrs_prepare(wq, attrs);
+
+ /* the ctx has been prepared successfully, let's commit it */
+ if (ctx) {
+ apply_wqattrs_commit(ctx);
+ ret = 0;
+ }
+
+ apply_wqattrs_cleanup(ctx);
+
return ret;
+}
-enomem_pwq:
- free_unbound_pwq(dfl_pwq);
- for_each_node(node)
- if (pwq_tbl && pwq_tbl[node] != dfl_pwq)
- free_unbound_pwq(pwq_tbl[node]);
- mutex_unlock(&wq_pool_mutex);
- put_online_cpus();
-enomem:
- ret = -ENOMEM;
- goto out_free;
+/**
+ * apply_workqueue_attrs - apply new workqueue_attrs to an unbound workqueue
+ * @wq: the target workqueue
+ * @attrs: the workqueue_attrs to apply, allocated with alloc_workqueue_attrs()
+ *
+ * Apply @attrs to an unbound workqueue @wq. Unless disabled, on NUMA
+ * machines, this function maps a separate pwq to each NUMA node with
+ * possibles CPUs in @attrs->cpumask so that work items are affine to the
+ * NUMA node it was issued on. Older pwqs are released as in-flight work
+ * items finish. Note that a work item which repeatedly requeues itself
+ * back-to-back will stay on its current pwq.
+ *
+ * Performs GFP_KERNEL allocations.
+ *
+ * Return: 0 on success and -errno on failure.
+ */
+int apply_workqueue_attrs(struct workqueue_struct *wq,
+ const struct workqueue_attrs *attrs)
+{
+ int ret;
+
+ apply_wqattrs_lock();
+ ret = apply_workqueue_attrs_locked(wq, attrs);
+ apply_wqattrs_unlock();
+
+ return ret;
}
/**
@@ -3651,7 +3685,8 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu,
lockdep_assert_held(&wq_pool_mutex);
- if (!wq_numa_enabled || !(wq->flags & WQ_UNBOUND))
+ if (!wq_numa_enabled || !(wq->flags & WQ_UNBOUND) ||
+ wq->unbound_attrs->no_numa)
return;
/*
@@ -3662,48 +3697,37 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu,
target_attrs = wq_update_unbound_numa_attrs_buf;
cpumask = target_attrs->cpumask;
- mutex_lock(&wq->mutex);
- if (wq->unbound_attrs->no_numa)
- goto out_unlock;
-
copy_workqueue_attrs(target_attrs, wq->unbound_attrs);
pwq = unbound_pwq_by_node(wq, node);
/*
* Let's determine what needs to be done. If the target cpumask is
- * different from wq's, we need to compare it to @pwq's and create
- * a new one if they don't match. If the target cpumask equals
- * wq's, the default pwq should be used.
+ * different from the default pwq's, we need to compare it to @pwq's
+ * and create a new one if they don't match. If the target cpumask
+ * equals the default pwq's, the default pwq should be used.
*/
- if (wq_calc_node_cpumask(wq->unbound_attrs, node, cpu_off, cpumask)) {
+ if (wq_calc_node_cpumask(wq->dfl_pwq->pool->attrs, node, cpu_off, cpumask)) {
if (cpumask_equal(cpumask, pwq->pool->attrs->cpumask))
- goto out_unlock;
+ return;
} else {
goto use_dfl_pwq;
}
- mutex_unlock(&wq->mutex);
-
/* create a new pwq */
pwq = alloc_unbound_pwq(wq, target_attrs);
if (!pwq) {
pr_warn("workqueue: allocation failed while updating NUMA affinity of \"%s\"\n",
wq->name);
- mutex_lock(&wq->mutex);
goto use_dfl_pwq;
}
- /*
- * Install the new pwq. As this function is called only from CPU
- * hotplug callbacks and applying a new attrs is wrapped with
- * get/put_online_cpus(), @wq->unbound_attrs couldn't have changed
- * inbetween.
- */
+ /* Install the new pwq. */
mutex_lock(&wq->mutex);
old_pwq = numa_pwq_tbl_install(wq, node, pwq);
goto out_unlock;
use_dfl_pwq:
+ mutex_lock(&wq->mutex);
spin_lock_irq(&wq->dfl_pwq->pool->lock);
get_pwq(wq->dfl_pwq);
spin_unlock_irq(&wq->dfl_pwq->pool->lock);
@@ -4385,7 +4409,7 @@ static void rebind_workers(struct worker_pool *pool)
/*
* Restore CPU affinity of all workers. As all idle workers should
* be on the run-queue of the associated CPU before any local
- * wake-ups for concurrency management happen, restore CPU affinty
+ * wake-ups for concurrency management happen, restore CPU affinity
* of all workers first and then clear UNBOUND. As we're called
* from CPU_ONLINE, the following shouldn't fail.
*/
@@ -4698,6 +4722,82 @@ out_unlock:
}
#endif /* CONFIG_FREEZER */
+static int workqueue_apply_unbound_cpumask(void)
+{
+ LIST_HEAD(ctxs);
+ int ret = 0;
+ struct workqueue_struct *wq;
+ struct apply_wqattrs_ctx *ctx, *n;
+
+ lockdep_assert_held(&wq_pool_mutex);
+
+ list_for_each_entry(wq, &workqueues, list) {
+ if (!(wq->flags & WQ_UNBOUND))
+ continue;
+ /* creating multiple pwqs breaks ordering guarantee */
+ if (wq->flags & __WQ_ORDERED)
+ continue;
+
+ ctx = apply_wqattrs_prepare(wq, wq->unbound_attrs);
+ if (!ctx) {
+ ret = -ENOMEM;
+ break;
+ }
+
+ list_add_tail(&ctx->list, &ctxs);
+ }
+
+ list_for_each_entry_safe(ctx, n, &ctxs, list) {
+ if (!ret)
+ apply_wqattrs_commit(ctx);
+ apply_wqattrs_cleanup(ctx);
+ }
+
+ return ret;
+}
+
+/**
+ * workqueue_set_unbound_cpumask - Set the low-level unbound cpumask
+ * @cpumask: the cpumask to set
+ *
+ * The low-level workqueues cpumask is a global cpumask that limits
+ * the affinity of all unbound workqueues. This function check the @cpumask
+ * and apply it to all unbound workqueues and updates all pwqs of them.
+ *
+ * Retun: 0 - Success
+ * -EINVAL - Invalid @cpumask
+ * -ENOMEM - Failed to allocate memory for attrs or pwqs.
+ */
+int workqueue_set_unbound_cpumask(cpumask_var_t cpumask)
+{
+ int ret = -EINVAL;
+ cpumask_var_t saved_cpumask;
+
+ if (!zalloc_cpumask_var(&saved_cpumask, GFP_KERNEL))
+ return -ENOMEM;
+
+ cpumask_and(cpumask, cpumask, cpu_possible_mask);
+ if (!cpumask_empty(cpumask)) {
+ apply_wqattrs_lock();
+
+ /* save the old wq_unbound_cpumask. */
+ cpumask_copy(saved_cpumask, wq_unbound_cpumask);
+
+ /* update wq_unbound_cpumask at first and apply it to wqs. */
+ cpumask_copy(wq_unbound_cpumask, cpumask);
+ ret = workqueue_apply_unbound_cpumask();
+
+ /* restore the wq_unbound_cpumask when failed. */
+ if (ret < 0)
+ cpumask_copy(wq_unbound_cpumask, saved_cpumask);
+
+ apply_wqattrs_unlock();
+ }
+
+ free_cpumask_var(saved_cpumask);
+ return ret;
+}
+
#ifdef CONFIG_SYSFS
/*
* Workqueues with WQ_SYSFS flag set is visible to userland via
@@ -4802,13 +4902,13 @@ static struct workqueue_attrs *wq_sysfs_prep_attrs(struct workqueue_struct *wq)
{
struct workqueue_attrs *attrs;
+ lockdep_assert_held(&wq_pool_mutex);
+
attrs = alloc_workqueue_attrs(GFP_KERNEL);
if (!attrs)
return NULL;
- mutex_lock(&wq->mutex);
copy_workqueue_attrs(attrs, wq->unbound_attrs);
- mutex_unlock(&wq->mutex);
return attrs;
}
@@ -4817,18 +4917,22 @@ static ssize_t wq_nice_store(struct device *dev, struct device_attribute *attr,
{
struct workqueue_struct *wq = dev_to_wq(dev);
struct workqueue_attrs *attrs;
- int ret;
+ int ret = -ENOMEM;
+
+ apply_wqattrs_lock();
attrs = wq_sysfs_prep_attrs(wq);
if (!attrs)
- return -ENOMEM;
+ goto out_unlock;
if (sscanf(buf, "%d", &attrs->nice) == 1 &&
attrs->nice >= MIN_NICE && attrs->nice <= MAX_NICE)
- ret = apply_workqueue_attrs(wq, attrs);
+ ret = apply_workqueue_attrs_locked(wq, attrs);
else
ret = -EINVAL;
+out_unlock:
+ apply_wqattrs_unlock();
free_workqueue_attrs(attrs);
return ret ?: count;
}
@@ -4852,16 +4956,20 @@ static ssize_t wq_cpumask_store(struct device *dev,
{
struct workqueue_struct *wq = dev_to_wq(dev);
struct workqueue_attrs *attrs;
- int ret;
+ int ret = -ENOMEM;
+
+ apply_wqattrs_lock();
attrs = wq_sysfs_prep_attrs(wq);
if (!attrs)
- return -ENOMEM;
+ goto out_unlock;
ret = cpumask_parse(buf, attrs->cpumask);
if (!ret)
- ret = apply_workqueue_attrs(wq, attrs);
+ ret = apply_workqueue_attrs_locked(wq, attrs);
+out_unlock:
+ apply_wqattrs_unlock();
free_workqueue_attrs(attrs);
return ret ?: count;
}
@@ -4885,18 +4993,22 @@ static ssize_t wq_numa_store(struct device *dev, struct device_attribute *attr,
{
struct workqueue_struct *wq = dev_to_wq(dev);
struct workqueue_attrs *attrs;
- int v, ret;
+ int v, ret = -ENOMEM;
+
+ apply_wqattrs_lock();
attrs = wq_sysfs_prep_attrs(wq);
if (!attrs)
- return -ENOMEM;
+ goto out_unlock;
ret = -EINVAL;
if (sscanf(buf, "%d", &v) == 1) {
attrs->no_numa = !v;
- ret = apply_workqueue_attrs(wq, attrs);
+ ret = apply_workqueue_attrs_locked(wq, attrs);
}
+out_unlock:
+ apply_wqattrs_unlock();
free_workqueue_attrs(attrs);
return ret ?: count;
}
@@ -4914,9 +5026,49 @@ static struct bus_type wq_subsys = {
.dev_groups = wq_sysfs_groups,
};
+static ssize_t wq_unbound_cpumask_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ int written;
+
+ mutex_lock(&wq_pool_mutex);
+ written = scnprintf(buf, PAGE_SIZE, "%*pb\n",
+ cpumask_pr_args(wq_unbound_cpumask));
+ mutex_unlock(&wq_pool_mutex);
+
+ return written;
+}
+
+static ssize_t wq_unbound_cpumask_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t count)
+{
+ cpumask_var_t cpumask;
+ int ret;
+
+ if (!zalloc_cpumask_var(&cpumask, GFP_KERNEL))
+ return -ENOMEM;
+
+ ret = cpumask_parse(buf, cpumask);
+ if (!ret)
+ ret = workqueue_set_unbound_cpumask(cpumask);
+
+ free_cpumask_var(cpumask);
+ return ret ? ret : count;
+}
+
+static struct device_attribute wq_sysfs_cpumask_attr =
+ __ATTR(cpumask, 0644, wq_unbound_cpumask_show,
+ wq_unbound_cpumask_store);
+
static int __init wq_sysfs_init(void)
{
- return subsys_virtual_register(&wq_subsys, NULL);
+ int err;
+
+ err = subsys_virtual_register(&wq_subsys, NULL);
+ if (err)
+ return err;
+
+ return device_create_file(wq_subsys.dev_root, &wq_sysfs_cpumask_attr);
}
core_initcall(wq_sysfs_init);
@@ -4948,7 +5100,7 @@ int workqueue_sysfs_register(struct workqueue_struct *wq)
int ret;
/*
- * Adjusting max_active or creating new pwqs by applyting
+ * Adjusting max_active or creating new pwqs by applying
* attributes breaks ordering guarantee. Disallow exposing ordered
* workqueues.
*/
@@ -5064,6 +5216,9 @@ static int __init init_workqueues(void)
WARN_ON(__alignof__(struct pool_workqueue) < __alignof__(long long));
+ BUG_ON(!alloc_cpumask_var(&wq_unbound_cpumask, GFP_KERNEL));
+ cpumask_copy(wq_unbound_cpumask, cpu_possible_mask);
+
pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC);
cpu_notifier(workqueue_cpu_up_callback, CPU_PRI_WORKQUEUE_UP);