diff options
Diffstat (limited to 'kernel')
44 files changed, 1833 insertions, 1361 deletions
diff --git a/kernel/audit.c b/kernel/audit.c index 1c13e4267de6..f9e6065346db 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1904,7 +1904,7 @@ EXPORT_SYMBOL(audit_log_task_info); /** * audit_log_link_denied - report a link restriction denial - * @operation: specific link opreation + * @operation: specific link operation * @link: the path that triggered the restriction */ void audit_log_link_denied(const char *operation, struct path *link) diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 9fb9d1cb83ce..09c65640cad6 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -599,9 +599,7 @@ static int audit_filter_rules(struct task_struct *tsk, result = match_tree_refs(ctx, rule->tree); break; case AUDIT_LOGINUID: - result = 0; - if (ctx) - result = audit_uid_comparator(tsk->loginuid, f->op, f->uid); + result = audit_uid_comparator(tsk->loginuid, f->op, f->uid); break; case AUDIT_LOGINUID_SET: result = audit_comparator(audit_loginuid_set(tsk), f->op, f->val); @@ -1023,7 +1021,7 @@ static int audit_log_single_execve_arg(struct audit_context *context, * for strings that are too long, we should not have created * any. */ - if (unlikely((len == -1) || len > MAX_ARG_STRLEN - 1)) { + if (unlikely((len == 0) || len > MAX_ARG_STRLEN - 1)) { WARN_ON(1); send_sig(SIGKILL, current, 0); return -1; diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 469dd547770c..9ef9fc8a774b 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -46,6 +46,7 @@ #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/rwsem.h> +#include <linux/percpu-rwsem.h> #include <linux/string.h> #include <linux/sort.h> #include <linux/kmod.h> @@ -103,6 +104,8 @@ static DEFINE_SPINLOCK(cgroup_idr_lock); */ static DEFINE_SPINLOCK(release_agent_path_lock); +struct percpu_rw_semaphore cgroup_threadgroup_rwsem; + #define cgroup_assert_mutex_or_rcu_locked() \ rcu_lockdep_assert(rcu_read_lock_held() || \ lockdep_is_held(&cgroup_mutex), \ @@ -156,7 +159,7 @@ static bool cgrp_dfl_root_visible; static bool cgroup_legacy_files_on_dfl; /* some controllers are not supported in the default hierarchy */ -static unsigned int cgrp_dfl_root_inhibit_ss_mask; +static unsigned long cgrp_dfl_root_inhibit_ss_mask; /* The list of hierarchy roots */ @@ -175,18 +178,19 @@ static DEFINE_IDR(cgroup_hierarchy_idr); */ static u64 css_serial_nr_next = 1; -/* This flag indicates whether tasks in the fork and exit paths should - * check for fork/exit handlers to call. This avoids us having to do - * extra work in the fork/exit path if none of the subsystems need to - * be called. +/* + * These bitmask flags indicate whether tasks in the fork and exit paths have + * fork/exit handlers to call. This avoids us having to do extra work in the + * fork/exit path to check which subsystems have fork/exit callbacks. */ -static int need_forkexit_callback __read_mostly; +static unsigned long have_fork_callback __read_mostly; +static unsigned long have_exit_callback __read_mostly; static struct cftype cgroup_dfl_base_files[]; static struct cftype cgroup_legacy_base_files[]; static int rebind_subsystems(struct cgroup_root *dst_root, - unsigned int ss_mask); + unsigned long ss_mask); static int cgroup_destroy_locked(struct cgroup *cgrp); static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss, bool visible); @@ -261,7 +265,7 @@ static struct cgroup_subsys_state *cgroup_css(struct cgroup *cgrp, * @cgrp: the cgroup of interest * @ss: the subsystem of interest (%NULL returns @cgrp->self) * - * Similar to cgroup_css() but returns the effctive css, which is defined + * Similar to cgroup_css() but returns the effective css, which is defined * as the matching css of the nearest ancestor including self which has @ss * enabled. If @ss is associated with the hierarchy @cgrp is on, this * function is guaranteed to return non-NULL css. @@ -409,6 +413,24 @@ static int notify_on_release(const struct cgroup *cgrp) for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT && \ (((ss) = cgroup_subsys[ssid]) || true); (ssid)++) +/** + * for_each_subsys_which - filter for_each_subsys with a bitmask + * @ss: the iteration cursor + * @ssid: the index of @ss, CGROUP_SUBSYS_COUNT after reaching the end + * @ss_maskp: a pointer to the bitmask + * + * The block will only run for cases where the ssid-th bit (1 << ssid) of + * mask is set to 1. + */ +#define for_each_subsys_which(ss, ssid, ss_maskp) \ + if (!CGROUP_SUBSYS_COUNT) /* to avoid spurious gcc warning */ \ + (ssid) = 0; \ + else \ + for_each_set_bit(ssid, ss_maskp, CGROUP_SUBSYS_COUNT) \ + if (((ss) = cgroup_subsys[ssid]) && false) \ + break; \ + else + /* iterate across the hierarchies */ #define for_each_root(root) \ list_for_each_entry((root), &cgroup_roots, root_list) @@ -882,7 +904,7 @@ static void cgroup_exit_root_id(struct cgroup_root *root) static void cgroup_free_root(struct cgroup_root *root) { if (root) { - /* hierarhcy ID shoulid already have been released */ + /* hierarchy ID should already have been released */ WARN_ON_ONCE(root->hierarchy_id); idr_destroy(&root->cgroup_idr); @@ -998,7 +1020,7 @@ static struct cgroup *task_cgroup_from_root(struct task_struct *task, * update of a tasks cgroup pointer by cgroup_attach_task() */ -static int cgroup_populate_dir(struct cgroup *cgrp, unsigned int subsys_mask); +static int cgroup_populate_dir(struct cgroup *cgrp, unsigned long subsys_mask); static struct kernfs_syscall_ops cgroup_kf_syscall_ops; static const struct file_operations proc_cgroupstats_operations; @@ -1068,11 +1090,11 @@ static void cgroup_put(struct cgroup *cgrp) * @subtree_control is to be applied to @cgrp. The returned mask is always * a superset of @subtree_control and follows the usual hierarchy rules. */ -static unsigned int cgroup_calc_child_subsys_mask(struct cgroup *cgrp, - unsigned int subtree_control) +static unsigned long cgroup_calc_child_subsys_mask(struct cgroup *cgrp, + unsigned long subtree_control) { struct cgroup *parent = cgroup_parent(cgrp); - unsigned int cur_ss_mask = subtree_control; + unsigned long cur_ss_mask = subtree_control; struct cgroup_subsys *ss; int ssid; @@ -1082,11 +1104,10 @@ static unsigned int cgroup_calc_child_subsys_mask(struct cgroup *cgrp, return cur_ss_mask; while (true) { - unsigned int new_ss_mask = cur_ss_mask; + unsigned long new_ss_mask = cur_ss_mask; - for_each_subsys(ss, ssid) - if (cur_ss_mask & (1 << ssid)) - new_ss_mask |= ss->depends_on; + for_each_subsys_which(ss, ssid, &cur_ss_mask) + new_ss_mask |= ss->depends_on; /* * Mask out subsystems which aren't available. This can @@ -1200,7 +1221,7 @@ static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft) * @cgrp: target cgroup * @subsys_mask: mask of the subsystem ids whose files should be removed */ -static void cgroup_clear_dir(struct cgroup *cgrp, unsigned int subsys_mask) +static void cgroup_clear_dir(struct cgroup *cgrp, unsigned long subsys_mask) { struct cgroup_subsys *ss; int i; @@ -1215,18 +1236,16 @@ static void cgroup_clear_dir(struct cgroup *cgrp, unsigned int subsys_mask) } } -static int rebind_subsystems(struct cgroup_root *dst_root, unsigned int ss_mask) +static int rebind_subsystems(struct cgroup_root *dst_root, + unsigned long ss_mask) { struct cgroup_subsys *ss; - unsigned int tmp_ss_mask; + unsigned long tmp_ss_mask; int ssid, i, ret; lockdep_assert_held(&cgroup_mutex); - for_each_subsys(ss, ssid) { - if (!(ss_mask & (1 << ssid))) - continue; - + for_each_subsys_which(ss, ssid, &ss_mask) { /* if @ss has non-root csses attached to it, can't move */ if (css_next_child(NULL, cgroup_css(&ss->root->cgrp, ss))) return -EBUSY; @@ -1253,7 +1272,7 @@ static int rebind_subsystems(struct cgroup_root *dst_root, unsigned int ss_mask) * Just warn about it and continue. */ if (cgrp_dfl_root_visible) { - pr_warn("failed to create files (%d) while rebinding 0x%x to default root\n", + pr_warn("failed to create files (%d) while rebinding 0x%lx to default root\n", ret, ss_mask); pr_warn("you may retry by moving them to a different hierarchy and unbinding\n"); } @@ -1263,18 +1282,14 @@ static int rebind_subsystems(struct cgroup_root *dst_root, unsigned int ss_mask) * Nothing can fail from this point on. Remove files for the * removed subsystems and rebind each subsystem. */ - for_each_subsys(ss, ssid) - if (ss_mask & (1 << ssid)) - cgroup_clear_dir(&ss->root->cgrp, 1 << ssid); + for_each_subsys_which(ss, ssid, &ss_mask) + cgroup_clear_dir(&ss->root->cgrp, 1 << ssid); - for_each_subsys(ss, ssid) { + for_each_subsys_which(ss, ssid, &ss_mask) { struct cgroup_root *src_root; struct cgroup_subsys_state *css; struct css_set *cset; - if (!(ss_mask & (1 << ssid))) - continue; - src_root = ss->root; css = cgroup_css(&src_root->cgrp, ss); @@ -1338,7 +1353,7 @@ static int cgroup_show_options(struct seq_file *seq, } struct cgroup_sb_opts { - unsigned int subsys_mask; + unsigned long subsys_mask; unsigned int flags; char *release_agent; bool cpuset_clone_children; @@ -1351,7 +1366,7 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) { char *token, *o = data; bool all_ss = false, one_ss = false; - unsigned int mask = -1U; + unsigned long mask = -1UL; struct cgroup_subsys *ss; int nr_opts = 0; int i; @@ -1495,7 +1510,7 @@ static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data) int ret = 0; struct cgroup_root *root = cgroup_root_from_kf(kf_root); struct cgroup_sb_opts opts; - unsigned int added_mask, removed_mask; + unsigned long added_mask, removed_mask; if (root == &cgrp_dfl_root) { pr_err("remount is not allowed\n"); @@ -1641,7 +1656,7 @@ static void init_cgroup_root(struct cgroup_root *root, set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags); } -static int cgroup_setup_root(struct cgroup_root *root, unsigned int ss_mask) +static int cgroup_setup_root(struct cgroup_root *root, unsigned long ss_mask) { LIST_HEAD(tmp_links); struct cgroup *root_cgrp = &root->cgrp; @@ -2052,9 +2067,9 @@ static void cgroup_task_migrate(struct cgroup *old_cgrp, lockdep_assert_held(&css_set_rwsem); /* - * We are synchronized through threadgroup_lock() against PF_EXITING - * setting such that we can't race against cgroup_exit() changing the - * css_set to init_css_set and dropping the old one. + * We are synchronized through cgroup_threadgroup_rwsem against + * PF_EXITING setting such that we can't race against cgroup_exit() + * changing the css_set to init_css_set and dropping the old one. */ WARN_ON_ONCE(tsk->flags & PF_EXITING); old_cset = task_css_set(tsk); @@ -2111,10 +2126,11 @@ static void cgroup_migrate_finish(struct list_head *preloaded_csets) * @src_cset and add it to @preloaded_csets, which should later be cleaned * up by cgroup_migrate_finish(). * - * This function may be called without holding threadgroup_lock even if the - * target is a process. Threads may be created and destroyed but as long - * as cgroup_mutex is not dropped, no new css_set can be put into play and - * the preloaded css_sets are guaranteed to cover all migrations. + * This function may be called without holding cgroup_threadgroup_rwsem + * even if the target is a process. Threads may be created and destroyed + * but as long as cgroup_mutex is not dropped, no new css_set can be put + * into play and the preloaded css_sets are guaranteed to cover all + * migrations. */ static void cgroup_migrate_add_src(struct css_set *src_cset, struct cgroup *dst_cgrp, @@ -2217,7 +2233,7 @@ err: * @threadgroup: whether @leader points to the whole process or a single task * * Migrate a process or task denoted by @leader to @cgrp. If migrating a - * process, the caller must be holding threadgroup_lock of @leader. The + * process, the caller must be holding cgroup_threadgroup_rwsem. The * caller is also responsible for invoking cgroup_migrate_add_src() and * cgroup_migrate_prepare_dst() on the targets before invoking this * function and following up with cgroup_migrate_finish(). @@ -2345,7 +2361,7 @@ out_release_tset: * @leader: the task or the leader of the threadgroup to be attached * @threadgroup: attach the whole threadgroup? * - * Call holding cgroup_mutex and threadgroup_lock of @leader. + * Call holding cgroup_mutex and cgroup_threadgroup_rwsem. */ static int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader, bool threadgroup) @@ -2376,6 +2392,47 @@ static int cgroup_attach_task(struct cgroup *dst_cgrp, return ret; } +static int cgroup_procs_write_permission(struct task_struct *task, + struct cgroup *dst_cgrp, + struct kernfs_open_file *of) +{ + const struct cred *cred = current_cred(); + const struct cred *tcred = get_task_cred(task); + int ret = 0; + + /* + * even if we're attaching all tasks in the thread group, we only + * need to check permissions on one of them. + */ + if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) && + !uid_eq(cred->euid, tcred->uid) && + !uid_eq(cred->euid, tcred->suid)) + ret = -EACCES; + + if (!ret && cgroup_on_dfl(dst_cgrp)) { + struct super_block *sb = of->file->f_path.dentry->d_sb; + struct cgroup *cgrp; + struct inode *inode; + + down_read(&css_set_rwsem); + cgrp = task_cgroup_from_root(task, &cgrp_dfl_root); + up_read(&css_set_rwsem); + + while (!cgroup_is_descendant(dst_cgrp, cgrp)) + cgrp = cgroup_parent(cgrp); + + ret = -ENOMEM; + inode = kernfs_get_inode(sb, cgrp->procs_kn); + if (inode) { + ret = inode_permission(inode, MAY_WRITE); + iput(inode); + } + } + + put_cred(tcred); + return ret; +} + /* * Find the task_struct of the task to attach by vpid and pass it along to the * function to attach either it or all tasks in its threadgroup. Will lock @@ -2385,7 +2442,6 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off, bool threadgroup) { struct task_struct *tsk; - const struct cred *cred = current_cred(), *tcred; struct cgroup *cgrp; pid_t pid; int ret; @@ -2397,29 +2453,17 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, if (!cgrp) return -ENODEV; -retry_find_task: + percpu_down_write(&cgroup_threadgroup_rwsem); rcu_read_lock(); if (pid) { tsk = find_task_by_vpid(pid); if (!tsk) { - rcu_read_unlock(); ret = -ESRCH; - goto out_unlock_cgroup; + goto out_unlock_rcu; } - /* - * even if we're attaching all tasks in the thread group, we - * only need to check permissions on one of them. - */ - tcred = __task_cred(tsk); - if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) && - !uid_eq(cred->euid, tcred->uid) && - !uid_eq(cred->euid, tcred->suid)) { - rcu_read_unlock(); - ret = -EACCES; - goto out_unlock_cgroup; - } - } else + } else { tsk = current; + } if (threadgroup) tsk = tsk->group_leader; @@ -2431,35 +2475,23 @@ retry_find_task: */ if (tsk == kthreadd_task || (tsk->flags & PF_NO_SETAFFINITY)) { ret = -EINVAL; - rcu_read_unlock(); - goto out_unlock_cgroup; + goto out_unlock_rcu; } get_task_struct(tsk); rcu_read_unlock(); - threadgroup_lock(tsk); - if (threadgroup) { - if (!thread_group_leader(tsk)) { - /* - * a race with de_thread from another thread's exec() - * may strip us of our leadership, if this happens, - * there is no choice but to throw this task away and - * try again; this is - * "double-double-toil-and-trouble-check locking". - */ - threadgroup_unlock(tsk); - put_task_struct(tsk); - goto retry_find_task; - } - } - - ret = cgroup_attach_task(cgrp, tsk, threadgroup); - - threadgroup_unlock(tsk); + ret = cgroup_procs_write_permission(tsk, cgrp, of); + if (!ret) + ret = cgroup_attach_task(cgrp, tsk, threadgroup); put_task_struct(tsk); -out_unlock_cgroup: + goto out_unlock_threadgroup; + +out_unlock_rcu: + rcu_read_unlock(); +out_unlock_threadgroup: + percpu_up_write(&cgroup_threadgroup_rwsem); cgroup_kn_unlock(of->kn); return ret ?: nbytes; } @@ -2542,19 +2574,17 @@ static int cgroup_sane_behavior_show(struct seq_file *seq, void *v) return 0; } -static void cgroup_print_ss_mask(struct seq_file *seq, unsigned int ss_mask) +static void cgroup_print_ss_mask(struct seq_file *seq, unsigned long ss_mask) { struct cgroup_subsys *ss; bool printed = false; int ssid; - for_each_subsys(ss, ssid) { - if (ss_mask & (1 << ssid)) { - if (printed) - seq_putc(seq, ' '); - seq_printf(seq, "%s", ss->name); - printed = true; - } + for_each_subsys_which(ss, ssid, &ss_mask) { + if (printed) + seq_putc(seq, ' '); + seq_printf(seq, "%s", ss->name); + printed = true; } if (printed) seq_putc(seq, '\n'); @@ -2606,6 +2636,8 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) lockdep_assert_held(&cgroup_mutex); + percpu_down_write(&cgroup_threadgroup_rwsem); + /* look up all csses currently attached to @cgrp's subtree */ down_read(&css_set_rwsem); css_for_each_descendant_pre(css, cgroup_css(cgrp, NULL)) { @@ -2661,17 +2693,8 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) goto out_finish; last_task = task; - threadgroup_lock(task); - /* raced against de_thread() from another thread? */ - if (!thread_group_leader(task)) { - threadgroup_unlock(task); - put_task_struct(task); - continue; - } - ret = cgroup_migrate(src_cset->dfl_cgrp, task, true); - threadgroup_unlock(task); put_task_struct(task); if (WARN(ret, "cgroup: failed to update controllers for the default hierarchy (%d), further operations may crash or hang\n", ret)) @@ -2681,6 +2704,7 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) out_finish: cgroup_migrate_finish(&preloaded_csets); + percpu_up_write(&cgroup_threadgroup_rwsem); return ret; } @@ -2689,8 +2713,8 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { - unsigned int enable = 0, disable = 0; - unsigned int css_enable, css_disable, old_sc, new_sc, old_ss, new_ss; + unsigned long enable = 0, disable = 0; + unsigned long css_enable, css_disable, old_sc, new_sc, old_ss, new_ss; struct cgroup *cgrp, *child; struct cgroup_subsys *ss; char *tok; @@ -2702,11 +2726,12 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, */ buf = strstrip(buf); while ((tok = strsep(&buf, " "))) { + unsigned long tmp_ss_mask = ~cgrp_dfl_root_inhibit_ss_mask; + if (tok[0] == '\0') continue; - for_each_subsys(ss, ssid) { - if (ss->disabled || strcmp(tok + 1, ss->name) || - ((1 << ss->id) & cgrp_dfl_root_inhibit_ss_mask)) + for_each_subsys_which(ss, ssid, &tmp_ss_mask) { + if (ss->disabled || strcmp(tok + 1, ss->name)) continue; if (*tok == '+') { @@ -2793,10 +2818,7 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, * still around. In such cases, wait till it's gone using * offline_waitq. */ - for_each_subsys(ss, ssid) { - if (!(css_enable & (1 << ssid))) - continue; - + for_each_subsys_which(ss, ssid, &css_enable) { cgroup_for_each_live_child(child, cgrp) { DEFINE_WAIT(wait); @@ -3087,7 +3109,9 @@ static int cgroup_add_file(struct cgroup *cgrp, struct cftype *cft) return ret; } - if (cft->seq_show == cgroup_populated_show) + if (cft->write == cgroup_procs_write) + cgrp->procs_kn = kn; + else if (cft->seq_show == cgroup_populated_show) cgrp->populated_kn = kn; return 0; } @@ -4322,7 +4346,7 @@ static struct cftype cgroup_legacy_base_files[] = { * * On failure, no file is added. */ -static int cgroup_populate_dir(struct cgroup *cgrp, unsigned int subsys_mask) +static int cgroup_populate_dir(struct cgroup *cgrp, unsigned long subsys_mask) { struct cgroup_subsys *ss; int i, ret = 0; @@ -4931,7 +4955,8 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early) * init_css_set is in the subsystem's root cgroup. */ init_css_set.subsys[ss->id] = css; - need_forkexit_callback |= ss->fork || ss->exit; + have_fork_callback |= (bool)ss->fork << ss->id; + have_exit_callback |= (bool)ss->exit << ss->id; /* At system boot, before all subsystems have been * registered, no tasks have been forked, so we don't @@ -4989,6 +5014,7 @@ int __init cgroup_init(void) unsigned long key; int ssid, err; + BUG_ON(percpu_init_rwsem(&cgroup_threadgroup_rwsem)); BUG_ON(cgroup_init_cftypes(NULL, cgroup_dfl_base_files)); BUG_ON(cgroup_init_cftypes(NULL, cgroup_legacy_base_files)); @@ -5241,11 +5267,8 @@ void cgroup_post_fork(struct task_struct *child) * css_set; otherwise, @child might change state between ->fork() * and addition to css_set. */ - if (need_forkexit_callback) { - for_each_subsys(ss, i) - if (ss->fork) - ss->fork(child); - } + for_each_subsys_which(ss, i, &have_fork_callback) + ss->fork(child); } /** @@ -5289,16 +5312,12 @@ void cgroup_exit(struct task_struct *tsk) cset = task_css_set(tsk); RCU_INIT_POINTER(tsk->cgroups, &init_css_set); - if (need_forkexit_callback) { - /* see cgroup_post_fork() for details */ - for_each_subsys(ss, i) { - if (ss->exit) { - struct cgroup_subsys_state *old_css = cset->subsys[i]; - struct cgroup_subsys_state *css = task_css(tsk, i); + /* see cgroup_post_fork() for details */ + for_each_subsys_which(ss, i, &have_exit_callback) { + struct cgroup_subsys_state *old_css = cset->subsys[i]; + struct cgroup_subsys_state *css = task_css(tsk, i); - ss->exit(css, old_css, tsk); - } - } + ss->exit(css, old_css, tsk); } if (put_cset) diff --git a/kernel/configs/xen.config b/kernel/configs/xen.config new file mode 100644 index 000000000000..ff756221f112 --- /dev/null +++ b/kernel/configs/xen.config @@ -0,0 +1,48 @@ +# global stuff - these enable us to allow some +# of the not so generic stuff below for xen +CONFIG_PARAVIRT=y +CONFIG_NET=y +CONFIG_NET_CORE=y +CONFIG_NETDEVICES=y +CONFIG_BLOCK=y +CONFIG_WATCHDOG=y +CONFIG_TARGET_CORE=y +CONFIG_SCSI=y +CONFIG_FB=y +CONFIG_INPUT_MISC=y +CONFIG_MEMORY_HOTPLUG=y +CONFIG_TTY=y +# Technically not required but otherwise produces +# pretty useless systems starting from allnoconfig +# You want TCP/IP and ELF binaries right? +CONFIG_INET=y +CONFIG_BINFMT_ELF=y +# generic config +CONFIG_XEN=y +CONFIG_XEN_DOM0=y +# backend drivers +CONFIG_XEN_BACKEND=y +CONFIG_XEN_BLKDEV_BACKEND=m +CONFIG_XEN_NETDEV_BACKEND=m +CONFIG_HVC_XEN=y +CONFIG_XEN_WDT=m +CONFIG_XEN_SCSI_BACKEND=m +# frontend drivers +CONFIG_XEN_FBDEV_FRONTEND=m +CONFIG_HVC_XEN_FRONTEND=y +CONFIG_INPUT_XEN_KBDDEV_FRONTEND=m +CONFIG_XEN_SCSI_FRONTEND=m +# others +CONFIG_XEN_BALLOON=y +CONFIG_XEN_SCRUB_PAGES=y +CONFIG_XEN_DEV_EVTCHN=m +CONFIG_XEN_BLKDEV_FRONTEND=m +CONFIG_XEN_NETDEV_FRONTEND=m +CONFIG_XENFS=m +CONFIG_XEN_COMPAT_XENFS=y +CONFIG_XEN_SYS_HYPERVISOR=y +CONFIG_XEN_XENBUS_FRONTEND=y +CONFIG_XEN_GNTDEV=m +CONFIG_XEN_GRANT_DEV_ALLOC=m +CONFIG_SWIOTLB_XEN=y +CONFIG_XEN_PRIVCMD=m diff --git a/kernel/events/core.c b/kernel/events/core.c index 8e13f3e54ec3..d1f37ddd1960 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -36,7 +36,7 @@ #include <linux/kernel_stat.h> #include <linux/cgroup.h> #include <linux/perf_event.h> -#include <linux/ftrace_event.h> +#include <linux/trace_events.h> #include <linux/hw_breakpoint.h> #include <linux/mm_types.h> #include <linux/module.h> @@ -1502,11 +1502,17 @@ static int __init perf_workqueue_init(void) core_initcall(perf_workqueue_init); +static inline int pmu_filter_match(struct perf_event *event) +{ + struct pmu *pmu = event->pmu; + return pmu->filter_match ? pmu->filter_match(event) : 1; +} + static inline int event_filter_match(struct perf_event *event) { return (event->cpu == -1 || event->cpu == smp_processor_id()) - && perf_cgroup_match(event); + && perf_cgroup_match(event) && pmu_filter_match(event); } static void diff --git a/kernel/exit.c b/kernel/exit.c index 185752a729f6..031325e9acf9 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -711,10 +711,10 @@ void do_exit(long code) current->comm, task_pid_nr(current), preempt_count()); - acct_update_integrals(tsk); /* sync mm's RSS info before statistics gathering */ if (tsk->mm) sync_mm_rss(tsk->mm); + acct_update_integrals(tsk); group_dead = atomic_dec_and_test(&tsk->signal->live); if (group_dead) { hrtimer_cancel(&tsk->signal->real_timer); diff --git a/kernel/fork.c b/kernel/fork.c index 0bb88b555550..1bfefc6f96a4 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1141,10 +1141,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) tty_audit_fork(sig); sched_autogroup_fork(sig); -#ifdef CONFIG_CGROUPS - init_rwsem(&sig->group_rwsem); -#endif - sig->oom_score_adj = current->signal->oom_score_adj; sig->oom_score_adj_min = current->signal->oom_score_adj_min; @@ -1238,7 +1234,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, unsigned long stack_size, int __user *child_tidptr, struct pid *pid, - int trace) + int trace, + unsigned long tls) { int retval; struct task_struct *p; @@ -1447,7 +1444,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, retval = copy_io(clone_flags, p); if (retval) goto bad_fork_cleanup_namespaces; - retval = copy_thread(clone_flags, stack_start, stack_size, p); + retval = copy_thread_tls(clone_flags, stack_start, stack_size, p, tls); if (retval) goto bad_fork_cleanup_io; @@ -1659,7 +1656,7 @@ static inline void init_idle_pids(struct pid_link *links) struct task_struct *fork_idle(int cpu) { struct task_struct *task; - task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0); + task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0, 0); if (!IS_ERR(task)) { init_idle_pids(task->pids); init_idle(task, cpu); @@ -1674,11 +1671,12 @@ struct task_struct *fork_idle(int cpu) * It copies the process, and if successful kick-starts * it and waits for it to finish using the VM if required. */ -long do_fork(unsigned long clone_flags, +long _do_fork(unsigned long clone_flags, unsigned long stack_start, unsigned long stack_size, int __user *parent_tidptr, - int __user *child_tidptr) + int __user *child_tidptr, + unsigned long tls) { struct task_struct *p; int trace = 0; @@ -1703,7 +1701,7 @@ long do_fork(unsigned long clone_flags, } p = copy_process(clone_flags, stack_start, stack_size, - child_tidptr, NULL, trace); + child_tidptr, NULL, trace, tls); /* * Do this prior waking up the new thread - the thread pointer * might get invalid after that point, if the thread exits quickly. @@ -1744,20 +1742,34 @@ long do_fork(unsigned long clone_flags, return nr; } +#ifndef CONFIG_HAVE_COPY_THREAD_TLS +/* For compatibility with architectures that call do_fork directly rather than + * using the syscall entry points below. */ +long do_fork(unsigned long clone_flags, + unsigned long stack_start, + unsigned long stack_size, + int __user *parent_tidptr, + int __user *child_tidptr) +{ + return _do_fork(clone_flags, stack_start, stack_size, + parent_tidptr, child_tidptr, 0); +} +#endif + /* * Create a kernel thread. */ pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) { - return do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn, - (unsigned long)arg, NULL, NULL); + return _do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn, + (unsigned long)arg, NULL, NULL, 0); } #ifdef __ARCH_WANT_SYS_FORK SYSCALL_DEFINE0(fork) { #ifdef CONFIG_MMU - return do_fork(SIGCHLD, 0, 0, NULL, NULL); + return _do_fork(SIGCHLD, 0, 0, NULL, NULL, 0); #else /* can not support in nommu mode */ return -EINVAL; @@ -1768,8 +1780,8 @@ SYSCALL_DEFINE0(fork) #ifdef __ARCH_WANT_SYS_VFORK SYSCALL_DEFINE0(vfork) { - return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0, - 0, NULL, NULL); + return _do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0, + 0, NULL, NULL, 0); } #endif @@ -1777,27 +1789,27 @@ SYSCALL_DEFINE0(vfork) #ifdef CONFIG_CLONE_BACKWARDS SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp, int __user *, parent_tidptr, - int, tls_val, + unsigned long, tls, int __user *, child_tidptr) #elif defined(CONFIG_CLONE_BACKWARDS2) SYSCALL_DEFINE5(clone, unsigned long, newsp, unsigned long, clone_flags, int __user *, parent_tidptr, int __user *, child_tidptr, - int, tls_val) + unsigned long, tls) #elif defined(CONFIG_CLONE_BACKWARDS3) SYSCALL_DEFINE6(clone, unsigned long, clone_flags, unsigned long, newsp, int, stack_size, int __user *, parent_tidptr, int __user *, child_tidptr, - int, tls_val) + unsigned long, tls) #else SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp, int __user *, parent_tidptr, int __user *, child_tidptr, - int, tls_val) + unsigned long, tls) #endif { - return do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr); + return _do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr, tls); } #endif diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 9019f15deab2..52ebaca1b9fc 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -302,7 +302,7 @@ static int jump_label_add_module(struct module *mod) continue; key = iterk; - if (__module_address(iter->key) == mod) { + if (within_module(iter->key, mod)) { /* * Set key->entries to iter, but preserve JUMP_LABEL_TRUE_BRANCH. */ @@ -339,7 +339,7 @@ static void jump_label_del_module(struct module *mod) key = (struct static_key *)(unsigned long)iter->key; - if (__module_address(iter->key) == mod) + if (within_module(iter->key, mod)) continue; prev = &key->next; @@ -443,14 +443,16 @@ static void jump_label_update(struct static_key *key, int enable) { struct jump_entry *stop = __stop___jump_table; struct jump_entry *entry = jump_label_get_entries(key); - #ifdef CONFIG_MODULES - struct module *mod = __module_address((unsigned long)key); + struct module *mod; __jump_label_mod_update(key, enable); + preempt_disable(); + mod = __module_address((unsigned long)key); if (mod) stop = mod->jump_entries + mod->num_jump_entries; + preempt_enable(); #endif /* if there are no users, entry can be NULL */ if (entry) diff --git a/kernel/module.c b/kernel/module.c index cfc9e843a924..3e0e19763d24 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -18,7 +18,7 @@ */ #include <linux/export.h> #include <linux/moduleloader.h> -#include <linux/ftrace_event.h> +#include <linux/trace_events.h> #include <linux/init.h> #include <linux/kallsyms.h> #include <linux/file.h> @@ -101,48 +101,201 @@ DEFINE_MUTEX(module_mutex); EXPORT_SYMBOL_GPL(module_mutex); static LIST_HEAD(modules); -#ifdef CONFIG_KGDB_KDB -struct list_head *kdb_modules = &modules; /* kdb needs the list of modules */ -#endif /* CONFIG_KGDB_KDB */ -#ifdef CONFIG_MODULE_SIG -#ifdef CONFIG_MODULE_SIG_FORCE -static bool sig_enforce = true; -#else -static bool sig_enforce = false; +#ifdef CONFIG_MODULES_TREE_LOOKUP + +/* + * Use a latched RB-tree for __module_address(); this allows us to use + * RCU-sched lookups of the address from any context. + * + * Because modules have two address ranges: init and core, we need two + * latch_tree_nodes entries. Therefore we need the back-pointer from + * mod_tree_node. + * + * Because init ranges are short lived we mark them unlikely and have placed + * them outside the critical cacheline in struct module. + * + * This is conditional on PERF_EVENTS || TRACING because those can really hit + * __module_address() hard by doing a lot of stack unwinding; potentially from + * NMI context. + */ -static int param_set_bool_enable_only(const char *val, - const struct kernel_param *kp) +static __always_inline unsigned long __mod_tree_val(struct latch_tree_node *n) { - int err; - bool test; - struct kernel_param dummy_kp = *kp; + struct mod_tree_node *mtn = container_of(n, struct mod_tree_node, node); + struct module *mod = mtn->mod; - dummy_kp.arg = &test; + if (unlikely(mtn == &mod->mtn_init)) + return (unsigned long)mod->module_init; - err = param_set_bool(val, &dummy_kp); - if (err) - return err; + return (unsigned long)mod->module_core; +} + +static __always_inline unsigned long __mod_tree_size(struct latch_tree_node *n) +{ + struct mod_tree_node *mtn = container_of(n, struct mod_tree_node, node); + struct module *mod = mtn->mod; + + if (unlikely(mtn == &mod->mtn_init)) + return (unsigned long)mod->init_size; - /* Don't let them unset it once it's set! */ - if (!test && sig_enforce) - return -EROFS; + return (unsigned long)mod->core_size; +} + +static __always_inline bool +mod_tree_less(struct latch_tree_node *a, struct latch_tree_node *b) +{ + return __mod_tree_val(a) < __mod_tree_val(b); +} + +static __always_inline int +mod_tree_comp(void *key, struct latch_tree_node *n) +{ + unsigned long val = (unsigned long)key; + unsigned long start, end; + + start = __mod_tree_val(n); + if (val < start) + return -1; + + end = start + __mod_tree_size(n); + if (val >= end) + return 1; - if (test) - sig_enforce = true; return 0; } -static const struct kernel_param_ops param_ops_bool_enable_only = { - .flags = KERNEL_PARAM_OPS_FL_NOARG, - .set = param_set_bool_enable_only, - .get = param_get_bool, +static const struct latch_tree_ops mod_tree_ops = { + .less = mod_tree_less, + .comp = mod_tree_comp, }; -#define param_check_bool_enable_only param_check_bool +static struct mod_tree_root { + struct latch_tree_root root; + unsigned long addr_min; + unsigned long addr_max; +} mod_tree __cacheline_aligned = { + .addr_min = -1UL, +}; + +#define module_addr_min mod_tree.addr_min +#define module_addr_max mod_tree.addr_max + +static noinline void __mod_tree_insert(struct mod_tree_node *node) +{ + latch_tree_insert(&node->node, &mod_tree.root, &mod_tree_ops); +} + +static void __mod_tree_remove(struct mod_tree_node *node) +{ + latch_tree_erase(&node->node, &mod_tree.root, &mod_tree_ops); +} + +/* + * These modifications: insert, remove_init and remove; are serialized by the + * module_mutex. + */ +static void mod_tree_insert(struct module *mod) +{ + mod->mtn_core.mod = mod; + mod->mtn_init.mod = mod; + + __mod_tree_insert(&mod->mtn_core); + if (mod->init_size) + __mod_tree_insert(&mod->mtn_init); +} + +static void mod_tree_remove_init(struct module *mod) +{ + if (mod->init_size) + __mod_tree_remove(&mod->mtn_init); +} + +static void mod_tree_remove(struct module *mod) +{ + __mod_tree_remove(&mod->mtn_core); + mod_tree_remove_init(mod); +} + +static struct module *mod_find(unsigned long addr) +{ + struct latch_tree_node *ltn; + + ltn = latch_tree_find((void *)addr, &mod_tree.root, &mod_tree_ops); + if (!ltn) + return NULL; + + return container_of(ltn, struct mod_tree_node, node)->mod; +} + +#else /* MODULES_TREE_LOOKUP */ + +static unsigned long module_addr_min = -1UL, module_addr_max = 0; + +static void mod_tree_insert(struct module *mod) { } +static void mod_tree_remove_init(struct module *mod) { } +static void mod_tree_remove(struct module *mod) { } + +static struct module *mod_find(unsigned long addr) +{ + struct module *mod; + + list_for_each_entry_rcu(mod, &modules, list) { + if (within_module(addr, mod)) + return mod; + } + + return NULL; +} + +#endif /* MODULES_TREE_LOOKUP */ + +/* + * Bounds of module text, for speeding up __module_address. + * Protected by module_mutex. + */ +static void __mod_update_bounds(void *base, unsigned int size) +{ + unsigned long min = (unsigned long)base; + unsigned long max = min + size; + + if (min < module_addr_min) + module_addr_min = min; + if (max > module_addr_max) + module_addr_max = max; +} + +static void mod_update_bounds(struct module *mod) +{ + __mod_update_bounds(mod->module_core, mod->core_size); + if (mod->init_size) + __mod_update_bounds(mod->module_init, mod->init_size); +} + +#ifdef CONFIG_KGDB_KDB +struct list_head *kdb_modules = &modules; /* kdb needs the list of modules */ +#endif /* CONFIG_KGDB_KDB */ + +static void module_assert_mutex(void) +{ + lockdep_assert_held(&module_mutex); +} + +static void module_assert_mutex_or_preempt(void) +{ +#ifdef CONFIG_LOCKDEP + if (unlikely(!debug_locks)) + return; + + WARN_ON(!rcu_read_lock_sched_held() && + !lockdep_is_held(&module_mutex)); +#endif +} + +static bool sig_enforce = IS_ENABLED(CONFIG_MODULE_SIG_FORCE); +#ifndef CONFIG_MODULE_SIG_FORCE module_param(sig_enforce, bool_enable_only, 0644); #endif /* !CONFIG_MODULE_SIG_FORCE */ -#endif /* CONFIG_MODULE_SIG */ /* Block module loading/unloading? */ int modules_disabled = 0; @@ -153,10 +306,6 @@ static DECLARE_WAIT_QUEUE_HEAD(module_wq); static BLOCKING_NOTIFIER_HEAD(module_notify_list); -/* Bounds of module allocation, for speeding __module_address. - * Protected by module_mutex. */ -static unsigned long module_addr_min = -1UL, module_addr_max = 0; - int register_module_notifier(struct notifier_block *nb) { return blocking_notifier_chain_register(&module_notify_list, nb); @@ -318,6 +467,8 @@ bool each_symbol_section(bool (*fn)(const struct symsearch *arr, #endif }; + module_assert_mutex_or_preempt(); + if (each_symbol_in_section(arr, ARRAY_SIZE(arr), NULL, fn, data)) return true; @@ -457,6 +608,8 @@ static struct module *find_module_all(const char *name, size_t len, { struct module *mod; + module_assert_mutex(); + list_for_each_entry(mod, &modules, list) { if (!even_unformed && mod->state == MODULE_STATE_UNFORMED) continue; @@ -1169,11 +1322,17 @@ static inline int check_modstruct_version(Elf_Shdr *sechdrs, { const unsigned long *crc; - /* Since this should be found in kernel (which can't be removed), - * no locking is necessary. */ + /* + * Since this should be found in kernel (which can't be removed), no + * locking is necessary -- use preempt_disable() to placate lockdep. + */ + preempt_disable(); if (!find_symbol(VMLINUX_SYMBOL_STR(module_layout), NULL, - &crc, true, false)) + &crc, true, false)) { + preempt_enable(); BUG(); + } + preempt_enable(); return check_version(sechdrs, versindex, VMLINUX_SYMBOL_STR(module_layout), mod, crc, NULL); @@ -1661,6 +1820,10 @@ static void mod_sysfs_fini(struct module *mod) mod_kobject_put(mod); } +static void init_param_lock(struct module *mod) +{ + mutex_init(&mod->param_lock); +} #else /* !CONFIG_SYSFS */ static int mod_sysfs_setup(struct module *mod, @@ -1683,6 +1846,9 @@ static void del_usage_links(struct module *mod) { } +static void init_param_lock(struct module *mod) +{ +} #endif /* CONFIG_SYSFS */ static void mod_sysfs_teardown(struct module *mod) @@ -1852,10 +2018,11 @@ static void free_module(struct module *mod) mutex_lock(&module_mutex); /* Unlink carefully: kallsyms could be walking list. */ list_del_rcu(&mod->list); + mod_tree_remove(mod); /* Remove this module from bug list, this uses list_del_rcu */ module_bug_cleanup(mod); - /* Wait for RCU synchronizing before releasing mod->list and buglist. */ - synchronize_rcu(); + /* Wait for RCU-sched synchronizing before releasing mod->list and buglist. */ + synchronize_sched(); mutex_unlock(&module_mutex); /* This may be NULL, but that's OK */ @@ -2384,22 +2551,6 @@ void * __weak module_alloc(unsigned long size) return vmalloc_exec(size); } -static void *module_alloc_update_bounds(unsigned long size) -{ - void *ret = module_alloc(size); - - if (ret) { - mutex_lock(&module_mutex); - /* Update module bounds. */ - if ((unsigned long)ret < module_addr_min) - module_addr_min = (unsigned long)ret; - if ((unsigned long)ret + size > module_addr_max) - module_addr_max = (unsigned long)ret + size; - mutex_unlock(&module_mutex); - } - return ret; -} - #ifdef CONFIG_DEBUG_KMEMLEAK static void kmemleak_load_module(const struct module *mod, const struct load_info *info) @@ -2805,7 +2956,7 @@ static int move_module(struct module *mod, struct load_info *info) void *ptr; /* Do the allocs. */ - ptr = module_alloc_update_bounds(mod->core_size); + ptr = module_alloc(mod->core_size); /* * The pointer to this block is stored in the module structure * which is inside the block. Just mark it as not being a @@ -2819,7 +2970,7 @@ static int move_module(struct module *mod, struct load_info *info) mod->module_core = ptr; if (mod->init_size) { - ptr = module_alloc_update_bounds(mod->init_size); + ptr = module_alloc(mod->init_size); /* * The pointer to this block is stored in the module structure * which is inside the block. This block doesn't need to be @@ -3107,7 +3258,7 @@ static noinline int do_init_module(struct module *mod) * * http://thread.gmane.org/gmane.linux.kernel/1420814 */ - if (current->flags & PF_USED_ASYNC) + if (!mod->async_probe_requested && (current->flags & PF_USED_ASYNC)) async_synchronize_full(); mutex_lock(&module_mutex); @@ -3119,6 +3270,7 @@ static noinline int do_init_module(struct module *mod) mod->symtab = mod->core_symtab; mod->strtab = mod->core_strtab; #endif + mod_tree_remove_init(mod); unset_module_init_ro_nx(mod); module_arch_freeing_init(mod); mod->module_init = NULL; @@ -3127,11 +3279,11 @@ static noinline int do_init_module(struct module *mod) mod->init_text_size = 0; /* * We want to free module_init, but be aware that kallsyms may be - * walking this with preempt disabled. In all the failure paths, - * we call synchronize_rcu/synchronize_sched, but we don't want - * to slow down the success path, so use actual RCU here. + * walking this with preempt disabled. In all the failure paths, we + * call synchronize_sched(), but we don't want to slow down the success + * path, so use actual RCU here. */ - call_rcu(&freeinit->rcu, do_free_init); + call_rcu_sched(&freeinit->rcu, do_free_init); mutex_unlock(&module_mutex); wake_up_all(&module_wq); @@ -3188,7 +3340,9 @@ again: err = -EEXIST; goto out; } + mod_update_bounds(mod); list_add_rcu(&mod->list, &modules); + mod_tree_insert(mod); err = 0; out: @@ -3237,10 +3391,19 @@ out: return err; } -static int unknown_module_param_cb(char *param, char *val, const char *modname) +static int unknown_module_param_cb(char *param, char *val, const char *modname, + void *arg) { + struct module *mod = arg; + int ret; + + if (strcmp(param, "async_probe") == 0) { + mod->async_probe_requested = true; + return 0; + } + /* Check for magic 'dyndbg' arg */ - int ret = ddebug_dyndbg_module_param_cb(param, val, modname); + ret = ddebug_dyndbg_module_param_cb(param, val, modname); if (ret != 0) pr_warn("%s: unknown parameter '%s' ignored\n", modname, param); return 0; @@ -3295,6 +3458,8 @@ static int load_module(struct load_info *info, const char __user *uargs, if (err) goto unlink_mod; + init_param_lock(mod); + /* Now we've got everything in the final locations, we can * find optional sections. */ err = find_module_sections(mod, info); @@ -3342,7 +3507,8 @@ static int load_module(struct load_info *info, const char __user *uargs, /* Module is ready to execute: parsing args may do that. */ after_dashes = parse_args(mod->name, mod->args, mod->kp, mod->num_kp, - -32768, 32767, unknown_module_param_cb); + -32768, 32767, NULL, + unknown_module_param_cb); if (IS_ERR(after_dashes)) { err = PTR_ERR(after_dashes); goto bug_cleanup; @@ -3392,8 +3558,8 @@ static int load_module(struct load_info *info, const char __user *uargs, /* Unlink carefully: kallsyms could be walking list. */ list_del_rcu(&mod->list); wake_up_all(&module_wq); - /* Wait for RCU synchronizing before releasing mod->list. */ - synchronize_rcu(); + /* Wait for RCU-sched synchronizing before releasing mod->list. */ + synchronize_sched(); mutex_unlock(&module_mutex); free_module: /* Free lock-classes; relies on the preceding sync_rcu() */ @@ -3517,19 +3683,15 @@ const char *module_address_lookup(unsigned long addr, char **modname, char *namebuf) { - struct module *mod; const char *ret = NULL; + struct module *mod; preempt_disable(); - list_for_each_entry_rcu(mod, &modules, list) { - if (mod->state == MODULE_STATE_UNFORMED) - continue; - if (within_module(addr, mod)) { - if (modname) - *modname = mod->name; - ret = get_ksymbol(mod, addr, size, offset); - break; - } + mod = __module_address(addr); + if (mod) { + if (modname) + *modname = mod->name; + ret = get_ksymbol(mod, addr, size, offset); } /* Make a copy in here where it's safe */ if (ret) { @@ -3537,6 +3699,7 @@ const char *module_address_lookup(unsigned long addr, ret = namebuf; } preempt_enable(); + return ret; } @@ -3660,6 +3823,8 @@ int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *, unsigned int i; int ret; + module_assert_mutex(); + list_for_each_entry(mod, &modules, list) { if (mod->state == MODULE_STATE_UNFORMED) continue; @@ -3834,13 +3999,15 @@ struct module *__module_address(unsigned long addr) if (addr < module_addr_min || addr > module_addr_max) return NULL; - list_for_each_entry_rcu(mod, &modules, list) { + module_assert_mutex_or_preempt(); + + mod = mod_find(addr); + if (mod) { + BUG_ON(!within_module(addr, mod)); if (mod->state == MODULE_STATE_UNFORMED) - continue; - if (within_module(addr, mod)) - return mod; + mod = NULL; } - return NULL; + return mod; } EXPORT_SYMBOL_GPL(__module_address); diff --git a/kernel/params.c b/kernel/params.c index a22d6a759b1a..b6554aa71094 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -25,15 +25,34 @@ #include <linux/slab.h> #include <linux/ctype.h> -/* Protects all parameters, and incidentally kmalloced_param list. */ +#ifdef CONFIG_SYSFS +/* Protects all built-in parameters, modules use their own param_lock */ static DEFINE_MUTEX(param_lock); +/* Use the module's mutex, or if built-in use the built-in mutex */ +#ifdef CONFIG_MODULES +#define KPARAM_MUTEX(mod) ((mod) ? &(mod)->param_lock : ¶m_lock) +#else +#define KPARAM_MUTEX(mod) (¶m_lock) +#endif + +static inline void check_kparam_locked(struct module *mod) +{ + BUG_ON(!mutex_is_locked(KPARAM_MUTEX(mod))); +} +#else +static inline void check_kparam_locked(struct module *mod) +{ +} +#endif /* !CONFIG_SYSFS */ + /* This just allows us to keep track of which parameters are kmalloced. */ struct kmalloced_param { struct list_head list; char val[]; }; static LIST_HEAD(kmalloced_params); +static DEFINE_SPINLOCK(kmalloced_params_lock); static void *kmalloc_parameter(unsigned int size) { @@ -43,7 +62,10 @@ static void *kmalloc_parameter(unsigned int size) if (!p) return NULL; + spin_lock(&kmalloced_params_lock); list_add(&p->list, &kmalloced_params); + spin_unlock(&kmalloced_params_lock); + return p->val; } @@ -52,6 +74,7 @@ static void maybe_kfree_parameter(void *param) { struct kmalloced_param *p; + spin_lock(&kmalloced_params_lock); list_for_each_entry(p, &kmalloced_params, list) { if (p->val == param) { list_del(&p->list); @@ -59,6 +82,7 @@ static void maybe_kfree_parameter(void *param) break; } } + spin_unlock(&kmalloced_params_lock); } static char dash2underscore(char c) @@ -100,8 +124,9 @@ static int parse_one(char *param, unsigned num_params, s16 min_level, s16 max_level, + void *arg, int (*handle_unknown)(char *param, char *val, - const char *doing)) + const char *doing, void *arg)) { unsigned int i; int err; @@ -118,17 +143,17 @@ static int parse_one(char *param, return -EINVAL; pr_debug("handling %s with %p\n", param, params[i].ops->set); - mutex_lock(¶m_lock); + kernel_param_lock(params[i].mod); param_check_unsafe(¶ms[i]); err = params[i].ops->set(val, ¶ms[i]); - mutex_unlock(¶m_lock); + kernel_param_unlock(params[i].mod); return err; } } if (handle_unknown) { pr_debug("doing %s: %s='%s'\n", doing, param, val); - return handle_unknown(param, val, doing); + return handle_unknown(param, val, doing, arg); } pr_debug("Unknown argument '%s'\n", param); @@ -194,7 +219,9 @@ char *parse_args(const char *doing, unsigned num, s16 min_level, s16 max_level, - int (*unknown)(char *param, char *val, const char *doing)) + void *arg, + int (*unknown)(char *param, char *val, + const char *doing, void *arg)) { char *param, *val; @@ -214,7 +241,7 @@ char *parse_args(const char *doing, return args; irq_was_disabled = irqs_disabled(); ret = parse_one(param, val, doing, params, num, - min_level, max_level, unknown); + min_level, max_level, arg, unknown); if (irq_was_disabled && !irqs_disabled()) pr_warn("%s: option '%s' enabled irq's!\n", doing, param); @@ -251,7 +278,7 @@ char *parse_args(const char *doing, return scnprintf(buffer, PAGE_SIZE, format, \ *((type *)kp->arg)); \ } \ - struct kernel_param_ops param_ops_##name = { \ + const struct kernel_param_ops param_ops_##name = { \ .set = param_set_##name, \ .get = param_get_##name, \ }; \ @@ -303,7 +330,7 @@ static void param_free_charp(void *arg) maybe_kfree_parameter(*((char **)arg)); } -struct kernel_param_ops param_ops_charp = { +const struct kernel_param_ops param_ops_charp = { .set = param_set_charp, .get = param_get_charp, .free = param_free_charp, @@ -328,13 +355,44 @@ int param_get_bool(char *buffer, const struct kernel_param *kp) } EXPORT_SYMBOL(param_get_bool); -struct kernel_param_ops param_ops_bool = { +const struct kernel_param_ops param_ops_bool = { .flags = KERNEL_PARAM_OPS_FL_NOARG, .set = param_set_bool, .get = param_get_bool, }; EXPORT_SYMBOL(param_ops_bool); +int param_set_bool_enable_only(const char *val, const struct kernel_param *kp) +{ + int err = 0; + bool new_value; + bool orig_value = *(bool *)kp->arg; + struct kernel_param dummy_kp = *kp; + + dummy_kp.arg = &new_value; + + err = param_set_bool(val, &dummy_kp); + if (err) + return err; + + /* Don't let them unset it once it's set! */ + if (!new_value && orig_value) + return -EROFS; + + if (new_value) + err = param_set_bool(val, kp); + + return err; +} +EXPORT_SYMBOL_GPL(param_set_bool_enable_only); + +const struct kernel_param_ops param_ops_bool_enable_only = { + .flags = KERNEL_PARAM_OPS_FL_NOARG, + .set = param_set_bool_enable_only, + .get = param_get_bool, +}; +EXPORT_SYMBOL_GPL(param_ops_bool_enable_only); + /* This one must be bool. */ int param_set_invbool(const char *val, const struct kernel_param *kp) { @@ -356,7 +414,7 @@ int param_get_invbool(char *buffer, const struct kernel_param *kp) } EXPORT_SYMBOL(param_get_invbool); -struct kernel_param_ops param_ops_invbool = { +const struct kernel_param_ops param_ops_invbool = { .set = param_set_invbool, .get = param_get_invbool, }; @@ -364,12 +422,11 @@ EXPORT_SYMBOL(param_ops_invbool); int param_set_bint(const char *val, const struct kernel_param *kp) { - struct kernel_param boolkp; + /* Match bool exactly, by re-using it. */ + struct kernel_param boolkp = *kp; bool v; int ret; - /* Match bool exactly, by re-using it. */ - boolkp = *kp; boolkp.arg = &v; ret = param_set_bool(val, &boolkp); @@ -379,7 +436,7 @@ int param_set_bint(const char *val, const struct kernel_param *kp) } EXPORT_SYMBOL(param_set_bint); -struct kernel_param_ops param_ops_bint = { +const struct kernel_param_ops param_ops_bint = { .flags = KERNEL_PARAM_OPS_FL_NOARG, .set = param_set_bint, .get = param_get_int, @@ -387,7 +444,8 @@ struct kernel_param_ops param_ops_bint = { EXPORT_SYMBOL(param_ops_bint); /* We break the rule and mangle the string. */ -static int param_array(const char *name, +static int param_array(struct module *mod, + const char *name, const char *val, unsigned int min, unsigned int max, void *elem, int elemsize, @@ -418,7 +476,7 @@ static int param_array(const char *name, /* nul-terminate and parse */ save = val[len]; ((char *)val)[len] = '\0'; - BUG_ON(!mutex_is_locked(¶m_lock)); + check_kparam_locked(mod); ret = set(val, &kp); if (ret != 0) @@ -440,7 +498,7 @@ static int param_array_set(const char *val, const struct kernel_param *kp) const struct kparam_array *arr = kp->arr; unsigned int temp_num; - return param_array(kp->name, val, 1, arr->max, arr->elem, + return param_array(kp->mod, kp->name, val, 1, arr->max, arr->elem, arr->elemsize, arr->ops->set, kp->level, arr->num ?: &temp_num); } @@ -449,14 +507,13 @@ static int param_array_get(char *buffer, const struct kernel_param *kp) { int i, off, ret; const struct kparam_array *arr = kp->arr; - struct kernel_param p; + struct kernel_param p = *kp; - p = *kp; for (i = off = 0; i < (arr->num ? *arr->num : arr->max); i++) { if (i) buffer[off++] = ','; p.arg = arr->elem + arr->elemsize * i; - BUG_ON(!mutex_is_locked(¶m_lock)); + check_kparam_locked(p.mod); ret = arr->ops->get(buffer + off, &p); if (ret < 0) return ret; @@ -476,7 +533,7 @@ static void param_array_free(void *arg) arr->ops->free(arr->elem + arr->elemsize * i); } -struct kernel_param_ops param_array_ops = { +const struct kernel_param_ops param_array_ops = { .set = param_array_set, .get = param_array_get, .free = param_array_free, @@ -504,7 +561,7 @@ int param_get_string(char *buffer, const struct kernel_param *kp) } EXPORT_SYMBOL(param_get_string); -struct kernel_param_ops param_ops_string = { +const struct kernel_param_ops param_ops_string = { .set = param_set_copystring, .get = param_get_string, }; @@ -539,9 +596,9 @@ static ssize_t param_attr_show(struct module_attribute *mattr, if (!attribute->param->ops->get) return -EPERM; - mutex_lock(¶m_lock); + kernel_param_lock(mk->mod); count = attribute->param->ops->get(buf, attribute->param); - mutex_unlock(¶m_lock); + kernel_param_unlock(mk->mod); if (count > 0) { strcat(buf, "\n"); ++count; @@ -551,7 +608,7 @@ static ssize_t param_attr_show(struct module_attribute *mattr, /* sysfs always hands a nul-terminated string in buf. We rely on that. */ static ssize_t param_attr_store(struct module_attribute *mattr, - struct module_kobject *km, + struct module_kobject *mk, const char *buf, size_t len) { int err; @@ -560,10 +617,10 @@ static ssize_t param_attr_store(struct module_attribute *mattr, if (!attribute->param->ops->set) return -EPERM; - mutex_lock(¶m_lock); + kernel_param_lock(mk->mod); param_check_unsafe(attribute->param); err = attribute->param->ops->set(buf, attribute->param); - mutex_unlock(¶m_lock); + kernel_param_unlock(mk->mod); if (!err) return len; return err; @@ -577,17 +634,18 @@ static ssize_t param_attr_store(struct module_attribute *mattr, #endif #ifdef CONFIG_SYSFS -void __kernel_param_lock(void) +void kernel_param_lock(struct module *mod) { - mutex_lock(¶m_lock); + mutex_lock(KPARAM_MUTEX(mod)); } -EXPORT_SYMBOL(__kernel_param_lock); -void __kernel_param_unlock(void) +void kernel_param_unlock(struct module *mod) { - mutex_unlock(¶m_lock); + mutex_unlock(KPARAM_MUTEX(mod)); } -EXPORT_SYMBOL(__kernel_param_unlock); + +EXPORT_SYMBOL(kernel_param_lock); +EXPORT_SYMBOL(kernel_param_unlock); /* * add_sysfs_param - add a parameter to sysfs @@ -853,6 +911,7 @@ static void __init version_sysfs_builtin(void) mk = locate_module_kobject(vattr->module_name); if (mk) { err = sysfs_create_file(&mk->kobj, &vattr->mattr.attr); + WARN_ON_ONCE(err); kobject_uevent(&mk->kobj, KOBJ_ADD); kobject_put(&mk->kobj); } diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 7e01f78f0417..9e302315e33d 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -187,7 +187,7 @@ config DPM_WATCHDOG config DPM_WATCHDOG_TIMEOUT int "Watchdog timeout in seconds" range 1 120 - default 12 + default 60 depends on DPM_WATCHDOG config PM_TRACE diff --git a/kernel/power/Makefile b/kernel/power/Makefile index 29472bff11ef..cb880a14cc39 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile @@ -7,8 +7,7 @@ obj-$(CONFIG_VT_CONSOLE_SLEEP) += console.o obj-$(CONFIG_FREEZER) += process.o obj-$(CONFIG_SUSPEND) += suspend.o obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o -obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o \ - block_io.o +obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o obj-$(CONFIG_PM_AUTOSLEEP) += autosleep.o obj-$(CONFIG_PM_WAKELOCKS) += wakelock.o diff --git a/kernel/power/block_io.c b/kernel/power/block_io.c deleted file mode 100644 index 9a58bc258810..000000000000 --- a/kernel/power/block_io.c +++ /dev/null @@ -1,103 +0,0 @@ -/* - * This file provides functions for block I/O operations on swap/file. - * - * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@ucw.cz> - * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl> - * - * This file is released under the GPLv2. - */ - -#include <linux/bio.h> -#include <linux/kernel.h> -#include <linux/pagemap.h> -#include <linux/swap.h> - -#include "power.h" - -/** - * submit - submit BIO request. - * @rw: READ or WRITE. - * @off physical offset of page. - * @page: page we're reading or writing. - * @bio_chain: list of pending biod (for async reading) - * - * Straight from the textbook - allocate and initialize the bio. - * If we're reading, make sure the page is marked as dirty. - * Then submit it and, if @bio_chain == NULL, wait. - */ -static int submit(int rw, struct block_device *bdev, sector_t sector, - struct page *page, struct bio **bio_chain) -{ - const int bio_rw = rw | REQ_SYNC; - struct bio *bio; - - bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1); - bio->bi_iter.bi_sector = sector; - bio->bi_bdev = bdev; - bio->bi_end_io = end_swap_bio_read; - - if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { - printk(KERN_ERR "PM: Adding page to bio failed at %llu\n", - (unsigned long long)sector); - bio_put(bio); - return -EFAULT; - } - - lock_page(page); - bio_get(bio); - - if (bio_chain == NULL) { - submit_bio(bio_rw, bio); - wait_on_page_locked(page); - if (rw == READ) - bio_set_pages_dirty(bio); - bio_put(bio); - } else { - if (rw == READ) - get_page(page); /* These pages are freed later */ - bio->bi_private = *bio_chain; - *bio_chain = bio; - submit_bio(bio_rw, bio); - } - return 0; -} - -int hib_bio_read_page(pgoff_t page_off, void *addr, struct bio **bio_chain) -{ - return submit(READ, hib_resume_bdev, page_off * (PAGE_SIZE >> 9), - virt_to_page(addr), bio_chain); -} - -int hib_bio_write_page(pgoff_t page_off, void *addr, struct bio **bio_chain) -{ - return submit(WRITE, hib_resume_bdev, page_off * (PAGE_SIZE >> 9), - virt_to_page(addr), bio_chain); -} - -int hib_wait_on_bio_chain(struct bio **bio_chain) -{ - struct bio *bio; - struct bio *next_bio; - int ret = 0; - - if (bio_chain == NULL) - return 0; - - bio = *bio_chain; - if (bio == NULL) - return 0; - while (bio) { - struct page *page; - - next_bio = bio->bi_private; - page = bio->bi_io_vec[0].bv_page; - wait_on_page_locked(page); - if (!PageUptodate(page) || PageError(page)) - ret = -EIO; - put_page(page); - bio_put(bio); - bio = next_bio; - } - *bio_chain = NULL; - return ret; -} diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 2329daae5255..690f78f210f2 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -552,7 +552,7 @@ int hibernation_platform_enter(void) error = disable_nonboot_cpus(); if (error) - goto Platform_finish; + goto Enable_cpus; local_irq_disable(); syscore_suspend(); @@ -568,6 +568,8 @@ int hibernation_platform_enter(void) Power_up: syscore_resume(); local_irq_enable(); + + Enable_cpus: enable_nonboot_cpus(); Platform_finish: diff --git a/kernel/power/power.h b/kernel/power/power.h index ce9b8328a689..caadb566e82b 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -163,15 +163,6 @@ extern void swsusp_close(fmode_t); extern int swsusp_unmark(void); #endif -/* kernel/power/block_io.c */ -extern struct block_device *hib_resume_bdev; - -extern int hib_bio_read_page(pgoff_t page_off, void *addr, - struct bio **bio_chain); -extern int hib_bio_write_page(pgoff_t page_off, void *addr, - struct bio **bio_chain); -extern int hib_wait_on_bio_chain(struct bio **bio_chain); - struct timeval; /* kernel/power/swsusp.c */ extern void swsusp_show_speed(ktime_t, ktime_t, unsigned int, char *); diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 570aff817543..2f30ca91e4fa 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -212,7 +212,84 @@ int swsusp_swap_in_use(void) */ static unsigned short root_swap = 0xffff; -struct block_device *hib_resume_bdev; +static struct block_device *hib_resume_bdev; + +struct hib_bio_batch { + atomic_t count; + wait_queue_head_t wait; + int error; +}; + +static void hib_init_batch(struct hib_bio_batch *hb) +{ + atomic_set(&hb->count, 0); + init_waitqueue_head(&hb->wait); + hb->error = 0; +} + +static void hib_end_io(struct bio *bio, int error) +{ + struct hib_bio_batch *hb = bio->bi_private; + const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); + struct page *page = bio->bi_io_vec[0].bv_page; + + if (!uptodate || error) { + printk(KERN_ALERT "Read-error on swap-device (%u:%u:%Lu)\n", + imajor(bio->bi_bdev->bd_inode), + iminor(bio->bi_bdev->bd_inode), + (unsigned long long)bio->bi_iter.bi_sector); + + if (!error) + error = -EIO; + } + + if (bio_data_dir(bio) == WRITE) + put_page(page); + + if (error && !hb->error) + hb->error = error; + if (atomic_dec_and_test(&hb->count)) + wake_up(&hb->wait); + + bio_put(bio); +} + +static int hib_submit_io(int rw, pgoff_t page_off, void *addr, + struct hib_bio_batch *hb) +{ + struct page *page = virt_to_page(addr); + struct bio *bio; + int error = 0; + + bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1); + bio->bi_iter.bi_sector = page_off * (PAGE_SIZE >> 9); + bio->bi_bdev = hib_resume_bdev; + + if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { + printk(KERN_ERR "PM: Adding page to bio failed at %llu\n", + (unsigned long long)bio->bi_iter.bi_sector); + bio_put(bio); + return -EFAULT; + } + + if (hb) { + bio->bi_end_io = hib_end_io; + bio->bi_private = hb; + atomic_inc(&hb->count); + submit_bio(rw, bio); + } else { + error = submit_bio_wait(rw, bio); + bio_put(bio); + } + + return error; +} + +static int hib_wait_io(struct hib_bio_batch *hb) +{ + wait_event(hb->wait, atomic_read(&hb->count) == 0); + return hb->error; +} /* * Saving part @@ -222,7 +299,7 @@ static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags) { int error; - hib_bio_read_page(swsusp_resume_block, swsusp_header, NULL); + hib_submit_io(READ_SYNC, swsusp_resume_block, swsusp_header, NULL); if (!memcmp("SWAP-SPACE",swsusp_header->sig, 10) || !memcmp("SWAPSPACE2",swsusp_header->sig, 10)) { memcpy(swsusp_header->orig_sig,swsusp_header->sig, 10); @@ -231,7 +308,7 @@ static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags) swsusp_header->flags = flags; if (flags & SF_CRC32_MODE) swsusp_header->crc32 = handle->crc32; - error = hib_bio_write_page(swsusp_resume_block, + error = hib_submit_io(WRITE_SYNC, swsusp_resume_block, swsusp_header, NULL); } else { printk(KERN_ERR "PM: Swap header not found!\n"); @@ -271,10 +348,10 @@ static int swsusp_swap_check(void) * write_page - Write one page to given swap location. * @buf: Address we're writing. * @offset: Offset of the swap page we're writing to. - * @bio_chain: Link the next write BIO here + * @hb: bio completion batch */ -static int write_page(void *buf, sector_t offset, struct bio **bio_chain) +static int write_page(void *buf, sector_t offset, struct hib_bio_batch *hb) { void *src; int ret; @@ -282,13 +359,13 @@ static int write_page(void *buf, sector_t offset, struct bio **bio_chain) if (!offset) return -ENOSPC; - if (bio_chain) { + if (hb) { src = (void *)__get_free_page(__GFP_WAIT | __GFP_NOWARN | __GFP_NORETRY); if (src) { copy_page(src, buf); } else { - ret = hib_wait_on_bio_chain(bio_chain); /* Free pages */ + ret = hib_wait_io(hb); /* Free pages */ if (ret) return ret; src = (void *)__get_free_page(__GFP_WAIT | @@ -298,14 +375,14 @@ static int write_page(void *buf, sector_t offset, struct bio **bio_chain) copy_page(src, buf); } else { WARN_ON_ONCE(1); - bio_chain = NULL; /* Go synchronous */ + hb = NULL; /* Go synchronous */ src = buf; } } } else { src = buf; } - return hib_bio_write_page(offset, src, bio_chain); + return hib_submit_io(WRITE_SYNC, offset, src, hb); } static void release_swap_writer(struct swap_map_handle *handle) @@ -348,7 +425,7 @@ err_close: } static int swap_write_page(struct swap_map_handle *handle, void *buf, - struct bio **bio_chain) + struct hib_bio_batch *hb) { int error = 0; sector_t offset; @@ -356,7 +433,7 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf, if (!handle->cur) return -EINVAL; offset = alloc_swapdev_block(root_swap); - error = write_page(buf, offset, bio_chain); + error = write_page(buf, offset, hb); if (error) return error; handle->cur->entries[handle->k++] = offset; @@ -365,15 +442,15 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf, if (!offset) return -ENOSPC; handle->cur->next_swap = offset; - error = write_page(handle->cur, handle->cur_swap, bio_chain); + error = write_page(handle->cur, handle->cur_swap, hb); if (error) goto out; clear_page(handle->cur); handle->cur_swap = offset; handle->k = 0; - if (bio_chain && low_free_pages() <= handle->reqd_free_pages) { - error = hib_wait_on_bio_chain(bio_chain); + if (hb && low_free_pages() <= handle->reqd_free_pages) { + error = hib_wait_io(hb); if (error) goto out; /* @@ -445,23 +522,24 @@ static int save_image(struct swap_map_handle *handle, int ret; int nr_pages; int err2; - struct bio *bio; + struct hib_bio_batch hb; ktime_t start; ktime_t stop; + hib_init_batch(&hb); + printk(KERN_INFO "PM: Saving image data pages (%u pages)...\n", nr_to_write); m = nr_to_write / 10; if (!m) m = 1; nr_pages = 0; - bio = NULL; start = ktime_get(); while (1) { ret = snapshot_read_next(snapshot); if (ret <= 0) break; - ret = swap_write_page(handle, data_of(*snapshot), &bio); + ret = swap_write_page(handle, data_of(*snapshot), &hb); if (ret) break; if (!(nr_pages % m)) @@ -469,7 +547,7 @@ static int save_image(struct swap_map_handle *handle, nr_pages / m * 10); nr_pages++; } - err2 = hib_wait_on_bio_chain(&bio); + err2 = hib_wait_io(&hb); stop = ktime_get(); if (!ret) ret = err2; @@ -580,7 +658,7 @@ static int save_image_lzo(struct swap_map_handle *handle, int ret = 0; int nr_pages; int err2; - struct bio *bio; + struct hib_bio_batch hb; ktime_t start; ktime_t stop; size_t off; @@ -589,6 +667,8 @@ static int save_image_lzo(struct swap_map_handle *handle, struct cmp_data *data = NULL; struct crc_data *crc = NULL; + hib_init_batch(&hb); + /* * We'll limit the number of threads for compression to limit memory * footprint. @@ -674,7 +754,6 @@ static int save_image_lzo(struct swap_map_handle *handle, if (!m) m = 1; nr_pages = 0; - bio = NULL; start = ktime_get(); for (;;) { for (thr = 0; thr < nr_threads; thr++) { @@ -748,7 +827,7 @@ static int save_image_lzo(struct swap_map_handle *handle, off += PAGE_SIZE) { memcpy(page, data[thr].cmp + off, PAGE_SIZE); - ret = swap_write_page(handle, page, &bio); + ret = swap_write_page(handle, page, &hb); if (ret) goto out_finish; } @@ -759,7 +838,7 @@ static int save_image_lzo(struct swap_map_handle *handle, } out_finish: - err2 = hib_wait_on_bio_chain(&bio); + err2 = hib_wait_io(&hb); stop = ktime_get(); if (!ret) ret = err2; @@ -906,7 +985,7 @@ static int get_swap_reader(struct swap_map_handle *handle, return -ENOMEM; } - error = hib_bio_read_page(offset, tmp->map, NULL); + error = hib_submit_io(READ_SYNC, offset, tmp->map, NULL); if (error) { release_swap_reader(handle); return error; @@ -919,7 +998,7 @@ static int get_swap_reader(struct swap_map_handle *handle, } static int swap_read_page(struct swap_map_handle *handle, void *buf, - struct bio **bio_chain) + struct hib_bio_batch *hb) { sector_t offset; int error; @@ -930,7 +1009,7 @@ static int swap_read_page(struct swap_map_handle *handle, void *buf, offset = handle->cur->entries[handle->k]; if (!offset) return -EFAULT; - error = hib_bio_read_page(offset, buf, bio_chain); + error = hib_submit_io(READ_SYNC, offset, buf, hb); if (error) return error; if (++handle->k >= MAP_PAGE_ENTRIES) { @@ -968,27 +1047,28 @@ static int load_image(struct swap_map_handle *handle, int ret = 0; ktime_t start; ktime_t stop; - struct bio *bio; + struct hib_bio_batch hb; int err2; unsigned nr_pages; + hib_init_batch(&hb); + printk(KERN_INFO "PM: Loading image data pages (%u pages)...\n", nr_to_read); m = nr_to_read / 10; if (!m) m = 1; nr_pages = 0; - bio = NULL; start = ktime_get(); for ( ; ; ) { ret = snapshot_write_next(snapshot); if (ret <= 0) break; - ret = swap_read_page(handle, data_of(*snapshot), &bio); + ret = swap_read_page(handle, data_of(*snapshot), &hb); if (ret) break; if (snapshot->sync_read) - ret = hib_wait_on_bio_chain(&bio); + ret = hib_wait_io(&hb); if (ret) break; if (!(nr_pages % m)) @@ -996,7 +1076,7 @@ static int load_image(struct swap_map_handle *handle, nr_pages / m * 10); nr_pages++; } - err2 = hib_wait_on_bio_chain(&bio); + err2 = hib_wait_io(&hb); stop = ktime_get(); if (!ret) ret = err2; @@ -1067,7 +1147,7 @@ static int load_image_lzo(struct swap_map_handle *handle, unsigned int m; int ret = 0; int eof = 0; - struct bio *bio; + struct hib_bio_batch hb; ktime_t start; ktime_t stop; unsigned nr_pages; @@ -1080,6 +1160,8 @@ static int load_image_lzo(struct swap_map_handle *handle, struct dec_data *data = NULL; struct crc_data *crc = NULL; + hib_init_batch(&hb); + /* * We'll limit the number of threads for decompression to limit memory * footprint. @@ -1190,7 +1272,6 @@ static int load_image_lzo(struct swap_map_handle *handle, if (!m) m = 1; nr_pages = 0; - bio = NULL; start = ktime_get(); ret = snapshot_write_next(snapshot); @@ -1199,7 +1280,7 @@ static int load_image_lzo(struct swap_map_handle *handle, for(;;) { for (i = 0; !eof && i < want; i++) { - ret = swap_read_page(handle, page[ring], &bio); + ret = swap_read_page(handle, page[ring], &hb); if (ret) { /* * On real read error, finish. On end of data, @@ -1226,7 +1307,7 @@ static int load_image_lzo(struct swap_map_handle *handle, if (!asked) break; - ret = hib_wait_on_bio_chain(&bio); + ret = hib_wait_io(&hb); if (ret) goto out_finish; have += asked; @@ -1281,7 +1362,7 @@ static int load_image_lzo(struct swap_map_handle *handle, * Wait for more data while we are decompressing. */ if (have < LZO_CMP_PAGES && asked) { - ret = hib_wait_on_bio_chain(&bio); + ret = hib_wait_io(&hb); if (ret) goto out_finish; have += asked; @@ -1430,7 +1511,7 @@ int swsusp_check(void) if (!IS_ERR(hib_resume_bdev)) { set_blocksize(hib_resume_bdev, PAGE_SIZE); clear_page(swsusp_header); - error = hib_bio_read_page(swsusp_resume_block, + error = hib_submit_io(READ_SYNC, swsusp_resume_block, swsusp_header, NULL); if (error) goto put; @@ -1438,7 +1519,7 @@ int swsusp_check(void) if (!memcmp(HIBERNATE_SIG, swsusp_header->sig, 10)) { memcpy(swsusp_header->sig, swsusp_header->orig_sig, 10); /* Reset swap signature now */ - error = hib_bio_write_page(swsusp_resume_block, + error = hib_submit_io(WRITE_SYNC, swsusp_resume_block, swsusp_header, NULL); } else { error = -EINVAL; @@ -1482,10 +1563,10 @@ int swsusp_unmark(void) { int error; - hib_bio_read_page(swsusp_resume_block, swsusp_header, NULL); + hib_submit_io(READ_SYNC, swsusp_resume_block, swsusp_header, NULL); if (!memcmp(HIBERNATE_SIG,swsusp_header->sig, 10)) { memcpy(swsusp_header->sig,swsusp_header->orig_sig, 10); - error = hib_bio_write_page(swsusp_resume_block, + error = hib_submit_io(WRITE_SYNC, swsusp_resume_block, swsusp_header, NULL); } else { printk(KERN_ERR "PM: Cannot find swsusp signature!\n"); diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index c099b082cd02..de553849f3ac 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -85,6 +85,18 @@ static struct lockdep_map console_lock_dep_map = { #endif /* + * Number of registered extended console drivers. + * + * If extended consoles are present, in-kernel cont reassembly is disabled + * and each fragment is stored as a separate log entry with proper + * continuation flag so that every emitted message has full metadata. This + * doesn't change the result for regular consoles or /proc/kmsg. For + * /dev/kmsg, as long as the reader concatenates messages according to + * consecutive continuation flags, the end result should be the same too. + */ +static int nr_ext_console_drivers; + +/* * Helper macros to handle lockdep when locking/unlocking console_sem. We use * macros instead of functions so that _RET_IP_ contains useful information. */ @@ -195,7 +207,7 @@ static int console_may_schedule; * need to be changed in the future, when the requirements change. * * /dev/kmsg exports the structured data in the following line format: - * "level,sequnum,timestamp;<message text>\n" + * "<level>,<sequnum>,<timestamp>,<contflag>;<message text>\n" * * The optional key/value pairs are attached as continuation lines starting * with a space character and terminated by a newline. All possible @@ -477,18 +489,18 @@ static int syslog_action_restricted(int type) type != SYSLOG_ACTION_SIZE_BUFFER; } -int check_syslog_permissions(int type, bool from_file) +int check_syslog_permissions(int type, int source) { /* * If this is from /proc/kmsg and we've already opened it, then we've * already done the capabilities checks at open time. */ - if (from_file && type != SYSLOG_ACTION_OPEN) - return 0; + if (source == SYSLOG_FROM_PROC && type != SYSLOG_ACTION_OPEN) + goto ok; if (syslog_action_restricted(type)) { if (capable(CAP_SYSLOG)) - return 0; + goto ok; /* * For historical reasons, accept CAP_SYS_ADMIN too, with * a warning. @@ -498,13 +510,94 @@ int check_syslog_permissions(int type, bool from_file) "CAP_SYS_ADMIN but no CAP_SYSLOG " "(deprecated).\n", current->comm, task_pid_nr(current)); - return 0; + goto ok; } return -EPERM; } +ok: return security_syslog(type); } +static void append_char(char **pp, char *e, char c) +{ + if (*pp < e) + *(*pp)++ = c; +} + +static ssize_t msg_print_ext_header(char *buf, size_t size, + struct printk_log *msg, u64 seq, + enum log_flags prev_flags) +{ + u64 ts_usec = msg->ts_nsec; + char cont = '-'; + + do_div(ts_usec, 1000); + + /* + * If we couldn't merge continuation line fragments during the print, + * export the stored flags to allow an optional external merge of the + * records. Merging the records isn't always neccessarily correct, like + * when we hit a race during printing. In most cases though, it produces + * better readable output. 'c' in the record flags mark the first + * fragment of a line, '+' the following. + */ + if (msg->flags & LOG_CONT && !(prev_flags & LOG_CONT)) + cont = 'c'; + else if ((msg->flags & LOG_CONT) || + ((prev_flags & LOG_CONT) && !(msg->flags & LOG_PREFIX))) + cont = '+'; + + return scnprintf(buf, size, "%u,%llu,%llu,%c;", + (msg->facility << 3) | msg->level, seq, ts_usec, cont); +} + +static ssize_t msg_print_ext_body(char *buf, size_t size, + char *dict, size_t dict_len, + char *text, size_t text_len) +{ + char *p = buf, *e = buf + size; + size_t i; + + /* escape non-printable characters */ + for (i = 0; i < text_len; i++) { + unsigned char c = text[i]; + + if (c < ' ' || c >= 127 || c == '\\') + p += scnprintf(p, e - p, "\\x%02x", c); + else + append_char(&p, e, c); + } + append_char(&p, e, '\n'); + + if (dict_len) { + bool line = true; + + for (i = 0; i < dict_len; i++) { + unsigned char c = dict[i]; + + if (line) { + append_char(&p, e, ' '); + line = false; + } + + if (c == '\0') { + append_char(&p, e, '\n'); + line = true; + continue; + } + + if (c < ' ' || c >= 127 || c == '\\') { + p += scnprintf(p, e - p, "\\x%02x", c); + continue; + } + + append_char(&p, e, c); + } + append_char(&p, e, '\n'); + } + + return p - buf; +} /* /dev/kmsg - userspace message inject/listen interface */ struct devkmsg_user { @@ -512,7 +605,7 @@ struct devkmsg_user { u32 idx; enum log_flags prev; struct mutex lock; - char buf[8192]; + char buf[CONSOLE_EXT_LOG_MAX]; }; static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from) @@ -570,9 +663,6 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, { struct devkmsg_user *user = file->private_data; struct printk_log *msg; - u64 ts_usec; - size_t i; - char cont = '-'; size_t len; ssize_t ret; @@ -608,66 +698,13 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, } msg = log_from_idx(user->idx); - ts_usec = msg->ts_nsec; - do_div(ts_usec, 1000); + len = msg_print_ext_header(user->buf, sizeof(user->buf), + msg, user->seq, user->prev); + len += msg_print_ext_body(user->buf + len, sizeof(user->buf) - len, + log_dict(msg), msg->dict_len, + log_text(msg), msg->text_len); - /* - * If we couldn't merge continuation line fragments during the print, - * export the stored flags to allow an optional external merge of the - * records. Merging the records isn't always neccessarily correct, like - * when we hit a race during printing. In most cases though, it produces - * better readable output. 'c' in the record flags mark the first - * fragment of a line, '+' the following. - */ - if (msg->flags & LOG_CONT && !(user->prev & LOG_CONT)) - cont = 'c'; - else if ((msg->flags & LOG_CONT) || - ((user->prev & LOG_CONT) && !(msg->flags & LOG_PREFIX))) - cont = '+'; - - len = sprintf(user->buf, "%u,%llu,%llu,%c;", - (msg->facility << 3) | msg->level, - user->seq, ts_usec, cont); user->prev = msg->flags; - - /* escape non-printable characters */ - for (i = 0; i < msg->text_len; i++) { - unsigned char c = log_text(msg)[i]; - - if (c < ' ' || c >= 127 || c == '\\') - len += sprintf(user->buf + len, "\\x%02x", c); - else - user->buf[len++] = c; - } - user->buf[len++] = '\n'; - - if (msg->dict_len) { - bool line = true; - - for (i = 0; i < msg->dict_len; i++) { - unsigned char c = log_dict(msg)[i]; - - if (line) { - user->buf[len++] = ' '; - line = false; - } - - if (c == '\0') { - user->buf[len++] = '\n'; - line = true; - continue; - } - - if (c < ' ' || c >= 127 || c == '\\') { - len += sprintf(user->buf + len, "\\x%02x", c); - continue; - } - - user->buf[len++] = c; - } - user->buf[len++] = '\n'; - } - user->idx = log_next(user->idx); user->seq++; raw_spin_unlock_irq(&logbuf_lock); @@ -1253,20 +1290,16 @@ static int syslog_print_all(char __user *buf, int size, bool clear) return len; } -int do_syslog(int type, char __user *buf, int len, bool from_file) +int do_syslog(int type, char __user *buf, int len, int source) { bool clear = false; static int saved_console_loglevel = LOGLEVEL_DEFAULT; int error; - error = check_syslog_permissions(type, from_file); + error = check_syslog_permissions(type, source); if (error) goto out; - error = security_syslog(type); - if (error) - return error; - switch (type) { case SYSLOG_ACTION_CLOSE: /* Close log */ break; @@ -1346,7 +1379,7 @@ int do_syslog(int type, char __user *buf, int len, bool from_file) syslog_prev = 0; syslog_partial = 0; } - if (from_file) { + if (source == SYSLOG_FROM_PROC) { /* * Short-cut for poll(/"proc/kmsg") which simply checks * for pending data, not the size; return the count of @@ -1393,7 +1426,9 @@ SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len) * log_buf[start] to log_buf[end - 1]. * The console_lock must be held. */ -static void call_console_drivers(int level, const char *text, size_t len) +static void call_console_drivers(int level, + const char *ext_text, size_t ext_len, + const char *text, size_t len) { struct console *con; @@ -1414,7 +1449,10 @@ static void call_console_drivers(int level, const char *text, size_t len) if (!cpu_online(smp_processor_id()) && !(con->flags & CON_ANYTIME)) continue; - con->write(con, text, len); + if (con->flags & CON_EXTENDED) + con->write(con, ext_text, ext_len); + else + con->write(con, text, len); } } @@ -1557,8 +1595,12 @@ static bool cont_add(int facility, int level, const char *text, size_t len) if (cont.len && cont.flushed) return false; - if (cont.len + len > sizeof(cont.buf)) { - /* the line gets too long, split it up in separate records */ + /* + * If ext consoles are present, flush and skip in-kernel + * continuation. See nr_ext_console_drivers definition. Also, if + * the line gets too long, split it up in separate records. + */ + if (nr_ext_console_drivers || cont.len + len > sizeof(cont.buf)) { cont_flush(LOG_CONT); return false; } @@ -1893,9 +1935,19 @@ static struct cont { u8 level; bool flushed:1; } cont; +static char *log_text(const struct printk_log *msg) { return NULL; } +static char *log_dict(const struct printk_log *msg) { return NULL; } static struct printk_log *log_from_idx(u32 idx) { return NULL; } static u32 log_next(u32 idx) { return 0; } -static void call_console_drivers(int level, const char *text, size_t len) {} +static ssize_t msg_print_ext_header(char *buf, size_t size, + struct printk_log *msg, u64 seq, + enum log_flags prev_flags) { return 0; } +static ssize_t msg_print_ext_body(char *buf, size_t size, + char *dict, size_t dict_len, + char *text, size_t text_len) { return 0; } +static void call_console_drivers(int level, + const char *ext_text, size_t ext_len, + const char *text, size_t len) {} static size_t msg_print_text(const struct printk_log *msg, enum log_flags prev, bool syslog, char *buf, size_t size) { return 0; } static size_t cont_print_text(char *text, size_t size) { return 0; } @@ -2148,7 +2200,7 @@ static void console_cont_flush(char *text, size_t size) len = cont_print_text(text, size); raw_spin_unlock(&logbuf_lock); stop_critical_timings(); - call_console_drivers(cont.level, text, len); + call_console_drivers(cont.level, NULL, 0, text, len); start_critical_timings(); local_irq_restore(flags); return; @@ -2172,6 +2224,7 @@ out: */ void console_unlock(void) { + static char ext_text[CONSOLE_EXT_LOG_MAX]; static char text[LOG_LINE_MAX + PREFIX_MAX]; static u64 seen_seq; unsigned long flags; @@ -2190,6 +2243,7 @@ void console_unlock(void) again: for (;;) { struct printk_log *msg; + size_t ext_len = 0; size_t len; int level; @@ -2235,13 +2289,22 @@ skip: level = msg->level; len += msg_print_text(msg, console_prev, false, text + len, sizeof(text) - len); + if (nr_ext_console_drivers) { + ext_len = msg_print_ext_header(ext_text, + sizeof(ext_text), + msg, console_seq, console_prev); + ext_len += msg_print_ext_body(ext_text + ext_len, + sizeof(ext_text) - ext_len, + log_dict(msg), msg->dict_len, + log_text(msg), msg->text_len); + } console_idx = log_next(console_idx); console_seq++; console_prev = msg->flags; raw_spin_unlock(&logbuf_lock); stop_critical_timings(); /* don't trace print latency */ - call_console_drivers(level, text, len); + call_console_drivers(level, ext_text, ext_len, text, len); start_critical_timings(); local_irq_restore(flags); } @@ -2497,6 +2560,11 @@ void register_console(struct console *newcon) newcon->next = console_drivers->next; console_drivers->next = newcon; } + + if (newcon->flags & CON_EXTENDED) + if (!nr_ext_console_drivers++) + pr_info("printk: continuation disabled due to ext consoles, expect more fragments in /dev/kmsg\n"); + if (newcon->flags & CON_PRINTBUFFER) { /* * console_unlock(); will print out the buffered messages @@ -2569,6 +2637,9 @@ int unregister_console(struct console *console) } } + if (!res && (console->flags & CON_EXTENDED)) + nr_ext_console_drivers--; + /* * If this isn't the last console and it has CON_CONSDEV set, we * need to set it on the next preferred console. diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index 591af0cb7b9f..c291bd65d2cb 100644 --- a/kernel/rcu/tiny.c +++ b/kernel/rcu/tiny.c @@ -35,7 +35,7 @@ #include <linux/time.h> #include <linux/cpu.h> #include <linux/prefetch.h> -#include <linux/ftrace_event.h> +#include <linux/trace_events.h> #include "rcu.h" diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index add042926a66..65137bc28b2b 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -54,7 +54,7 @@ #include <linux/delay.h> #include <linux/stop_machine.h> #include <linux/random.h> -#include <linux/ftrace_event.h> +#include <linux/trace_events.h> #include <linux/suspend.h> #include "tree.h" diff --git a/kernel/signal.c b/kernel/signal.c index f19833b5db3c..836df8dac6cc 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -414,21 +414,16 @@ void flush_sigqueue(struct sigpending *queue) } /* - * Flush all pending signals for a task. + * Flush all pending signals for this kthread. */ -void __flush_signals(struct task_struct *t) -{ - clear_tsk_thread_flag(t, TIF_SIGPENDING); - flush_sigqueue(&t->pending); - flush_sigqueue(&t->signal->shared_pending); -} - void flush_signals(struct task_struct *t) { unsigned long flags; spin_lock_irqsave(&t->sighand->siglock, flags); - __flush_signals(t); + clear_tsk_thread_flag(t, TIF_SIGPENDING); + flush_sigqueue(&t->pending); + flush_sigqueue(&t->signal->shared_pending); spin_unlock_irqrestore(&t->sighand->siglock, flags); } diff --git a/kernel/sys.c b/kernel/sys.c index 8571296b7ddb..259fda25eb6b 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1722,7 +1722,6 @@ exit_err: goto exit; } -#ifdef CONFIG_CHECKPOINT_RESTORE /* * WARNING: we don't require any capability here so be very careful * in what is allowed for modification from userspace. @@ -1818,6 +1817,7 @@ out: return error; } +#ifdef CONFIG_CHECKPOINT_RESTORE static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data_size) { struct prctl_mm_map prctl_map = { .exe_fd = (u32)-1, }; @@ -1902,10 +1902,41 @@ out: } #endif /* CONFIG_CHECKPOINT_RESTORE */ +static int prctl_set_auxv(struct mm_struct *mm, unsigned long addr, + unsigned long len) +{ + /* + * This doesn't move the auxiliary vector itself since it's pinned to + * mm_struct, but it permits filling the vector with new values. It's + * up to the caller to provide sane values here, otherwise userspace + * tools which use this vector might be unhappy. + */ + unsigned long user_auxv[AT_VECTOR_SIZE]; + + if (len > sizeof(user_auxv)) + return -EINVAL; + + if (copy_from_user(user_auxv, (const void __user *)addr, len)) + return -EFAULT; + + /* Make sure the last entry is always AT_NULL */ + user_auxv[AT_VECTOR_SIZE - 2] = 0; + user_auxv[AT_VECTOR_SIZE - 1] = 0; + + BUILD_BUG_ON(sizeof(user_auxv) != sizeof(mm->saved_auxv)); + + task_lock(current); + memcpy(mm->saved_auxv, user_auxv, len); + task_unlock(current); + + return 0; +} + static int prctl_set_mm(int opt, unsigned long addr, unsigned long arg4, unsigned long arg5) { struct mm_struct *mm = current->mm; + struct prctl_mm_map prctl_map; struct vm_area_struct *vma; int error; @@ -1925,6 +1956,9 @@ static int prctl_set_mm(int opt, unsigned long addr, if (opt == PR_SET_MM_EXE_FILE) return prctl_set_mm_exe_file(mm, (unsigned int)addr); + if (opt == PR_SET_MM_AUXV) + return prctl_set_auxv(mm, addr, arg4); + if (addr >= TASK_SIZE || addr < mmap_min_addr) return -EINVAL; @@ -1933,42 +1967,64 @@ static int prctl_set_mm(int opt, unsigned long addr, down_read(&mm->mmap_sem); vma = find_vma(mm, addr); + prctl_map.start_code = mm->start_code; + prctl_map.end_code = mm->end_code; + prctl_map.start_data = mm->start_data; + prctl_map.end_data = mm->end_data; + prctl_map.start_brk = mm->start_brk; + prctl_map.brk = mm->brk; + prctl_map.start_stack = mm->start_stack; + prctl_map.arg_start = mm->arg_start; + prctl_map.arg_end = mm->arg_end; + prctl_map.env_start = mm->env_start; + prctl_map.env_end = mm->env_end; + prctl_map.auxv = NULL; + prctl_map.auxv_size = 0; + prctl_map.exe_fd = -1; + switch (opt) { case PR_SET_MM_START_CODE: - mm->start_code = addr; + prctl_map.start_code = addr; break; case PR_SET_MM_END_CODE: - mm->end_code = addr; + prctl_map.end_code = addr; break; case PR_SET_MM_START_DATA: - mm->start_data = addr; + prctl_map.start_data = addr; break; case PR_SET_MM_END_DATA: - mm->end_data = addr; + prctl_map.end_data = addr; + break; + case PR_SET_MM_START_STACK: + prctl_map.start_stack = addr; break; - case PR_SET_MM_START_BRK: - if (addr <= mm->end_data) - goto out; - - if (check_data_rlimit(rlimit(RLIMIT_DATA), mm->brk, addr, - mm->end_data, mm->start_data)) - goto out; - - mm->start_brk = addr; + prctl_map.start_brk = addr; break; - case PR_SET_MM_BRK: - if (addr <= mm->end_data) - goto out; - - if (check_data_rlimit(rlimit(RLIMIT_DATA), addr, mm->start_brk, - mm->end_data, mm->start_data)) - goto out; - - mm->brk = addr; + prctl_map.brk = addr; break; + case PR_SET_MM_ARG_START: + prctl_map.arg_start = addr; + break; + case PR_SET_MM_ARG_END: + prctl_map.arg_end = addr; + break; + case PR_SET_MM_ENV_START: + prctl_map.env_start = addr; + break; + case PR_SET_MM_ENV_END: + prctl_map.env_end = addr; + break; + default: + goto out; + } + + error = validate_prctl_map(&prctl_map); + if (error) + goto out; + switch (opt) { /* * If command line arguments and environment * are placed somewhere else on stack, we can @@ -1985,52 +2041,20 @@ static int prctl_set_mm(int opt, unsigned long addr, error = -EFAULT; goto out; } - if (opt == PR_SET_MM_START_STACK) - mm->start_stack = addr; - else if (opt == PR_SET_MM_ARG_START) - mm->arg_start = addr; - else if (opt == PR_SET_MM_ARG_END) - mm->arg_end = addr; - else if (opt == PR_SET_MM_ENV_START) - mm->env_start = addr; - else if (opt == PR_SET_MM_ENV_END) - mm->env_end = addr; - break; - - /* - * This doesn't move auxiliary vector itself - * since it's pinned to mm_struct, but allow - * to fill vector with new values. It's up - * to a caller to provide sane values here - * otherwise user space tools which use this - * vector might be unhappy. - */ - case PR_SET_MM_AUXV: { - unsigned long user_auxv[AT_VECTOR_SIZE]; - - if (arg4 > sizeof(user_auxv)) - goto out; - up_read(&mm->mmap_sem); - - if (copy_from_user(user_auxv, (const void __user *)addr, arg4)) - return -EFAULT; - - /* Make sure the last entry is always AT_NULL */ - user_auxv[AT_VECTOR_SIZE - 2] = 0; - user_auxv[AT_VECTOR_SIZE - 1] = 0; - - BUILD_BUG_ON(sizeof(user_auxv) != sizeof(mm->saved_auxv)); - - task_lock(current); - memcpy(mm->saved_auxv, user_auxv, arg4); - task_unlock(current); - - return 0; - } - default: - goto out; } + mm->start_code = prctl_map.start_code; + mm->end_code = prctl_map.end_code; + mm->start_data = prctl_map.start_data; + mm->end_data = prctl_map.end_data; + mm->start_brk = prctl_map.start_brk; + mm->brk = prctl_map.brk; + mm->start_stack = prctl_map.start_stack; + mm->arg_start = prctl_map.arg_start; + mm->arg_end = prctl_map.arg_end; + mm->env_start = prctl_map.env_start; + mm->env_end = prctl_map.env_end; + error = 0; out: up_read(&mm->mmap_sem); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 30b7a409bf1e..bca3667a2de1 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -319,32 +319,7 @@ static inline s64 timekeeping_get_ns(struct tk_read_base *tkr) * We want to use this from any context including NMI and tracing / * instrumenting the timekeeping code itself. * - * So we handle this differently than the other timekeeping accessor - * functions which retry when the sequence count has changed. The - * update side does: - * - * smp_wmb(); <- Ensure that the last base[1] update is visible - * tkf->seq++; - * smp_wmb(); <- Ensure that the seqcount update is visible - * update(tkf->base[0], tkr); - * smp_wmb(); <- Ensure that the base[0] update is visible - * tkf->seq++; - * smp_wmb(); <- Ensure that the seqcount update is visible - * update(tkf->base[1], tkr); - * - * The reader side does: - * - * do { - * seq = tkf->seq; - * smp_rmb(); - * idx = seq & 0x01; - * now = now(tkf->base[idx]); - * smp_rmb(); - * } while (seq != tkf->seq) - * - * As long as we update base[0] readers are forced off to - * base[1]. Once base[0] is updated readers are redirected to base[0] - * and the base[1] update takes place. + * Employ the latch technique; see @raw_write_seqcount_latch. * * So if a NMI hits the update of base[0] then it will use base[1] * which is still consistent. In the worst case this can result is a @@ -407,7 +382,7 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf) u64 now; do { - seq = raw_read_seqcount(&tkf->seq); + seq = raw_read_seqcount_latch(&tkf->seq); tkr = tkf->base + (seq & 0x01); now = ktime_to_ns(tkr->base) + timekeeping_get_ns(tkr); } while (read_seqcount_retry(&tkf->seq, seq)); diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 483cecfa5c17..b3e6b39b6cf9 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -439,7 +439,7 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, { struct blk_trace *old_bt, *bt = NULL; struct dentry *dir = NULL; - int ret, i; + int ret; if (!buts->buf_size || !buts->buf_nr) return -EINVAL; @@ -451,9 +451,7 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, * some device names have larger paths - convert the slashes * to underscores for this to work as expected */ - for (i = 0; i < strlen(buts->name); i++) - if (buts->name[i] == '/') - buts->name[i] = '_'; + strreplace(buts->name, '/', '_'); bt = kzalloc(sizeof(*bt), GFP_KERNEL); if (!bt) @@ -1450,14 +1448,14 @@ static struct trace_event trace_blk_event = { static int __init init_blk_tracer(void) { - if (!register_ftrace_event(&trace_blk_event)) { + if (!register_trace_event(&trace_blk_event)) { pr_warning("Warning: could not register block events\n"); return 1; } if (register_tracer(&blk_tracer) != 0) { pr_warning("Warning: could not register the block tracer\n"); - unregister_ftrace_event(&trace_blk_event); + unregister_trace_event(&trace_blk_event); return 1; } diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 0315d43176d8..6260717c18e3 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -3,7 +3,7 @@ * * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com> */ -#include <linux/ftrace_event.h> +#include <linux/trace_events.h> #include <linux/ring_buffer.h> #include <linux/trace_clock.h> #include <linux/trace_seq.h> @@ -115,63 +115,11 @@ int ring_buffer_print_entry_header(struct trace_seq *s) * */ -/* - * A fast way to enable or disable all ring buffers is to - * call tracing_on or tracing_off. Turning off the ring buffers - * prevents all ring buffers from being recorded to. - * Turning this switch on, makes it OK to write to the - * ring buffer, if the ring buffer is enabled itself. - * - * There's three layers that must be on in order to write - * to the ring buffer. - * - * 1) This global flag must be set. - * 2) The ring buffer must be enabled for recording. - * 3) The per cpu buffer must be enabled for recording. - * - * In case of an anomaly, this global flag has a bit set that - * will permantly disable all ring buffers. - */ - -/* - * Global flag to disable all recording to ring buffers - * This has two bits: ON, DISABLED - * - * ON DISABLED - * ---- ---------- - * 0 0 : ring buffers are off - * 1 0 : ring buffers are on - * X 1 : ring buffers are permanently disabled - */ - -enum { - RB_BUFFERS_ON_BIT = 0, - RB_BUFFERS_DISABLED_BIT = 1, -}; - -enum { - RB_BUFFERS_ON = 1 << RB_BUFFERS_ON_BIT, - RB_BUFFERS_DISABLED = 1 << RB_BUFFERS_DISABLED_BIT, -}; - -static unsigned long ring_buffer_flags __read_mostly = RB_BUFFERS_ON; - /* Used for individual buffers (after the counter) */ #define RB_BUFFER_OFF (1 << 20) #define BUF_PAGE_HDR_SIZE offsetof(struct buffer_data_page, data) -/** - * tracing_off_permanent - permanently disable ring buffers - * - * This function, once called, will disable all ring buffers - * permanently. - */ -void tracing_off_permanent(void) -{ - set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags); -} - #define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array)) #define RB_ALIGNMENT 4U #define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) @@ -452,6 +400,23 @@ struct rb_irq_work { }; /* + * Used for which event context the event is in. + * NMI = 0 + * IRQ = 1 + * SOFTIRQ = 2 + * NORMAL = 3 + * + * See trace_recursive_lock() comment below for more details. + */ +enum { + RB_CTX_NMI, + RB_CTX_IRQ, + RB_CTX_SOFTIRQ, + RB_CTX_NORMAL, + RB_CTX_MAX +}; + +/* * head_page == tail_page && head == tail then buffer is empty. */ struct ring_buffer_per_cpu { @@ -462,6 +427,7 @@ struct ring_buffer_per_cpu { arch_spinlock_t lock; struct lock_class_key lock_key; unsigned int nr_pages; + unsigned int current_context; struct list_head *pages; struct buffer_page *head_page; /* read from head */ struct buffer_page *tail_page; /* write to tail */ @@ -2224,7 +2190,7 @@ static unsigned rb_calculate_event_length(unsigned length) /* zero length can cause confusions */ if (!length) - length = 1; + length++; if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) length += sizeof(event.array[0]); @@ -2636,8 +2602,6 @@ rb_reserve_next_event(struct ring_buffer *buffer, return NULL; } -#ifdef CONFIG_TRACING - /* * The lock and unlock are done within a preempt disable section. * The current_context per_cpu variable can only be modified @@ -2675,44 +2639,38 @@ rb_reserve_next_event(struct ring_buffer *buffer, * just so happens that it is the same bit corresponding to * the current context. */ -static DEFINE_PER_CPU(unsigned int, current_context); -static __always_inline int trace_recursive_lock(void) +static __always_inline int +trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer) { - unsigned int val = __this_cpu_read(current_context); + unsigned int val = cpu_buffer->current_context; int bit; if (in_interrupt()) { if (in_nmi()) - bit = 0; + bit = RB_CTX_NMI; else if (in_irq()) - bit = 1; + bit = RB_CTX_IRQ; else - bit = 2; + bit = RB_CTX_SOFTIRQ; } else - bit = 3; + bit = RB_CTX_NORMAL; if (unlikely(val & (1 << bit))) return 1; val |= (1 << bit); - __this_cpu_write(current_context, val); + cpu_buffer->current_context = val; return 0; } -static __always_inline void trace_recursive_unlock(void) +static __always_inline void +trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer) { - __this_cpu_and(current_context, __this_cpu_read(current_context) - 1); + cpu_buffer->current_context &= cpu_buffer->current_context - 1; } -#else - -#define trace_recursive_lock() (0) -#define trace_recursive_unlock() do { } while (0) - -#endif - /** * ring_buffer_lock_reserve - reserve a part of the buffer * @buffer: the ring buffer to reserve from @@ -2735,41 +2693,37 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length) struct ring_buffer_event *event; int cpu; - if (ring_buffer_flags != RB_BUFFERS_ON) - return NULL; - /* If we are tracing schedule, we don't want to recurse */ preempt_disable_notrace(); - if (atomic_read(&buffer->record_disabled)) - goto out_nocheck; - - if (trace_recursive_lock()) - goto out_nocheck; + if (unlikely(atomic_read(&buffer->record_disabled))) + goto out; cpu = raw_smp_processor_id(); - if (!cpumask_test_cpu(cpu, buffer->cpumask)) + if (unlikely(!cpumask_test_cpu(cpu, buffer->cpumask))) goto out; cpu_buffer = buffer->buffers[cpu]; - if (atomic_read(&cpu_buffer->record_disabled)) + if (unlikely(atomic_read(&cpu_buffer->record_disabled))) goto out; - if (length > BUF_MAX_DATA_SIZE) + if (unlikely(length > BUF_MAX_DATA_SIZE)) + goto out; + + if (unlikely(trace_recursive_lock(cpu_buffer))) goto out; event = rb_reserve_next_event(buffer, cpu_buffer, length); if (!event) - goto out; + goto out_unlock; return event; + out_unlock: + trace_recursive_unlock(cpu_buffer); out: - trace_recursive_unlock(); - - out_nocheck: preempt_enable_notrace(); return NULL; } @@ -2859,7 +2813,7 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer, rb_wakeups(buffer, cpu_buffer); - trace_recursive_unlock(); + trace_recursive_unlock(cpu_buffer); preempt_enable_notrace(); @@ -2970,7 +2924,7 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer, out: rb_end_commit(cpu_buffer); - trace_recursive_unlock(); + trace_recursive_unlock(cpu_buffer); preempt_enable_notrace(); @@ -3000,9 +2954,6 @@ int ring_buffer_write(struct ring_buffer *buffer, int ret = -EBUSY; int cpu; - if (ring_buffer_flags != RB_BUFFERS_ON) - return -EBUSY; - preempt_disable_notrace(); if (atomic_read(&buffer->record_disabled)) @@ -3021,9 +2972,12 @@ int ring_buffer_write(struct ring_buffer *buffer, if (length > BUF_MAX_DATA_SIZE) goto out; + if (unlikely(trace_recursive_lock(cpu_buffer))) + goto out; + event = rb_reserve_next_event(buffer, cpu_buffer, length); if (!event) - goto out; + goto out_unlock; body = rb_event_data(event); @@ -3034,6 +2988,10 @@ int ring_buffer_write(struct ring_buffer *buffer, rb_wakeups(buffer, cpu_buffer); ret = 0; + + out_unlock: + trace_recursive_unlock(cpu_buffer); + out: preempt_enable_notrace(); @@ -3860,19 +3818,36 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) } EXPORT_SYMBOL_GPL(ring_buffer_iter_peek); -static inline int rb_ok_to_lock(void) +static inline bool rb_reader_lock(struct ring_buffer_per_cpu *cpu_buffer) { + if (likely(!in_nmi())) { + raw_spin_lock(&cpu_buffer->reader_lock); + return true; + } + /* * If an NMI die dumps out the content of the ring buffer - * do not grab locks. We also permanently disable the ring - * buffer too. A one time deal is all you get from reading - * the ring buffer from an NMI. + * trylock must be used to prevent a deadlock if the NMI + * preempted a task that holds the ring buffer locks. If + * we get the lock then all is fine, if not, then continue + * to do the read, but this can corrupt the ring buffer, + * so it must be permanently disabled from future writes. + * Reading from NMI is a oneshot deal. */ - if (likely(!in_nmi())) - return 1; + if (raw_spin_trylock(&cpu_buffer->reader_lock)) + return true; - tracing_off_permanent(); - return 0; + /* Continue without locking, but disable the ring buffer */ + atomic_inc(&cpu_buffer->record_disabled); + return false; +} + +static inline void +rb_reader_unlock(struct ring_buffer_per_cpu *cpu_buffer, bool locked) +{ + if (likely(locked)) + raw_spin_unlock(&cpu_buffer->reader_lock); + return; } /** @@ -3892,21 +3867,18 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts, struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; struct ring_buffer_event *event; unsigned long flags; - int dolock; + bool dolock; if (!cpumask_test_cpu(cpu, buffer->cpumask)) return NULL; - dolock = rb_ok_to_lock(); again: local_irq_save(flags); - if (dolock) - raw_spin_lock(&cpu_buffer->reader_lock); + dolock = rb_reader_lock(cpu_buffer); event = rb_buffer_peek(cpu_buffer, ts, lost_events); if (event && event->type_len == RINGBUF_TYPE_PADDING) rb_advance_reader(cpu_buffer); - if (dolock) - raw_spin_unlock(&cpu_buffer->reader_lock); + rb_reader_unlock(cpu_buffer, dolock); local_irq_restore(flags); if (event && event->type_len == RINGBUF_TYPE_PADDING) @@ -3959,9 +3931,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts, struct ring_buffer_per_cpu *cpu_buffer; struct ring_buffer_event *event = NULL; unsigned long flags; - int dolock; - - dolock = rb_ok_to_lock(); + bool dolock; again: /* might be called in atomic */ @@ -3972,8 +3942,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts, cpu_buffer = buffer->buffers[cpu]; local_irq_save(flags); - if (dolock) - raw_spin_lock(&cpu_buffer->reader_lock); + dolock = rb_reader_lock(cpu_buffer); event = rb_buffer_peek(cpu_buffer, ts, lost_events); if (event) { @@ -3981,8 +3950,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts, rb_advance_reader(cpu_buffer); } - if (dolock) - raw_spin_unlock(&cpu_buffer->reader_lock); + rb_reader_unlock(cpu_buffer, dolock); local_irq_restore(flags); out: @@ -4263,21 +4231,17 @@ int ring_buffer_empty(struct ring_buffer *buffer) { struct ring_buffer_per_cpu *cpu_buffer; unsigned long flags; - int dolock; + bool dolock; int cpu; int ret; - dolock = rb_ok_to_lock(); - /* yes this is racy, but if you don't like the race, lock the buffer */ for_each_buffer_cpu(buffer, cpu) { cpu_buffer = buffer->buffers[cpu]; local_irq_save(flags); - if (dolock) - raw_spin_lock(&cpu_buffer->reader_lock); + dolock = rb_reader_lock(cpu_buffer); ret = rb_per_cpu_empty(cpu_buffer); - if (dolock) - raw_spin_unlock(&cpu_buffer->reader_lock); + rb_reader_unlock(cpu_buffer, dolock); local_irq_restore(flags); if (!ret) @@ -4297,21 +4261,17 @@ int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; unsigned long flags; - int dolock; + bool dolock; int ret; if (!cpumask_test_cpu(cpu, buffer->cpumask)) return 1; - dolock = rb_ok_to_lock(); - cpu_buffer = buffer->buffers[cpu]; local_irq_save(flags); - if (dolock) - raw_spin_lock(&cpu_buffer->reader_lock); + dolock = rb_reader_lock(cpu_buffer); ret = rb_per_cpu_empty(cpu_buffer); - if (dolock) - raw_spin_unlock(&cpu_buffer->reader_lock); + rb_reader_unlock(cpu_buffer, dolock); local_irq_restore(flags); return ret; @@ -4349,9 +4309,6 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a, ret = -EAGAIN; - if (ring_buffer_flags != RB_BUFFERS_ON) - goto out; - if (atomic_read(&buffer_a->record_disabled)) goto out; diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c index 1b28df2d9104..a1503a027ee2 100644 --- a/kernel/trace/ring_buffer_benchmark.c +++ b/kernel/trace/ring_buffer_benchmark.c @@ -32,11 +32,11 @@ static struct task_struct *producer; static struct task_struct *consumer; static unsigned long read; -static int disable_reader; +static unsigned int disable_reader; module_param(disable_reader, uint, 0644); MODULE_PARM_DESC(disable_reader, "only run producer"); -static int write_iteration = 50; +static unsigned int write_iteration = 50; module_param(write_iteration, uint, 0644); MODULE_PARM_DESC(write_iteration, "# of writes between timestamp readings"); @@ -46,16 +46,16 @@ static int consumer_nice = MAX_NICE; static int producer_fifo = -1; static int consumer_fifo = -1; -module_param(producer_nice, uint, 0644); +module_param(producer_nice, int, 0644); MODULE_PARM_DESC(producer_nice, "nice prio for producer"); -module_param(consumer_nice, uint, 0644); +module_param(consumer_nice, int, 0644); MODULE_PARM_DESC(consumer_nice, "nice prio for consumer"); -module_param(producer_fifo, uint, 0644); +module_param(producer_fifo, int, 0644); MODULE_PARM_DESC(producer_fifo, "fifo prio for producer"); -module_param(consumer_fifo, uint, 0644); +module_param(consumer_fifo, int, 0644); MODULE_PARM_DESC(consumer_fifo, "fifo prio for consumer"); static int read_events; @@ -263,6 +263,8 @@ static void ring_buffer_producer(void) if (cnt % wakeup_interval) cond_resched(); #endif + if (kthread_should_stop()) + kill_test = 1; } while (ktime_before(end_time, timeout) && !kill_test); trace_printk("End ring buffer hammer\n"); @@ -285,7 +287,7 @@ static void ring_buffer_producer(void) entries = ring_buffer_entries(buffer); overruns = ring_buffer_overruns(buffer); - if (kill_test) + if (kill_test && !kthread_should_stop()) trace_printk("ERROR!\n"); if (!disable_reader) { @@ -379,7 +381,7 @@ static int ring_buffer_consumer_thread(void *arg) } __set_current_state(TASK_RUNNING); - if (kill_test) + if (!kthread_should_stop()) wait_to_die(); return 0; @@ -399,13 +401,16 @@ static int ring_buffer_producer_thread(void *arg) } ring_buffer_producer(); + if (kill_test) + goto out_kill; trace_printk("Sleeping for 10 secs\n"); set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(HZ * SLEEP_TIME); } - if (kill_test) +out_kill: + if (!kthread_should_stop()) wait_to_die(); return 0; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 05330494a0df..abcbf7ff8743 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -297,11 +297,11 @@ void trace_array_put(struct trace_array *this_tr) mutex_unlock(&trace_types_lock); } -int filter_check_discard(struct ftrace_event_file *file, void *rec, +int filter_check_discard(struct trace_event_file *file, void *rec, struct ring_buffer *buffer, struct ring_buffer_event *event) { - if (unlikely(file->flags & FTRACE_EVENT_FL_FILTERED) && + if (unlikely(file->flags & EVENT_FILE_FL_FILTERED) && !filter_match_preds(file->filter, rec)) { ring_buffer_discard_commit(buffer, event); return 1; @@ -311,7 +311,7 @@ int filter_check_discard(struct ftrace_event_file *file, void *rec, } EXPORT_SYMBOL_GPL(filter_check_discard); -int call_filter_check_discard(struct ftrace_event_call *call, void *rec, +int call_filter_check_discard(struct trace_event_call *call, void *rec, struct ring_buffer *buffer, struct ring_buffer_event *event) { @@ -876,6 +876,7 @@ static struct { { trace_clock_jiffies, "uptime", 0 }, { trace_clock, "perf", 1 }, { ktime_get_mono_fast_ns, "mono", 1 }, + { ktime_get_raw_fast_ns, "mono_raw", 1 }, ARCH_TRACE_CLOCKS }; @@ -1693,13 +1694,13 @@ static struct ring_buffer *temp_buffer; struct ring_buffer_event * trace_event_buffer_lock_reserve(struct ring_buffer **current_rb, - struct ftrace_event_file *ftrace_file, + struct trace_event_file *trace_file, int type, unsigned long len, unsigned long flags, int pc) { struct ring_buffer_event *entry; - *current_rb = ftrace_file->tr->trace_buffer.buffer; + *current_rb = trace_file->tr->trace_buffer.buffer; entry = trace_buffer_lock_reserve(*current_rb, type, len, flags, pc); /* @@ -1708,7 +1709,7 @@ trace_event_buffer_lock_reserve(struct ring_buffer **current_rb, * to store the trace event for the tigger to use. It's recusive * safe and will not be recorded anywhere. */ - if (!entry && ftrace_file->flags & FTRACE_EVENT_FL_TRIGGER_COND) { + if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) { *current_rb = temp_buffer; entry = trace_buffer_lock_reserve(*current_rb, type, len, flags, pc); @@ -1760,7 +1761,7 @@ trace_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, unsigned long flags, int pc) { - struct ftrace_event_call *call = &event_function; + struct trace_event_call *call = &event_function; struct ring_buffer *buffer = tr->trace_buffer.buffer; struct ring_buffer_event *event; struct ftrace_entry *entry; @@ -1795,7 +1796,7 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags, int skip, int pc, struct pt_regs *regs) { - struct ftrace_event_call *call = &event_kernel_stack; + struct trace_event_call *call = &event_kernel_stack; struct ring_buffer_event *event; struct stack_entry *entry; struct stack_trace trace; @@ -1923,7 +1924,7 @@ static DEFINE_PER_CPU(int, user_stack_count); void ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) { - struct ftrace_event_call *call = &event_user_stack; + struct trace_event_call *call = &event_user_stack; struct ring_buffer_event *event; struct userstack_entry *entry; struct stack_trace trace; @@ -2129,7 +2130,7 @@ static void trace_printk_start_stop_comm(int enabled) */ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) { - struct ftrace_event_call *call = &event_bprint; + struct trace_event_call *call = &event_bprint; struct ring_buffer_event *event; struct ring_buffer *buffer; struct trace_array *tr = &global_trace; @@ -2187,7 +2188,7 @@ static int __trace_array_vprintk(struct ring_buffer *buffer, unsigned long ip, const char *fmt, va_list args) { - struct ftrace_event_call *call = &event_print; + struct trace_event_call *call = &event_print; struct ring_buffer_event *event; int len = 0, size, pc; struct print_entry *entry; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index d2612016de94..f060716b02ae 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -12,7 +12,7 @@ #include <linux/ftrace.h> #include <linux/hw_breakpoint.h> #include <linux/trace_seq.h> -#include <linux/ftrace_event.h> +#include <linux/trace_events.h> #include <linux/compiler.h> #include <linux/trace_seq.h> @@ -211,8 +211,8 @@ struct trace_array { #ifdef CONFIG_FTRACE_SYSCALLS int sys_refcount_enter; int sys_refcount_exit; - struct ftrace_event_file __rcu *enter_syscall_files[NR_syscalls]; - struct ftrace_event_file __rcu *exit_syscall_files[NR_syscalls]; + struct trace_event_file __rcu *enter_syscall_files[NR_syscalls]; + struct trace_event_file __rcu *exit_syscall_files[NR_syscalls]; #endif int stop_count; int clock_id; @@ -858,7 +858,7 @@ void ftrace_destroy_filter_files(struct ftrace_ops *ops); #define ftrace_destroy_filter_files(ops) do { } while (0) #endif /* CONFIG_FUNCTION_TRACER && CONFIG_DYNAMIC_FTRACE */ -int ftrace_event_is_function(struct ftrace_event_call *call); +int ftrace_event_is_function(struct trace_event_call *call); /* * struct trace_parser - servers for reading the user input separated by spaces @@ -992,7 +992,7 @@ struct event_subsystem { int ref_count; }; -struct ftrace_subsystem_dir { +struct trace_subsystem_dir { struct list_head list; struct event_subsystem *subsystem; struct trace_array *tr; @@ -1052,30 +1052,30 @@ struct filter_pred { extern enum regex_type filter_parse_regex(char *buff, int len, char **search, int *not); -extern void print_event_filter(struct ftrace_event_file *file, +extern void print_event_filter(struct trace_event_file *file, struct trace_seq *s); -extern int apply_event_filter(struct ftrace_event_file *file, +extern int apply_event_filter(struct trace_event_file *file, char *filter_string); -extern int apply_subsystem_event_filter(struct ftrace_subsystem_dir *dir, +extern int apply_subsystem_event_filter(struct trace_subsystem_dir *dir, char *filter_string); extern void print_subsystem_event_filter(struct event_subsystem *system, struct trace_seq *s); extern int filter_assign_type(const char *type); -extern int create_event_filter(struct ftrace_event_call *call, +extern int create_event_filter(struct trace_event_call *call, char *filter_str, bool set_str, struct event_filter **filterp); extern void free_event_filter(struct event_filter *filter); struct ftrace_event_field * -trace_find_event_field(struct ftrace_event_call *call, char *name); +trace_find_event_field(struct trace_event_call *call, char *name); extern void trace_event_enable_cmd_record(bool enable); extern int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr); extern int event_trace_del_tracer(struct trace_array *tr); -extern struct ftrace_event_file *find_event_file(struct trace_array *tr, - const char *system, - const char *event); +extern struct trace_event_file *find_event_file(struct trace_array *tr, + const char *system, + const char *event); static inline void *event_file_data(struct file *filp) { @@ -1180,7 +1180,7 @@ struct event_trigger_ops { * commands need to do this if they themselves log to the trace * buffer (see the @post_trigger() member below). @trigger_type * values are defined by adding new values to the trigger_type - * enum in include/linux/ftrace_event.h. + * enum in include/linux/trace_events.h. * * @post_trigger: A flag that says whether or not this command needs * to have its action delayed until after the current event has @@ -1242,23 +1242,23 @@ struct event_command { enum event_trigger_type trigger_type; bool post_trigger; int (*func)(struct event_command *cmd_ops, - struct ftrace_event_file *file, + struct trace_event_file *file, char *glob, char *cmd, char *params); int (*reg)(char *glob, struct event_trigger_ops *ops, struct event_trigger_data *data, - struct ftrace_event_file *file); + struct trace_event_file *file); void (*unreg)(char *glob, struct event_trigger_ops *ops, struct event_trigger_data *data, - struct ftrace_event_file *file); + struct trace_event_file *file); int (*set_filter)(char *filter_str, struct event_trigger_data *data, - struct ftrace_event_file *file); + struct trace_event_file *file); struct event_trigger_ops *(*get_trigger_ops)(char *cmd, char *param); }; -extern int trace_event_enable_disable(struct ftrace_event_file *file, +extern int trace_event_enable_disable(struct trace_event_file *file, int enable, int soft_disable); extern int tracing_alloc_snapshot(void); @@ -1286,7 +1286,7 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled); #undef FTRACE_ENTRY #define FTRACE_ENTRY(call, struct_name, id, tstruct, print, filter) \ - extern struct ftrace_event_call \ + extern struct trace_event_call \ __aligned(4) event_##call; #undef FTRACE_ENTRY_DUP #define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print, filter) \ @@ -1295,7 +1295,7 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled); #include "trace_entries.h" #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_FUNCTION_TRACER) -int perf_ftrace_event_register(struct ftrace_event_call *call, +int perf_ftrace_event_register(struct trace_event_call *call, enum trace_reg type, void *data); #else #define perf_ftrace_event_register NULL @@ -1312,7 +1312,7 @@ void trace_event_init(void); void trace_event_enum_update(struct trace_enum_map **map, int len); #else static inline void __init trace_event_init(void) { } -static inlin void trace_event_enum_update(struct trace_enum_map **map, int len) { } +static inline void trace_event_enum_update(struct trace_enum_map **map, int len) { } #endif extern struct trace_iterator *tracepoint_print_iter; diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index 57cbf1efdd44..a87b43f49eb4 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c @@ -29,7 +29,7 @@ static struct trace_array *branch_tracer; static void probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) { - struct ftrace_event_call *call = &event_branch; + struct trace_event_call *call = &event_branch; struct trace_array *tr = branch_tracer; struct trace_array_cpu *data; struct ring_buffer_event *event; @@ -191,7 +191,7 @@ __init static int init_branch_tracer(void) { int ret; - ret = register_ftrace_event(&trace_branch_event); + ret = register_trace_event(&trace_branch_event); if (!ret) { printk(KERN_WARNING "Warning: could not register " "branch events\n"); diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c index 57b67b1f24d1..0f06532a755b 100644 --- a/kernel/trace/trace_clock.c +++ b/kernel/trace/trace_clock.c @@ -56,6 +56,7 @@ u64 notrace trace_clock(void) { return local_clock(); } +EXPORT_SYMBOL_GPL(trace_clock); /* * trace_jiffy_clock(): Simply use jiffies as a clock counter. @@ -68,6 +69,7 @@ u64 notrace trace_clock_jiffies(void) { return jiffies_64_to_clock_t(jiffies_64 - INITIAL_JIFFIES); } +EXPORT_SYMBOL_GPL(trace_clock_jiffies); /* * trace_clock_global(): special globally coherent trace clock @@ -123,6 +125,7 @@ u64 notrace trace_clock_global(void) return now; } +EXPORT_SYMBOL_GPL(trace_clock_global); static atomic64_t trace_counter; diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 6fa484de2ba1..abfc903e741e 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -21,7 +21,7 @@ typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)]) /* Count the events in use (per event id, not per instance) */ static int total_ref_count; -static int perf_trace_event_perm(struct ftrace_event_call *tp_event, +static int perf_trace_event_perm(struct trace_event_call *tp_event, struct perf_event *p_event) { if (tp_event->perf_perm) { @@ -83,7 +83,7 @@ static int perf_trace_event_perm(struct ftrace_event_call *tp_event, return 0; } -static int perf_trace_event_reg(struct ftrace_event_call *tp_event, +static int perf_trace_event_reg(struct trace_event_call *tp_event, struct perf_event *p_event) { struct hlist_head __percpu *list; @@ -143,7 +143,7 @@ fail: static void perf_trace_event_unreg(struct perf_event *p_event) { - struct ftrace_event_call *tp_event = p_event->tp_event; + struct trace_event_call *tp_event = p_event->tp_event; int i; if (--tp_event->perf_refcount > 0) @@ -172,17 +172,17 @@ out: static int perf_trace_event_open(struct perf_event *p_event) { - struct ftrace_event_call *tp_event = p_event->tp_event; + struct trace_event_call *tp_event = p_event->tp_event; return tp_event->class->reg(tp_event, TRACE_REG_PERF_OPEN, p_event); } static void perf_trace_event_close(struct perf_event *p_event) { - struct ftrace_event_call *tp_event = p_event->tp_event; + struct trace_event_call *tp_event = p_event->tp_event; tp_event->class->reg(tp_event, TRACE_REG_PERF_CLOSE, p_event); } -static int perf_trace_event_init(struct ftrace_event_call *tp_event, +static int perf_trace_event_init(struct trace_event_call *tp_event, struct perf_event *p_event) { int ret; @@ -206,7 +206,7 @@ static int perf_trace_event_init(struct ftrace_event_call *tp_event, int perf_trace_init(struct perf_event *p_event) { - struct ftrace_event_call *tp_event; + struct trace_event_call *tp_event; u64 event_id = p_event->attr.config; int ret = -EINVAL; @@ -236,7 +236,7 @@ void perf_trace_destroy(struct perf_event *p_event) int perf_trace_add(struct perf_event *p_event, int flags) { - struct ftrace_event_call *tp_event = p_event->tp_event; + struct trace_event_call *tp_event = p_event->tp_event; struct hlist_head __percpu *pcpu_list; struct hlist_head *list; @@ -255,7 +255,7 @@ int perf_trace_add(struct perf_event *p_event, int flags) void perf_trace_del(struct perf_event *p_event, int flags) { - struct ftrace_event_call *tp_event = p_event->tp_event; + struct trace_event_call *tp_event = p_event->tp_event; hlist_del_rcu(&p_event->hlist_entry); tp_event->class->reg(tp_event, TRACE_REG_PERF_DEL, p_event); } @@ -357,7 +357,7 @@ static void perf_ftrace_function_disable(struct perf_event *event) ftrace_function_local_disable(&event->ftrace_ops); } -int perf_ftrace_event_register(struct ftrace_event_call *call, +int perf_ftrace_event_register(struct trace_event_call *call, enum trace_reg type, void *data) { switch (type) { diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index c4de47fc5cca..404a372ad85a 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -61,14 +61,14 @@ static int system_refcount_dec(struct event_subsystem *system) #define do_for_each_event_file_safe(tr, file) \ list_for_each_entry(tr, &ftrace_trace_arrays, list) { \ - struct ftrace_event_file *___n; \ + struct trace_event_file *___n; \ list_for_each_entry_safe(file, ___n, &tr->events, list) #define while_for_each_event_file() \ } static struct list_head * -trace_get_fields(struct ftrace_event_call *event_call) +trace_get_fields(struct trace_event_call *event_call) { if (!event_call->class->get_fields) return &event_call->class->fields; @@ -89,7 +89,7 @@ __find_event_field(struct list_head *head, char *name) } struct ftrace_event_field * -trace_find_event_field(struct ftrace_event_call *call, char *name) +trace_find_event_field(struct trace_event_call *call, char *name) { struct ftrace_event_field *field; struct list_head *head; @@ -129,7 +129,7 @@ static int __trace_define_field(struct list_head *head, const char *type, return 0; } -int trace_define_field(struct ftrace_event_call *call, const char *type, +int trace_define_field(struct trace_event_call *call, const char *type, const char *name, int offset, int size, int is_signed, int filter_type) { @@ -166,7 +166,7 @@ static int trace_define_common_fields(void) return ret; } -static void trace_destroy_fields(struct ftrace_event_call *call) +static void trace_destroy_fields(struct trace_event_call *call) { struct ftrace_event_field *field, *next; struct list_head *head; @@ -178,11 +178,11 @@ static void trace_destroy_fields(struct ftrace_event_call *call) } } -int trace_event_raw_init(struct ftrace_event_call *call) +int trace_event_raw_init(struct trace_event_call *call) { int id; - id = register_ftrace_event(&call->event); + id = register_trace_event(&call->event); if (!id) return -ENODEV; @@ -190,18 +190,18 @@ int trace_event_raw_init(struct ftrace_event_call *call) } EXPORT_SYMBOL_GPL(trace_event_raw_init); -void *ftrace_event_buffer_reserve(struct ftrace_event_buffer *fbuffer, - struct ftrace_event_file *ftrace_file, - unsigned long len) +void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer, + struct trace_event_file *trace_file, + unsigned long len) { - struct ftrace_event_call *event_call = ftrace_file->event_call; + struct trace_event_call *event_call = trace_file->event_call; local_save_flags(fbuffer->flags); fbuffer->pc = preempt_count(); - fbuffer->ftrace_file = ftrace_file; + fbuffer->trace_file = trace_file; fbuffer->event = - trace_event_buffer_lock_reserve(&fbuffer->buffer, ftrace_file, + trace_event_buffer_lock_reserve(&fbuffer->buffer, trace_file, event_call->event.type, len, fbuffer->flags, fbuffer->pc); if (!fbuffer->event) @@ -210,13 +210,13 @@ void *ftrace_event_buffer_reserve(struct ftrace_event_buffer *fbuffer, fbuffer->entry = ring_buffer_event_data(fbuffer->event); return fbuffer->entry; } -EXPORT_SYMBOL_GPL(ftrace_event_buffer_reserve); +EXPORT_SYMBOL_GPL(trace_event_buffer_reserve); static DEFINE_SPINLOCK(tracepoint_iter_lock); -static void output_printk(struct ftrace_event_buffer *fbuffer) +static void output_printk(struct trace_event_buffer *fbuffer) { - struct ftrace_event_call *event_call; + struct trace_event_call *event_call; struct trace_event *event; unsigned long flags; struct trace_iterator *iter = tracepoint_print_iter; @@ -224,12 +224,12 @@ static void output_printk(struct ftrace_event_buffer *fbuffer) if (!iter) return; - event_call = fbuffer->ftrace_file->event_call; + event_call = fbuffer->trace_file->event_call; if (!event_call || !event_call->event.funcs || !event_call->event.funcs->trace) return; - event = &fbuffer->ftrace_file->event_call->event; + event = &fbuffer->trace_file->event_call->event; spin_lock_irqsave(&tracepoint_iter_lock, flags); trace_seq_init(&iter->seq); @@ -241,21 +241,21 @@ static void output_printk(struct ftrace_event_buffer *fbuffer) spin_unlock_irqrestore(&tracepoint_iter_lock, flags); } -void ftrace_event_buffer_commit(struct ftrace_event_buffer *fbuffer) +void trace_event_buffer_commit(struct trace_event_buffer *fbuffer) { if (tracepoint_printk) output_printk(fbuffer); - event_trigger_unlock_commit(fbuffer->ftrace_file, fbuffer->buffer, + event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer, fbuffer->event, fbuffer->entry, fbuffer->flags, fbuffer->pc); } -EXPORT_SYMBOL_GPL(ftrace_event_buffer_commit); +EXPORT_SYMBOL_GPL(trace_event_buffer_commit); -int ftrace_event_reg(struct ftrace_event_call *call, - enum trace_reg type, void *data) +int trace_event_reg(struct trace_event_call *call, + enum trace_reg type, void *data) { - struct ftrace_event_file *file = data; + struct trace_event_file *file = data; WARN_ON(!(call->flags & TRACE_EVENT_FL_TRACEPOINT)); switch (type) { @@ -288,34 +288,34 @@ int ftrace_event_reg(struct ftrace_event_call *call, } return 0; } -EXPORT_SYMBOL_GPL(ftrace_event_reg); +EXPORT_SYMBOL_GPL(trace_event_reg); void trace_event_enable_cmd_record(bool enable) { - struct ftrace_event_file *file; + struct trace_event_file *file; struct trace_array *tr; mutex_lock(&event_mutex); do_for_each_event_file(tr, file) { - if (!(file->flags & FTRACE_EVENT_FL_ENABLED)) + if (!(file->flags & EVENT_FILE_FL_ENABLED)) continue; if (enable) { tracing_start_cmdline_record(); - set_bit(FTRACE_EVENT_FL_RECORDED_CMD_BIT, &file->flags); + set_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags); } else { tracing_stop_cmdline_record(); - clear_bit(FTRACE_EVENT_FL_RECORDED_CMD_BIT, &file->flags); + clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags); } } while_for_each_event_file(); mutex_unlock(&event_mutex); } -static int __ftrace_event_enable_disable(struct ftrace_event_file *file, +static int __ftrace_event_enable_disable(struct trace_event_file *file, int enable, int soft_disable) { - struct ftrace_event_call *call = file->event_call; + struct trace_event_call *call = file->event_call; int ret = 0; int disable; @@ -337,24 +337,24 @@ static int __ftrace_event_enable_disable(struct ftrace_event_file *file, if (soft_disable) { if (atomic_dec_return(&file->sm_ref) > 0) break; - disable = file->flags & FTRACE_EVENT_FL_SOFT_DISABLED; - clear_bit(FTRACE_EVENT_FL_SOFT_MODE_BIT, &file->flags); + disable = file->flags & EVENT_FILE_FL_SOFT_DISABLED; + clear_bit(EVENT_FILE_FL_SOFT_MODE_BIT, &file->flags); } else - disable = !(file->flags & FTRACE_EVENT_FL_SOFT_MODE); + disable = !(file->flags & EVENT_FILE_FL_SOFT_MODE); - if (disable && (file->flags & FTRACE_EVENT_FL_ENABLED)) { - clear_bit(FTRACE_EVENT_FL_ENABLED_BIT, &file->flags); - if (file->flags & FTRACE_EVENT_FL_RECORDED_CMD) { + if (disable && (file->flags & EVENT_FILE_FL_ENABLED)) { + clear_bit(EVENT_FILE_FL_ENABLED_BIT, &file->flags); + if (file->flags & EVENT_FILE_FL_RECORDED_CMD) { tracing_stop_cmdline_record(); - clear_bit(FTRACE_EVENT_FL_RECORDED_CMD_BIT, &file->flags); + clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags); } call->class->reg(call, TRACE_REG_UNREGISTER, file); } /* If in SOFT_MODE, just set the SOFT_DISABLE_BIT, else clear it */ - if (file->flags & FTRACE_EVENT_FL_SOFT_MODE) - set_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags); + if (file->flags & EVENT_FILE_FL_SOFT_MODE) + set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags); else - clear_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags); + clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags); break; case 1: /* @@ -366,31 +366,31 @@ static int __ftrace_event_enable_disable(struct ftrace_event_file *file, * it still seems to be disabled. */ if (!soft_disable) - clear_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags); + clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags); else { if (atomic_inc_return(&file->sm_ref) > 1) break; - set_bit(FTRACE_EVENT_FL_SOFT_MODE_BIT, &file->flags); + set_bit(EVENT_FILE_FL_SOFT_MODE_BIT, &file->flags); } - if (!(file->flags & FTRACE_EVENT_FL_ENABLED)) { + if (!(file->flags & EVENT_FILE_FL_ENABLED)) { /* Keep the event disabled, when going to SOFT_MODE. */ if (soft_disable) - set_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags); + set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags); if (trace_flags & TRACE_ITER_RECORD_CMD) { tracing_start_cmdline_record(); - set_bit(FTRACE_EVENT_FL_RECORDED_CMD_BIT, &file->flags); + set_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags); } ret = call->class->reg(call, TRACE_REG_REGISTER, file); if (ret) { tracing_stop_cmdline_record(); pr_info("event trace: Could not enable event " - "%s\n", ftrace_event_name(call)); + "%s\n", trace_event_name(call)); break; } - set_bit(FTRACE_EVENT_FL_ENABLED_BIT, &file->flags); + set_bit(EVENT_FILE_FL_ENABLED_BIT, &file->flags); /* WAS_ENABLED gets set but never cleared. */ call->flags |= TRACE_EVENT_FL_WAS_ENABLED; @@ -401,13 +401,13 @@ static int __ftrace_event_enable_disable(struct ftrace_event_file *file, return ret; } -int trace_event_enable_disable(struct ftrace_event_file *file, +int trace_event_enable_disable(struct trace_event_file *file, int enable, int soft_disable) { return __ftrace_event_enable_disable(file, enable, soft_disable); } -static int ftrace_event_enable_disable(struct ftrace_event_file *file, +static int ftrace_event_enable_disable(struct trace_event_file *file, int enable) { return __ftrace_event_enable_disable(file, enable, 0); @@ -415,7 +415,7 @@ static int ftrace_event_enable_disable(struct ftrace_event_file *file, static void ftrace_clear_events(struct trace_array *tr) { - struct ftrace_event_file *file; + struct trace_event_file *file; mutex_lock(&event_mutex); list_for_each_entry(file, &tr->events, list) { @@ -449,14 +449,14 @@ static void __get_system(struct event_subsystem *system) system_refcount_inc(system); } -static void __get_system_dir(struct ftrace_subsystem_dir *dir) +static void __get_system_dir(struct trace_subsystem_dir *dir) { WARN_ON_ONCE(dir->ref_count == 0); dir->ref_count++; __get_system(dir->subsystem); } -static void __put_system_dir(struct ftrace_subsystem_dir *dir) +static void __put_system_dir(struct trace_subsystem_dir *dir) { WARN_ON_ONCE(dir->ref_count == 0); /* If the subsystem is about to be freed, the dir must be too */ @@ -467,14 +467,14 @@ static void __put_system_dir(struct ftrace_subsystem_dir *dir) kfree(dir); } -static void put_system(struct ftrace_subsystem_dir *dir) +static void put_system(struct trace_subsystem_dir *dir) { mutex_lock(&event_mutex); __put_system_dir(dir); mutex_unlock(&event_mutex); } -static void remove_subsystem(struct ftrace_subsystem_dir *dir) +static void remove_subsystem(struct trace_subsystem_dir *dir) { if (!dir) return; @@ -486,7 +486,7 @@ static void remove_subsystem(struct ftrace_subsystem_dir *dir) } } -static void remove_event_file_dir(struct ftrace_event_file *file) +static void remove_event_file_dir(struct trace_event_file *file) { struct dentry *dir = file->dir; struct dentry *child; @@ -515,15 +515,15 @@ static int __ftrace_set_clr_event_nolock(struct trace_array *tr, const char *match, const char *sub, const char *event, int set) { - struct ftrace_event_file *file; - struct ftrace_event_call *call; + struct trace_event_file *file; + struct trace_event_call *call; const char *name; int ret = -EINVAL; list_for_each_entry(file, &tr->events, list) { call = file->event_call; - name = ftrace_event_name(call); + name = trace_event_name(call); if (!name || !call->class || !call->class->reg) continue; @@ -671,8 +671,8 @@ ftrace_event_write(struct file *file, const char __user *ubuf, static void * t_next(struct seq_file *m, void *v, loff_t *pos) { - struct ftrace_event_file *file = v; - struct ftrace_event_call *call; + struct trace_event_file *file = v; + struct trace_event_call *call; struct trace_array *tr = m->private; (*pos)++; @@ -692,13 +692,13 @@ t_next(struct seq_file *m, void *v, loff_t *pos) static void *t_start(struct seq_file *m, loff_t *pos) { - struct ftrace_event_file *file; + struct trace_event_file *file; struct trace_array *tr = m->private; loff_t l; mutex_lock(&event_mutex); - file = list_entry(&tr->events, struct ftrace_event_file, list); + file = list_entry(&tr->events, struct trace_event_file, list); for (l = 0; l <= *pos; ) { file = t_next(m, file, &l); if (!file) @@ -710,13 +710,13 @@ static void *t_start(struct seq_file *m, loff_t *pos) static void * s_next(struct seq_file *m, void *v, loff_t *pos) { - struct ftrace_event_file *file = v; + struct trace_event_file *file = v; struct trace_array *tr = m->private; (*pos)++; list_for_each_entry_continue(file, &tr->events, list) { - if (file->flags & FTRACE_EVENT_FL_ENABLED) + if (file->flags & EVENT_FILE_FL_ENABLED) return file; } @@ -725,13 +725,13 @@ s_next(struct seq_file *m, void *v, loff_t *pos) static void *s_start(struct seq_file *m, loff_t *pos) { - struct ftrace_event_file *file; + struct trace_event_file *file; struct trace_array *tr = m->private; loff_t l; mutex_lock(&event_mutex); - file = list_entry(&tr->events, struct ftrace_event_file, list); + file = list_entry(&tr->events, struct trace_event_file, list); for (l = 0; l <= *pos; ) { file = s_next(m, file, &l); if (!file) @@ -742,12 +742,12 @@ static void *s_start(struct seq_file *m, loff_t *pos) static int t_show(struct seq_file *m, void *v) { - struct ftrace_event_file *file = v; - struct ftrace_event_call *call = file->event_call; + struct trace_event_file *file = v; + struct trace_event_call *call = file->event_call; if (strcmp(call->class->system, TRACE_SYSTEM) != 0) seq_printf(m, "%s:", call->class->system); - seq_printf(m, "%s\n", ftrace_event_name(call)); + seq_printf(m, "%s\n", trace_event_name(call)); return 0; } @@ -761,7 +761,7 @@ static ssize_t event_enable_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { - struct ftrace_event_file *file; + struct trace_event_file *file; unsigned long flags; char buf[4] = "0"; @@ -774,12 +774,12 @@ event_enable_read(struct file *filp, char __user *ubuf, size_t cnt, if (!file) return -ENODEV; - if (flags & FTRACE_EVENT_FL_ENABLED && - !(flags & FTRACE_EVENT_FL_SOFT_DISABLED)) + if (flags & EVENT_FILE_FL_ENABLED && + !(flags & EVENT_FILE_FL_SOFT_DISABLED)) strcpy(buf, "1"); - if (flags & FTRACE_EVENT_FL_SOFT_DISABLED || - flags & FTRACE_EVENT_FL_SOFT_MODE) + if (flags & EVENT_FILE_FL_SOFT_DISABLED || + flags & EVENT_FILE_FL_SOFT_MODE) strcat(buf, "*"); strcat(buf, "\n"); @@ -791,7 +791,7 @@ static ssize_t event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { - struct ftrace_event_file *file; + struct trace_event_file *file; unsigned long val; int ret; @@ -828,10 +828,10 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { const char set_to_char[4] = { '?', '0', '1', 'X' }; - struct ftrace_subsystem_dir *dir = filp->private_data; + struct trace_subsystem_dir *dir = filp->private_data; struct event_subsystem *system = dir->subsystem; - struct ftrace_event_call *call; - struct ftrace_event_file *file; + struct trace_event_call *call; + struct trace_event_file *file; struct trace_array *tr = dir->tr; char buf[2]; int set = 0; @@ -840,7 +840,7 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt, mutex_lock(&event_mutex); list_for_each_entry(file, &tr->events, list) { call = file->event_call; - if (!ftrace_event_name(call) || !call->class || !call->class->reg) + if (!trace_event_name(call) || !call->class || !call->class->reg) continue; if (system && strcmp(call->class->system, system->name) != 0) @@ -851,7 +851,7 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt, * or if all events or cleared, or if we have * a mixture. */ - set |= (1 << !!(file->flags & FTRACE_EVENT_FL_ENABLED)); + set |= (1 << !!(file->flags & EVENT_FILE_FL_ENABLED)); /* * If we have a mixture, no need to look further. @@ -873,7 +873,7 @@ static ssize_t system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { - struct ftrace_subsystem_dir *dir = filp->private_data; + struct trace_subsystem_dir *dir = filp->private_data; struct event_subsystem *system = dir->subsystem; const char *name = NULL; unsigned long val; @@ -917,7 +917,7 @@ enum { static void *f_next(struct seq_file *m, void *v, loff_t *pos) { - struct ftrace_event_call *call = event_file_data(m->private); + struct trace_event_call *call = event_file_data(m->private); struct list_head *common_head = &ftrace_common_fields; struct list_head *head = trace_get_fields(call); struct list_head *node = v; @@ -949,13 +949,13 @@ static void *f_next(struct seq_file *m, void *v, loff_t *pos) static int f_show(struct seq_file *m, void *v) { - struct ftrace_event_call *call = event_file_data(m->private); + struct trace_event_call *call = event_file_data(m->private); struct ftrace_event_field *field; const char *array_descriptor; switch ((unsigned long)v) { case FORMAT_HEADER: - seq_printf(m, "name: %s\n", ftrace_event_name(call)); + seq_printf(m, "name: %s\n", trace_event_name(call)); seq_printf(m, "ID: %d\n", call->event.type); seq_puts(m, "format:\n"); return 0; @@ -1062,7 +1062,7 @@ static ssize_t event_filter_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { - struct ftrace_event_file *file; + struct trace_event_file *file; struct trace_seq *s; int r = -ENODEV; @@ -1095,7 +1095,7 @@ static ssize_t event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { - struct ftrace_event_file *file; + struct trace_event_file *file; char *buf; int err = -ENODEV; @@ -1132,7 +1132,7 @@ static LIST_HEAD(event_subsystems); static int subsystem_open(struct inode *inode, struct file *filp) { struct event_subsystem *system = NULL; - struct ftrace_subsystem_dir *dir = NULL; /* Initialize for gcc */ + struct trace_subsystem_dir *dir = NULL; /* Initialize for gcc */ struct trace_array *tr; int ret; @@ -1181,7 +1181,7 @@ static int subsystem_open(struct inode *inode, struct file *filp) static int system_tr_open(struct inode *inode, struct file *filp) { - struct ftrace_subsystem_dir *dir; + struct trace_subsystem_dir *dir; struct trace_array *tr = inode->i_private; int ret; @@ -1214,7 +1214,7 @@ static int system_tr_open(struct inode *inode, struct file *filp) static int subsystem_release(struct inode *inode, struct file *file) { - struct ftrace_subsystem_dir *dir = file->private_data; + struct trace_subsystem_dir *dir = file->private_data; trace_array_put(dir->tr); @@ -1235,7 +1235,7 @@ static ssize_t subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { - struct ftrace_subsystem_dir *dir = filp->private_data; + struct trace_subsystem_dir *dir = filp->private_data; struct event_subsystem *system = dir->subsystem; struct trace_seq *s; int r; @@ -1262,7 +1262,7 @@ static ssize_t subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { - struct ftrace_subsystem_dir *dir = filp->private_data; + struct trace_subsystem_dir *dir = filp->private_data; char *buf; int err; @@ -1497,9 +1497,9 @@ create_new_subsystem(const char *name) static struct dentry * event_subsystem_dir(struct trace_array *tr, const char *name, - struct ftrace_event_file *file, struct dentry *parent) + struct trace_event_file *file, struct dentry *parent) { - struct ftrace_subsystem_dir *dir; + struct trace_subsystem_dir *dir; struct event_subsystem *system; struct dentry *entry; @@ -1571,9 +1571,9 @@ event_subsystem_dir(struct trace_array *tr, const char *name, } static int -event_create_dir(struct dentry *parent, struct ftrace_event_file *file) +event_create_dir(struct dentry *parent, struct trace_event_file *file) { - struct ftrace_event_call *call = file->event_call; + struct trace_event_call *call = file->event_call; struct trace_array *tr = file->tr; struct list_head *head; struct dentry *d_events; @@ -1591,7 +1591,7 @@ event_create_dir(struct dentry *parent, struct ftrace_event_file *file) } else d_events = parent; - name = ftrace_event_name(call); + name = trace_event_name(call); file->dir = tracefs_create_dir(name, d_events); if (!file->dir) { pr_warn("Could not create tracefs '%s' directory\n", name); @@ -1634,9 +1634,9 @@ event_create_dir(struct dentry *parent, struct ftrace_event_file *file) return 0; } -static void remove_event_from_tracers(struct ftrace_event_call *call) +static void remove_event_from_tracers(struct trace_event_call *call) { - struct ftrace_event_file *file; + struct trace_event_file *file; struct trace_array *tr; do_for_each_event_file_safe(tr, file) { @@ -1654,10 +1654,10 @@ static void remove_event_from_tracers(struct ftrace_event_call *call) } while_for_each_event_file(); } -static void event_remove(struct ftrace_event_call *call) +static void event_remove(struct trace_event_call *call) { struct trace_array *tr; - struct ftrace_event_file *file; + struct trace_event_file *file; do_for_each_event_file(tr, file) { if (file->event_call != call) @@ -1673,17 +1673,17 @@ static void event_remove(struct ftrace_event_call *call) } while_for_each_event_file(); if (call->event.funcs) - __unregister_ftrace_event(&call->event); + __unregister_trace_event(&call->event); remove_event_from_tracers(call); list_del(&call->list); } -static int event_init(struct ftrace_event_call *call) +static int event_init(struct trace_event_call *call) { int ret = 0; const char *name; - name = ftrace_event_name(call); + name = trace_event_name(call); if (WARN_ON(!name)) return -EINVAL; @@ -1697,7 +1697,7 @@ static int event_init(struct ftrace_event_call *call) } static int -__register_event(struct ftrace_event_call *call, struct module *mod) +__register_event(struct trace_event_call *call, struct module *mod) { int ret; @@ -1733,7 +1733,7 @@ static char *enum_replace(char *ptr, struct trace_enum_map *map, int len) return ptr + elen; } -static void update_event_printk(struct ftrace_event_call *call, +static void update_event_printk(struct trace_event_call *call, struct trace_enum_map *map) { char *ptr; @@ -1811,7 +1811,7 @@ static void update_event_printk(struct ftrace_event_call *call, void trace_event_enum_update(struct trace_enum_map **map, int len) { - struct ftrace_event_call *call, *p; + struct trace_event_call *call, *p; const char *last_system = NULL; int last_i; int i; @@ -1836,11 +1836,11 @@ void trace_event_enum_update(struct trace_enum_map **map, int len) up_write(&trace_event_sem); } -static struct ftrace_event_file * -trace_create_new_event(struct ftrace_event_call *call, +static struct trace_event_file * +trace_create_new_event(struct trace_event_call *call, struct trace_array *tr) { - struct ftrace_event_file *file; + struct trace_event_file *file; file = kmem_cache_alloc(file_cachep, GFP_TRACE); if (!file) @@ -1858,9 +1858,9 @@ trace_create_new_event(struct ftrace_event_call *call, /* Add an event to a trace directory */ static int -__trace_add_new_event(struct ftrace_event_call *call, struct trace_array *tr) +__trace_add_new_event(struct trace_event_call *call, struct trace_array *tr) { - struct ftrace_event_file *file; + struct trace_event_file *file; file = trace_create_new_event(call, tr); if (!file) @@ -1875,10 +1875,10 @@ __trace_add_new_event(struct ftrace_event_call *call, struct trace_array *tr) * the filesystem is initialized. */ static __init int -__trace_early_add_new_event(struct ftrace_event_call *call, +__trace_early_add_new_event(struct trace_event_call *call, struct trace_array *tr) { - struct ftrace_event_file *file; + struct trace_event_file *file; file = trace_create_new_event(call, tr); if (!file) @@ -1888,10 +1888,10 @@ __trace_early_add_new_event(struct ftrace_event_call *call, } struct ftrace_module_file_ops; -static void __add_event_to_tracers(struct ftrace_event_call *call); +static void __add_event_to_tracers(struct trace_event_call *call); /* Add an additional event_call dynamically */ -int trace_add_event_call(struct ftrace_event_call *call) +int trace_add_event_call(struct trace_event_call *call) { int ret; mutex_lock(&trace_types_lock); @@ -1910,7 +1910,7 @@ int trace_add_event_call(struct ftrace_event_call *call) * Must be called under locking of trace_types_lock, event_mutex and * trace_event_sem. */ -static void __trace_remove_event_call(struct ftrace_event_call *call) +static void __trace_remove_event_call(struct trace_event_call *call) { event_remove(call); trace_destroy_fields(call); @@ -1918,10 +1918,10 @@ static void __trace_remove_event_call(struct ftrace_event_call *call) call->filter = NULL; } -static int probe_remove_event_call(struct ftrace_event_call *call) +static int probe_remove_event_call(struct trace_event_call *call) { struct trace_array *tr; - struct ftrace_event_file *file; + struct trace_event_file *file; #ifdef CONFIG_PERF_EVENTS if (call->perf_refcount) @@ -1932,10 +1932,10 @@ static int probe_remove_event_call(struct ftrace_event_call *call) continue; /* * We can't rely on ftrace_event_enable_disable(enable => 0) - * we are going to do, FTRACE_EVENT_FL_SOFT_MODE can suppress + * we are going to do, EVENT_FILE_FL_SOFT_MODE can suppress * TRACE_REG_UNREGISTER. */ - if (file->flags & FTRACE_EVENT_FL_ENABLED) + if (file->flags & EVENT_FILE_FL_ENABLED) return -EBUSY; /* * The do_for_each_event_file_safe() is @@ -1952,7 +1952,7 @@ static int probe_remove_event_call(struct ftrace_event_call *call) } /* Remove an event_call */ -int trace_remove_event_call(struct ftrace_event_call *call) +int trace_remove_event_call(struct trace_event_call *call) { int ret; @@ -1976,7 +1976,7 @@ int trace_remove_event_call(struct ftrace_event_call *call) static void trace_module_add_events(struct module *mod) { - struct ftrace_event_call **call, **start, **end; + struct trace_event_call **call, **start, **end; if (!mod->num_trace_events) return; @@ -1999,7 +1999,7 @@ static void trace_module_add_events(struct module *mod) static void trace_module_remove_events(struct module *mod) { - struct ftrace_event_call *call, *p; + struct trace_event_call *call, *p; bool clear_trace = false; down_write(&trace_event_sem); @@ -2055,28 +2055,28 @@ static struct notifier_block trace_module_nb = { static void __trace_add_event_dirs(struct trace_array *tr) { - struct ftrace_event_call *call; + struct trace_event_call *call; int ret; list_for_each_entry(call, &ftrace_events, list) { ret = __trace_add_new_event(call, tr); if (ret < 0) pr_warn("Could not create directory for event %s\n", - ftrace_event_name(call)); + trace_event_name(call)); } } -struct ftrace_event_file * +struct trace_event_file * find_event_file(struct trace_array *tr, const char *system, const char *event) { - struct ftrace_event_file *file; - struct ftrace_event_call *call; + struct trace_event_file *file; + struct trace_event_call *call; const char *name; list_for_each_entry(file, &tr->events, list) { call = file->event_call; - name = ftrace_event_name(call); + name = trace_event_name(call); if (!name || !call->class || !call->class->reg) continue; @@ -2098,7 +2098,7 @@ find_event_file(struct trace_array *tr, const char *system, const char *event) #define DISABLE_EVENT_STR "disable_event" struct event_probe_data { - struct ftrace_event_file *file; + struct trace_event_file *file; unsigned long count; int ref; bool enable; @@ -2114,9 +2114,9 @@ event_enable_probe(unsigned long ip, unsigned long parent_ip, void **_data) return; if (data->enable) - clear_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &data->file->flags); + clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &data->file->flags); else - set_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &data->file->flags); + set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &data->file->flags); } static void @@ -2132,7 +2132,7 @@ event_enable_count_probe(unsigned long ip, unsigned long parent_ip, void **_data return; /* Skip if the event is in a state we want to switch to */ - if (data->enable == !(data->file->flags & FTRACE_EVENT_FL_SOFT_DISABLED)) + if (data->enable == !(data->file->flags & EVENT_FILE_FL_SOFT_DISABLED)) return; if (data->count != -1) @@ -2152,7 +2152,7 @@ event_enable_print(struct seq_file *m, unsigned long ip, seq_printf(m, "%s:%s:%s", data->enable ? ENABLE_EVENT_STR : DISABLE_EVENT_STR, data->file->event_call->class->system, - ftrace_event_name(data->file->event_call)); + trace_event_name(data->file->event_call)); if (data->count == -1) seq_puts(m, ":unlimited\n"); @@ -2226,7 +2226,7 @@ event_enable_func(struct ftrace_hash *hash, char *glob, char *cmd, char *param, int enabled) { struct trace_array *tr = top_trace_array(); - struct ftrace_event_file *file; + struct trace_event_file *file; struct ftrace_probe_ops *ops; struct event_probe_data *data; const char *system; @@ -2358,7 +2358,7 @@ static inline int register_event_cmds(void) { return 0; } #endif /* CONFIG_DYNAMIC_FTRACE */ /* - * The top level array has already had its ftrace_event_file + * The top level array has already had its trace_event_file * descriptors created in order to allow for early events to * be recorded. This function is called after the tracefs has been * initialized, and we now have to create the files associated @@ -2367,7 +2367,7 @@ static inline int register_event_cmds(void) { return 0; } static __init void __trace_early_add_event_dirs(struct trace_array *tr) { - struct ftrace_event_file *file; + struct trace_event_file *file; int ret; @@ -2375,7 +2375,7 @@ __trace_early_add_event_dirs(struct trace_array *tr) ret = event_create_dir(tr->event_dir, file); if (ret < 0) pr_warn("Could not create directory for event %s\n", - ftrace_event_name(file->event_call)); + trace_event_name(file->event_call)); } } @@ -2388,7 +2388,7 @@ __trace_early_add_event_dirs(struct trace_array *tr) static __init void __trace_early_add_events(struct trace_array *tr) { - struct ftrace_event_call *call; + struct trace_event_call *call; int ret; list_for_each_entry(call, &ftrace_events, list) { @@ -2399,7 +2399,7 @@ __trace_early_add_events(struct trace_array *tr) ret = __trace_early_add_new_event(call, tr); if (ret < 0) pr_warn("Could not create early event %s\n", - ftrace_event_name(call)); + trace_event_name(call)); } } @@ -2407,13 +2407,13 @@ __trace_early_add_events(struct trace_array *tr) static void __trace_remove_event_dirs(struct trace_array *tr) { - struct ftrace_event_file *file, *next; + struct trace_event_file *file, *next; list_for_each_entry_safe(file, next, &tr->events, list) remove_event_file_dir(file); } -static void __add_event_to_tracers(struct ftrace_event_call *call) +static void __add_event_to_tracers(struct trace_event_call *call) { struct trace_array *tr; @@ -2421,8 +2421,8 @@ static void __add_event_to_tracers(struct ftrace_event_call *call) __trace_add_new_event(call, tr); } -extern struct ftrace_event_call *__start_ftrace_events[]; -extern struct ftrace_event_call *__stop_ftrace_events[]; +extern struct trace_event_call *__start_ftrace_events[]; +extern struct trace_event_call *__stop_ftrace_events[]; static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata; @@ -2557,7 +2557,7 @@ int event_trace_del_tracer(struct trace_array *tr) static __init int event_trace_memsetup(void) { field_cachep = KMEM_CACHE(ftrace_event_field, SLAB_PANIC); - file_cachep = KMEM_CACHE(ftrace_event_file, SLAB_PANIC); + file_cachep = KMEM_CACHE(trace_event_file, SLAB_PANIC); return 0; } @@ -2593,7 +2593,7 @@ early_enable_events(struct trace_array *tr, bool disable_first) static __init int event_trace_enable(void) { struct trace_array *tr = top_trace_array(); - struct ftrace_event_call **iter, *call; + struct trace_event_call **iter, *call; int ret; if (!tr) @@ -2754,9 +2754,9 @@ static __init void event_test_stuff(void) */ static __init void event_trace_self_tests(void) { - struct ftrace_subsystem_dir *dir; - struct ftrace_event_file *file; - struct ftrace_event_call *call; + struct trace_subsystem_dir *dir; + struct trace_event_file *file; + struct trace_event_call *call; struct event_subsystem *system; struct trace_array *tr; int ret; @@ -2787,13 +2787,13 @@ static __init void event_trace_self_tests(void) continue; #endif - pr_info("Testing event %s: ", ftrace_event_name(call)); + pr_info("Testing event %s: ", trace_event_name(call)); /* * If an event is already enabled, someone is using * it and the self test should not be on. */ - if (file->flags & FTRACE_EVENT_FL_ENABLED) { + if (file->flags & EVENT_FILE_FL_ENABLED) { pr_warn("Enabled event during self test!\n"); WARN_ON_ONCE(1); continue; diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 7f2e97ce71a7..d81d6f302b14 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -643,7 +643,7 @@ static void append_filter_err(struct filter_parse_state *ps, free_page((unsigned long) buf); } -static inline struct event_filter *event_filter(struct ftrace_event_file *file) +static inline struct event_filter *event_filter(struct trace_event_file *file) { if (file->event_call->flags & TRACE_EVENT_FL_USE_CALL_FILTER) return file->event_call->filter; @@ -652,7 +652,7 @@ static inline struct event_filter *event_filter(struct ftrace_event_file *file) } /* caller must hold event_mutex */ -void print_event_filter(struct ftrace_event_file *file, struct trace_seq *s) +void print_event_filter(struct trace_event_file *file, struct trace_seq *s) { struct event_filter *filter = event_filter(file); @@ -780,14 +780,14 @@ static void __free_preds(struct event_filter *filter) filter->n_preds = 0; } -static void filter_disable(struct ftrace_event_file *file) +static void filter_disable(struct trace_event_file *file) { - struct ftrace_event_call *call = file->event_call; + struct trace_event_call *call = file->event_call; if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER) call->flags &= ~TRACE_EVENT_FL_FILTERED; else - file->flags &= ~FTRACE_EVENT_FL_FILTERED; + file->flags &= ~EVENT_FILE_FL_FILTERED; } static void __free_filter(struct event_filter *filter) @@ -837,9 +837,9 @@ static int __alloc_preds(struct event_filter *filter, int n_preds) return 0; } -static inline void __remove_filter(struct ftrace_event_file *file) +static inline void __remove_filter(struct trace_event_file *file) { - struct ftrace_event_call *call = file->event_call; + struct trace_event_call *call = file->event_call; filter_disable(file); if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER) @@ -848,10 +848,10 @@ static inline void __remove_filter(struct ftrace_event_file *file) remove_filter_string(file->filter); } -static void filter_free_subsystem_preds(struct ftrace_subsystem_dir *dir, +static void filter_free_subsystem_preds(struct trace_subsystem_dir *dir, struct trace_array *tr) { - struct ftrace_event_file *file; + struct trace_event_file *file; list_for_each_entry(file, &tr->events, list) { if (file->system != dir) @@ -860,9 +860,9 @@ static void filter_free_subsystem_preds(struct ftrace_subsystem_dir *dir, } } -static inline void __free_subsystem_filter(struct ftrace_event_file *file) +static inline void __free_subsystem_filter(struct trace_event_file *file) { - struct ftrace_event_call *call = file->event_call; + struct trace_event_call *call = file->event_call; if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER) { __free_filter(call->filter); @@ -873,10 +873,10 @@ static inline void __free_subsystem_filter(struct ftrace_event_file *file) } } -static void filter_free_subsystem_filters(struct ftrace_subsystem_dir *dir, +static void filter_free_subsystem_filters(struct trace_subsystem_dir *dir, struct trace_array *tr) { - struct ftrace_event_file *file; + struct trace_event_file *file; list_for_each_entry(file, &tr->events, list) { if (file->system != dir) @@ -1056,6 +1056,9 @@ static void parse_init(struct filter_parse_state *ps, static char infix_next(struct filter_parse_state *ps) { + if (!ps->infix.cnt) + return 0; + ps->infix.cnt--; return ps->infix.string[ps->infix.tail++]; @@ -1071,6 +1074,9 @@ static char infix_peek(struct filter_parse_state *ps) static void infix_advance(struct filter_parse_state *ps) { + if (!ps->infix.cnt) + return; + ps->infix.cnt--; ps->infix.tail++; } @@ -1336,7 +1342,7 @@ parse_operand: } static struct filter_pred *create_pred(struct filter_parse_state *ps, - struct ftrace_event_call *call, + struct trace_event_call *call, int op, char *operand1, char *operand2) { struct ftrace_event_field *field; @@ -1385,7 +1391,9 @@ static int check_preds(struct filter_parse_state *ps) if (elt->op != OP_NOT) cnt--; n_normal_preds++; - WARN_ON_ONCE(cnt < 0); + /* all ops should have operands */ + if (cnt < 0) + break; } if (cnt != 1 || !n_normal_preds || n_logical_preds >= n_normal_preds) { @@ -1556,7 +1564,7 @@ static int fold_pred_tree(struct event_filter *filter, filter->preds); } -static int replace_preds(struct ftrace_event_call *call, +static int replace_preds(struct trace_event_call *call, struct event_filter *filter, struct filter_parse_state *ps, bool dry_run) @@ -1669,20 +1677,20 @@ fail: return err; } -static inline void event_set_filtered_flag(struct ftrace_event_file *file) +static inline void event_set_filtered_flag(struct trace_event_file *file) { - struct ftrace_event_call *call = file->event_call; + struct trace_event_call *call = file->event_call; if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER) call->flags |= TRACE_EVENT_FL_FILTERED; else - file->flags |= FTRACE_EVENT_FL_FILTERED; + file->flags |= EVENT_FILE_FL_FILTERED; } -static inline void event_set_filter(struct ftrace_event_file *file, +static inline void event_set_filter(struct trace_event_file *file, struct event_filter *filter) { - struct ftrace_event_call *call = file->event_call; + struct trace_event_call *call = file->event_call; if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER) rcu_assign_pointer(call->filter, filter); @@ -1690,9 +1698,9 @@ static inline void event_set_filter(struct ftrace_event_file *file, rcu_assign_pointer(file->filter, filter); } -static inline void event_clear_filter(struct ftrace_event_file *file) +static inline void event_clear_filter(struct trace_event_file *file) { - struct ftrace_event_call *call = file->event_call; + struct trace_event_call *call = file->event_call; if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER) RCU_INIT_POINTER(call->filter, NULL); @@ -1701,33 +1709,33 @@ static inline void event_clear_filter(struct ftrace_event_file *file) } static inline void -event_set_no_set_filter_flag(struct ftrace_event_file *file) +event_set_no_set_filter_flag(struct trace_event_file *file) { - struct ftrace_event_call *call = file->event_call; + struct trace_event_call *call = file->event_call; if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER) call->flags |= TRACE_EVENT_FL_NO_SET_FILTER; else - file->flags |= FTRACE_EVENT_FL_NO_SET_FILTER; + file->flags |= EVENT_FILE_FL_NO_SET_FILTER; } static inline void -event_clear_no_set_filter_flag(struct ftrace_event_file *file) +event_clear_no_set_filter_flag(struct trace_event_file *file) { - struct ftrace_event_call *call = file->event_call; + struct trace_event_call *call = file->event_call; if (call->flags & TRACE_EVENT_FL_USE_CALL_FILTER) call->flags &= ~TRACE_EVENT_FL_NO_SET_FILTER; else - file->flags &= ~FTRACE_EVENT_FL_NO_SET_FILTER; + file->flags &= ~EVENT_FILE_FL_NO_SET_FILTER; } static inline bool -event_no_set_filter_flag(struct ftrace_event_file *file) +event_no_set_filter_flag(struct trace_event_file *file) { - struct ftrace_event_call *call = file->event_call; + struct trace_event_call *call = file->event_call; - if (file->flags & FTRACE_EVENT_FL_NO_SET_FILTER) + if (file->flags & EVENT_FILE_FL_NO_SET_FILTER) return true; if ((call->flags & TRACE_EVENT_FL_USE_CALL_FILTER) && @@ -1742,12 +1750,12 @@ struct filter_list { struct event_filter *filter; }; -static int replace_system_preds(struct ftrace_subsystem_dir *dir, +static int replace_system_preds(struct trace_subsystem_dir *dir, struct trace_array *tr, struct filter_parse_state *ps, char *filter_string) { - struct ftrace_event_file *file; + struct trace_event_file *file; struct filter_list *filter_item; struct filter_list *tmp; LIST_HEAD(filter_list); @@ -1891,8 +1899,8 @@ static void create_filter_finish(struct filter_parse_state *ps) } /** - * create_filter - create a filter for a ftrace_event_call - * @call: ftrace_event_call to create a filter for + * create_filter - create a filter for a trace_event_call + * @call: trace_event_call to create a filter for * @filter_str: filter string * @set_str: remember @filter_str and enable detailed error in filter * @filterp: out param for created filter (always updated on return) @@ -1906,7 +1914,7 @@ static void create_filter_finish(struct filter_parse_state *ps) * information if @set_str is %true and the caller is responsible for * freeing it. */ -static int create_filter(struct ftrace_event_call *call, +static int create_filter(struct trace_event_call *call, char *filter_str, bool set_str, struct event_filter **filterp) { @@ -1926,7 +1934,7 @@ static int create_filter(struct ftrace_event_call *call, return err; } -int create_event_filter(struct ftrace_event_call *call, +int create_event_filter(struct trace_event_call *call, char *filter_str, bool set_str, struct event_filter **filterp) { @@ -1942,7 +1950,7 @@ int create_event_filter(struct ftrace_event_call *call, * Identical to create_filter() except that it creates a subsystem filter * and always remembers @filter_str. */ -static int create_system_filter(struct ftrace_subsystem_dir *dir, +static int create_system_filter(struct trace_subsystem_dir *dir, struct trace_array *tr, char *filter_str, struct event_filter **filterp) { @@ -1968,9 +1976,9 @@ static int create_system_filter(struct ftrace_subsystem_dir *dir, } /* caller must hold event_mutex */ -int apply_event_filter(struct ftrace_event_file *file, char *filter_string) +int apply_event_filter(struct trace_event_file *file, char *filter_string) { - struct ftrace_event_call *call = file->event_call; + struct trace_event_call *call = file->event_call; struct event_filter *filter; int err; @@ -2019,7 +2027,7 @@ int apply_event_filter(struct ftrace_event_file *file, char *filter_string) return err; } -int apply_subsystem_event_filter(struct ftrace_subsystem_dir *dir, +int apply_subsystem_event_filter(struct trace_subsystem_dir *dir, char *filter_string) { struct event_subsystem *system = dir->subsystem; @@ -2082,7 +2090,7 @@ struct function_filter_data { static char ** ftrace_function_filter_re(char *buf, int len, int *count) { - char *str, *sep, **re; + char *str, **re; str = kstrndup(buf, len, GFP_KERNEL); if (!str) @@ -2092,8 +2100,7 @@ ftrace_function_filter_re(char *buf, int len, int *count) * The argv_split function takes white space * as a separator, so convert ',' into spaces. */ - while ((sep = strchr(str, ','))) - *sep = ' '; + strreplace(str, ',', ' '); re = argv_split(GFP_KERNEL, str, count); kfree(str); @@ -2219,7 +2226,7 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id, { int err; struct event_filter *filter; - struct ftrace_event_call *call; + struct trace_event_call *call; mutex_lock(&event_mutex); @@ -2275,7 +2282,7 @@ out_unlock: static struct test_filter_data_t { char *filter; - struct ftrace_raw_ftrace_test_filter rec; + struct trace_event_raw_ftrace_test_filter rec; int match; char *not_visited; } test_filter_data[] = { diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index 8712df9decb4..42a4009fd75a 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -40,7 +40,7 @@ trigger_data_free(struct event_trigger_data *data) /** * event_triggers_call - Call triggers associated with a trace event - * @file: The ftrace_event_file associated with the event + * @file: The trace_event_file associated with the event * @rec: The trace entry for the event, NULL for unconditional invocation * * For each trigger associated with an event, invoke the trigger @@ -63,7 +63,7 @@ trigger_data_free(struct event_trigger_data *data) * any trigger that should be deferred, ETT_NONE if nothing to defer. */ enum event_trigger_type -event_triggers_call(struct ftrace_event_file *file, void *rec) +event_triggers_call(struct trace_event_file *file, void *rec) { struct event_trigger_data *data; enum event_trigger_type tt = ETT_NONE; @@ -92,7 +92,7 @@ EXPORT_SYMBOL_GPL(event_triggers_call); /** * event_triggers_post_call - Call 'post_triggers' for a trace event - * @file: The ftrace_event_file associated with the event + * @file: The trace_event_file associated with the event * @tt: enum event_trigger_type containing a set bit for each trigger to invoke * * For each trigger associated with an event, invoke the trigger @@ -103,7 +103,7 @@ EXPORT_SYMBOL_GPL(event_triggers_call); * Called from tracepoint handlers (with rcu_read_lock_sched() held). */ void -event_triggers_post_call(struct ftrace_event_file *file, +event_triggers_post_call(struct trace_event_file *file, enum event_trigger_type tt) { struct event_trigger_data *data; @@ -119,7 +119,7 @@ EXPORT_SYMBOL_GPL(event_triggers_post_call); static void *trigger_next(struct seq_file *m, void *t, loff_t *pos) { - struct ftrace_event_file *event_file = event_file_data(m->private); + struct trace_event_file *event_file = event_file_data(m->private); if (t == SHOW_AVAILABLE_TRIGGERS) return NULL; @@ -129,7 +129,7 @@ static void *trigger_next(struct seq_file *m, void *t, loff_t *pos) static void *trigger_start(struct seq_file *m, loff_t *pos) { - struct ftrace_event_file *event_file; + struct trace_event_file *event_file; /* ->stop() is called even if ->start() fails */ mutex_lock(&event_mutex); @@ -201,7 +201,7 @@ static int event_trigger_regex_open(struct inode *inode, struct file *file) return ret; } -static int trigger_process_regex(struct ftrace_event_file *file, char *buff) +static int trigger_process_regex(struct trace_event_file *file, char *buff) { char *command, *next = buff; struct event_command *p; @@ -227,7 +227,7 @@ static ssize_t event_trigger_regex_write(struct file *file, const char __user *ubuf, size_t cnt, loff_t *ppos) { - struct ftrace_event_file *event_file; + struct trace_event_file *event_file; ssize_t ret; char *buf; @@ -430,7 +430,7 @@ event_trigger_free(struct event_trigger_ops *ops, trigger_data_free(data); } -static int trace_event_trigger_enable_disable(struct ftrace_event_file *file, +static int trace_event_trigger_enable_disable(struct trace_event_file *file, int trigger_enable) { int ret = 0; @@ -438,12 +438,12 @@ static int trace_event_trigger_enable_disable(struct ftrace_event_file *file, if (trigger_enable) { if (atomic_inc_return(&file->tm_ref) > 1) return ret; - set_bit(FTRACE_EVENT_FL_TRIGGER_MODE_BIT, &file->flags); + set_bit(EVENT_FILE_FL_TRIGGER_MODE_BIT, &file->flags); ret = trace_event_enable_disable(file, 1, 1); } else { if (atomic_dec_return(&file->tm_ref) > 0) return ret; - clear_bit(FTRACE_EVENT_FL_TRIGGER_MODE_BIT, &file->flags); + clear_bit(EVENT_FILE_FL_TRIGGER_MODE_BIT, &file->flags); ret = trace_event_enable_disable(file, 0, 1); } @@ -466,7 +466,7 @@ static int trace_event_trigger_enable_disable(struct ftrace_event_file *file, void clear_event_triggers(struct trace_array *tr) { - struct ftrace_event_file *file; + struct trace_event_file *file; list_for_each_entry(file, &tr->events, list) { struct event_trigger_data *data; @@ -480,7 +480,7 @@ clear_event_triggers(struct trace_array *tr) /** * update_cond_flag - Set or reset the TRIGGER_COND bit - * @file: The ftrace_event_file associated with the event + * @file: The trace_event_file associated with the event * * If an event has triggers and any of those triggers has a filter or * a post_trigger, trigger invocation needs to be deferred until after @@ -488,7 +488,7 @@ clear_event_triggers(struct trace_array *tr) * its TRIGGER_COND bit set, otherwise the TRIGGER_COND bit should be * cleared. */ -static void update_cond_flag(struct ftrace_event_file *file) +static void update_cond_flag(struct trace_event_file *file) { struct event_trigger_data *data; bool set_cond = false; @@ -501,9 +501,9 @@ static void update_cond_flag(struct ftrace_event_file *file) } if (set_cond) - set_bit(FTRACE_EVENT_FL_TRIGGER_COND_BIT, &file->flags); + set_bit(EVENT_FILE_FL_TRIGGER_COND_BIT, &file->flags); else - clear_bit(FTRACE_EVENT_FL_TRIGGER_COND_BIT, &file->flags); + clear_bit(EVENT_FILE_FL_TRIGGER_COND_BIT, &file->flags); } /** @@ -511,7 +511,7 @@ static void update_cond_flag(struct ftrace_event_file *file) * @glob: The raw string used to register the trigger * @ops: The trigger ops associated with the trigger * @data: Trigger-specific data to associate with the trigger - * @file: The ftrace_event_file associated with the event + * @file: The trace_event_file associated with the event * * Common implementation for event trigger registration. * @@ -522,7 +522,7 @@ static void update_cond_flag(struct ftrace_event_file *file) */ static int register_trigger(char *glob, struct event_trigger_ops *ops, struct event_trigger_data *data, - struct ftrace_event_file *file) + struct trace_event_file *file) { struct event_trigger_data *test; int ret = 0; @@ -557,7 +557,7 @@ out: * @glob: The raw string used to register the trigger * @ops: The trigger ops associated with the trigger * @test: Trigger-specific data used to find the trigger to remove - * @file: The ftrace_event_file associated with the event + * @file: The trace_event_file associated with the event * * Common implementation for event trigger unregistration. * @@ -566,7 +566,7 @@ out: */ static void unregister_trigger(char *glob, struct event_trigger_ops *ops, struct event_trigger_data *test, - struct ftrace_event_file *file) + struct trace_event_file *file) { struct event_trigger_data *data; bool unregistered = false; @@ -588,7 +588,7 @@ static void unregister_trigger(char *glob, struct event_trigger_ops *ops, /** * event_trigger_callback - Generic event_command @func implementation * @cmd_ops: The command ops, used for trigger registration - * @file: The ftrace_event_file associated with the event + * @file: The trace_event_file associated with the event * @glob: The raw string used to register the trigger * @cmd: The cmd portion of the string used to register the trigger * @param: The params portion of the string used to register the trigger @@ -603,7 +603,7 @@ static void unregister_trigger(char *glob, struct event_trigger_ops *ops, */ static int event_trigger_callback(struct event_command *cmd_ops, - struct ftrace_event_file *file, + struct trace_event_file *file, char *glob, char *cmd, char *param) { struct event_trigger_data *trigger_data; @@ -688,7 +688,7 @@ event_trigger_callback(struct event_command *cmd_ops, * set_trigger_filter - Generic event_command @set_filter implementation * @filter_str: The filter string for the trigger, NULL to remove filter * @trigger_data: Trigger-specific data - * @file: The ftrace_event_file associated with the event + * @file: The trace_event_file associated with the event * * Common implementation for event command filter parsing and filter * instantiation. @@ -702,7 +702,7 @@ event_trigger_callback(struct event_command *cmd_ops, */ static int set_trigger_filter(char *filter_str, struct event_trigger_data *trigger_data, - struct ftrace_event_file *file) + struct trace_event_file *file) { struct event_trigger_data *data = trigger_data; struct event_filter *filter = NULL, *tmp; @@ -900,7 +900,7 @@ snapshot_count_trigger(struct event_trigger_data *data) static int register_snapshot_trigger(char *glob, struct event_trigger_ops *ops, struct event_trigger_data *data, - struct ftrace_event_file *file) + struct trace_event_file *file) { int ret = register_trigger(glob, ops, data, file); @@ -968,7 +968,7 @@ static __init int register_trigger_snapshot_cmd(void) { return 0; } * Skip 3: * stacktrace_trigger() * event_triggers_post_call() - * ftrace_raw_event_xxx() + * trace_event_raw_event_xxx() */ #define STACK_SKIP 3 @@ -1053,7 +1053,7 @@ static __init void unregister_trigger_traceon_traceoff_cmds(void) #define DISABLE_EVENT_STR "disable_event" struct enable_trigger_data { - struct ftrace_event_file *file; + struct trace_event_file *file; bool enable; }; @@ -1063,9 +1063,9 @@ event_enable_trigger(struct event_trigger_data *data) struct enable_trigger_data *enable_data = data->private_data; if (enable_data->enable) - clear_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &enable_data->file->flags); + clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &enable_data->file->flags); else - set_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &enable_data->file->flags); + set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &enable_data->file->flags); } static void @@ -1077,7 +1077,7 @@ event_enable_count_trigger(struct event_trigger_data *data) return; /* Skip if the event is in a state we want to switch to */ - if (enable_data->enable == !(enable_data->file->flags & FTRACE_EVENT_FL_SOFT_DISABLED)) + if (enable_data->enable == !(enable_data->file->flags & EVENT_FILE_FL_SOFT_DISABLED)) return; if (data->count != -1) @@ -1095,7 +1095,7 @@ event_enable_trigger_print(struct seq_file *m, struct event_trigger_ops *ops, seq_printf(m, "%s:%s:%s", enable_data->enable ? ENABLE_EVENT_STR : DISABLE_EVENT_STR, enable_data->file->event_call->class->system, - ftrace_event_name(enable_data->file->event_call)); + trace_event_name(enable_data->file->event_call)); if (data->count == -1) seq_puts(m, ":unlimited"); @@ -1159,10 +1159,10 @@ static struct event_trigger_ops event_disable_count_trigger_ops = { static int event_enable_trigger_func(struct event_command *cmd_ops, - struct ftrace_event_file *file, + struct trace_event_file *file, char *glob, char *cmd, char *param) { - struct ftrace_event_file *event_enable_file; + struct trace_event_file *event_enable_file; struct enable_trigger_data *enable_data; struct event_trigger_data *trigger_data; struct event_trigger_ops *trigger_ops; @@ -1294,7 +1294,7 @@ event_enable_trigger_func(struct event_command *cmd_ops, static int event_enable_register_trigger(char *glob, struct event_trigger_ops *ops, struct event_trigger_data *data, - struct ftrace_event_file *file) + struct trace_event_file *file) { struct enable_trigger_data *enable_data = data->private_data; struct enable_trigger_data *test_enable_data; @@ -1331,7 +1331,7 @@ out: static void event_enable_unregister_trigger(char *glob, struct event_trigger_ops *ops, struct event_trigger_data *test, - struct ftrace_event_file *file) + struct trace_event_file *file) { struct enable_trigger_data *test_enable_data = test->private_data; struct enable_trigger_data *enable_data; diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 174a6a71146c..adabf7da9113 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -125,7 +125,7 @@ static void __always_unused ____ftrace_check_##name(void) \ #undef FTRACE_ENTRY #define FTRACE_ENTRY(name, struct_name, id, tstruct, print, filter) \ static int __init \ -ftrace_define_fields_##name(struct ftrace_event_call *event_call) \ +ftrace_define_fields_##name(struct trace_event_call *event_call) \ { \ struct struct_name field; \ int ret; \ @@ -163,14 +163,14 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \ #define FTRACE_ENTRY_REG(call, struct_name, etype, tstruct, print, filter,\ regfn) \ \ -struct ftrace_event_class __refdata event_class_ftrace_##call = { \ +struct trace_event_class __refdata event_class_ftrace_##call = { \ .system = __stringify(TRACE_SYSTEM), \ .define_fields = ftrace_define_fields_##call, \ .fields = LIST_HEAD_INIT(event_class_ftrace_##call.fields),\ .reg = regfn, \ }; \ \ -struct ftrace_event_call __used event_##call = { \ +struct trace_event_call __used event_##call = { \ .class = &event_class_ftrace_##call, \ { \ .name = #call, \ @@ -179,7 +179,7 @@ struct ftrace_event_call __used event_##call = { \ .print_fmt = print, \ .flags = TRACE_EVENT_FL_IGNORE_ENABLE, \ }; \ -struct ftrace_event_call __used \ +struct trace_event_call __used \ __attribute__((section("_ftrace_events"))) *__event_##call = &event_##call; #undef FTRACE_ENTRY @@ -187,7 +187,7 @@ __attribute__((section("_ftrace_events"))) *__event_##call = &event_##call; FTRACE_ENTRY_REG(call, struct_name, etype, \ PARAMS(tstruct), PARAMS(print), filter, NULL) -int ftrace_event_is_function(struct ftrace_event_call *call) +int ftrace_event_is_function(struct trace_event_call *call) { return call == &event_function; } diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index a51e79688455..8968bf720c12 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -278,7 +278,7 @@ int __trace_graph_entry(struct trace_array *tr, unsigned long flags, int pc) { - struct ftrace_event_call *call = &event_funcgraph_entry; + struct trace_event_call *call = &event_funcgraph_entry; struct ring_buffer_event *event; struct ring_buffer *buffer = tr->trace_buffer.buffer; struct ftrace_graph_ent_entry *entry; @@ -393,7 +393,7 @@ void __trace_graph_return(struct trace_array *tr, unsigned long flags, int pc) { - struct ftrace_event_call *call = &event_funcgraph_exit; + struct trace_event_call *call = &event_funcgraph_exit; struct ring_buffer_event *event; struct ring_buffer *buffer = tr->trace_buffer.buffer; struct ftrace_graph_ret_entry *entry; @@ -1454,12 +1454,12 @@ static __init int init_graph_trace(void) { max_bytes_for_cpu = snprintf(NULL, 0, "%d", nr_cpu_ids - 1); - if (!register_ftrace_event(&graph_trace_entry_event)) { + if (!register_trace_event(&graph_trace_entry_event)) { pr_warning("Warning: could not register graph trace events\n"); return 1; } - if (!register_ftrace_event(&graph_trace_ret_event)) { + if (!register_trace_event(&graph_trace_ret_event)) { pr_warning("Warning: could not register graph trace events\n"); return 1; } diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index d0ce590f06e1..b7d0cdd9906c 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -348,7 +348,7 @@ static struct trace_kprobe *find_trace_kprobe(const char *event, struct trace_kprobe *tk; list_for_each_entry(tk, &probe_list, list) - if (strcmp(ftrace_event_name(&tk->tp.call), event) == 0 && + if (strcmp(trace_event_name(&tk->tp.call), event) == 0 && strcmp(tk->tp.call.class->system, group) == 0) return tk; return NULL; @@ -359,7 +359,7 @@ static struct trace_kprobe *find_trace_kprobe(const char *event, * if the file is NULL, enable "perf" handler, or enable "trace" handler. */ static int -enable_trace_kprobe(struct trace_kprobe *tk, struct ftrace_event_file *file) +enable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file) { int ret = 0; @@ -394,7 +394,7 @@ enable_trace_kprobe(struct trace_kprobe *tk, struct ftrace_event_file *file) * if the file is NULL, disable "perf" handler, or disable "trace" handler. */ static int -disable_trace_kprobe(struct trace_kprobe *tk, struct ftrace_event_file *file) +disable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file) { struct event_file_link *link = NULL; int wait = 0; @@ -523,7 +523,7 @@ static int register_trace_kprobe(struct trace_kprobe *tk) mutex_lock(&probe_lock); /* Delete old (same name) event if exist */ - old_tk = find_trace_kprobe(ftrace_event_name(&tk->tp.call), + old_tk = find_trace_kprobe(trace_event_name(&tk->tp.call), tk->tp.call.class->system); if (old_tk) { ret = unregister_trace_kprobe(old_tk); @@ -572,7 +572,7 @@ static int trace_kprobe_module_callback(struct notifier_block *nb, if (ret) pr_warning("Failed to re-register probe %s on" "%s: %d\n", - ftrace_event_name(&tk->tp.call), + trace_event_name(&tk->tp.call), mod->name, ret); } } @@ -829,7 +829,7 @@ static int probes_seq_show(struct seq_file *m, void *v) seq_putc(m, trace_kprobe_is_return(tk) ? 'r' : 'p'); seq_printf(m, ":%s/%s", tk->tp.call.class->system, - ftrace_event_name(&tk->tp.call)); + trace_event_name(&tk->tp.call)); if (!tk->symbol) seq_printf(m, " 0x%p", tk->rp.kp.addr); @@ -888,7 +888,7 @@ static int probes_profile_seq_show(struct seq_file *m, void *v) struct trace_kprobe *tk = v; seq_printf(m, " %-44s %15lu %15lu\n", - ftrace_event_name(&tk->tp.call), tk->nhit, + trace_event_name(&tk->tp.call), tk->nhit, tk->rp.kp.nmissed); return 0; @@ -917,18 +917,18 @@ static const struct file_operations kprobe_profile_ops = { /* Kprobe handler */ static nokprobe_inline void __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs, - struct ftrace_event_file *ftrace_file) + struct trace_event_file *trace_file) { struct kprobe_trace_entry_head *entry; struct ring_buffer_event *event; struct ring_buffer *buffer; int size, dsize, pc; unsigned long irq_flags; - struct ftrace_event_call *call = &tk->tp.call; + struct trace_event_call *call = &tk->tp.call; - WARN_ON(call != ftrace_file->event_call); + WARN_ON(call != trace_file->event_call); - if (ftrace_trigger_soft_disabled(ftrace_file)) + if (trace_trigger_soft_disabled(trace_file)) return; local_save_flags(irq_flags); @@ -937,7 +937,7 @@ __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs, dsize = __get_data_size(&tk->tp, regs); size = sizeof(*entry) + tk->tp.size + dsize; - event = trace_event_buffer_lock_reserve(&buffer, ftrace_file, + event = trace_event_buffer_lock_reserve(&buffer, trace_file, call->event.type, size, irq_flags, pc); if (!event) @@ -947,7 +947,7 @@ __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs, entry->ip = (unsigned long)tk->rp.kp.addr; store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize); - event_trigger_unlock_commit_regs(ftrace_file, buffer, event, + event_trigger_unlock_commit_regs(trace_file, buffer, event, entry, irq_flags, pc, regs); } @@ -965,18 +965,18 @@ NOKPROBE_SYMBOL(kprobe_trace_func); static nokprobe_inline void __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, struct pt_regs *regs, - struct ftrace_event_file *ftrace_file) + struct trace_event_file *trace_file) { struct kretprobe_trace_entry_head *entry; struct ring_buffer_event *event; struct ring_buffer *buffer; int size, pc, dsize; unsigned long irq_flags; - struct ftrace_event_call *call = &tk->tp.call; + struct trace_event_call *call = &tk->tp.call; - WARN_ON(call != ftrace_file->event_call); + WARN_ON(call != trace_file->event_call); - if (ftrace_trigger_soft_disabled(ftrace_file)) + if (trace_trigger_soft_disabled(trace_file)) return; local_save_flags(irq_flags); @@ -985,7 +985,7 @@ __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, dsize = __get_data_size(&tk->tp, regs); size = sizeof(*entry) + tk->tp.size + dsize; - event = trace_event_buffer_lock_reserve(&buffer, ftrace_file, + event = trace_event_buffer_lock_reserve(&buffer, trace_file, call->event.type, size, irq_flags, pc); if (!event) @@ -996,7 +996,7 @@ __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, entry->ret_ip = (unsigned long)ri->ret_addr; store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize); - event_trigger_unlock_commit_regs(ftrace_file, buffer, event, + event_trigger_unlock_commit_regs(trace_file, buffer, event, entry, irq_flags, pc, regs); } @@ -1025,7 +1025,7 @@ print_kprobe_event(struct trace_iterator *iter, int flags, field = (struct kprobe_trace_entry_head *)iter->ent; tp = container_of(event, struct trace_probe, call.event); - trace_seq_printf(s, "%s: (", ftrace_event_name(&tp->call)); + trace_seq_printf(s, "%s: (", trace_event_name(&tp->call)); if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET)) goto out; @@ -1056,7 +1056,7 @@ print_kretprobe_event(struct trace_iterator *iter, int flags, field = (struct kretprobe_trace_entry_head *)iter->ent; tp = container_of(event, struct trace_probe, call.event); - trace_seq_printf(s, "%s: (", ftrace_event_name(&tp->call)); + trace_seq_printf(s, "%s: (", trace_event_name(&tp->call)); if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET)) goto out; @@ -1081,7 +1081,7 @@ print_kretprobe_event(struct trace_iterator *iter, int flags, } -static int kprobe_event_define_fields(struct ftrace_event_call *event_call) +static int kprobe_event_define_fields(struct trace_event_call *event_call) { int ret, i; struct kprobe_trace_entry_head field; @@ -1104,7 +1104,7 @@ static int kprobe_event_define_fields(struct ftrace_event_call *event_call) return 0; } -static int kretprobe_event_define_fields(struct ftrace_event_call *event_call) +static int kretprobe_event_define_fields(struct trace_event_call *event_call) { int ret, i; struct kretprobe_trace_entry_head field; @@ -1134,7 +1134,7 @@ static int kretprobe_event_define_fields(struct ftrace_event_call *event_call) static void kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) { - struct ftrace_event_call *call = &tk->tp.call; + struct trace_event_call *call = &tk->tp.call; struct bpf_prog *prog = call->prog; struct kprobe_trace_entry_head *entry; struct hlist_head *head; @@ -1169,7 +1169,7 @@ static void kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, struct pt_regs *regs) { - struct ftrace_event_call *call = &tk->tp.call; + struct trace_event_call *call = &tk->tp.call; struct bpf_prog *prog = call->prog; struct kretprobe_trace_entry_head *entry; struct hlist_head *head; @@ -1206,11 +1206,11 @@ NOKPROBE_SYMBOL(kretprobe_perf_func); * kprobe_trace_self_tests_init() does enable_trace_probe/disable_trace_probe * lockless, but we can't race with this __init function. */ -static int kprobe_register(struct ftrace_event_call *event, +static int kprobe_register(struct trace_event_call *event, enum trace_reg type, void *data) { struct trace_kprobe *tk = (struct trace_kprobe *)event->data; - struct ftrace_event_file *file = data; + struct trace_event_file *file = data; switch (type) { case TRACE_REG_REGISTER: @@ -1276,10 +1276,10 @@ static struct trace_event_functions kprobe_funcs = { static int register_kprobe_event(struct trace_kprobe *tk) { - struct ftrace_event_call *call = &tk->tp.call; + struct trace_event_call *call = &tk->tp.call; int ret; - /* Initialize ftrace_event_call */ + /* Initialize trace_event_call */ INIT_LIST_HEAD(&call->class->fields); if (trace_kprobe_is_return(tk)) { call->event.funcs = &kretprobe_funcs; @@ -1290,7 +1290,7 @@ static int register_kprobe_event(struct trace_kprobe *tk) } if (set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0) return -ENOMEM; - ret = register_ftrace_event(&call->event); + ret = register_trace_event(&call->event); if (!ret) { kfree(call->print_fmt); return -ENODEV; @@ -1301,9 +1301,9 @@ static int register_kprobe_event(struct trace_kprobe *tk) ret = trace_add_event_call(call); if (ret) { pr_info("Failed to register kprobe event: %s\n", - ftrace_event_name(call)); + trace_event_name(call)); kfree(call->print_fmt); - unregister_ftrace_event(&call->event); + unregister_trace_event(&call->event); } return ret; } @@ -1364,10 +1364,10 @@ static __used int kprobe_trace_selftest_target(int a1, int a2, int a3, return a1 + a2 + a3 + a4 + a5 + a6; } -static struct ftrace_event_file * +static struct trace_event_file * find_trace_probe_file(struct trace_kprobe *tk, struct trace_array *tr) { - struct ftrace_event_file *file; + struct trace_event_file *file; list_for_each_entry(file, &tr->events, list) if (file->event_call == &tk->tp.call) @@ -1385,7 +1385,7 @@ static __init int kprobe_trace_self_tests_init(void) int ret, warn = 0; int (*target)(int, int, int, int, int, int); struct trace_kprobe *tk; - struct ftrace_event_file *file; + struct trace_event_file *file; if (tracing_is_disabled()) return -ENODEV; diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index 7a9ba62e9fef..638e110c5bfd 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -298,7 +298,7 @@ static void __trace_mmiotrace_rw(struct trace_array *tr, struct trace_array_cpu *data, struct mmiotrace_rw *rw) { - struct ftrace_event_call *call = &event_mmiotrace_rw; + struct trace_event_call *call = &event_mmiotrace_rw; struct ring_buffer *buffer = tr->trace_buffer.buffer; struct ring_buffer_event *event; struct trace_mmiotrace_rw *entry; @@ -328,7 +328,7 @@ static void __trace_mmiotrace_map(struct trace_array *tr, struct trace_array_cpu *data, struct mmiotrace_map *map) { - struct ftrace_event_call *call = &event_mmiotrace_map; + struct trace_event_call *call = &event_mmiotrace_map; struct ring_buffer *buffer = tr->trace_buffer.buffer; struct ring_buffer_event *event; struct trace_mmiotrace_map *entry; diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 25a086bcb700..dfab253727dc 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -60,9 +60,9 @@ enum print_line_t trace_print_printk_msg_only(struct trace_iterator *iter) } const char * -ftrace_print_flags_seq(struct trace_seq *p, const char *delim, - unsigned long flags, - const struct trace_print_flags *flag_array) +trace_print_flags_seq(struct trace_seq *p, const char *delim, + unsigned long flags, + const struct trace_print_flags *flag_array) { unsigned long mask; const char *str; @@ -95,11 +95,11 @@ ftrace_print_flags_seq(struct trace_seq *p, const char *delim, return ret; } -EXPORT_SYMBOL(ftrace_print_flags_seq); +EXPORT_SYMBOL(trace_print_flags_seq); const char * -ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val, - const struct trace_print_flags *symbol_array) +trace_print_symbols_seq(struct trace_seq *p, unsigned long val, + const struct trace_print_flags *symbol_array) { int i; const char *ret = trace_seq_buffer_ptr(p); @@ -120,11 +120,11 @@ ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val, return ret; } -EXPORT_SYMBOL(ftrace_print_symbols_seq); +EXPORT_SYMBOL(trace_print_symbols_seq); #if BITS_PER_LONG == 32 const char * -ftrace_print_symbols_seq_u64(struct trace_seq *p, unsigned long long val, +trace_print_symbols_seq_u64(struct trace_seq *p, unsigned long long val, const struct trace_print_flags_u64 *symbol_array) { int i; @@ -146,12 +146,12 @@ ftrace_print_symbols_seq_u64(struct trace_seq *p, unsigned long long val, return ret; } -EXPORT_SYMBOL(ftrace_print_symbols_seq_u64); +EXPORT_SYMBOL(trace_print_symbols_seq_u64); #endif const char * -ftrace_print_bitmask_seq(struct trace_seq *p, void *bitmask_ptr, - unsigned int bitmask_size) +trace_print_bitmask_seq(struct trace_seq *p, void *bitmask_ptr, + unsigned int bitmask_size) { const char *ret = trace_seq_buffer_ptr(p); @@ -160,10 +160,10 @@ ftrace_print_bitmask_seq(struct trace_seq *p, void *bitmask_ptr, return ret; } -EXPORT_SYMBOL_GPL(ftrace_print_bitmask_seq); +EXPORT_SYMBOL_GPL(trace_print_bitmask_seq); const char * -ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len) +trace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len) { int i; const char *ret = trace_seq_buffer_ptr(p); @@ -175,11 +175,11 @@ ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len) return ret; } -EXPORT_SYMBOL(ftrace_print_hex_seq); +EXPORT_SYMBOL(trace_print_hex_seq); const char * -ftrace_print_array_seq(struct trace_seq *p, const void *buf, int count, - size_t el_size) +trace_print_array_seq(struct trace_seq *p, const void *buf, int count, + size_t el_size) { const char *ret = trace_seq_buffer_ptr(p); const char *prefix = ""; @@ -220,17 +220,17 @@ ftrace_print_array_seq(struct trace_seq *p, const void *buf, int count, return ret; } -EXPORT_SYMBOL(ftrace_print_array_seq); +EXPORT_SYMBOL(trace_print_array_seq); -int ftrace_raw_output_prep(struct trace_iterator *iter, - struct trace_event *trace_event) +int trace_raw_output_prep(struct trace_iterator *iter, + struct trace_event *trace_event) { - struct ftrace_event_call *event; + struct trace_event_call *event; struct trace_seq *s = &iter->seq; struct trace_seq *p = &iter->tmp_seq; struct trace_entry *entry; - event = container_of(trace_event, struct ftrace_event_call, event); + event = container_of(trace_event, struct trace_event_call, event); entry = iter->ent; if (entry->type != event->event.type) { @@ -239,14 +239,14 @@ int ftrace_raw_output_prep(struct trace_iterator *iter, } trace_seq_init(p); - trace_seq_printf(s, "%s: ", ftrace_event_name(event)); + trace_seq_printf(s, "%s: ", trace_event_name(event)); return trace_handle_return(s); } -EXPORT_SYMBOL(ftrace_raw_output_prep); +EXPORT_SYMBOL(trace_raw_output_prep); -static int ftrace_output_raw(struct trace_iterator *iter, char *name, - char *fmt, va_list ap) +static int trace_output_raw(struct trace_iterator *iter, char *name, + char *fmt, va_list ap) { struct trace_seq *s = &iter->seq; @@ -256,18 +256,18 @@ static int ftrace_output_raw(struct trace_iterator *iter, char *name, return trace_handle_return(s); } -int ftrace_output_call(struct trace_iterator *iter, char *name, char *fmt, ...) +int trace_output_call(struct trace_iterator *iter, char *name, char *fmt, ...) { va_list ap; int ret; va_start(ap, fmt); - ret = ftrace_output_raw(iter, name, fmt, ap); + ret = trace_output_raw(iter, name, fmt, ap); va_end(ap); return ret; } -EXPORT_SYMBOL_GPL(ftrace_output_call); +EXPORT_SYMBOL_GPL(trace_output_call); #ifdef CONFIG_KRETPROBES static inline const char *kretprobed(const char *name) @@ -675,7 +675,7 @@ static int trace_search_list(struct list_head **list) } /* Did we used up all 65 thousand events??? */ - if ((last + 1) > FTRACE_MAX_EVENT) + if ((last + 1) > TRACE_EVENT_TYPE_MAX) return 0; *list = &e->list; @@ -693,7 +693,7 @@ void trace_event_read_unlock(void) } /** - * register_ftrace_event - register output for an event type + * register_trace_event - register output for an event type * @event: the event type to register * * Event types are stored in a hash and this hash is used to @@ -707,7 +707,7 @@ void trace_event_read_unlock(void) * * Returns the event type number or zero on error. */ -int register_ftrace_event(struct trace_event *event) +int register_trace_event(struct trace_event *event) { unsigned key; int ret = 0; @@ -725,7 +725,7 @@ int register_ftrace_event(struct trace_event *event) if (!event->type) { struct list_head *list = NULL; - if (next_event_type > FTRACE_MAX_EVENT) { + if (next_event_type > TRACE_EVENT_TYPE_MAX) { event->type = trace_search_list(&list); if (!event->type) @@ -771,12 +771,12 @@ int register_ftrace_event(struct trace_event *event) return ret; } -EXPORT_SYMBOL_GPL(register_ftrace_event); +EXPORT_SYMBOL_GPL(register_trace_event); /* * Used by module code with the trace_event_sem held for write. */ -int __unregister_ftrace_event(struct trace_event *event) +int __unregister_trace_event(struct trace_event *event) { hlist_del(&event->node); list_del(&event->list); @@ -784,18 +784,18 @@ int __unregister_ftrace_event(struct trace_event *event) } /** - * unregister_ftrace_event - remove a no longer used event + * unregister_trace_event - remove a no longer used event * @event: the event to remove */ -int unregister_ftrace_event(struct trace_event *event) +int unregister_trace_event(struct trace_event *event) { down_write(&trace_event_sem); - __unregister_ftrace_event(event); + __unregister_trace_event(event); up_write(&trace_event_sem); return 0; } -EXPORT_SYMBOL_GPL(unregister_ftrace_event); +EXPORT_SYMBOL_GPL(unregister_trace_event); /* * Standard events @@ -1243,7 +1243,7 @@ __init static int init_events(void) for (i = 0; events[i]; i++) { event = events[i]; - ret = register_ftrace_event(event); + ret = register_trace_event(event); if (!ret) { printk(KERN_WARNING "event %d failed to register\n", event->type); diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h index 8ef2c40efb3c..4cbfe85b99c8 100644 --- a/kernel/trace/trace_output.h +++ b/kernel/trace/trace_output.h @@ -32,7 +32,7 @@ extern int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry); /* used by module unregistering */ -extern int __unregister_ftrace_event(struct trace_event *event); +extern int __unregister_trace_event(struct trace_event *event); extern struct rw_semaphore trace_event_sem; #define SEQ_PUT_FIELD(s, x) \ diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index ab283e146b70..b98dee914542 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -272,8 +272,8 @@ struct probe_arg { struct trace_probe { unsigned int flags; /* For TP_FLAG_* */ - struct ftrace_event_class class; - struct ftrace_event_call call; + struct trace_event_class class; + struct trace_event_call call; struct list_head files; ssize_t size; /* trace entry size */ unsigned int nr_args; @@ -281,7 +281,7 @@ struct trace_probe { }; struct event_file_link { - struct ftrace_event_file *file; + struct trace_event_file *file; struct list_head list; }; @@ -314,7 +314,7 @@ static inline int is_good_name(const char *name) } static inline struct event_file_link * -find_event_file_link(struct trace_probe *tp, struct ftrace_event_file *file) +find_event_file_link(struct trace_probe *tp, struct trace_event_file *file) { struct event_file_link *link; diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index d6e1003724e9..9b33dd117f3f 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -369,7 +369,7 @@ tracing_sched_switch_trace(struct trace_array *tr, struct task_struct *next, unsigned long flags, int pc) { - struct ftrace_event_call *call = &event_context_switch; + struct trace_event_call *call = &event_context_switch; struct ring_buffer *buffer = tr->trace_buffer.buffer; struct ring_buffer_event *event; struct ctx_switch_entry *entry; @@ -397,7 +397,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr, struct task_struct *curr, unsigned long flags, int pc) { - struct ftrace_event_call *call = &event_wakeup; + struct trace_event_call *call = &event_wakeup; struct ring_buffer_event *event; struct ctx_switch_entry *entry; struct ring_buffer *buffer = tr->trace_buffer.buffer; diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index f97f6e3a676c..7d567a4b9fa7 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -13,13 +13,13 @@ static DEFINE_MUTEX(syscall_trace_lock); -static int syscall_enter_register(struct ftrace_event_call *event, +static int syscall_enter_register(struct trace_event_call *event, enum trace_reg type, void *data); -static int syscall_exit_register(struct ftrace_event_call *event, +static int syscall_exit_register(struct trace_event_call *event, enum trace_reg type, void *data); static struct list_head * -syscall_get_enter_fields(struct ftrace_event_call *call) +syscall_get_enter_fields(struct trace_event_call *call) { struct syscall_metadata *entry = call->data; @@ -219,7 +219,7 @@ __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len) return pos; } -static int __init set_syscall_print_fmt(struct ftrace_event_call *call) +static int __init set_syscall_print_fmt(struct trace_event_call *call) { char *print_fmt; int len; @@ -244,7 +244,7 @@ static int __init set_syscall_print_fmt(struct ftrace_event_call *call) return 0; } -static void __init free_syscall_print_fmt(struct ftrace_event_call *call) +static void __init free_syscall_print_fmt(struct trace_event_call *call) { struct syscall_metadata *entry = call->data; @@ -252,7 +252,7 @@ static void __init free_syscall_print_fmt(struct ftrace_event_call *call) kfree(call->print_fmt); } -static int __init syscall_enter_define_fields(struct ftrace_event_call *call) +static int __init syscall_enter_define_fields(struct trace_event_call *call) { struct syscall_trace_enter trace; struct syscall_metadata *meta = call->data; @@ -275,7 +275,7 @@ static int __init syscall_enter_define_fields(struct ftrace_event_call *call) return ret; } -static int __init syscall_exit_define_fields(struct ftrace_event_call *call) +static int __init syscall_exit_define_fields(struct trace_event_call *call) { struct syscall_trace_exit trace; int ret; @@ -293,7 +293,7 @@ static int __init syscall_exit_define_fields(struct ftrace_event_call *call) static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) { struct trace_array *tr = data; - struct ftrace_event_file *ftrace_file; + struct trace_event_file *trace_file; struct syscall_trace_enter *entry; struct syscall_metadata *sys_data; struct ring_buffer_event *event; @@ -308,11 +308,11 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) return; /* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE) */ - ftrace_file = rcu_dereference_sched(tr->enter_syscall_files[syscall_nr]); - if (!ftrace_file) + trace_file = rcu_dereference_sched(tr->enter_syscall_files[syscall_nr]); + if (!trace_file) return; - if (ftrace_trigger_soft_disabled(ftrace_file)) + if (trace_trigger_soft_disabled(trace_file)) return; sys_data = syscall_nr_to_meta(syscall_nr); @@ -334,14 +334,14 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) entry->nr = syscall_nr; syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args); - event_trigger_unlock_commit(ftrace_file, buffer, event, entry, + event_trigger_unlock_commit(trace_file, buffer, event, entry, irq_flags, pc); } static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) { struct trace_array *tr = data; - struct ftrace_event_file *ftrace_file; + struct trace_event_file *trace_file; struct syscall_trace_exit *entry; struct syscall_metadata *sys_data; struct ring_buffer_event *event; @@ -355,11 +355,11 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) return; /* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE()) */ - ftrace_file = rcu_dereference_sched(tr->exit_syscall_files[syscall_nr]); - if (!ftrace_file) + trace_file = rcu_dereference_sched(tr->exit_syscall_files[syscall_nr]); + if (!trace_file) return; - if (ftrace_trigger_soft_disabled(ftrace_file)) + if (trace_trigger_soft_disabled(trace_file)) return; sys_data = syscall_nr_to_meta(syscall_nr); @@ -380,12 +380,12 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) entry->nr = syscall_nr; entry->ret = syscall_get_return_value(current, regs); - event_trigger_unlock_commit(ftrace_file, buffer, event, entry, + event_trigger_unlock_commit(trace_file, buffer, event, entry, irq_flags, pc); } -static int reg_event_syscall_enter(struct ftrace_event_file *file, - struct ftrace_event_call *call) +static int reg_event_syscall_enter(struct trace_event_file *file, + struct trace_event_call *call) { struct trace_array *tr = file->tr; int ret = 0; @@ -405,8 +405,8 @@ static int reg_event_syscall_enter(struct ftrace_event_file *file, return ret; } -static void unreg_event_syscall_enter(struct ftrace_event_file *file, - struct ftrace_event_call *call) +static void unreg_event_syscall_enter(struct trace_event_file *file, + struct trace_event_call *call) { struct trace_array *tr = file->tr; int num; @@ -422,8 +422,8 @@ static void unreg_event_syscall_enter(struct ftrace_event_file *file, mutex_unlock(&syscall_trace_lock); } -static int reg_event_syscall_exit(struct ftrace_event_file *file, - struct ftrace_event_call *call) +static int reg_event_syscall_exit(struct trace_event_file *file, + struct trace_event_call *call) { struct trace_array *tr = file->tr; int ret = 0; @@ -443,8 +443,8 @@ static int reg_event_syscall_exit(struct ftrace_event_file *file, return ret; } -static void unreg_event_syscall_exit(struct ftrace_event_file *file, - struct ftrace_event_call *call) +static void unreg_event_syscall_exit(struct trace_event_file *file, + struct trace_event_call *call) { struct trace_array *tr = file->tr; int num; @@ -460,7 +460,7 @@ static void unreg_event_syscall_exit(struct ftrace_event_file *file, mutex_unlock(&syscall_trace_lock); } -static int __init init_syscall_trace(struct ftrace_event_call *call) +static int __init init_syscall_trace(struct trace_event_call *call) { int id; int num; @@ -493,7 +493,7 @@ struct trace_event_functions exit_syscall_print_funcs = { .trace = print_syscall_exit, }; -struct ftrace_event_class __refdata event_class_syscall_enter = { +struct trace_event_class __refdata event_class_syscall_enter = { .system = "syscalls", .reg = syscall_enter_register, .define_fields = syscall_enter_define_fields, @@ -501,7 +501,7 @@ struct ftrace_event_class __refdata event_class_syscall_enter = { .raw_init = init_syscall_trace, }; -struct ftrace_event_class __refdata event_class_syscall_exit = { +struct trace_event_class __refdata event_class_syscall_exit = { .system = "syscalls", .reg = syscall_exit_register, .define_fields = syscall_exit_define_fields, @@ -584,7 +584,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL); } -static int perf_sysenter_enable(struct ftrace_event_call *call) +static int perf_sysenter_enable(struct trace_event_call *call) { int ret = 0; int num; @@ -605,7 +605,7 @@ static int perf_sysenter_enable(struct ftrace_event_call *call) return ret; } -static void perf_sysenter_disable(struct ftrace_event_call *call) +static void perf_sysenter_disable(struct trace_event_call *call) { int num; @@ -656,7 +656,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL); } -static int perf_sysexit_enable(struct ftrace_event_call *call) +static int perf_sysexit_enable(struct trace_event_call *call) { int ret = 0; int num; @@ -677,7 +677,7 @@ static int perf_sysexit_enable(struct ftrace_event_call *call) return ret; } -static void perf_sysexit_disable(struct ftrace_event_call *call) +static void perf_sysexit_disable(struct trace_event_call *call) { int num; @@ -693,10 +693,10 @@ static void perf_sysexit_disable(struct ftrace_event_call *call) #endif /* CONFIG_PERF_EVENTS */ -static int syscall_enter_register(struct ftrace_event_call *event, +static int syscall_enter_register(struct trace_event_call *event, enum trace_reg type, void *data) { - struct ftrace_event_file *file = data; + struct trace_event_file *file = data; switch (type) { case TRACE_REG_REGISTER: @@ -721,10 +721,10 @@ static int syscall_enter_register(struct ftrace_event_call *event, return 0; } -static int syscall_exit_register(struct ftrace_event_call *event, +static int syscall_exit_register(struct trace_event_call *event, enum trace_reg type, void *data) { - struct ftrace_event_file *file = data; + struct trace_event_file *file = data; switch (type) { case TRACE_REG_REGISTER: diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 6dd022c7b5bc..aa1ea7b36fa8 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -293,7 +293,7 @@ static struct trace_uprobe *find_probe_event(const char *event, const char *grou struct trace_uprobe *tu; list_for_each_entry(tu, &uprobe_list, list) - if (strcmp(ftrace_event_name(&tu->tp.call), event) == 0 && + if (strcmp(trace_event_name(&tu->tp.call), event) == 0 && strcmp(tu->tp.call.class->system, group) == 0) return tu; @@ -323,7 +323,7 @@ static int register_trace_uprobe(struct trace_uprobe *tu) mutex_lock(&uprobe_lock); /* register as an event */ - old_tu = find_probe_event(ftrace_event_name(&tu->tp.call), + old_tu = find_probe_event(trace_event_name(&tu->tp.call), tu->tp.call.class->system); if (old_tu) { /* delete old event */ @@ -600,7 +600,7 @@ static int probes_seq_show(struct seq_file *m, void *v) int i; seq_printf(m, "%c:%s/%s", c, tu->tp.call.class->system, - ftrace_event_name(&tu->tp.call)); + trace_event_name(&tu->tp.call)); seq_printf(m, " %s:0x%p", tu->filename, (void *)tu->offset); for (i = 0; i < tu->tp.nr_args; i++) @@ -651,7 +651,7 @@ static int probes_profile_seq_show(struct seq_file *m, void *v) struct trace_uprobe *tu = v; seq_printf(m, " %s %-44s %15lu\n", tu->filename, - ftrace_event_name(&tu->tp.call), tu->nhit); + trace_event_name(&tu->tp.call), tu->nhit); return 0; } @@ -770,26 +770,26 @@ static void uprobe_buffer_put(struct uprobe_cpu_buffer *ucb) static void __uprobe_trace_func(struct trace_uprobe *tu, unsigned long func, struct pt_regs *regs, struct uprobe_cpu_buffer *ucb, int dsize, - struct ftrace_event_file *ftrace_file) + struct trace_event_file *trace_file) { struct uprobe_trace_entry_head *entry; struct ring_buffer_event *event; struct ring_buffer *buffer; void *data; int size, esize; - struct ftrace_event_call *call = &tu->tp.call; + struct trace_event_call *call = &tu->tp.call; - WARN_ON(call != ftrace_file->event_call); + WARN_ON(call != trace_file->event_call); if (WARN_ON_ONCE(tu->tp.size + dsize > PAGE_SIZE)) return; - if (ftrace_trigger_soft_disabled(ftrace_file)) + if (trace_trigger_soft_disabled(trace_file)) return; esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); size = esize + tu->tp.size + dsize; - event = trace_event_buffer_lock_reserve(&buffer, ftrace_file, + event = trace_event_buffer_lock_reserve(&buffer, trace_file, call->event.type, size, 0, 0); if (!event) return; @@ -806,7 +806,7 @@ static void __uprobe_trace_func(struct trace_uprobe *tu, memcpy(data, ucb->buf, tu->tp.size + dsize); - event_trigger_unlock_commit(ftrace_file, buffer, event, entry, 0, 0); + event_trigger_unlock_commit(trace_file, buffer, event, entry, 0, 0); } /* uprobe handler */ @@ -853,12 +853,12 @@ print_uprobe_event(struct trace_iterator *iter, int flags, struct trace_event *e if (is_ret_probe(tu)) { trace_seq_printf(s, "%s: (0x%lx <- 0x%lx)", - ftrace_event_name(&tu->tp.call), + trace_event_name(&tu->tp.call), entry->vaddr[1], entry->vaddr[0]); data = DATAOF_TRACE_ENTRY(entry, true); } else { trace_seq_printf(s, "%s: (0x%lx)", - ftrace_event_name(&tu->tp.call), + trace_event_name(&tu->tp.call), entry->vaddr[0]); data = DATAOF_TRACE_ENTRY(entry, false); } @@ -881,7 +881,7 @@ typedef bool (*filter_func_t)(struct uprobe_consumer *self, struct mm_struct *mm); static int -probe_event_enable(struct trace_uprobe *tu, struct ftrace_event_file *file, +probe_event_enable(struct trace_uprobe *tu, struct trace_event_file *file, filter_func_t filter) { bool enabled = trace_probe_is_enabled(&tu->tp); @@ -938,7 +938,7 @@ probe_event_enable(struct trace_uprobe *tu, struct ftrace_event_file *file, } static void -probe_event_disable(struct trace_uprobe *tu, struct ftrace_event_file *file) +probe_event_disable(struct trace_uprobe *tu, struct trace_event_file *file) { if (!trace_probe_is_enabled(&tu->tp)) return; @@ -967,7 +967,7 @@ probe_event_disable(struct trace_uprobe *tu, struct ftrace_event_file *file) uprobe_buffer_disable(); } -static int uprobe_event_define_fields(struct ftrace_event_call *event_call) +static int uprobe_event_define_fields(struct trace_event_call *event_call) { int ret, i, size; struct uprobe_trace_entry_head field; @@ -1093,7 +1093,7 @@ static void __uprobe_perf_func(struct trace_uprobe *tu, unsigned long func, struct pt_regs *regs, struct uprobe_cpu_buffer *ucb, int dsize) { - struct ftrace_event_call *call = &tu->tp.call; + struct trace_event_call *call = &tu->tp.call; struct uprobe_trace_entry_head *entry; struct hlist_head *head; void *data; @@ -1159,11 +1159,11 @@ static void uretprobe_perf_func(struct trace_uprobe *tu, unsigned long func, #endif /* CONFIG_PERF_EVENTS */ static int -trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type, +trace_uprobe_register(struct trace_event_call *event, enum trace_reg type, void *data) { struct trace_uprobe *tu = event->data; - struct ftrace_event_file *file = data; + struct trace_event_file *file = data; switch (type) { case TRACE_REG_REGISTER: @@ -1272,10 +1272,10 @@ static struct trace_event_functions uprobe_funcs = { static int register_uprobe_event(struct trace_uprobe *tu) { - struct ftrace_event_call *call = &tu->tp.call; + struct trace_event_call *call = &tu->tp.call; int ret; - /* Initialize ftrace_event_call */ + /* Initialize trace_event_call */ INIT_LIST_HEAD(&call->class->fields); call->event.funcs = &uprobe_funcs; call->class->define_fields = uprobe_event_define_fields; @@ -1283,7 +1283,7 @@ static int register_uprobe_event(struct trace_uprobe *tu) if (set_print_fmt(&tu->tp, is_ret_probe(tu)) < 0) return -ENOMEM; - ret = register_ftrace_event(&call->event); + ret = register_trace_event(&call->event); if (!ret) { kfree(call->print_fmt); return -ENODEV; @@ -1295,9 +1295,9 @@ static int register_uprobe_event(struct trace_uprobe *tu) if (ret) { pr_info("Failed to register uprobe event: %s\n", - ftrace_event_name(call)); + trace_event_name(call)); kfree(call->print_fmt); - unregister_ftrace_event(&call->event); + unregister_trace_event(&call->event); } return ret; diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 586ad91300b0..4c4f06176f74 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -127,6 +127,11 @@ enum { * * PR: wq_pool_mutex protected for writes. Sched-RCU protected for reads. * + * PW: wq_pool_mutex and wq->mutex protected for writes. Either for reads. + * + * PWR: wq_pool_mutex and wq->mutex protected for writes. Either or + * sched-RCU for reads. + * * WQ: wq->mutex protected. * * WR: wq->mutex protected for writes. Sched-RCU protected for reads. @@ -247,8 +252,8 @@ struct workqueue_struct { int nr_drainers; /* WQ: drain in progress */ int saved_max_active; /* WQ: saved pwq max_active */ - struct workqueue_attrs *unbound_attrs; /* WQ: only for unbound wqs */ - struct pool_workqueue *dfl_pwq; /* WQ: only for unbound wqs */ + struct workqueue_attrs *unbound_attrs; /* PW: only for unbound wqs */ + struct pool_workqueue *dfl_pwq; /* PW: only for unbound wqs */ #ifdef CONFIG_SYSFS struct wq_device *wq_dev; /* I: for sysfs interface */ @@ -268,7 +273,7 @@ struct workqueue_struct { /* hot fields used during command issue, aligned to cacheline */ unsigned int flags ____cacheline_aligned; /* WQ: WQ_* flags */ struct pool_workqueue __percpu *cpu_pwqs; /* I: per-cpu pwqs */ - struct pool_workqueue __rcu *numa_pwq_tbl[]; /* FR: unbound pwqs indexed by node */ + struct pool_workqueue __rcu *numa_pwq_tbl[]; /* PWR: unbound pwqs indexed by node */ }; static struct kmem_cache *pwq_cache; @@ -280,12 +285,7 @@ static bool wq_disable_numa; module_param_named(disable_numa, wq_disable_numa, bool, 0444); /* see the comment above the definition of WQ_POWER_EFFICIENT */ -#ifdef CONFIG_WQ_POWER_EFFICIENT_DEFAULT -static bool wq_power_efficient = true; -#else -static bool wq_power_efficient; -#endif - +static bool wq_power_efficient = IS_ENABLED(CONFIG_WQ_POWER_EFFICIENT_DEFAULT); module_param_named(power_efficient, wq_power_efficient, bool, 0444); static bool wq_numa_enabled; /* unbound NUMA affinity enabled */ @@ -299,6 +299,8 @@ static DEFINE_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */ static LIST_HEAD(workqueues); /* PR: list of all workqueues */ static bool workqueue_freezing; /* PL: have wqs started freezing? */ +static cpumask_var_t wq_unbound_cpumask; /* PL: low level cpumask for all unbound wqs */ + /* the per-cpu worker pools */ static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS], cpu_worker_pools); @@ -330,8 +332,6 @@ struct workqueue_struct *system_freezable_power_efficient_wq __read_mostly; EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq); static int worker_thread(void *__worker); -static void copy_workqueue_attrs(struct workqueue_attrs *to, - const struct workqueue_attrs *from); static void workqueue_sysfs_unregister(struct workqueue_struct *wq); #define CREATE_TRACE_POINTS @@ -347,6 +347,12 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq); lockdep_is_held(&wq->mutex), \ "sched RCU or wq->mutex should be held") +#define assert_rcu_or_wq_mutex_or_pool_mutex(wq) \ + rcu_lockdep_assert(rcu_read_lock_sched_held() || \ + lockdep_is_held(&wq->mutex) || \ + lockdep_is_held(&wq_pool_mutex), \ + "sched RCU, wq->mutex or wq_pool_mutex should be held") + #define for_each_cpu_worker_pool(pool, cpu) \ for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0]; \ (pool) < &per_cpu(cpu_worker_pools, cpu)[NR_STD_WORKER_POOLS]; \ @@ -551,7 +557,8 @@ static int worker_pool_assign_id(struct worker_pool *pool) * @wq: the target workqueue * @node: the node ID * - * This must be called either with pwq_lock held or sched RCU read locked. + * This must be called with any of wq_pool_mutex, wq->mutex or sched RCU + * read locked. * If the pwq needs to be used beyond the locking in effect, the caller is * responsible for guaranteeing that the pwq stays online. * @@ -560,7 +567,7 @@ static int worker_pool_assign_id(struct worker_pool *pool) static struct pool_workqueue *unbound_pwq_by_node(struct workqueue_struct *wq, int node) { - assert_rcu_or_wq_mutex(wq); + assert_rcu_or_wq_mutex_or_pool_mutex(wq); return rcu_dereference_raw(wq->numa_pwq_tbl[node]); } @@ -976,7 +983,7 @@ static struct worker *find_worker_executing_work(struct worker_pool *pool, * move_linked_works - move linked works to a list * @work: start of series of works to be scheduled * @head: target list to append @work to - * @nextp: out paramter for nested worklist walking + * @nextp: out parameter for nested worklist walking * * Schedule linked works starting from @work to @head. Work series to * be scheduled starts at @work and includes any consecutive work with @@ -2616,7 +2623,7 @@ EXPORT_SYMBOL_GPL(flush_workqueue); * Wait until the workqueue becomes empty. While draining is in progress, * only chain queueing is allowed. IOW, only currently pending or running * work items on @wq can queue further work items on it. @wq is flushed - * repeatedly until it becomes empty. The number of flushing is detemined + * repeatedly until it becomes empty. The number of flushing is determined * by the depth of chaining and should be relatively short. Whine if it * takes too long. */ @@ -2947,36 +2954,6 @@ int schedule_on_each_cpu(work_func_t func) } /** - * flush_scheduled_work - ensure that any scheduled work has run to completion. - * - * Forces execution of the kernel-global workqueue and blocks until its - * completion. - * - * Think twice before calling this function! It's very easy to get into - * trouble if you don't take great care. Either of the following situations - * will lead to deadlock: - * - * One of the work items currently on the workqueue needs to acquire - * a lock held by your code or its caller. - * - * Your code is running in the context of a work routine. - * - * They will be detected by lockdep when they occur, but the first might not - * occur very often. It depends on what work items are on the workqueue and - * what locks they need, which you have no control over. - * - * In most situations flushing the entire workqueue is overkill; you merely - * need to know that a particular work item isn't queued and isn't running. - * In such cases you should use cancel_delayed_work_sync() or - * cancel_work_sync() instead. - */ -void flush_scheduled_work(void) -{ - flush_workqueue(system_wq); -} -EXPORT_SYMBOL(flush_scheduled_work); - -/** * execute_in_process_context - reliably execute the routine with user context * @fn: the function to execute * @ew: guaranteed storage for the execute work structure (must @@ -3081,7 +3058,7 @@ static bool wqattrs_equal(const struct workqueue_attrs *a, * init_worker_pool - initialize a newly zalloc'd worker_pool * @pool: worker_pool to initialize * - * Initiailize a newly zalloc'd @pool. It also allocates @pool->attrs. + * Initialize a newly zalloc'd @pool. It also allocates @pool->attrs. * * Return: 0 on success, -errno on failure. Even on failure, all fields * inside @pool proper are initialized and put_unbound_pool() can be called @@ -3425,20 +3402,9 @@ static struct pool_workqueue *alloc_unbound_pwq(struct workqueue_struct *wq, return pwq; } -/* undo alloc_unbound_pwq(), used only in the error path */ -static void free_unbound_pwq(struct pool_workqueue *pwq) -{ - lockdep_assert_held(&wq_pool_mutex); - - if (pwq) { - put_unbound_pool(pwq->pool); - kmem_cache_free(pwq_cache, pwq); - } -} - /** - * wq_calc_node_mask - calculate a wq_attrs' cpumask for the specified node - * @attrs: the wq_attrs of interest + * wq_calc_node_cpumask - calculate a wq_attrs' cpumask for the specified node + * @attrs: the wq_attrs of the default pwq of the target workqueue * @node: the target NUMA node * @cpu_going_down: if >= 0, the CPU to consider as offline * @cpumask: outarg, the resulting cpumask @@ -3488,6 +3454,7 @@ static struct pool_workqueue *numa_pwq_tbl_install(struct workqueue_struct *wq, { struct pool_workqueue *old_pwq; + lockdep_assert_held(&wq_pool_mutex); lockdep_assert_held(&wq->mutex); /* link_pwq() can handle duplicate calls */ @@ -3498,46 +3465,59 @@ static struct pool_workqueue *numa_pwq_tbl_install(struct workqueue_struct *wq, return old_pwq; } -/** - * apply_workqueue_attrs - apply new workqueue_attrs to an unbound workqueue - * @wq: the target workqueue - * @attrs: the workqueue_attrs to apply, allocated with alloc_workqueue_attrs() - * - * Apply @attrs to an unbound workqueue @wq. Unless disabled, on NUMA - * machines, this function maps a separate pwq to each NUMA node with - * possibles CPUs in @attrs->cpumask so that work items are affine to the - * NUMA node it was issued on. Older pwqs are released as in-flight work - * items finish. Note that a work item which repeatedly requeues itself - * back-to-back will stay on its current pwq. - * - * Performs GFP_KERNEL allocations. - * - * Return: 0 on success and -errno on failure. - */ -int apply_workqueue_attrs(struct workqueue_struct *wq, - const struct workqueue_attrs *attrs) +/* context to store the prepared attrs & pwqs before applying */ +struct apply_wqattrs_ctx { + struct workqueue_struct *wq; /* target workqueue */ + struct workqueue_attrs *attrs; /* attrs to apply */ + struct list_head list; /* queued for batching commit */ + struct pool_workqueue *dfl_pwq; + struct pool_workqueue *pwq_tbl[]; +}; + +/* free the resources after success or abort */ +static void apply_wqattrs_cleanup(struct apply_wqattrs_ctx *ctx) +{ + if (ctx) { + int node; + + for_each_node(node) + put_pwq_unlocked(ctx->pwq_tbl[node]); + put_pwq_unlocked(ctx->dfl_pwq); + + free_workqueue_attrs(ctx->attrs); + + kfree(ctx); + } +} + +/* allocate the attrs and pwqs for later installation */ +static struct apply_wqattrs_ctx * +apply_wqattrs_prepare(struct workqueue_struct *wq, + const struct workqueue_attrs *attrs) { + struct apply_wqattrs_ctx *ctx; struct workqueue_attrs *new_attrs, *tmp_attrs; - struct pool_workqueue **pwq_tbl, *dfl_pwq; - int node, ret; + int node; - /* only unbound workqueues can change attributes */ - if (WARN_ON(!(wq->flags & WQ_UNBOUND))) - return -EINVAL; + lockdep_assert_held(&wq_pool_mutex); - /* creating multiple pwqs breaks ordering guarantee */ - if (WARN_ON((wq->flags & __WQ_ORDERED) && !list_empty(&wq->pwqs))) - return -EINVAL; + ctx = kzalloc(sizeof(*ctx) + nr_node_ids * sizeof(ctx->pwq_tbl[0]), + GFP_KERNEL); - pwq_tbl = kzalloc(nr_node_ids * sizeof(pwq_tbl[0]), GFP_KERNEL); new_attrs = alloc_workqueue_attrs(GFP_KERNEL); tmp_attrs = alloc_workqueue_attrs(GFP_KERNEL); - if (!pwq_tbl || !new_attrs || !tmp_attrs) - goto enomem; + if (!ctx || !new_attrs || !tmp_attrs) + goto out_free; - /* make a copy of @attrs and sanitize it */ + /* + * Calculate the attrs of the default pwq. + * If the user configured cpumask doesn't overlap with the + * wq_unbound_cpumask, we fallback to the wq_unbound_cpumask. + */ copy_workqueue_attrs(new_attrs, attrs); - cpumask_and(new_attrs->cpumask, new_attrs->cpumask, cpu_possible_mask); + cpumask_and(new_attrs->cpumask, new_attrs->cpumask, wq_unbound_cpumask); + if (unlikely(cpumask_empty(new_attrs->cpumask))) + cpumask_copy(new_attrs->cpumask, wq_unbound_cpumask); /* * We may create multiple pwqs with differing cpumasks. Make a @@ -3547,75 +3527,129 @@ int apply_workqueue_attrs(struct workqueue_struct *wq, copy_workqueue_attrs(tmp_attrs, new_attrs); /* - * CPUs should stay stable across pwq creations and installations. - * Pin CPUs, determine the target cpumask for each node and create - * pwqs accordingly. - */ - get_online_cpus(); - - mutex_lock(&wq_pool_mutex); - - /* * If something goes wrong during CPU up/down, we'll fall back to * the default pwq covering whole @attrs->cpumask. Always create * it even if we don't use it immediately. */ - dfl_pwq = alloc_unbound_pwq(wq, new_attrs); - if (!dfl_pwq) - goto enomem_pwq; + ctx->dfl_pwq = alloc_unbound_pwq(wq, new_attrs); + if (!ctx->dfl_pwq) + goto out_free; for_each_node(node) { - if (wq_calc_node_cpumask(attrs, node, -1, tmp_attrs->cpumask)) { - pwq_tbl[node] = alloc_unbound_pwq(wq, tmp_attrs); - if (!pwq_tbl[node]) - goto enomem_pwq; + if (wq_calc_node_cpumask(new_attrs, node, -1, tmp_attrs->cpumask)) { + ctx->pwq_tbl[node] = alloc_unbound_pwq(wq, tmp_attrs); + if (!ctx->pwq_tbl[node]) + goto out_free; } else { - dfl_pwq->refcnt++; - pwq_tbl[node] = dfl_pwq; + ctx->dfl_pwq->refcnt++; + ctx->pwq_tbl[node] = ctx->dfl_pwq; } } - mutex_unlock(&wq_pool_mutex); + /* save the user configured attrs and sanitize it. */ + copy_workqueue_attrs(new_attrs, attrs); + cpumask_and(new_attrs->cpumask, new_attrs->cpumask, cpu_possible_mask); + ctx->attrs = new_attrs; + + ctx->wq = wq; + free_workqueue_attrs(tmp_attrs); + return ctx; + +out_free: + free_workqueue_attrs(tmp_attrs); + free_workqueue_attrs(new_attrs); + apply_wqattrs_cleanup(ctx); + return NULL; +} + +/* set attrs and install prepared pwqs, @ctx points to old pwqs on return */ +static void apply_wqattrs_commit(struct apply_wqattrs_ctx *ctx) +{ + int node; /* all pwqs have been created successfully, let's install'em */ - mutex_lock(&wq->mutex); + mutex_lock(&ctx->wq->mutex); - copy_workqueue_attrs(wq->unbound_attrs, new_attrs); + copy_workqueue_attrs(ctx->wq->unbound_attrs, ctx->attrs); /* save the previous pwq and install the new one */ for_each_node(node) - pwq_tbl[node] = numa_pwq_tbl_install(wq, node, pwq_tbl[node]); + ctx->pwq_tbl[node] = numa_pwq_tbl_install(ctx->wq, node, + ctx->pwq_tbl[node]); /* @dfl_pwq might not have been used, ensure it's linked */ - link_pwq(dfl_pwq); - swap(wq->dfl_pwq, dfl_pwq); + link_pwq(ctx->dfl_pwq); + swap(ctx->wq->dfl_pwq, ctx->dfl_pwq); - mutex_unlock(&wq->mutex); + mutex_unlock(&ctx->wq->mutex); +} - /* put the old pwqs */ - for_each_node(node) - put_pwq_unlocked(pwq_tbl[node]); - put_pwq_unlocked(dfl_pwq); +static void apply_wqattrs_lock(void) +{ + /* CPUs should stay stable across pwq creations and installations */ + get_online_cpus(); + mutex_lock(&wq_pool_mutex); +} +static void apply_wqattrs_unlock(void) +{ + mutex_unlock(&wq_pool_mutex); put_online_cpus(); - ret = 0; - /* fall through */ -out_free: - free_workqueue_attrs(tmp_attrs); - free_workqueue_attrs(new_attrs); - kfree(pwq_tbl); +} + +static int apply_workqueue_attrs_locked(struct workqueue_struct *wq, + const struct workqueue_attrs *attrs) +{ + struct apply_wqattrs_ctx *ctx; + int ret = -ENOMEM; + + /* only unbound workqueues can change attributes */ + if (WARN_ON(!(wq->flags & WQ_UNBOUND))) + return -EINVAL; + + /* creating multiple pwqs breaks ordering guarantee */ + if (WARN_ON((wq->flags & __WQ_ORDERED) && !list_empty(&wq->pwqs))) + return -EINVAL; + + ctx = apply_wqattrs_prepare(wq, attrs); + + /* the ctx has been prepared successfully, let's commit it */ + if (ctx) { + apply_wqattrs_commit(ctx); + ret = 0; + } + + apply_wqattrs_cleanup(ctx); + return ret; +} -enomem_pwq: - free_unbound_pwq(dfl_pwq); - for_each_node(node) - if (pwq_tbl && pwq_tbl[node] != dfl_pwq) - free_unbound_pwq(pwq_tbl[node]); - mutex_unlock(&wq_pool_mutex); - put_online_cpus(); -enomem: - ret = -ENOMEM; - goto out_free; +/** + * apply_workqueue_attrs - apply new workqueue_attrs to an unbound workqueue + * @wq: the target workqueue + * @attrs: the workqueue_attrs to apply, allocated with alloc_workqueue_attrs() + * + * Apply @attrs to an unbound workqueue @wq. Unless disabled, on NUMA + * machines, this function maps a separate pwq to each NUMA node with + * possibles CPUs in @attrs->cpumask so that work items are affine to the + * NUMA node it was issued on. Older pwqs are released as in-flight work + * items finish. Note that a work item which repeatedly requeues itself + * back-to-back will stay on its current pwq. + * + * Performs GFP_KERNEL allocations. + * + * Return: 0 on success and -errno on failure. + */ +int apply_workqueue_attrs(struct workqueue_struct *wq, + const struct workqueue_attrs *attrs) +{ + int ret; + + apply_wqattrs_lock(); + ret = apply_workqueue_attrs_locked(wq, attrs); + apply_wqattrs_unlock(); + + return ret; } /** @@ -3651,7 +3685,8 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu, lockdep_assert_held(&wq_pool_mutex); - if (!wq_numa_enabled || !(wq->flags & WQ_UNBOUND)) + if (!wq_numa_enabled || !(wq->flags & WQ_UNBOUND) || + wq->unbound_attrs->no_numa) return; /* @@ -3662,48 +3697,37 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu, target_attrs = wq_update_unbound_numa_attrs_buf; cpumask = target_attrs->cpumask; - mutex_lock(&wq->mutex); - if (wq->unbound_attrs->no_numa) - goto out_unlock; - copy_workqueue_attrs(target_attrs, wq->unbound_attrs); pwq = unbound_pwq_by_node(wq, node); /* * Let's determine what needs to be done. If the target cpumask is - * different from wq's, we need to compare it to @pwq's and create - * a new one if they don't match. If the target cpumask equals - * wq's, the default pwq should be used. + * different from the default pwq's, we need to compare it to @pwq's + * and create a new one if they don't match. If the target cpumask + * equals the default pwq's, the default pwq should be used. */ - if (wq_calc_node_cpumask(wq->unbound_attrs, node, cpu_off, cpumask)) { + if (wq_calc_node_cpumask(wq->dfl_pwq->pool->attrs, node, cpu_off, cpumask)) { if (cpumask_equal(cpumask, pwq->pool->attrs->cpumask)) - goto out_unlock; + return; } else { goto use_dfl_pwq; } - mutex_unlock(&wq->mutex); - /* create a new pwq */ pwq = alloc_unbound_pwq(wq, target_attrs); if (!pwq) { pr_warn("workqueue: allocation failed while updating NUMA affinity of \"%s\"\n", wq->name); - mutex_lock(&wq->mutex); goto use_dfl_pwq; } - /* - * Install the new pwq. As this function is called only from CPU - * hotplug callbacks and applying a new attrs is wrapped with - * get/put_online_cpus(), @wq->unbound_attrs couldn't have changed - * inbetween. - */ + /* Install the new pwq. */ mutex_lock(&wq->mutex); old_pwq = numa_pwq_tbl_install(wq, node, pwq); goto out_unlock; use_dfl_pwq: + mutex_lock(&wq->mutex); spin_lock_irq(&wq->dfl_pwq->pool->lock); get_pwq(wq->dfl_pwq); spin_unlock_irq(&wq->dfl_pwq->pool->lock); @@ -4385,7 +4409,7 @@ static void rebind_workers(struct worker_pool *pool) /* * Restore CPU affinity of all workers. As all idle workers should * be on the run-queue of the associated CPU before any local - * wake-ups for concurrency management happen, restore CPU affinty + * wake-ups for concurrency management happen, restore CPU affinity * of all workers first and then clear UNBOUND. As we're called * from CPU_ONLINE, the following shouldn't fail. */ @@ -4698,6 +4722,82 @@ out_unlock: } #endif /* CONFIG_FREEZER */ +static int workqueue_apply_unbound_cpumask(void) +{ + LIST_HEAD(ctxs); + int ret = 0; + struct workqueue_struct *wq; + struct apply_wqattrs_ctx *ctx, *n; + + lockdep_assert_held(&wq_pool_mutex); + + list_for_each_entry(wq, &workqueues, list) { + if (!(wq->flags & WQ_UNBOUND)) + continue; + /* creating multiple pwqs breaks ordering guarantee */ + if (wq->flags & __WQ_ORDERED) + continue; + + ctx = apply_wqattrs_prepare(wq, wq->unbound_attrs); + if (!ctx) { + ret = -ENOMEM; + break; + } + + list_add_tail(&ctx->list, &ctxs); + } + + list_for_each_entry_safe(ctx, n, &ctxs, list) { + if (!ret) + apply_wqattrs_commit(ctx); + apply_wqattrs_cleanup(ctx); + } + + return ret; +} + +/** + * workqueue_set_unbound_cpumask - Set the low-level unbound cpumask + * @cpumask: the cpumask to set + * + * The low-level workqueues cpumask is a global cpumask that limits + * the affinity of all unbound workqueues. This function check the @cpumask + * and apply it to all unbound workqueues and updates all pwqs of them. + * + * Retun: 0 - Success + * -EINVAL - Invalid @cpumask + * -ENOMEM - Failed to allocate memory for attrs or pwqs. + */ +int workqueue_set_unbound_cpumask(cpumask_var_t cpumask) +{ + int ret = -EINVAL; + cpumask_var_t saved_cpumask; + + if (!zalloc_cpumask_var(&saved_cpumask, GFP_KERNEL)) + return -ENOMEM; + + cpumask_and(cpumask, cpumask, cpu_possible_mask); + if (!cpumask_empty(cpumask)) { + apply_wqattrs_lock(); + + /* save the old wq_unbound_cpumask. */ + cpumask_copy(saved_cpumask, wq_unbound_cpumask); + + /* update wq_unbound_cpumask at first and apply it to wqs. */ + cpumask_copy(wq_unbound_cpumask, cpumask); + ret = workqueue_apply_unbound_cpumask(); + + /* restore the wq_unbound_cpumask when failed. */ + if (ret < 0) + cpumask_copy(wq_unbound_cpumask, saved_cpumask); + + apply_wqattrs_unlock(); + } + + free_cpumask_var(saved_cpumask); + return ret; +} + #ifdef CONFIG_SYSFS /* * Workqueues with WQ_SYSFS flag set is visible to userland via @@ -4802,13 +4902,13 @@ static struct workqueue_attrs *wq_sysfs_prep_attrs(struct workqueue_struct *wq) { struct workqueue_attrs *attrs; + lockdep_assert_held(&wq_pool_mutex); + attrs = alloc_workqueue_attrs(GFP_KERNEL); if (!attrs) return NULL; - mutex_lock(&wq->mutex); copy_workqueue_attrs(attrs, wq->unbound_attrs); - mutex_unlock(&wq->mutex); return attrs; } @@ -4817,18 +4917,22 @@ static ssize_t wq_nice_store(struct device *dev, struct device_attribute *attr, { struct workqueue_struct *wq = dev_to_wq(dev); struct workqueue_attrs *attrs; - int ret; + int ret = -ENOMEM; + + apply_wqattrs_lock(); attrs = wq_sysfs_prep_attrs(wq); if (!attrs) - return -ENOMEM; + goto out_unlock; if (sscanf(buf, "%d", &attrs->nice) == 1 && attrs->nice >= MIN_NICE && attrs->nice <= MAX_NICE) - ret = apply_workqueue_attrs(wq, attrs); + ret = apply_workqueue_attrs_locked(wq, attrs); else ret = -EINVAL; +out_unlock: + apply_wqattrs_unlock(); free_workqueue_attrs(attrs); return ret ?: count; } @@ -4852,16 +4956,20 @@ static ssize_t wq_cpumask_store(struct device *dev, { struct workqueue_struct *wq = dev_to_wq(dev); struct workqueue_attrs *attrs; - int ret; + int ret = -ENOMEM; + + apply_wqattrs_lock(); attrs = wq_sysfs_prep_attrs(wq); if (!attrs) - return -ENOMEM; + goto out_unlock; ret = cpumask_parse(buf, attrs->cpumask); if (!ret) - ret = apply_workqueue_attrs(wq, attrs); + ret = apply_workqueue_attrs_locked(wq, attrs); +out_unlock: + apply_wqattrs_unlock(); free_workqueue_attrs(attrs); return ret ?: count; } @@ -4885,18 +4993,22 @@ static ssize_t wq_numa_store(struct device *dev, struct device_attribute *attr, { struct workqueue_struct *wq = dev_to_wq(dev); struct workqueue_attrs *attrs; - int v, ret; + int v, ret = -ENOMEM; + + apply_wqattrs_lock(); attrs = wq_sysfs_prep_attrs(wq); if (!attrs) - return -ENOMEM; + goto out_unlock; ret = -EINVAL; if (sscanf(buf, "%d", &v) == 1) { attrs->no_numa = !v; - ret = apply_workqueue_attrs(wq, attrs); + ret = apply_workqueue_attrs_locked(wq, attrs); } +out_unlock: + apply_wqattrs_unlock(); free_workqueue_attrs(attrs); return ret ?: count; } @@ -4914,9 +5026,49 @@ static struct bus_type wq_subsys = { .dev_groups = wq_sysfs_groups, }; +static ssize_t wq_unbound_cpumask_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int written; + + mutex_lock(&wq_pool_mutex); + written = scnprintf(buf, PAGE_SIZE, "%*pb\n", + cpumask_pr_args(wq_unbound_cpumask)); + mutex_unlock(&wq_pool_mutex); + + return written; +} + +static ssize_t wq_unbound_cpumask_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + cpumask_var_t cpumask; + int ret; + + if (!zalloc_cpumask_var(&cpumask, GFP_KERNEL)) + return -ENOMEM; + + ret = cpumask_parse(buf, cpumask); + if (!ret) + ret = workqueue_set_unbound_cpumask(cpumask); + + free_cpumask_var(cpumask); + return ret ? ret : count; +} + +static struct device_attribute wq_sysfs_cpumask_attr = + __ATTR(cpumask, 0644, wq_unbound_cpumask_show, + wq_unbound_cpumask_store); + static int __init wq_sysfs_init(void) { - return subsys_virtual_register(&wq_subsys, NULL); + int err; + + err = subsys_virtual_register(&wq_subsys, NULL); + if (err) + return err; + + return device_create_file(wq_subsys.dev_root, &wq_sysfs_cpumask_attr); } core_initcall(wq_sysfs_init); @@ -4948,7 +5100,7 @@ int workqueue_sysfs_register(struct workqueue_struct *wq) int ret; /* - * Adjusting max_active or creating new pwqs by applyting + * Adjusting max_active or creating new pwqs by applying * attributes breaks ordering guarantee. Disallow exposing ordered * workqueues. */ @@ -5064,6 +5216,9 @@ static int __init init_workqueues(void) WARN_ON(__alignof__(struct pool_workqueue) < __alignof__(long long)); + BUG_ON(!alloc_cpumask_var(&wq_unbound_cpumask, GFP_KERNEL)); + cpumask_copy(wq_unbound_cpumask, cpu_possible_mask); + pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC); cpu_notifier(workqueue_cpu_up_callback, CPU_PRI_WORKQUEUE_UP); |