diff options
Diffstat (limited to 'kernel/trace')
-rw-r--r-- | kernel/trace/Kconfig | 8 | ||||
-rw-r--r-- | kernel/trace/bpf_trace.c | 21 | ||||
-rw-r--r-- | kernel/trace/fprobe.c | 32 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 502 | ||||
-rw-r--r-- | kernel/trace/ring_buffer.c | 115 | ||||
-rw-r--r-- | kernel/trace/rv/reactor_panic.c | 1 | ||||
-rw-r--r-- | kernel/trace/rv/reactor_printk.c | 1 | ||||
-rw-r--r-- | kernel/trace/rv/rv.c | 2 | ||||
-rw-r--r-- | kernel/trace/trace.c | 37 | ||||
-rw-r--r-- | kernel/trace/trace.h | 2 | ||||
-rw-r--r-- | kernel/trace/trace_events_synth.c | 19 | ||||
-rw-r--r-- | kernel/trace/trace_events_user.c | 1034 | ||||
-rw-r--r-- | kernel/trace/trace_osnoise.c | 6 | ||||
-rw-r--r-- | kernel/trace/trace_output.c | 175 | ||||
-rw-r--r-- | kernel/trace/trace_output.h | 2 | ||||
-rw-r--r-- | kernel/trace/trace_probe.c | 2 | ||||
-rw-r--r-- | kernel/trace/trace_selftest.c | 19 |
17 files changed, 1259 insertions, 719 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index a856d4a34c67..8cf97fa4a4b3 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -257,7 +257,7 @@ config DYNAMIC_FTRACE_WITH_REGS config DYNAMIC_FTRACE_WITH_DIRECT_CALLS def_bool y - depends on DYNAMIC_FTRACE_WITH_REGS + depends on DYNAMIC_FTRACE_WITH_REGS || DYNAMIC_FTRACE_WITH_ARGS depends on HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS config DYNAMIC_FTRACE_WITH_CALL_OPS @@ -792,15 +792,15 @@ config USER_EVENTS bool "User trace events" select TRACING select DYNAMIC_EVENTS - depends on BROKEN || COMPILE_TEST # API needs to be straighten out help User trace events are user-defined trace events that can be used like an existing kernel trace event. User trace events are generated by writing to a tracefs file. User processes can determine if their tracing events should be - generated by memory mapping a tracefs file and checking for - an associated byte being non-zero. + generated by registering a value and bit with the kernel + that reflects when it is enabled or not. + See Documentation/trace/user_events.rst. If in doubt, say N. config HIST_TRIGGERS diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index e8da032bb6fc..9a050e36dc6c 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1453,10 +1453,6 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) NULL : &bpf_probe_read_compat_str_proto; #endif #ifdef CONFIG_CGROUPS - case BPF_FUNC_get_current_cgroup_id: - return &bpf_get_current_cgroup_id_proto; - case BPF_FUNC_get_current_ancestor_cgroup_id: - return &bpf_get_current_ancestor_cgroup_id_proto; case BPF_FUNC_cgrp_storage_get: return &bpf_cgrp_storage_get_proto; case BPF_FUNC_cgrp_storage_delete: @@ -2644,9 +2640,20 @@ kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link, return err; } -static void +static int kprobe_multi_link_handler(struct fprobe *fp, unsigned long fentry_ip, - struct pt_regs *regs) + struct pt_regs *regs, void *data) +{ + struct bpf_kprobe_multi_link *link; + + link = container_of(fp, struct bpf_kprobe_multi_link, fp); + kprobe_multi_link_prog_run(link, get_entry_ip(fentry_ip), regs); + return 0; +} + +static void +kprobe_multi_link_exit_handler(struct fprobe *fp, unsigned long fentry_ip, + struct pt_regs *regs, void *data) { struct bpf_kprobe_multi_link *link; @@ -2848,7 +2855,7 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr goto error; if (flags & BPF_F_KPROBE_MULTI_RETURN) - link->fp.exit_handler = kprobe_multi_link_handler; + link->fp.exit_handler = kprobe_multi_link_exit_handler; else link->fp.entry_handler = kprobe_multi_link_handler; diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c index e8143e368074..9abb3905bc8e 100644 --- a/kernel/trace/fprobe.c +++ b/kernel/trace/fprobe.c @@ -17,15 +17,17 @@ struct fprobe_rethook_node { struct rethook_node node; unsigned long entry_ip; + char data[]; }; static void fprobe_handler(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *ops, struct ftrace_regs *fregs) { struct fprobe_rethook_node *fpr; - struct rethook_node *rh; + struct rethook_node *rh = NULL; struct fprobe *fp; - int bit; + void *entry_data = NULL; + int bit, ret; fp = container_of(ops, struct fprobe, ops); if (fprobe_disabled(fp)) @@ -37,9 +39,6 @@ static void fprobe_handler(unsigned long ip, unsigned long parent_ip, return; } - if (fp->entry_handler) - fp->entry_handler(fp, ip, ftrace_get_regs(fregs)); - if (fp->exit_handler) { rh = rethook_try_get(fp->rethook); if (!rh) { @@ -48,9 +47,20 @@ static void fprobe_handler(unsigned long ip, unsigned long parent_ip, } fpr = container_of(rh, struct fprobe_rethook_node, node); fpr->entry_ip = ip; - rethook_hook(rh, ftrace_get_regs(fregs), true); + if (fp->entry_data_size) + entry_data = fpr->data; } + if (fp->entry_handler) + ret = fp->entry_handler(fp, ip, ftrace_get_regs(fregs), entry_data); + + /* If entry_handler returns !0, nmissed is not counted. */ + if (rh) { + if (ret) + rethook_recycle(rh); + else + rethook_hook(rh, ftrace_get_regs(fregs), true); + } out: ftrace_test_recursion_unlock(bit); } @@ -81,7 +91,8 @@ static void fprobe_exit_handler(struct rethook_node *rh, void *data, fpr = container_of(rh, struct fprobe_rethook_node, node); - fp->exit_handler(fp, fpr->entry_ip, regs); + fp->exit_handler(fp, fpr->entry_ip, regs, + fp->entry_data_size ? (void *)fpr->data : NULL); } NOKPROBE_SYMBOL(fprobe_exit_handler); @@ -136,7 +147,10 @@ static int fprobe_init_rethook(struct fprobe *fp, int num) } /* Initialize rethook if needed */ - size = num * num_possible_cpus() * 2; + if (fp->nr_maxactive) + size = fp->nr_maxactive; + else + size = num * num_possible_cpus() * 2; if (size < 0) return -E2BIG; @@ -146,7 +160,7 @@ static int fprobe_init_rethook(struct fprobe *fp, int num) for (i = 0; i < size; i++) { struct fprobe_rethook_node *node; - node = kzalloc(sizeof(*node), GFP_KERNEL); + node = kzalloc(sizeof(*node) + fp->entry_data_size, GFP_KERNEL); if (!node) { rethook_free(fp->rethook); fp->rethook = NULL; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 0feea145bb29..764668467155 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -45,6 +45,10 @@ #include "trace_output.h" #include "trace_stat.h" +/* Flags that do not get reset */ +#define FTRACE_NOCLEAR_FLAGS (FTRACE_FL_DISABLED | FTRACE_FL_TOUCHED | \ + FTRACE_FL_MODIFIED) + #define FTRACE_INVALID_FUNCTION "__ftrace_invalid_address__" #define FTRACE_WARN_ON(cond) \ @@ -2256,7 +2260,7 @@ static int ftrace_check_record(struct dyn_ftrace *rec, bool enable, bool update) flag ^= rec->flags & FTRACE_FL_ENABLED; if (update) { - rec->flags |= FTRACE_FL_ENABLED; + rec->flags |= FTRACE_FL_ENABLED | FTRACE_FL_TOUCHED; if (flag & FTRACE_FL_REGS) { if (rec->flags & FTRACE_FL_REGS) rec->flags |= FTRACE_FL_REGS_EN; @@ -2270,6 +2274,10 @@ static int ftrace_check_record(struct dyn_ftrace *rec, bool enable, bool update) rec->flags &= ~FTRACE_FL_TRAMP_EN; } + /* Keep track of anything that modifies the function */ + if (rec->flags & (FTRACE_FL_DIRECT | FTRACE_FL_IPMODIFY)) + rec->flags |= FTRACE_FL_MODIFIED; + if (flag & FTRACE_FL_DIRECT) { /* * If there's only one user (direct_ops helper) @@ -2326,7 +2334,7 @@ static int ftrace_check_record(struct dyn_ftrace *rec, bool enable, bool update) if (update) { /* If there's no more users, clear all flags */ if (!ftrace_rec_count(rec)) - rec->flags &= FTRACE_FL_DISABLED; + rec->flags &= FTRACE_NOCLEAR_FLAGS; else /* * Just disable the record, but keep the ops TRAMP @@ -2583,28 +2591,13 @@ ftrace_add_rec_direct(unsigned long ip, unsigned long addr, static void call_direct_funcs(unsigned long ip, unsigned long pip, struct ftrace_ops *ops, struct ftrace_regs *fregs) { - unsigned long addr; + unsigned long addr = READ_ONCE(ops->direct_call); - addr = ftrace_find_rec_direct(ip); if (!addr) return; arch_ftrace_set_direct_caller(fregs, addr); } - -static struct ftrace_ops direct_ops = { - .func = call_direct_funcs, - .flags = FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_REGS - | FTRACE_OPS_FL_PERMANENT, - /* - * By declaring the main trampoline as this trampoline - * it will never have one allocated for it. Allocated - * trampolines should not call direct functions. - * The direct_ops should only be called by the builtin - * ftrace_regs_caller trampoline. - */ - .trampoline = FTRACE_REGS_ADDR, -}; #endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ /** @@ -3162,7 +3155,7 @@ int ftrace_shutdown(struct ftrace_ops *ops, int command) struct dyn_ftrace *rec; do_for_each_ftrace_rec(pg, rec) { - if (FTRACE_WARN_ON_ONCE(rec->flags & ~FTRACE_FL_DISABLED)) + if (FTRACE_WARN_ON_ONCE(rec->flags & ~FTRACE_NOCLEAR_FLAGS)) pr_warn(" %pS flags:%lx\n", (void *)rec->ip, rec->flags); } while_for_each_ftrace_rec(); @@ -3613,7 +3606,10 @@ t_func_next(struct seq_file *m, loff_t *pos) !ftrace_lookup_ip(iter->hash, rec->ip)) || ((iter->flags & FTRACE_ITER_ENABLED) && - !(rec->flags & FTRACE_FL_ENABLED))) { + !(rec->flags & FTRACE_FL_ENABLED)) || + + ((iter->flags & FTRACE_ITER_TOUCHED) && + !(rec->flags & FTRACE_FL_TOUCHED))) { rec = NULL; goto retry; @@ -3872,15 +3868,16 @@ static int t_show(struct seq_file *m, void *v) return 0; } - if (iter->flags & FTRACE_ITER_ENABLED) { + if (iter->flags & (FTRACE_ITER_ENABLED | FTRACE_ITER_TOUCHED)) { struct ftrace_ops *ops; - seq_printf(m, " (%ld)%s%s%s%s", + seq_printf(m, " (%ld)%s%s%s%s%s", ftrace_rec_count(rec), rec->flags & FTRACE_FL_REGS ? " R" : " ", rec->flags & FTRACE_FL_IPMODIFY ? " I" : " ", rec->flags & FTRACE_FL_DIRECT ? " D" : " ", - rec->flags & FTRACE_FL_CALL_OPS ? " O" : " "); + rec->flags & FTRACE_FL_CALL_OPS ? " O" : " ", + rec->flags & FTRACE_FL_MODIFIED ? " M " : " "); if (rec->flags & FTRACE_FL_TRAMP_EN) { ops = ftrace_find_tramp_ops_any(rec); if (ops) { @@ -3974,6 +3971,31 @@ ftrace_enabled_open(struct inode *inode, struct file *file) return 0; } +static int +ftrace_touched_open(struct inode *inode, struct file *file) +{ + struct ftrace_iterator *iter; + + /* + * This shows us what functions have ever been enabled + * (traced, direct, patched, etc). Not sure if we want lockdown + * to hide such critical information for an admin. + * Although, perhaps it can show information we don't + * want people to see, but if something had traced + * something, we probably want to know about it. + */ + + iter = __seq_open_private(file, &show_ftrace_seq_ops, sizeof(*iter)); + if (!iter) + return -ENOMEM; + + iter->pg = ftrace_pages_start; + iter->flags = FTRACE_ITER_TOUCHED; + iter->ops = &global_ops; + + return 0; +} + /** * ftrace_regex_open - initialize function tracer filter files * @ops: The ftrace_ops that hold the hash filters @@ -5301,388 +5323,9 @@ struct ftrace_direct_func { static LIST_HEAD(ftrace_direct_funcs); -/** - * ftrace_find_direct_func - test an address if it is a registered direct caller - * @addr: The address of a registered direct caller - * - * This searches to see if a ftrace direct caller has been registered - * at a specific address, and if so, it returns a descriptor for it. - * - * This can be used by architecture code to see if an address is - * a direct caller (trampoline) attached to a fentry/mcount location. - * This is useful for the function_graph tracer, as it may need to - * do adjustments if it traced a location that also has a direct - * trampoline attached to it. - */ -struct ftrace_direct_func *ftrace_find_direct_func(unsigned long addr) -{ - struct ftrace_direct_func *entry; - bool found = false; - - /* May be called by fgraph trampoline (protected by rcu tasks) */ - list_for_each_entry_rcu(entry, &ftrace_direct_funcs, next) { - if (entry->addr == addr) { - found = true; - break; - } - } - if (found) - return entry; - - return NULL; -} - -static struct ftrace_direct_func *ftrace_alloc_direct_func(unsigned long addr) -{ - struct ftrace_direct_func *direct; - - direct = kmalloc(sizeof(*direct), GFP_KERNEL); - if (!direct) - return NULL; - direct->addr = addr; - direct->count = 0; - list_add_rcu(&direct->next, &ftrace_direct_funcs); - ftrace_direct_func_count++; - return direct; -} - static int register_ftrace_function_nolock(struct ftrace_ops *ops); -/** - * register_ftrace_direct - Call a custom trampoline directly - * @ip: The address of the nop at the beginning of a function - * @addr: The address of the trampoline to call at @ip - * - * This is used to connect a direct call from the nop location (@ip) - * at the start of ftrace traced functions. The location that it calls - * (@addr) must be able to handle a direct call, and save the parameters - * of the function being traced, and restore them (or inject new ones - * if needed), before returning. - * - * Returns: - * 0 on success - * -EBUSY - Another direct function is already attached (there can be only one) - * -ENODEV - @ip does not point to a ftrace nop location (or not supported) - * -ENOMEM - There was an allocation failure. - */ -int register_ftrace_direct(unsigned long ip, unsigned long addr) -{ - struct ftrace_direct_func *direct; - struct ftrace_func_entry *entry; - struct ftrace_hash *free_hash = NULL; - struct dyn_ftrace *rec; - int ret = -ENODEV; - - mutex_lock(&direct_mutex); - - ip = ftrace_location(ip); - if (!ip) - goto out_unlock; - - /* See if there's a direct function at @ip already */ - ret = -EBUSY; - if (ftrace_find_rec_direct(ip)) - goto out_unlock; - - ret = -ENODEV; - rec = lookup_rec(ip, ip); - if (!rec) - goto out_unlock; - - /* - * Check if the rec says it has a direct call but we didn't - * find one earlier? - */ - if (WARN_ON(rec->flags & FTRACE_FL_DIRECT)) - goto out_unlock; - - /* Make sure the ip points to the exact record */ - if (ip != rec->ip) { - ip = rec->ip; - /* Need to check this ip for a direct. */ - if (ftrace_find_rec_direct(ip)) - goto out_unlock; - } - - ret = -ENOMEM; - direct = ftrace_find_direct_func(addr); - if (!direct) { - direct = ftrace_alloc_direct_func(addr); - if (!direct) - goto out_unlock; - } - - entry = ftrace_add_rec_direct(ip, addr, &free_hash); - if (!entry) - goto out_unlock; - - ret = ftrace_set_filter_ip(&direct_ops, ip, 0, 0); - - if (!ret && !(direct_ops.flags & FTRACE_OPS_FL_ENABLED)) { - ret = register_ftrace_function_nolock(&direct_ops); - if (ret) - ftrace_set_filter_ip(&direct_ops, ip, 1, 0); - } - - if (ret) { - remove_hash_entry(direct_functions, entry); - kfree(entry); - if (!direct->count) { - list_del_rcu(&direct->next); - synchronize_rcu_tasks(); - kfree(direct); - if (free_hash) - free_ftrace_hash(free_hash); - free_hash = NULL; - ftrace_direct_func_count--; - } - } else { - direct->count++; - } - out_unlock: - mutex_unlock(&direct_mutex); - - if (free_hash) { - synchronize_rcu_tasks(); - free_ftrace_hash(free_hash); - } - - return ret; -} -EXPORT_SYMBOL_GPL(register_ftrace_direct); - -static struct ftrace_func_entry *find_direct_entry(unsigned long *ip, - struct dyn_ftrace **recp) -{ - struct ftrace_func_entry *entry; - struct dyn_ftrace *rec; - - rec = lookup_rec(*ip, *ip); - if (!rec) - return NULL; - - entry = __ftrace_lookup_ip(direct_functions, rec->ip); - if (!entry) { - WARN_ON(rec->flags & FTRACE_FL_DIRECT); - return NULL; - } - - WARN_ON(!(rec->flags & FTRACE_FL_DIRECT)); - - /* Passed in ip just needs to be on the call site */ - *ip = rec->ip; - - if (recp) - *recp = rec; - - return entry; -} - -int unregister_ftrace_direct(unsigned long ip, unsigned long addr) -{ - struct ftrace_direct_func *direct; - struct ftrace_func_entry *entry; - struct ftrace_hash *hash; - int ret = -ENODEV; - - mutex_lock(&direct_mutex); - - ip = ftrace_location(ip); - if (!ip) - goto out_unlock; - - entry = find_direct_entry(&ip, NULL); - if (!entry) - goto out_unlock; - - hash = direct_ops.func_hash->filter_hash; - if (hash->count == 1) - unregister_ftrace_function(&direct_ops); - - ret = ftrace_set_filter_ip(&direct_ops, ip, 1, 0); - - WARN_ON(ret); - - remove_hash_entry(direct_functions, entry); - - direct = ftrace_find_direct_func(addr); - if (!WARN_ON(!direct)) { - /* This is the good path (see the ! before WARN) */ - direct->count--; - WARN_ON(direct->count < 0); - if (!direct->count) { - list_del_rcu(&direct->next); - synchronize_rcu_tasks(); - kfree(direct); - kfree(entry); - ftrace_direct_func_count--; - } - } - out_unlock: - mutex_unlock(&direct_mutex); - - return ret; -} -EXPORT_SYMBOL_GPL(unregister_ftrace_direct); - -static struct ftrace_ops stub_ops = { - .func = ftrace_stub, -}; - -/** - * ftrace_modify_direct_caller - modify ftrace nop directly - * @entry: The ftrace hash entry of the direct helper for @rec - * @rec: The record representing the function site to patch - * @old_addr: The location that the site at @rec->ip currently calls - * @new_addr: The location that the site at @rec->ip should call - * - * An architecture may overwrite this function to optimize the - * changing of the direct callback on an ftrace nop location. - * This is called with the ftrace_lock mutex held, and no other - * ftrace callbacks are on the associated record (@rec). Thus, - * it is safe to modify the ftrace record, where it should be - * currently calling @old_addr directly, to call @new_addr. - * - * This is called with direct_mutex locked. - * - * Safety checks should be made to make sure that the code at - * @rec->ip is currently calling @old_addr. And this must - * also update entry->direct to @new_addr. - */ -int __weak ftrace_modify_direct_caller(struct ftrace_func_entry *entry, - struct dyn_ftrace *rec, - unsigned long old_addr, - unsigned long new_addr) -{ - unsigned long ip = rec->ip; - int ret; - - lockdep_assert_held(&direct_mutex); - - /* - * The ftrace_lock was used to determine if the record - * had more than one registered user to it. If it did, - * we needed to prevent that from changing to do the quick - * switch. But if it did not (only a direct caller was attached) - * then this function is called. But this function can deal - * with attached callers to the rec that we care about, and - * since this function uses standard ftrace calls that take - * the ftrace_lock mutex, we need to release it. - */ - mutex_unlock(&ftrace_lock); - - /* - * By setting a stub function at the same address, we force - * the code to call the iterator and the direct_ops helper. - * This means that @ip does not call the direct call, and - * we can simply modify it. - */ - ret = ftrace_set_filter_ip(&stub_ops, ip, 0, 0); - if (ret) - goto out_lock; - - ret = register_ftrace_function_nolock(&stub_ops); - if (ret) { - ftrace_set_filter_ip(&stub_ops, ip, 1, 0); - goto out_lock; - } - - entry->direct = new_addr; - - /* - * By removing the stub, we put back the direct call, calling - * the @new_addr. - */ - unregister_ftrace_function(&stub_ops); - ftrace_set_filter_ip(&stub_ops, ip, 1, 0); - - out_lock: - mutex_lock(&ftrace_lock); - - return ret; -} - -/** - * modify_ftrace_direct - Modify an existing direct call to call something else - * @ip: The instruction pointer to modify - * @old_addr: The address that the current @ip calls directly - * @new_addr: The address that the @ip should call - * - * This modifies a ftrace direct caller at an instruction pointer without - * having to disable it first. The direct call will switch over to the - * @new_addr without missing anything. - * - * Returns: zero on success. Non zero on error, which includes: - * -ENODEV : the @ip given has no direct caller attached - * -EINVAL : the @old_addr does not match the current direct caller - */ -int modify_ftrace_direct(unsigned long ip, - unsigned long old_addr, unsigned long new_addr) -{ - struct ftrace_direct_func *direct, *new_direct = NULL; - struct ftrace_func_entry *entry; - struct dyn_ftrace *rec; - int ret = -ENODEV; - - mutex_lock(&direct_mutex); - - mutex_lock(&ftrace_lock); - - ip = ftrace_location(ip); - if (!ip) - goto out_unlock; - - entry = find_direct_entry(&ip, &rec); - if (!entry) - goto out_unlock; - - ret = -EINVAL; - if (entry->direct != old_addr) - goto out_unlock; - - direct = ftrace_find_direct_func(old_addr); - if (WARN_ON(!direct)) - goto out_unlock; - if (direct->count > 1) { - ret = -ENOMEM; - new_direct = ftrace_alloc_direct_func(new_addr); - if (!new_direct) - goto out_unlock; - direct->count--; - new_direct->count++; - } else { - direct->addr = new_addr; - } - - /* - * If there's no other ftrace callback on the rec->ip location, - * then it can be changed directly by the architecture. - * If there is another caller, then we just need to change the - * direct caller helper to point to @new_addr. - */ - if (ftrace_rec_count(rec) == 1) { - ret = ftrace_modify_direct_caller(entry, rec, old_addr, new_addr); - } else { - entry->direct = new_addr; - ret = 0; - } - - if (unlikely(ret && new_direct)) { - direct->count++; - list_del_rcu(&new_direct->next); - synchronize_rcu_tasks(); - kfree(new_direct); - ftrace_direct_func_count--; - } - - out_unlock: - mutex_unlock(&ftrace_lock); - mutex_unlock(&direct_mutex); - return ret; -} -EXPORT_SYMBOL_GPL(modify_ftrace_direct); - -#define MULTI_FLAGS (FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_REGS) +#define MULTI_FLAGS (FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_ARGS) static int check_direct_multi(struct ftrace_ops *ops) { @@ -5711,7 +5354,7 @@ static void remove_direct_functions_hash(struct ftrace_hash *hash, unsigned long } /** - * register_ftrace_direct_multi - Call a custom trampoline directly + * register_ftrace_direct - Call a custom trampoline directly * for multiple functions registered in @ops * @ops: The address of the struct ftrace_ops object * @addr: The address of the trampoline to call at @ops functions @@ -5732,7 +5375,7 @@ static void remove_direct_functions_hash(struct ftrace_hash *hash, unsigned long * -ENODEV - @ip does not point to a ftrace nop location (or not supported) * -ENOMEM - There was an allocation failure. */ -int register_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) +int register_ftrace_direct(struct ftrace_ops *ops, unsigned long addr) { struct ftrace_hash *hash, *free_hash = NULL; struct ftrace_func_entry *entry, *new; @@ -5774,6 +5417,7 @@ int register_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) ops->func = call_direct_funcs; ops->flags = MULTI_FLAGS; ops->trampoline = FTRACE_REGS_ADDR; + ops->direct_call = addr; err = register_ftrace_function_nolock(ops); @@ -5790,11 +5434,11 @@ int register_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) } return err; } -EXPORT_SYMBOL_GPL(register_ftrace_direct_multi); +EXPORT_SYMBOL_GPL(register_ftrace_direct); /** - * unregister_ftrace_direct_multi - Remove calls to custom trampoline - * previously registered by register_ftrace_direct_multi for @ops object. + * unregister_ftrace_direct - Remove calls to custom trampoline + * previously registered by register_ftrace_direct for @ops object. * @ops: The address of the struct ftrace_ops object * * This is used to remove a direct calls to @addr from the nop locations @@ -5805,7 +5449,8 @@ EXPORT_SYMBOL_GPL(register_ftrace_direct_multi); * 0 on success * -EINVAL - The @ops object was not properly registered. */ -int unregister_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) +int unregister_ftrace_direct(struct ftrace_ops *ops, unsigned long addr, + bool free_filters) { struct ftrace_hash *hash = ops->func_hash->filter_hash; int err; @@ -5823,12 +5468,15 @@ int unregister_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) /* cleanup for possible another register call */ ops->func = NULL; ops->trampoline = 0; + + if (free_filters) + ftrace_free_filter(ops); return err; } -EXPORT_SYMBOL_GPL(unregister_ftrace_direct_multi); +EXPORT_SYMBOL_GPL(unregister_ftrace_direct); static int -__modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) +__modify_ftrace_direct(struct ftrace_ops *ops, unsigned long addr) { struct ftrace_hash *hash; struct ftrace_func_entry *entry, *iter; @@ -5844,6 +5492,7 @@ __modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) /* Enable the tmp_ops to have the same functions as the direct ops */ ftrace_ops_init(&tmp_ops); tmp_ops.func_hash = ops->func_hash; + tmp_ops.direct_call = addr; err = register_ftrace_function_nolock(&tmp_ops); if (err) @@ -5865,6 +5514,8 @@ __modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) entry->direct = addr; } } + /* Prevent store tearing if a trampoline concurrently accesses the value */ + WRITE_ONCE(ops->direct_call, addr); mutex_unlock(&ftrace_lock); @@ -5875,7 +5526,7 @@ __modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) } /** - * modify_ftrace_direct_multi_nolock - Modify an existing direct 'multi' call + * modify_ftrace_direct_nolock - Modify an existing direct 'multi' call * to call something else * @ops: The address of the struct ftrace_ops object * @addr: The address of the new trampoline to call at @ops functions @@ -5892,19 +5543,19 @@ __modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) * Returns: zero on success. Non zero on error, which includes: * -EINVAL - The @ops object was not properly registered. */ -int modify_ftrace_direct_multi_nolock(struct ftrace_ops *ops, unsigned long addr) +int modify_ftrace_direct_nolock(struct ftrace_ops *ops, unsigned long addr) { if (check_direct_multi(ops)) return -EINVAL; if (!(ops->flags & FTRACE_OPS_FL_ENABLED)) return -EINVAL; - return __modify_ftrace_direct_multi(ops, addr); + return __modify_ftrace_direct(ops, addr); } -EXPORT_SYMBOL_GPL(modify_ftrace_direct_multi_nolock); +EXPORT_SYMBOL_GPL(modify_ftrace_direct_nolock); /** - * modify_ftrace_direct_multi - Modify an existing direct 'multi' call + * modify_ftrace_direct - Modify an existing direct 'multi' call * to call something else * @ops: The address of the struct ftrace_ops object * @addr: The address of the new trampoline to call at @ops functions @@ -5918,7 +5569,7 @@ EXPORT_SYMBOL_GPL(modify_ftrace_direct_multi_nolock); * Returns: zero on success. Non zero on error, which includes: * -EINVAL - The @ops object was not properly registered. */ -int modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) +int modify_ftrace_direct(struct ftrace_ops *ops, unsigned long addr) { int err; @@ -5928,11 +5579,11 @@ int modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) return -EINVAL; mutex_lock(&direct_mutex); - err = __modify_ftrace_direct_multi(ops, addr); + err = __modify_ftrace_direct(ops, addr); mutex_unlock(&direct_mutex); return err; } -EXPORT_SYMBOL_GPL(modify_ftrace_direct_multi); +EXPORT_SYMBOL_GPL(modify_ftrace_direct); #endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ /** @@ -6258,6 +5909,13 @@ static const struct file_operations ftrace_enabled_fops = { .release = seq_release_private, }; +static const struct file_operations ftrace_touched_fops = { + .open = ftrace_touched_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, +}; + static const struct file_operations ftrace_filter_fops = { .open = ftrace_filter_open, .read = seq_read, @@ -6722,6 +6380,9 @@ static __init int ftrace_init_dyn_tracefs(struct dentry *d_tracer) trace_create_file("enabled_functions", TRACE_MODE_READ, d_tracer, NULL, &ftrace_enabled_fops); + trace_create_file("touched_functions", TRACE_MODE_READ, + d_tracer, NULL, &ftrace_touched_fops); + ftrace_create_filter_files(&global_ops, d_tracer); #ifdef CONFIG_FUNCTION_GRAPH_TRACER @@ -8392,8 +8053,7 @@ struct kallsyms_data { * and returns 1 in case we resolved all the requested symbols, * 0 otherwise. */ -static int kallsyms_callback(void *data, const char *name, - struct module *mod, unsigned long addr) +static int kallsyms_callback(void *data, const char *name, unsigned long addr) { struct kallsyms_data *args = data; const char **sym; diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index c6f47b6cfd5f..834b361a4a66 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -163,7 +163,7 @@ enum { #define extended_time(event) \ (event->type_len >= RINGBUF_TYPE_TIME_EXTEND) -static inline int rb_null_event(struct ring_buffer_event *event) +static inline bool rb_null_event(struct ring_buffer_event *event) { return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta; } @@ -363,11 +363,9 @@ static void free_buffer_page(struct buffer_page *bpage) /* * We need to fit the time_stamp delta into 27 bits. */ -static inline int test_time_stamp(u64 delta) +static inline bool test_time_stamp(u64 delta) { - if (delta & TS_DELTA_TEST) - return 1; - return 0; + return !!(delta & TS_DELTA_TEST); } #define BUF_PAGE_SIZE (PAGE_SIZE - BUF_PAGE_HDR_SIZE) @@ -696,7 +694,7 @@ rb_time_read_cmpxchg(local_t *l, unsigned long expect, unsigned long set) return ret == expect; } -static int rb_time_cmpxchg(rb_time_t *t, u64 expect, u64 set) +static bool rb_time_cmpxchg(rb_time_t *t, u64 expect, u64 set) { unsigned long cnt, top, bottom, msb; unsigned long cnt2, top2, bottom2, msb2; @@ -1486,7 +1484,7 @@ rb_set_head_page(struct ring_buffer_per_cpu *cpu_buffer) return NULL; } -static int rb_head_page_replace(struct buffer_page *old, +static bool rb_head_page_replace(struct buffer_page *old, struct buffer_page *new) { unsigned long *ptr = (unsigned long *)&old->list.prev->next; @@ -1565,15 +1563,12 @@ static void rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer, } } -static int rb_check_bpage(struct ring_buffer_per_cpu *cpu_buffer, +static void rb_check_bpage(struct ring_buffer_per_cpu *cpu_buffer, struct buffer_page *bpage) { unsigned long val = (unsigned long)bpage; - if (RB_WARN_ON(cpu_buffer, val & RB_FLAG_MASK)) - return 1; - - return 0; + RB_WARN_ON(cpu_buffer, val & RB_FLAG_MASK); } /** @@ -1583,30 +1578,28 @@ static int rb_check_bpage(struct ring_buffer_per_cpu *cpu_buffer, * As a safety measure we check to make sure the data pages have not * been corrupted. */ -static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer) +static void rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer) { struct list_head *head = rb_list_head(cpu_buffer->pages); struct list_head *tmp; if (RB_WARN_ON(cpu_buffer, rb_list_head(rb_list_head(head->next)->prev) != head)) - return -1; + return; if (RB_WARN_ON(cpu_buffer, rb_list_head(rb_list_head(head->prev)->next) != head)) - return -1; + return; for (tmp = rb_list_head(head->next); tmp != head; tmp = rb_list_head(tmp->next)) { if (RB_WARN_ON(cpu_buffer, rb_list_head(rb_list_head(tmp->next)->prev) != tmp)) - return -1; + return; if (RB_WARN_ON(cpu_buffer, rb_list_head(rb_list_head(tmp->prev)->next) != tmp)) - return -1; + return; } - - return 0; } static int __rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, @@ -1774,6 +1767,8 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) struct list_head *head = cpu_buffer->pages; struct buffer_page *bpage, *tmp; + irq_work_sync(&cpu_buffer->irq_work.work); + free_buffer_page(cpu_buffer->reader_page); if (head) { @@ -1880,6 +1875,8 @@ ring_buffer_free(struct trace_buffer *buffer) cpuhp_state_remove_instance(CPUHP_TRACE_RB_PREPARE, &buffer->node); + irq_work_sync(&buffer->irq_work.work); + for_each_buffer_cpu(buffer, cpu) rb_free_cpu_buffer(buffer->buffers[cpu]); @@ -1918,7 +1915,7 @@ static inline unsigned long rb_page_write(struct buffer_page *bpage) return local_read(&bpage->write) & RB_WRITE_MASK; } -static int +static bool rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages) { struct list_head *tail_page, *to_remove, *next_page; @@ -2031,12 +2028,13 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages) return nr_removed == 0; } -static int +static bool rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer) { struct list_head *pages = &cpu_buffer->new_pages; - int retries, success; unsigned long flags; + bool success; + int retries; /* Can be called at early boot up, where interrupts must not been enabled */ raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); @@ -2055,15 +2053,16 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer) * spinning. */ retries = 10; - success = 0; + success = false; while (retries--) { struct list_head *head_page, *prev_page, *r; struct list_head *last_page, *first_page; struct list_head *head_page_with_bit; + struct buffer_page *hpage = rb_set_head_page(cpu_buffer); - head_page = &rb_set_head_page(cpu_buffer)->list; - if (!head_page) + if (!hpage) break; + head_page = &hpage->list; prev_page = head_page->prev; first_page = pages->next; @@ -2084,7 +2083,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer) * pointer to point to end of list */ head_page->prev = last_page; - success = 1; + success = true; break; } } @@ -2112,7 +2111,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer) static void rb_update_pages(struct ring_buffer_per_cpu *cpu_buffer) { - int success; + bool success; if (cpu_buffer->nr_pages_to_update > 0) success = rb_insert_pages(cpu_buffer); @@ -2995,7 +2994,7 @@ static u64 rb_time_delta(struct ring_buffer_event *event) } } -static inline int +static inline bool rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, struct ring_buffer_event *event) { @@ -3016,7 +3015,7 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, delta = rb_time_delta(event); if (!rb_time_read(&cpu_buffer->write_stamp, &write_stamp)) - return 0; + return false; /* Make sure the write stamp is read before testing the location */ barrier(); @@ -3029,7 +3028,7 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, /* Something came in, can't discard */ if (!rb_time_cmpxchg(&cpu_buffer->write_stamp, write_stamp, write_stamp - delta)) - return 0; + return false; /* * It's possible that the event time delta is zero @@ -3062,12 +3061,12 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, if (index == old_index) { /* update counters */ local_sub(event_length, &cpu_buffer->entries_bytes); - return 1; + return true; } } /* could not discard */ - return 0; + return false; } static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer) @@ -3098,6 +3097,10 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) if (RB_WARN_ON(cpu_buffer, rb_is_reader_page(cpu_buffer->tail_page))) return; + /* + * No need for a memory barrier here, as the update + * of the tail_page did it for this page. + */ local_set(&cpu_buffer->commit_page->page->commit, rb_page_write(cpu_buffer->commit_page)); rb_inc_page(&cpu_buffer->commit_page); @@ -3107,6 +3110,8 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer) while (rb_commit_index(cpu_buffer) != rb_page_write(cpu_buffer->commit_page)) { + /* Make sure the readers see the content of what is committed. */ + smp_wmb(); local_set(&cpu_buffer->commit_page->page->commit, rb_page_write(cpu_buffer->commit_page)); RB_WARN_ON(cpu_buffer, @@ -3282,7 +3287,7 @@ rb_wakeups(struct trace_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer) * Note: The TRANSITION bit only handles a single transition between context. */ -static __always_inline int +static __always_inline bool trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer) { unsigned int val = cpu_buffer->current_context; @@ -3299,14 +3304,14 @@ trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer) bit = RB_CTX_TRANSITION; if (val & (1 << (bit + cpu_buffer->nest))) { do_ring_buffer_record_recursion(); - return 1; + return true; } } val |= (1 << (bit + cpu_buffer->nest)); cpu_buffer->current_context = val; - return 0; + return false; } static __always_inline void @@ -4063,10 +4068,10 @@ void ring_buffer_record_off(struct trace_buffer *buffer) unsigned int rd; unsigned int new_rd; + rd = atomic_read(&buffer->record_disabled); do { - rd = atomic_read(&buffer->record_disabled); new_rd = rd | RB_BUFFER_OFF; - } while (atomic_cmpxchg(&buffer->record_disabled, rd, new_rd) != rd); + } while (!atomic_try_cmpxchg(&buffer->record_disabled, &rd, new_rd)); } EXPORT_SYMBOL_GPL(ring_buffer_record_off); @@ -4086,10 +4091,10 @@ void ring_buffer_record_on(struct trace_buffer *buffer) unsigned int rd; unsigned int new_rd; + rd = atomic_read(&buffer->record_disabled); do { - rd = atomic_read(&buffer->record_disabled); new_rd = rd & ~RB_BUFFER_OFF; - } while (atomic_cmpxchg(&buffer->record_disabled, rd, new_rd) != rd); + } while (!atomic_try_cmpxchg(&buffer->record_disabled, &rd, new_rd)); } EXPORT_SYMBOL_GPL(ring_buffer_record_on); @@ -4496,7 +4501,6 @@ rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer, default: RB_WARN_ON(cpu_buffer, 1); } - return; } static void @@ -4527,7 +4531,6 @@ rb_update_iter_read_stamp(struct ring_buffer_iter *iter, default: RB_WARN_ON(iter->cpu_buffer, 1); } - return; } static struct buffer_page * @@ -4537,7 +4540,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) unsigned long overwrite; unsigned long flags; int nr_loops = 0; - int ret; + bool ret; local_irq_save(flags); arch_spin_lock(&cpu_buffer->lock); @@ -4684,7 +4687,12 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) /* * Make sure we see any padding after the write update - * (see rb_reset_tail()) + * (see rb_reset_tail()). + * + * In addition, a writer may be writing on the reader page + * if the page has not been fully filled, so the read barrier + * is also needed to make sure we see the content of what is + * committed by the writer (see rb_set_commit_to_write()). */ smp_rmb(); @@ -4942,7 +4950,6 @@ rb_reader_unlock(struct ring_buffer_per_cpu *cpu_buffer, bool locked) { if (likely(locked)) raw_spin_unlock(&cpu_buffer->reader_lock); - return; } /** @@ -5334,6 +5341,9 @@ void ring_buffer_reset_cpu(struct trace_buffer *buffer, int cpu) } EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu); +/* Flag to ensure proper resetting of atomic variables */ +#define RESET_BIT (1 << 30) + /** * ring_buffer_reset_online_cpus - reset a ring buffer per CPU buffer * @buffer: The ring buffer to reset a per cpu buffer of @@ -5350,20 +5360,27 @@ void ring_buffer_reset_online_cpus(struct trace_buffer *buffer) for_each_online_buffer_cpu(buffer, cpu) { cpu_buffer = buffer->buffers[cpu]; - atomic_inc(&cpu_buffer->resize_disabled); + atomic_add(RESET_BIT, &cpu_buffer->resize_disabled); atomic_inc(&cpu_buffer->record_disabled); } /* Make sure all commits have finished */ synchronize_rcu(); - for_each_online_buffer_cpu(buffer, cpu) { + for_each_buffer_cpu(buffer, cpu) { cpu_buffer = buffer->buffers[cpu]; + /* + * If a CPU came online during the synchronize_rcu(), then + * ignore it. + */ + if (!(atomic_read(&cpu_buffer->resize_disabled) & RESET_BIT)) + continue; + reset_disabled_cpu_buffer(cpu_buffer); atomic_dec(&cpu_buffer->record_disabled); - atomic_dec(&cpu_buffer->resize_disabled); + atomic_sub(RESET_BIT, &cpu_buffer->resize_disabled); } mutex_unlock(&buffer->mutex); @@ -5413,8 +5430,8 @@ bool ring_buffer_empty(struct trace_buffer *buffer) struct ring_buffer_per_cpu *cpu_buffer; unsigned long flags; bool dolock; + bool ret; int cpu; - int ret; /* yes this is racy, but if you don't like the race, lock the buffer */ for_each_buffer_cpu(buffer, cpu) { @@ -5443,7 +5460,7 @@ bool ring_buffer_empty_cpu(struct trace_buffer *buffer, int cpu) struct ring_buffer_per_cpu *cpu_buffer; unsigned long flags; bool dolock; - int ret; + bool ret; if (!cpumask_test_cpu(cpu, buffer->cpumask)) return true; diff --git a/kernel/trace/rv/reactor_panic.c b/kernel/trace/rv/reactor_panic.c index d65f6c25a87c..0186ff4cbd0b 100644 --- a/kernel/trace/rv/reactor_panic.c +++ b/kernel/trace/rv/reactor_panic.c @@ -38,6 +38,5 @@ static void __exit unregister_react_panic(void) module_init(register_react_panic); module_exit(unregister_react_panic); -MODULE_LICENSE("GPL"); MODULE_AUTHOR("Daniel Bristot de Oliveira"); MODULE_DESCRIPTION("panic rv reactor: panic if an exception is found."); diff --git a/kernel/trace/rv/reactor_printk.c b/kernel/trace/rv/reactor_printk.c index 4b6b7106a477..178759dbf89f 100644 --- a/kernel/trace/rv/reactor_printk.c +++ b/kernel/trace/rv/reactor_printk.c @@ -37,6 +37,5 @@ static void __exit unregister_react_printk(void) module_init(register_react_printk); module_exit(unregister_react_printk); -MODULE_LICENSE("GPL"); MODULE_AUTHOR("Daniel Bristot de Oliveira"); MODULE_DESCRIPTION("printk rv reactor: printk if an exception is hit."); diff --git a/kernel/trace/rv/rv.c b/kernel/trace/rv/rv.c index 7e9061828c24..2f68e93fff0b 100644 --- a/kernel/trace/rv/rv.c +++ b/kernel/trace/rv/rv.c @@ -290,8 +290,6 @@ static ssize_t monitor_enable_write_data(struct file *filp, const char __user *u if (retval) return retval; - retval = count; - mutex_lock(&rv_interface_lock); if (val) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 937e9676dfd4..ebc59781456a 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1149,22 +1149,22 @@ static void tracing_snapshot_instance_cond(struct trace_array *tr, unsigned long flags; if (in_nmi()) { - internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n"); - internal_trace_puts("*** snapshot is being ignored ***\n"); + trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n"); + trace_array_puts(tr, "*** snapshot is being ignored ***\n"); return; } if (!tr->allocated_snapshot) { - internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n"); - internal_trace_puts("*** stopping trace here! ***\n"); - tracing_off(); + trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n"); + trace_array_puts(tr, "*** stopping trace here! ***\n"); + tracer_tracing_off(tr); return; } /* Note, snapshot can not be used when the tracer uses it */ if (tracer->use_max_tr) { - internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n"); - internal_trace_puts("*** Can not use snapshot (sorry) ***\n"); + trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n"); + trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n"); return; } @@ -3726,7 +3726,7 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, #define STATIC_FMT_BUF_SIZE 128 static char static_fmt_buf[STATIC_FMT_BUF_SIZE]; -static char *trace_iter_expand_format(struct trace_iterator *iter) +char *trace_iter_expand_format(struct trace_iterator *iter) { char *tmp; @@ -4446,8 +4446,11 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) if (trace_seq_has_overflowed(s)) return TRACE_TYPE_PARTIAL_LINE; - if (event) + if (event) { + if (tr->trace_flags & TRACE_ITER_FIELDS) + return print_event_fields(iter, event); return event->funcs->trace(iter, sym_flags, event); + } trace_seq_printf(s, "Unknown type %d\n", entry->type); @@ -9516,6 +9519,7 @@ static int __remove_instance(struct trace_array *tr) tracefs_remove(tr->dir); free_percpu(tr->last_func_repeats); free_trace_buffers(tr); + clear_tracing_err_log(tr); for (i = 0; i < tr->nr_topts; i++) { kfree(tr->topts[i].topts); @@ -9657,7 +9661,7 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer) tr->buffer_percent = 50; - trace_create_file("buffer_percent", TRACE_MODE_READ, d_tracer, + trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer, tr, &buffer_percent_fops); create_trace_options_dir(tr); @@ -10393,19 +10397,20 @@ out: void __init ftrace_boot_snapshot(void) { +#ifdef CONFIG_TRACER_MAX_TRACE struct trace_array *tr; - if (snapshot_at_boot) { - tracing_snapshot(); - internal_trace_puts("** Boot snapshot taken **\n"); - } + if (!snapshot_at_boot) + return; list_for_each_entry(tr, &ftrace_trace_arrays, list) { - if (tr == &global_trace) + if (!tr->allocated_snapshot) continue; - trace_array_puts(tr, "** Boot snapshot taken **\n"); + tracing_snapshot_instance(tr); + trace_array_puts(tr, "** Boot snapshot taken **\n"); } +#endif } void __init early_trace_init(void) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 616e1aa1c4da..79bdefe9261b 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -619,6 +619,7 @@ bool trace_is_tracepoint_string(const char *str); const char *trace_event_format(struct trace_iterator *iter, const char *fmt); void trace_check_vprintf(struct trace_iterator *iter, const char *fmt, va_list ap) __printf(2, 0); +char *trace_iter_expand_format(struct trace_iterator *iter); int trace_empty(struct trace_iterator *iter); @@ -1199,6 +1200,7 @@ extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf, C(HEX, "hex"), \ C(BIN, "bin"), \ C(BLOCK, "block"), \ + C(FIELDS, "fields"), \ C(PRINTK, "trace_printk"), \ C(ANNOTATE, "annotate"), \ C(USERSTACKTRACE, "userstacktrace"), \ diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index 46d0abb32d0f..d6a70aff2410 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -44,14 +44,21 @@ enum { ERRORS }; static const char *err_text[] = { ERRORS }; +static DEFINE_MUTEX(lastcmd_mutex); static char *last_cmd; static int errpos(const char *str) { + int ret = 0; + + mutex_lock(&lastcmd_mutex); if (!str || !last_cmd) - return 0; + goto out; - return err_pos(last_cmd, str); + ret = err_pos(last_cmd, str); + out: + mutex_unlock(&lastcmd_mutex); + return ret; } static void last_cmd_set(const char *str) @@ -59,18 +66,22 @@ static void last_cmd_set(const char *str) if (!str) return; + mutex_lock(&lastcmd_mutex); kfree(last_cmd); - last_cmd = kstrdup(str, GFP_KERNEL); + mutex_unlock(&lastcmd_mutex); } static void synth_err(u8 err_type, u16 err_pos) { + mutex_lock(&lastcmd_mutex); if (!last_cmd) - return; + goto out; tracing_log_err(NULL, "synthetic_events", last_cmd, err_text, err_type, err_pos); + out: + mutex_unlock(&lastcmd_mutex); } static int create_synth_event(const char *raw_command); diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c index 908e8a13c675..b1ecd7677642 100644 --- a/kernel/trace/trace_events_user.c +++ b/kernel/trace/trace_events_user.c @@ -19,14 +19,12 @@ #include <linux/tracefs.h> #include <linux/types.h> #include <linux/uaccess.h> -/* Reminder to move to uapi when everything works */ -#ifdef CONFIG_COMPILE_TEST +#include <linux/highmem.h> +#include <linux/init.h> #include <linux/user_events.h> -#else -#include <uapi/linux/user_events.h> -#endif -#include "trace.h" #include "trace_dynevent.h" +#include "trace_output.h" +#include "trace.h" #define USER_EVENTS_PREFIX_LEN (sizeof(USER_EVENTS_PREFIX)-1) @@ -34,35 +32,12 @@ #define FIELD_DEPTH_NAME 1 #define FIELD_DEPTH_SIZE 2 -/* - * Limits how many trace_event calls user processes can create: - * Must be a power of two of PAGE_SIZE. - */ -#define MAX_PAGE_ORDER 0 -#define MAX_PAGES (1 << MAX_PAGE_ORDER) -#define MAX_BYTES (MAX_PAGES * PAGE_SIZE) -#define MAX_EVENTS (MAX_BYTES * 8) - /* Limit how long of an event name plus args within the subsystem. */ #define MAX_EVENT_DESC 512 #define EVENT_NAME(user_event) ((user_event)->tracepoint.name) #define MAX_FIELD_ARRAY_SIZE 1024 /* - * The MAP_STATUS_* macros are used for taking a index and determining the - * appropriate byte and the bit in the byte to set/reset for an event. - * - * The lower 3 bits of the index decide which bit to set. - * The remaining upper bits of the index decide which byte to use for the bit. - * - * This is used when an event has a probe attached/removed to reflect live - * status of the event wanting tracing or not to user-programs via shared - * memory maps. - */ -#define MAP_STATUS_BYTE(index) ((index) >> 3) -#define MAP_STATUS_MASK(index) BIT((index) & 7) - -/* * Internal bits (kernel side only) to keep track of connected probes: * These are used when status is requested in text form about an event. These * bits are compared against an internal byte on the event to determine which @@ -75,25 +50,25 @@ #define EVENT_STATUS_OTHER BIT(7) /* - * Stores the pages, tables, and locks for a group of events. - * Each logical grouping of events has its own group, with a - * matching page for status checks within user programs. This - * allows for isolation of events to user programs by various - * means. + * Stores the system name, tables, and locks for a group of events. This + * allows isolation for events by various means. */ struct user_event_group { - struct page *pages; - char *register_page_data; - char *system_name; - struct hlist_node node; - struct mutex reg_mutex; + char *system_name; + struct hlist_node node; + struct mutex reg_mutex; DECLARE_HASHTABLE(register_table, 8); - DECLARE_BITMAP(page_bitmap, MAX_EVENTS); }; /* Group for init_user_ns mapping, top-most group */ static struct user_event_group *init_group; +/* Max allowed events for the whole system */ +static unsigned int max_user_events = 32768; + +/* Current number of events on the whole system */ +static unsigned int current_user_events; + /* * Stores per-event properties, as users register events * within a file a user_event might be created if it does not @@ -102,21 +77,61 @@ static struct user_event_group *init_group; * refcnt reaches one. */ struct user_event { - struct user_event_group *group; - struct tracepoint tracepoint; - struct trace_event_call call; - struct trace_event_class class; - struct dyn_event devent; - struct hlist_node node; - struct list_head fields; - struct list_head validators; - refcount_t refcnt; - int index; - int flags; - int min_size; - char status; + struct user_event_group *group; + struct tracepoint tracepoint; + struct trace_event_call call; + struct trace_event_class class; + struct dyn_event devent; + struct hlist_node node; + struct list_head fields; + struct list_head validators; + refcount_t refcnt; + int min_size; + char status; +}; + +/* + * Stores per-mm/event properties that enable an address to be + * updated properly for each task. As tasks are forked, we use + * these to track enablement sites that are tied to an event. + */ +struct user_event_enabler { + struct list_head link; + struct user_event *event; + unsigned long addr; + + /* Track enable bit, flags, etc. Aligned for bitops. */ + unsigned int values; +}; + +/* Bits 0-5 are for the bit to update upon enable/disable (0-63 allowed) */ +#define ENABLE_VAL_BIT_MASK 0x3F + +/* Bit 6 is for faulting status of enablement */ +#define ENABLE_VAL_FAULTING_BIT 6 + +/* Bit 7 is for freeing status of enablement */ +#define ENABLE_VAL_FREEING_BIT 7 + +/* Only duplicate the bit value */ +#define ENABLE_VAL_DUP_MASK ENABLE_VAL_BIT_MASK + +#define ENABLE_BITOPS(e) ((unsigned long *)&(e)->values) + +/* Used for asynchronous faulting in of pages */ +struct user_event_enabler_fault { + struct work_struct work; + struct user_event_mm *mm; + struct user_event_enabler *enabler; + int attempt; }; +static struct kmem_cache *fault_cache; + +/* Global list of memory descriptors using user_events */ +static LIST_HEAD(user_event_mms); +static DEFINE_SPINLOCK(user_event_mms_lock); + /* * Stores per-file events references, as users register events * within a file this structure is modified and freed via RCU. @@ -124,23 +139,23 @@ struct user_event { * These are not shared and only accessible by the file that created it. */ struct user_event_refs { - struct rcu_head rcu; - int count; - struct user_event *events[]; + struct rcu_head rcu; + int count; + struct user_event *events[]; }; struct user_event_file_info { - struct user_event_group *group; - struct user_event_refs *refs; + struct user_event_group *group; + struct user_event_refs *refs; }; #define VALIDATOR_ENSURE_NULL (1 << 0) #define VALIDATOR_REL (1 << 1) struct user_event_validator { - struct list_head link; - int offset; - int flags; + struct list_head link; + int offset; + int flags; }; typedef void (*user_event_func_t) (struct user_event *user, struct iov_iter *i, @@ -150,33 +165,17 @@ static int user_event_parse(struct user_event_group *group, char *name, char *args, char *flags, struct user_event **newuser); +static struct user_event_mm *user_event_mm_get(struct user_event_mm *mm); +static struct user_event_mm *user_event_mm_get_all(struct user_event *user); +static void user_event_mm_put(struct user_event_mm *mm); + static u32 user_event_key(char *name) { return jhash(name, strlen(name), 0); } -static void set_page_reservations(char *pages, bool set) -{ - int page; - - for (page = 0; page < MAX_PAGES; ++page) { - void *addr = pages + (PAGE_SIZE * page); - - if (set) - SetPageReserved(virt_to_page(addr)); - else - ClearPageReserved(virt_to_page(addr)); - } -} - static void user_event_group_destroy(struct user_event_group *group) { - if (group->register_page_data) - set_page_reservations(group->register_page_data, false); - - if (group->pages) - __free_pages(group->pages, MAX_PAGE_ORDER); - kfree(group->system_name); kfree(group); } @@ -247,19 +246,6 @@ static struct user_event_group if (!group->system_name) goto error; - group->pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, MAX_PAGE_ORDER); - - if (!group->pages) - goto error; - - group->register_page_data = page_address(group->pages); - - set_page_reservations(group->register_page_data, true); - - /* Zero all bits beside 0 (which is reserved for failures) */ - bitmap_zero(group->page_bitmap, MAX_EVENTS); - set_bit(0, group->page_bitmap); - mutex_init(&group->reg_mutex); hash_init(group->register_table); @@ -271,20 +257,514 @@ error: return NULL; }; -static __always_inline -void user_event_register_set(struct user_event *user) +static void user_event_enabler_destroy(struct user_event_enabler *enabler) +{ + list_del_rcu(&enabler->link); + + /* No longer tracking the event via the enabler */ + refcount_dec(&enabler->event->refcnt); + + kfree(enabler); +} + +static int user_event_mm_fault_in(struct user_event_mm *mm, unsigned long uaddr, + int attempt) +{ + bool unlocked; + int ret; + + /* + * Normally this is low, ensure that it cannot be taken advantage of by + * bad user processes to cause excessive looping. + */ + if (attempt > 10) + return -EFAULT; + + mmap_read_lock(mm->mm); + + /* Ensure MM has tasks, cannot use after exit_mm() */ + if (refcount_read(&mm->tasks) == 0) { + ret = -ENOENT; + goto out; + } + + ret = fixup_user_fault(mm->mm, uaddr, FAULT_FLAG_WRITE | FAULT_FLAG_REMOTE, + &unlocked); +out: + mmap_read_unlock(mm->mm); + + return ret; +} + +static int user_event_enabler_write(struct user_event_mm *mm, + struct user_event_enabler *enabler, + bool fixup_fault, int *attempt); + +static void user_event_enabler_fault_fixup(struct work_struct *work) +{ + struct user_event_enabler_fault *fault = container_of( + work, struct user_event_enabler_fault, work); + struct user_event_enabler *enabler = fault->enabler; + struct user_event_mm *mm = fault->mm; + unsigned long uaddr = enabler->addr; + int attempt = fault->attempt; + int ret; + + ret = user_event_mm_fault_in(mm, uaddr, attempt); + + if (ret && ret != -ENOENT) { + struct user_event *user = enabler->event; + + pr_warn("user_events: Fault for mm: 0x%pK @ 0x%llx event: %s\n", + mm->mm, (unsigned long long)uaddr, EVENT_NAME(user)); + } + + /* Prevent state changes from racing */ + mutex_lock(&event_mutex); + + /* User asked for enabler to be removed during fault */ + if (test_bit(ENABLE_VAL_FREEING_BIT, ENABLE_BITOPS(enabler))) { + user_event_enabler_destroy(enabler); + goto out; + } + + /* + * If we managed to get the page, re-issue the write. We do not + * want to get into a possible infinite loop, which is why we only + * attempt again directly if the page came in. If we couldn't get + * the page here, then we will try again the next time the event is + * enabled/disabled. + */ + clear_bit(ENABLE_VAL_FAULTING_BIT, ENABLE_BITOPS(enabler)); + + if (!ret) { + mmap_read_lock(mm->mm); + user_event_enabler_write(mm, enabler, true, &attempt); + mmap_read_unlock(mm->mm); + } +out: + mutex_unlock(&event_mutex); + + /* In all cases we no longer need the mm or fault */ + user_event_mm_put(mm); + kmem_cache_free(fault_cache, fault); +} + +static bool user_event_enabler_queue_fault(struct user_event_mm *mm, + struct user_event_enabler *enabler, + int attempt) +{ + struct user_event_enabler_fault *fault; + + fault = kmem_cache_zalloc(fault_cache, GFP_NOWAIT | __GFP_NOWARN); + + if (!fault) + return false; + + INIT_WORK(&fault->work, user_event_enabler_fault_fixup); + fault->mm = user_event_mm_get(mm); + fault->enabler = enabler; + fault->attempt = attempt; + + /* Don't try to queue in again while we have a pending fault */ + set_bit(ENABLE_VAL_FAULTING_BIT, ENABLE_BITOPS(enabler)); + + if (!schedule_work(&fault->work)) { + /* Allow another attempt later */ + clear_bit(ENABLE_VAL_FAULTING_BIT, ENABLE_BITOPS(enabler)); + + user_event_mm_put(mm); + kmem_cache_free(fault_cache, fault); + + return false; + } + + return true; +} + +static int user_event_enabler_write(struct user_event_mm *mm, + struct user_event_enabler *enabler, + bool fixup_fault, int *attempt) +{ + unsigned long uaddr = enabler->addr; + unsigned long *ptr; + struct page *page; + void *kaddr; + int ret; + + lockdep_assert_held(&event_mutex); + mmap_assert_locked(mm->mm); + + *attempt += 1; + + /* Ensure MM has tasks, cannot use after exit_mm() */ + if (refcount_read(&mm->tasks) == 0) + return -ENOENT; + + if (unlikely(test_bit(ENABLE_VAL_FAULTING_BIT, ENABLE_BITOPS(enabler)) || + test_bit(ENABLE_VAL_FREEING_BIT, ENABLE_BITOPS(enabler)))) + return -EBUSY; + + ret = pin_user_pages_remote(mm->mm, uaddr, 1, FOLL_WRITE | FOLL_NOFAULT, + &page, NULL, NULL); + + if (unlikely(ret <= 0)) { + if (!fixup_fault) + return -EFAULT; + + if (!user_event_enabler_queue_fault(mm, enabler, *attempt)) + pr_warn("user_events: Unable to queue fault handler\n"); + + return -EFAULT; + } + + kaddr = kmap_local_page(page); + ptr = kaddr + (uaddr & ~PAGE_MASK); + + /* Update bit atomically, user tracers must be atomic as well */ + if (enabler->event && enabler->event->status) + set_bit(enabler->values & ENABLE_VAL_BIT_MASK, ptr); + else + clear_bit(enabler->values & ENABLE_VAL_BIT_MASK, ptr); + + kunmap_local(kaddr); + unpin_user_pages_dirty_lock(&page, 1, true); + + return 0; +} + +static bool user_event_enabler_exists(struct user_event_mm *mm, + unsigned long uaddr, unsigned char bit) { - int i = user->index; + struct user_event_enabler *enabler; + struct user_event_enabler *next; - user->group->register_page_data[MAP_STATUS_BYTE(i)] |= MAP_STATUS_MASK(i); + list_for_each_entry_safe(enabler, next, &mm->enablers, link) { + if (enabler->addr == uaddr && + (enabler->values & ENABLE_VAL_BIT_MASK) == bit) + return true; + } + + return false; } -static __always_inline -void user_event_register_clear(struct user_event *user) +static void user_event_enabler_update(struct user_event *user) { - int i = user->index; + struct user_event_enabler *enabler; + struct user_event_mm *mm = user_event_mm_get_all(user); + struct user_event_mm *next; + int attempt; + + while (mm) { + next = mm->next; + mmap_read_lock(mm->mm); + rcu_read_lock(); - user->group->register_page_data[MAP_STATUS_BYTE(i)] &= ~MAP_STATUS_MASK(i); + list_for_each_entry_rcu(enabler, &mm->enablers, link) { + if (enabler->event == user) { + attempt = 0; + user_event_enabler_write(mm, enabler, true, &attempt); + } + } + + rcu_read_unlock(); + mmap_read_unlock(mm->mm); + user_event_mm_put(mm); + mm = next; + } +} + +static bool user_event_enabler_dup(struct user_event_enabler *orig, + struct user_event_mm *mm) +{ + struct user_event_enabler *enabler; + + /* Skip pending frees */ + if (unlikely(test_bit(ENABLE_VAL_FREEING_BIT, ENABLE_BITOPS(orig)))) + return true; + + enabler = kzalloc(sizeof(*enabler), GFP_NOWAIT | __GFP_ACCOUNT); + + if (!enabler) + return false; + + enabler->event = orig->event; + enabler->addr = orig->addr; + + /* Only dup part of value (ignore future flags, etc) */ + enabler->values = orig->values & ENABLE_VAL_DUP_MASK; + + refcount_inc(&enabler->event->refcnt); + list_add_rcu(&enabler->link, &mm->enablers); + + return true; +} + +static struct user_event_mm *user_event_mm_get(struct user_event_mm *mm) +{ + refcount_inc(&mm->refcnt); + + return mm; +} + +static struct user_event_mm *user_event_mm_get_all(struct user_event *user) +{ + struct user_event_mm *found = NULL; + struct user_event_enabler *enabler; + struct user_event_mm *mm; + + /* + * We do not want to block fork/exec while enablements are being + * updated, so we use RCU to walk the current tasks that have used + * user_events ABI for 1 or more events. Each enabler found in each + * task that matches the event being updated has a write to reflect + * the kernel state back into the process. Waits/faults must not occur + * during this. So we scan the list under RCU for all the mm that have + * the event within it. This is needed because mm_read_lock() can wait. + * Each user mm returned has a ref inc to handle remove RCU races. + */ + rcu_read_lock(); + + list_for_each_entry_rcu(mm, &user_event_mms, link) + list_for_each_entry_rcu(enabler, &mm->enablers, link) + if (enabler->event == user) { + mm->next = found; + found = user_event_mm_get(mm); + break; + } + + rcu_read_unlock(); + + return found; +} + +static struct user_event_mm *user_event_mm_create(struct task_struct *t) +{ + struct user_event_mm *user_mm; + unsigned long flags; + + user_mm = kzalloc(sizeof(*user_mm), GFP_KERNEL_ACCOUNT); + + if (!user_mm) + return NULL; + + user_mm->mm = t->mm; + INIT_LIST_HEAD(&user_mm->enablers); + refcount_set(&user_mm->refcnt, 1); + refcount_set(&user_mm->tasks, 1); + + spin_lock_irqsave(&user_event_mms_lock, flags); + list_add_rcu(&user_mm->link, &user_event_mms); + spin_unlock_irqrestore(&user_event_mms_lock, flags); + + t->user_event_mm = user_mm; + + /* + * The lifetime of the memory descriptor can slightly outlast + * the task lifetime if a ref to the user_event_mm is taken + * between list_del_rcu() and call_rcu(). Therefore we need + * to take a reference to it to ensure it can live this long + * under this corner case. This can also occur in clones that + * outlast the parent. + */ + mmgrab(user_mm->mm); + + return user_mm; +} + +static struct user_event_mm *current_user_event_mm(void) +{ + struct user_event_mm *user_mm = current->user_event_mm; + + if (user_mm) + goto inc; + + user_mm = user_event_mm_create(current); + + if (!user_mm) + goto error; +inc: + refcount_inc(&user_mm->refcnt); +error: + return user_mm; +} + +static void user_event_mm_destroy(struct user_event_mm *mm) +{ + struct user_event_enabler *enabler, *next; + + list_for_each_entry_safe(enabler, next, &mm->enablers, link) + user_event_enabler_destroy(enabler); + + mmdrop(mm->mm); + kfree(mm); +} + +static void user_event_mm_put(struct user_event_mm *mm) +{ + if (mm && refcount_dec_and_test(&mm->refcnt)) + user_event_mm_destroy(mm); +} + +static void delayed_user_event_mm_put(struct work_struct *work) +{ + struct user_event_mm *mm; + + mm = container_of(to_rcu_work(work), struct user_event_mm, put_rwork); + user_event_mm_put(mm); +} + +void user_event_mm_remove(struct task_struct *t) +{ + struct user_event_mm *mm; + unsigned long flags; + + might_sleep(); + + mm = t->user_event_mm; + t->user_event_mm = NULL; + + /* Clone will increment the tasks, only remove if last clone */ + if (!refcount_dec_and_test(&mm->tasks)) + return; + + /* Remove the mm from the list, so it can no longer be enabled */ + spin_lock_irqsave(&user_event_mms_lock, flags); + list_del_rcu(&mm->link); + spin_unlock_irqrestore(&user_event_mms_lock, flags); + + /* + * We need to wait for currently occurring writes to stop within + * the mm. This is required since exit_mm() snaps the current rss + * stats and clears them. On the final mmdrop(), check_mm() will + * report a bug if these increment. + * + * All writes/pins are done under mmap_read lock, take the write + * lock to ensure in-progress faults have completed. Faults that + * are pending but yet to run will check the task count and skip + * the fault since the mm is going away. + */ + mmap_write_lock(mm->mm); + mmap_write_unlock(mm->mm); + + /* + * Put for mm must be done after RCU delay to handle new refs in + * between the list_del_rcu() and now. This ensures any get refs + * during rcu_read_lock() are accounted for during list removal. + * + * CPU A | CPU B + * --------------------------------------------------------------- + * user_event_mm_remove() | rcu_read_lock(); + * list_del_rcu() | list_for_each_entry_rcu(); + * call_rcu() | refcount_inc(); + * . | rcu_read_unlock(); + * schedule_work() | . + * user_event_mm_put() | . + * + * mmdrop() cannot be called in the softirq context of call_rcu() + * so we use a work queue after call_rcu() to run within. + */ + INIT_RCU_WORK(&mm->put_rwork, delayed_user_event_mm_put); + queue_rcu_work(system_wq, &mm->put_rwork); +} + +void user_event_mm_dup(struct task_struct *t, struct user_event_mm *old_mm) +{ + struct user_event_mm *mm = user_event_mm_create(t); + struct user_event_enabler *enabler; + + if (!mm) + return; + + rcu_read_lock(); + + list_for_each_entry_rcu(enabler, &old_mm->enablers, link) + if (!user_event_enabler_dup(enabler, mm)) + goto error; + + rcu_read_unlock(); + + return; +error: + rcu_read_unlock(); + user_event_mm_remove(t); +} + +static bool current_user_event_enabler_exists(unsigned long uaddr, + unsigned char bit) +{ + struct user_event_mm *user_mm = current_user_event_mm(); + bool exists; + + if (!user_mm) + return false; + + exists = user_event_enabler_exists(user_mm, uaddr, bit); + + user_event_mm_put(user_mm); + + return exists; +} + +static struct user_event_enabler +*user_event_enabler_create(struct user_reg *reg, struct user_event *user, + int *write_result) +{ + struct user_event_enabler *enabler; + struct user_event_mm *user_mm; + unsigned long uaddr = (unsigned long)reg->enable_addr; + int attempt = 0; + + user_mm = current_user_event_mm(); + + if (!user_mm) + return NULL; + + enabler = kzalloc(sizeof(*enabler), GFP_KERNEL_ACCOUNT); + + if (!enabler) + goto out; + + enabler->event = user; + enabler->addr = uaddr; + enabler->values = reg->enable_bit; +retry: + /* Prevents state changes from racing with new enablers */ + mutex_lock(&event_mutex); + + /* Attempt to reflect the current state within the process */ + mmap_read_lock(user_mm->mm); + *write_result = user_event_enabler_write(user_mm, enabler, false, + &attempt); + mmap_read_unlock(user_mm->mm); + + /* + * If the write works, then we will track the enabler. A ref to the + * underlying user_event is held by the enabler to prevent it going + * away while the enabler is still in use by a process. The ref is + * removed when the enabler is destroyed. This means a event cannot + * be forcefully deleted from the system until all tasks using it + * exit or run exec(), which includes forks and clones. + */ + if (!*write_result) { + refcount_inc(&enabler->event->refcnt); + list_add_rcu(&enabler->link, &user_mm->enablers); + } + + mutex_unlock(&event_mutex); + + if (*write_result) { + /* Attempt to fault-in and retry if it worked */ + if (!user_event_mm_fault_in(user_mm, uaddr, attempt)) + goto retry; + + kfree(enabler); + enabler = NULL; + } +out: + user_event_mm_put(user_mm); + + return enabler; } static __always_inline __must_check @@ -449,7 +929,7 @@ static int user_event_add_field(struct user_event *user, const char *type, struct ftrace_event_field *field; int validator_flags = 0; - field = kmalloc(sizeof(*field), GFP_KERNEL); + field = kmalloc(sizeof(*field), GFP_KERNEL_ACCOUNT); if (!field) return -ENOMEM; @@ -468,7 +948,7 @@ add_validator: if (strstr(type, "char") != NULL) validator_flags |= VALIDATOR_ENSURE_NULL; - validator = kmalloc(sizeof(*validator), GFP_KERNEL); + validator = kmalloc(sizeof(*validator), GFP_KERNEL_ACCOUNT); if (!validator) { kfree(field); @@ -489,6 +969,9 @@ add_field: field->is_signed = is_signed; field->filter_type = filter_type; + if (filter_type == FILTER_OTHER) + field->filter_type = filter_assign_type(type); + list_add(&field->link, &user->fields); /* @@ -754,7 +1237,7 @@ static int user_event_create_print_fmt(struct user_event *user) len = user_event_set_print_fmt(user, NULL, 0); - print_fmt = kmalloc(len, GFP_KERNEL); + print_fmt = kmalloc(len, GFP_KERNEL_ACCOUNT); if (!print_fmt) return -ENOMEM; @@ -770,11 +1253,7 @@ static enum print_line_t user_event_print_trace(struct trace_iterator *iter, int flags, struct trace_event *event) { - /* Unsafe to try to decode user provided print_fmt, use hex */ - trace_print_hex_dump_seq(&iter->seq, "", DUMP_PREFIX_OFFSET, 16, - 1, iter->ent, iter->ent_size, true); - - return trace_handle_return(&iter->seq); + return print_event_fields(iter, event); } static struct trace_event_functions user_event_funcs = { @@ -820,6 +1299,8 @@ static int destroy_user_event(struct user_event *user) { int ret = 0; + lockdep_assert_held(&event_mutex); + /* Must destroy fields before call removal */ user_event_destroy_fields(user); @@ -829,9 +1310,6 @@ static int destroy_user_event(struct user_event *user) return ret; dyn_event_remove(&user->devent); - - user_event_register_clear(user); - clear_bit(user->index, user->group->page_bitmap); hash_del(&user->node); user_event_destroy_validators(user); @@ -839,6 +1317,11 @@ static int destroy_user_event(struct user_event *user) kfree(EVENT_NAME(user)); kfree(user); + if (current_user_events > 0) + current_user_events--; + else + pr_alert("BUG: Bad current_user_events\n"); + return ret; } @@ -977,9 +1460,9 @@ discard: #endif /* - * Update the register page that is shared between user processes. + * Update the enabled bit among all user processes. */ -static void update_reg_page_for(struct user_event *user) +static void update_enable_bit_for(struct user_event *user) { struct tracepoint *tp = &user->tracepoint; char status = 0; @@ -1010,12 +1493,9 @@ static void update_reg_page_for(struct user_event *user) rcu_read_unlock_sched(); } - if (status) - user_event_register_set(user); - else - user_event_register_clear(user); - user->status = status; + + user_event_enabler_update(user); } /* @@ -1072,10 +1552,10 @@ static int user_event_reg(struct trace_event_call *call, return ret; inc: refcount_inc(&user->refcnt); - update_reg_page_for(user); + update_enable_bit_for(user); return 0; dec: - update_reg_page_for(user); + update_enable_bit_for(user); refcount_dec(&user->refcnt); return 0; } @@ -1093,7 +1573,7 @@ static int user_event_create(const char *raw_command) raw_command += USER_EVENTS_PREFIX_LEN; raw_command = skip_spaces(raw_command); - name = kstrdup(raw_command, GFP_KERNEL); + name = kstrdup(raw_command, GFP_KERNEL_ACCOUNT); if (!name) return -ENOMEM; @@ -1271,7 +1751,6 @@ static int user_event_parse(struct user_event_group *group, char *name, struct user_event **newuser) { int ret; - int index; u32 key; struct user_event *user; @@ -1290,12 +1769,7 @@ static int user_event_parse(struct user_event_group *group, char *name, return 0; } - index = find_first_zero_bit(group->page_bitmap, MAX_EVENTS); - - if (index == MAX_EVENTS) - return -EMFILE; - - user = kzalloc(sizeof(*user), GFP_KERNEL); + user = kzalloc(sizeof(*user), GFP_KERNEL_ACCOUNT); if (!user) return -ENOMEM; @@ -1335,20 +1809,23 @@ static int user_event_parse(struct user_event_group *group, char *name, mutex_lock(&event_mutex); + if (current_user_events >= max_user_events) { + ret = -EMFILE; + goto put_user_lock; + } + ret = user_event_trace_register(user); if (ret) goto put_user_lock; - user->index = index; - /* Ensure we track self ref and caller ref (2) */ refcount_set(&user->refcnt, 2); dyn_event_init(&user->devent, &user_event_dops); dyn_event_add(&user->devent, &user->call); - set_bit(user->index, group->page_bitmap); hash_add(group->register_table, &user->node, key); + current_user_events++; mutex_unlock(&event_mutex); @@ -1398,6 +1875,9 @@ static ssize_t user_events_write_core(struct file *file, struct iov_iter *i) if (unlikely(copy_from_iter(&idx, sizeof(idx), i) != sizeof(idx))) return -EFAULT; + if (idx < 0) + return -EINVAL; + rcu_read_lock_sched(); refs = rcu_dereference_sched(info->refs); @@ -1468,7 +1948,7 @@ static int user_events_open(struct inode *node, struct file *file) if (!group) return -ENOENT; - info = kzalloc(sizeof(*info), GFP_KERNEL); + info = kzalloc(sizeof(*info), GFP_KERNEL_ACCOUNT); if (!info) return -ENOMEM; @@ -1521,7 +2001,7 @@ static int user_events_ref_add(struct user_event_file_info *info, size = struct_size(refs, events, count + 1); - new_refs = kzalloc(size, GFP_KERNEL); + new_refs = kzalloc(size, GFP_KERNEL_ACCOUNT); if (!new_refs) return -ENOMEM; @@ -1564,6 +2044,37 @@ static long user_reg_get(struct user_reg __user *ureg, struct user_reg *kreg) if (ret) return ret; + /* Ensure no flags, since we don't support any yet */ + if (kreg->flags != 0) + return -EINVAL; + + /* Ensure supported size */ + switch (kreg->enable_size) { + case 4: + /* 32-bit */ + break; +#if BITS_PER_LONG >= 64 + case 8: + /* 64-bit */ + break; +#endif + default: + return -EINVAL; + } + + /* Ensure natural alignment */ + if (kreg->enable_addr % kreg->enable_size) + return -EINVAL; + + /* Ensure bit range for size */ + if (kreg->enable_bit > (kreg->enable_size * BITS_PER_BYTE) - 1) + return -EINVAL; + + /* Ensure accessible */ + if (!access_ok((const void __user *)(uintptr_t)kreg->enable_addr, + kreg->enable_size)) + return -EFAULT; + kreg->size = size; return 0; @@ -1578,14 +2089,26 @@ static long user_events_ioctl_reg(struct user_event_file_info *info, struct user_reg __user *ureg = (struct user_reg __user *)uarg; struct user_reg reg; struct user_event *user; + struct user_event_enabler *enabler; char *name; long ret; + int write_result; ret = user_reg_get(ureg, ®); if (ret) return ret; + /* + * Prevent users from using the same address and bit multiple times + * within the same mm address space. This can cause unexpected behavior + * for user processes that is far easier to debug if this is explictly + * an error upon registering. + */ + if (current_user_event_enabler_exists((unsigned long)reg.enable_addr, + reg.enable_bit)) + return -EADDRINUSE; + name = strndup_user((const char __user *)(uintptr_t)reg.name_args, MAX_EVENT_DESC); @@ -1610,8 +2133,28 @@ static long user_events_ioctl_reg(struct user_event_file_info *info, if (ret < 0) return ret; + /* + * user_events_ref_add succeeded: + * At this point we have a user_event, it's lifetime is bound by the + * reference count, not this file. If anything fails, the user_event + * still has a reference until the file is released. During release + * any remaining references (from user_events_ref_add) are decremented. + * + * Attempt to create an enabler, which too has a lifetime tied in the + * same way for the event. Once the task that caused the enabler to be + * created exits or issues exec() then the enablers it has created + * will be destroyed and the ref to the event will be decremented. + */ + enabler = user_event_enabler_create(®, user, &write_result); + + if (!enabler) + return -ENOMEM; + + /* Write failed/faulted, give error back to caller */ + if (write_result) + return write_result; + put_user((u32)ret, &ureg->write_index); - put_user(user->index, &ureg->status_bit); return 0; } @@ -1641,6 +2184,114 @@ static long user_events_ioctl_del(struct user_event_file_info *info, return ret; } +static long user_unreg_get(struct user_unreg __user *ureg, + struct user_unreg *kreg) +{ + u32 size; + long ret; + + ret = get_user(size, &ureg->size); + + if (ret) + return ret; + + if (size > PAGE_SIZE) + return -E2BIG; + + if (size < offsetofend(struct user_unreg, disable_addr)) + return -EINVAL; + + ret = copy_struct_from_user(kreg, sizeof(*kreg), ureg, size); + + /* Ensure no reserved values, since we don't support any yet */ + if (kreg->__reserved || kreg->__reserved2) + return -EINVAL; + + return ret; +} + +static int user_event_mm_clear_bit(struct user_event_mm *user_mm, + unsigned long uaddr, unsigned char bit) +{ + struct user_event_enabler enabler; + int result; + int attempt = 0; + + memset(&enabler, 0, sizeof(enabler)); + enabler.addr = uaddr; + enabler.values = bit; +retry: + /* Prevents state changes from racing with new enablers */ + mutex_lock(&event_mutex); + + /* Force the bit to be cleared, since no event is attached */ + mmap_read_lock(user_mm->mm); + result = user_event_enabler_write(user_mm, &enabler, false, &attempt); + mmap_read_unlock(user_mm->mm); + + mutex_unlock(&event_mutex); + + if (result) { + /* Attempt to fault-in and retry if it worked */ + if (!user_event_mm_fault_in(user_mm, uaddr, attempt)) + goto retry; + } + + return result; +} + +/* + * Unregisters an enablement address/bit within a task/user mm. + */ +static long user_events_ioctl_unreg(unsigned long uarg) +{ + struct user_unreg __user *ureg = (struct user_unreg __user *)uarg; + struct user_event_mm *mm = current->user_event_mm; + struct user_event_enabler *enabler, *next; + struct user_unreg reg; + long ret; + + ret = user_unreg_get(ureg, ®); + + if (ret) + return ret; + + if (!mm) + return -ENOENT; + + ret = -ENOENT; + + /* + * Flags freeing and faulting are used to indicate if the enabler is in + * use at all. When faulting is set a page-fault is occurring asyncly. + * During async fault if freeing is set, the enabler will be destroyed. + * If no async fault is happening, we can destroy it now since we hold + * the event_mutex during these checks. + */ + mutex_lock(&event_mutex); + + list_for_each_entry_safe(enabler, next, &mm->enablers, link) + if (enabler->addr == reg.disable_addr && + (enabler->values & ENABLE_VAL_BIT_MASK) == reg.disable_bit) { + set_bit(ENABLE_VAL_FREEING_BIT, ENABLE_BITOPS(enabler)); + + if (!test_bit(ENABLE_VAL_FAULTING_BIT, ENABLE_BITOPS(enabler))) + user_event_enabler_destroy(enabler); + + /* Removed at least one */ + ret = 0; + } + + mutex_unlock(&event_mutex); + + /* Ensure bit is now cleared for user, regardless of event status */ + if (!ret) + ret = user_event_mm_clear_bit(mm, reg.disable_addr, + reg.disable_bit); + + return ret; +} + /* * Handles the ioctl from user mode to register or alter operations. */ @@ -1663,6 +2314,12 @@ static long user_events_ioctl(struct file *file, unsigned int cmd, ret = user_events_ioctl_del(info, uarg); mutex_unlock(&group->reg_mutex); break; + + case DIAG_IOCSUNREG: + mutex_lock(&group->reg_mutex); + ret = user_events_ioctl_unreg(uarg); + mutex_unlock(&group->reg_mutex); + break; } return ret; @@ -1718,45 +2375,13 @@ out: } static const struct file_operations user_data_fops = { - .open = user_events_open, - .write = user_events_write, - .write_iter = user_events_write_iter, + .open = user_events_open, + .write = user_events_write, + .write_iter = user_events_write_iter, .unlocked_ioctl = user_events_ioctl, - .release = user_events_release, + .release = user_events_release, }; -static struct user_event_group *user_status_group(struct file *file) -{ - struct seq_file *m = file->private_data; - - if (!m) - return NULL; - - return m->private; -} - -/* - * Maps the shared page into the user process for checking if event is enabled. - */ -static int user_status_mmap(struct file *file, struct vm_area_struct *vma) -{ - char *pages; - struct user_event_group *group = user_status_group(file); - unsigned long size = vma->vm_end - vma->vm_start; - - if (size != MAX_BYTES) - return -EINVAL; - - if (!group) - return -EINVAL; - - pages = group->register_page_data; - - return remap_pfn_range(vma, vma->vm_start, - virt_to_phys(pages) >> PAGE_SHIFT, - size, vm_get_page_prot(VM_READ)); -} - static void *user_seq_start(struct seq_file *m, loff_t *pos) { if (*pos) @@ -1780,7 +2405,7 @@ static int user_seq_show(struct seq_file *m, void *p) struct user_event_group *group = m->private; struct user_event *user; char status; - int i, active = 0, busy = 0, flags; + int i, active = 0, busy = 0; if (!group) return -EINVAL; @@ -1789,11 +2414,10 @@ static int user_seq_show(struct seq_file *m, void *p) hash_for_each(group->register_table, i, user, node) { status = user->status; - flags = user->flags; - seq_printf(m, "%d:%s", user->index, EVENT_NAME(user)); + seq_printf(m, "%s", EVENT_NAME(user)); - if (flags != 0 || status != 0) + if (status != 0) seq_puts(m, " #"); if (status != 0) { @@ -1816,16 +2440,15 @@ static int user_seq_show(struct seq_file *m, void *p) seq_puts(m, "\n"); seq_printf(m, "Active: %d\n", active); seq_printf(m, "Busy: %d\n", busy); - seq_printf(m, "Max: %ld\n", MAX_EVENTS); return 0; } static const struct seq_operations user_seq_ops = { - .start = user_seq_start, - .next = user_seq_next, - .stop = user_seq_stop, - .show = user_seq_show, + .start = user_seq_start, + .next = user_seq_next, + .stop = user_seq_stop, + .show = user_seq_show, }; static int user_status_open(struct inode *node, struct file *file) @@ -1851,11 +2474,10 @@ static int user_status_open(struct inode *node, struct file *file) } static const struct file_operations user_status_fops = { - .open = user_status_open, - .mmap = user_status_mmap, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, + .open = user_status_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, }; /* @@ -1873,8 +2495,7 @@ static int create_user_tracefs(void) goto err; } - /* mmap with MAP_SHARED requires writable fd */ - emmap = tracefs_create_file("user_events_status", TRACE_MODE_WRITE, + emmap = tracefs_create_file("user_events_status", TRACE_MODE_READ, NULL, NULL, &user_status_fops); if (!emmap) { @@ -1888,20 +2509,53 @@ err: return -ENODEV; } +static int set_max_user_events_sysctl(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + int ret; + + mutex_lock(&event_mutex); + + ret = proc_douintvec(table, write, buffer, lenp, ppos); + + mutex_unlock(&event_mutex); + + return ret; +} + +static struct ctl_table user_event_sysctls[] = { + { + .procname = "user_events_max", + .data = &max_user_events, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = set_max_user_events_sysctl, + }, + {} +}; + static int __init trace_events_user_init(void) { int ret; + fault_cache = KMEM_CACHE(user_event_enabler_fault, 0); + + if (!fault_cache) + return -ENOMEM; + init_group = user_event_group_create(&init_user_ns); - if (!init_group) + if (!init_group) { + kmem_cache_destroy(fault_cache); return -ENOMEM; + } ret = create_user_tracefs(); if (ret) { pr_warn("user_events could not register with tracefs\n"); user_event_group_destroy(init_group); + kmem_cache_destroy(fault_cache); init_group = NULL; return ret; } @@ -1909,6 +2563,8 @@ static int __init trace_events_user_init(void) if (dyn_event_register(&user_event_dops)) pr_warn("user_events could not register with dyn_events\n"); + register_sysctl_init("kernel", user_event_sysctls); + return 0; } diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c index 9176bb7a9bb4..efbbec2caff8 100644 --- a/kernel/trace/trace_osnoise.c +++ b/kernel/trace/trace_osnoise.c @@ -159,7 +159,7 @@ static void osnoise_unregister_instance(struct trace_array *tr) if (!found) return; - kvfree_rcu(inst); + kvfree_rcu_mightsleep(inst); } /* @@ -1296,7 +1296,7 @@ static void notify_new_max_latency(u64 latency) rcu_read_lock(); list_for_each_entry_rcu(inst, &osnoise_instances, list) { tr = inst->tr; - if (tr->max_latency < latency) { + if (tracer_tracing_is_on(tr) && tr->max_latency < latency) { tr->max_latency = latency; latency_fsnotify(tr); } @@ -1738,6 +1738,8 @@ static int timerlat_main(void *data) trace_timerlat_sample(&s); + notify_new_max_latency(diff); + timerlat_dump_stack(time_to_us(diff)); tlat->tracing_thread = false; diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index bd475a00f96d..15f05faaae44 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -221,8 +221,11 @@ trace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len, const char *ret = trace_seq_buffer_ptr(p); const char *fmt = concatenate ? "%*phN" : "%*ph"; - for (i = 0; i < buf_len; i += 16) + for (i = 0; i < buf_len; i += 16) { + if (!concatenate && i != 0) + trace_seq_putc(p, ' '); trace_seq_printf(p, fmt, min(buf_len - i, 16), &buf[i]); + } trace_seq_putc(p, 0); return ret; @@ -808,6 +811,176 @@ EXPORT_SYMBOL_GPL(unregister_trace_event); * Standard events */ +static void print_array(struct trace_iterator *iter, void *pos, + struct ftrace_event_field *field) +{ + int offset; + int len; + int i; + + offset = *(int *)pos & 0xffff; + len = *(int *)pos >> 16; + + if (field) + offset += field->offset + sizeof(int); + + if (offset + len > iter->ent_size) { + trace_seq_puts(&iter->seq, "<OVERFLOW>"); + return; + } + + pos = (void *)iter->ent + offset; + + for (i = 0; i < len; i++, pos++) { + if (i) + trace_seq_putc(&iter->seq, ','); + trace_seq_printf(&iter->seq, "%02x", *(unsigned char *)pos); + } +} + +static void print_fields(struct trace_iterator *iter, struct trace_event_call *call, + struct list_head *head) +{ + struct ftrace_event_field *field; + int offset; + int len; + int ret; + void *pos; + + list_for_each_entry(field, head, link) { + trace_seq_printf(&iter->seq, " %s=", field->name); + if (field->offset + field->size > iter->ent_size) { + trace_seq_puts(&iter->seq, "<OVERFLOW>"); + continue; + } + pos = (void *)iter->ent + field->offset; + + switch (field->filter_type) { + case FILTER_COMM: + case FILTER_STATIC_STRING: + trace_seq_printf(&iter->seq, "%.*s", field->size, (char *)pos); + break; + case FILTER_RDYN_STRING: + case FILTER_DYN_STRING: + offset = *(int *)pos & 0xffff; + len = *(int *)pos >> 16; + + if (field->filter_type == FILTER_RDYN_STRING) + offset += field->offset + sizeof(int); + + if (offset + len > iter->ent_size) { + trace_seq_puts(&iter->seq, "<OVERFLOW>"); + break; + } + pos = (void *)iter->ent + offset; + trace_seq_printf(&iter->seq, "%.*s", len, (char *)pos); + break; + case FILTER_PTR_STRING: + if (!iter->fmt_size) + trace_iter_expand_format(iter); + pos = *(void **)pos; + ret = strncpy_from_kernel_nofault(iter->fmt, pos, + iter->fmt_size); + if (ret < 0) + trace_seq_printf(&iter->seq, "(0x%px)", pos); + else + trace_seq_printf(&iter->seq, "(0x%px:%s)", + pos, iter->fmt); + break; + case FILTER_TRACE_FN: + pos = *(void **)pos; + trace_seq_printf(&iter->seq, "%pS", pos); + break; + case FILTER_CPU: + case FILTER_OTHER: + switch (field->size) { + case 1: + if (isprint(*(char *)pos)) { + trace_seq_printf(&iter->seq, "'%c'", + *(unsigned char *)pos); + } + trace_seq_printf(&iter->seq, "(%d)", + *(unsigned char *)pos); + break; + case 2: + trace_seq_printf(&iter->seq, "0x%x (%d)", + *(unsigned short *)pos, + *(unsigned short *)pos); + break; + case 4: + /* dynamic array info is 4 bytes */ + if (strstr(field->type, "__data_loc")) { + print_array(iter, pos, NULL); + break; + } + + if (strstr(field->type, "__rel_loc")) { + print_array(iter, pos, field); + break; + } + + trace_seq_printf(&iter->seq, "0x%x (%d)", + *(unsigned int *)pos, + *(unsigned int *)pos); + break; + case 8: + trace_seq_printf(&iter->seq, "0x%llx (%lld)", + *(unsigned long long *)pos, + *(unsigned long long *)pos); + break; + default: + trace_seq_puts(&iter->seq, "<INVALID-SIZE>"); + break; + } + break; + default: + trace_seq_puts(&iter->seq, "<INVALID-TYPE>"); + } + } + trace_seq_putc(&iter->seq, '\n'); +} + +enum print_line_t print_event_fields(struct trace_iterator *iter, + struct trace_event *event) +{ + struct trace_event_call *call; + struct list_head *head; + + /* ftrace defined events have separate call structures */ + if (event->type <= __TRACE_LAST_TYPE) { + bool found = false; + + down_read(&trace_event_sem); + list_for_each_entry(call, &ftrace_events, list) { + if (call->event.type == event->type) { + found = true; + break; + } + /* No need to search all events */ + if (call->event.type > __TRACE_LAST_TYPE) + break; + } + up_read(&trace_event_sem); + if (!found) { + trace_seq_printf(&iter->seq, "UNKNOWN TYPE %d\n", event->type); + goto out; + } + } else { + call = container_of(event, struct trace_event_call, event); + } + head = trace_get_fields(call); + + trace_seq_printf(&iter->seq, "%s:", trace_event_name(call)); + + if (head && !list_empty(head)) + print_fields(iter, call, head); + else + trace_seq_puts(&iter->seq, "No fields found\n"); + + out: + return trace_handle_return(&iter->seq); +} + enum print_line_t trace_nop_print(struct trace_iterator *iter, int flags, struct trace_event *event) { diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h index 4c954636caf0..dca40f1f1da4 100644 --- a/kernel/trace/trace_output.h +++ b/kernel/trace/trace_output.h @@ -19,6 +19,8 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, extern void trace_seq_print_sym(struct trace_seq *s, unsigned long address, bool offset); extern int trace_print_context(struct trace_iterator *iter); extern int trace_print_lat_context(struct trace_iterator *iter); +extern enum print_line_t print_event_fields(struct trace_iterator *iter, + struct trace_event *event); extern void trace_event_read_lock(void); extern void trace_event_read_unlock(void); diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 20d0c4a97633..2d2616678295 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -1172,7 +1172,7 @@ int trace_probe_remove_file(struct trace_probe *tp, return -ENOENT; list_del_rcu(&link->list); - kvfree_rcu(link); + kvfree_rcu_mightsleep(link); if (list_empty(&tp->event->files)) trace_probe_clear_flag(tp, TP_FLAG_TRACE); diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index ff0536cea968..a931d9aaea26 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -785,14 +785,7 @@ static struct fgraph_ops fgraph_ops __initdata = { }; #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS -#ifndef CALL_DEPTH_ACCOUNT -#define CALL_DEPTH_ACCOUNT "" -#endif - -noinline __noclone static void trace_direct_tramp(void) -{ - asm(CALL_DEPTH_ACCOUNT); -} +static struct ftrace_ops direct; #endif /* @@ -870,8 +863,9 @@ trace_selftest_startup_function_graph(struct tracer *trace, * Register direct function together with graph tracer * and make sure we get graph trace. */ - ret = register_ftrace_direct((unsigned long) DYN_FTRACE_TEST_NAME, - (unsigned long) trace_direct_tramp); + ftrace_set_filter_ip(&direct, (unsigned long)DYN_FTRACE_TEST_NAME, 0, 0); + ret = register_ftrace_direct(&direct, + (unsigned long)ftrace_stub_direct_tramp); if (ret) goto out; @@ -891,8 +885,9 @@ trace_selftest_startup_function_graph(struct tracer *trace, unregister_ftrace_graph(&fgraph_ops); - ret = unregister_ftrace_direct((unsigned long) DYN_FTRACE_TEST_NAME, - (unsigned long) trace_direct_tramp); + ret = unregister_ftrace_direct(&direct, + (unsigned long)ftrace_stub_direct_tramp, + true); if (ret) goto out; |