summaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
authorPeter Zijlstra (Intel) <peterz@infradead.org>2014-12-16 12:47:34 +0100
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2017-04-30 05:49:15 +0200
commit803e3757c40366d673b7792f6ac07793825fafeb (patch)
tree97dd95858acb54a740825ec45d8013bd72caddaf /include/linux
parent8e0aa9da2a47b8d0379e32e9717c6df2cc3bc66b (diff)
perf: Avoid horrible stack usage
commit 86038c5ea81b519a8a1fcfcd5e4599aab0cdd119 upstream. Both Linus (most recent) and Steve (a while ago) reported that perf related callbacks have massive stack bloat. The problem is that software events need a pt_regs in order to properly report the event location and unwind stack. And because we could not assume one was present we allocated one on stack and filled it with minimal bits required for operation. Now, pt_regs is quite large, so this is undesirable. Furthermore it turns out that most sites actually have a pt_regs pointer available, making this even more onerous, as the stack space is pointless waste. This patch addresses the problem by observing that software events have well defined nesting semantics, therefore we can use static per-cpu storage instead of on-stack. Linus made the further observation that all but the scheduler callers of perf_sw_event() have a pt_regs available, so we change the regular perf_sw_event() to require a valid pt_regs (where it used to be optional) and add perf_sw_event_sched() for the scheduler. We have a scheduler specific call instead of a more generic _noregs() like construct because we can assume non-recursion from the scheduler and thereby simplify the code further (_noregs would have to put the recursion context call inline in order to assertain which __perf_regs element to use). One last note on the implementation of perf_trace_buf_prepare(); we allow .regs = NULL for those cases where we already have a pt_regs pointer available and do not need another. Reported-by: Linus Torvalds <torvalds@linux-foundation.org> Reported-by: Steven Rostedt <rostedt@goodmis.org> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Arnaldo Carvalho de Melo <acme@kernel.org> Cc: Javi Merino <javi.merino@arm.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Paul Mackerras <paulus@samba.org> Cc: Petr Mladek <pmladek@suse.cz> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Tom Zanussi <tom.zanussi@linux.intel.com> Cc: Vaibhav Nagarnaik <vnagarnaik@google.com> Link: http://lkml.kernel.org/r/20141216115041.GW3337@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar <mingo@kernel.org> Cc: Arnd Bergmann <arnd@arndb.de> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/ftrace_event.h2
-rw-r--r--include/linux/perf_event.h28
2 files changed, 22 insertions, 8 deletions
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 28672e87e910..3d0320675e12 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -584,7 +584,7 @@ extern int ftrace_profile_set_filter(struct perf_event *event, int event_id,
char *filter_str);
extern void ftrace_profile_free_filter(struct perf_event *event);
extern void *perf_trace_buf_prepare(int size, unsigned short type,
- struct pt_regs *regs, int *rctxp);
+ struct pt_regs **regs, int *rctxp);
static inline void
perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr,
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 893a0d07986f..df8904fea40c 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -660,6 +660,7 @@ static inline int is_software_event(struct perf_event *event)
extern struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX];
+extern void ___perf_sw_event(u32, u64, struct pt_regs *, u64);
extern void __perf_sw_event(u32, u64, struct pt_regs *, u64);
#ifndef perf_arch_fetch_caller_regs
@@ -684,14 +685,25 @@ static inline void perf_fetch_caller_regs(struct pt_regs *regs)
static __always_inline void
perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
{
- struct pt_regs hot_regs;
+ if (static_key_false(&perf_swevent_enabled[event_id]))
+ __perf_sw_event(event_id, nr, regs, addr);
+}
+
+DECLARE_PER_CPU(struct pt_regs, __perf_regs[4]);
+/*
+ * 'Special' version for the scheduler, it hard assumes no recursion,
+ * which is guaranteed by us not actually scheduling inside other swevents
+ * because those disable preemption.
+ */
+static __always_inline void
+perf_sw_event_sched(u32 event_id, u64 nr, u64 addr)
+{
if (static_key_false(&perf_swevent_enabled[event_id])) {
- if (!regs) {
- perf_fetch_caller_regs(&hot_regs);
- regs = &hot_regs;
- }
- __perf_sw_event(event_id, nr, regs, addr);
+ struct pt_regs *regs = this_cpu_ptr(&__perf_regs[0]);
+
+ perf_fetch_caller_regs(regs);
+ ___perf_sw_event(event_id, nr, regs, addr);
}
}
@@ -707,7 +719,7 @@ static inline void perf_event_task_sched_in(struct task_struct *prev,
static inline void perf_event_task_sched_out(struct task_struct *prev,
struct task_struct *next)
{
- perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, NULL, 0);
+ perf_sw_event_sched(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 0);
if (static_key_false(&perf_sched_events.key))
__perf_event_task_sched_out(prev, next);
@@ -818,6 +830,8 @@ static inline int perf_event_refresh(struct perf_event *event, int refresh)
static inline void
perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) { }
static inline void
+perf_sw_event_sched(u32 event_id, u64 nr, u64 addr) { }
+static inline void
perf_bp_event(struct perf_event *event, void *data) { }
static inline int perf_register_guest_info_callbacks