From 21ef97f05a7da5bc23b26cb34d6746f83ca9bf20 Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Fri, 10 Dec 2010 14:09:16 +1100 Subject: perf session: Fallback to unordered processing if no sample_id_all If we are running the new perf on an old kernel without support for sample_id_all, we should fall back to the old unordered processing of events. If we didn't than we would *always* process events without timestamps out of order, whether or not we hit a reordering race. In other words, instead of there being a chance of not attributing samples correctly, we would guarantee that samples would not be attributed. While processing all events without timestamps before events with timestamps may seem like an intuitive solution, it falls down as PERF_RECORD_EXIT events would also be processed before any samples. Even with a workaround for that case, samples before/after an exec would not be attributed correctly. This patch allows commands to indicate whether they need to fall back to unordered processing, so that commands that do not care about timestamps on every event will not be affected. If we do fallback, this will print out a warning if report -D was invoked. This patch adds the test in perf_session__new so that we only need to test once per session. Commands that do not use an event_ops (such as record and top) can simply pass NULL in it's place. Acked-by: Thomas Gleixner Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Thomas Gleixner LKML-Reference: <1291951882-sup-6069@au1.ibm.com> Signed-off-by: Ian Munsie Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-annotate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf/builtin-annotate.c') diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 569a2761b90a..48dbab4b482f 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -382,7 +382,7 @@ static int __cmd_annotate(void) int ret; struct perf_session *session; - session = perf_session__new(input_name, O_RDONLY, force, false); + session = perf_session__new(input_name, O_RDONLY, force, false, &event_ops); if (session == NULL) return -ENOMEM; -- cgit v1.2.3 From eac23d1c384b55e4bbb89ea9e5a6bb77fb4d1140 Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Thu, 9 Dec 2010 16:33:53 +1100 Subject: perf record,report,annotate,diff: Process events in order This patch changes perf report to ask for the ID info on all events be default if recording from multiple CPUs. Perf report, annotate and diff will now process the events in order if the kernel is able to provide timestamps on all events. This ensures that events such as COMM and MMAP which are necessary to correctly interpret samples are processed prior to those samples so that they are attributed correctly. Before: # perf record ./cachetest # perf report # Events: 6K cycles # # Overhead Command Shared Object Symbol # ........ ....... ................. ............................... # 74.11% :3259 [unknown] [k] 0x4a6c 1.50% cachetest ld-2.11.2.so [.] 0x1777c 1.46% :3259 [kernel.kallsyms] [k] .perf_event_mmap_ctx 1.25% :3259 [kernel.kallsyms] [k] restore 0.74% :3259 [kernel.kallsyms] [k] ._raw_spin_lock 0.71% :3259 [kernel.kallsyms] [k] .filemap_fault 0.66% :3259 [kernel.kallsyms] [k] .memset 0.54% cachetest [kernel.kallsyms] [k] .sha_transform 0.54% :3259 [kernel.kallsyms] [k] .copy_4K_page 0.54% :3259 [kernel.kallsyms] [k] .find_get_page 0.52% :3259 [kernel.kallsyms] [k] .trace_hardirqs_off 0.50% :3259 [kernel.kallsyms] [k] .__do_fault After: # perf report # Events: 6K cycles # # Overhead Command Shared Object Symbol # ........ ....... ................. ............................... # 44.28% cachetest cachetest [.] sumArrayNaive 22.53% cachetest cachetest [.] sumArrayOptimal 6.59% cachetest ld-2.11.2.so [.] 0x1777c 2.13% cachetest [unknown] [k] 0x340 1.46% cachetest [kernel.kallsyms] [k] .perf_event_mmap_ctx 1.25% cachetest [kernel.kallsyms] [k] restore 0.74% cachetest [kernel.kallsyms] [k] ._raw_spin_lock 0.71% cachetest [kernel.kallsyms] [k] .filemap_fault 0.66% cachetest [kernel.kallsyms] [k] .memset 0.54% cachetest [kernel.kallsyms] [k] .copy_4K_page 0.54% cachetest [kernel.kallsyms] [k] .find_get_page 0.54% cachetest [kernel.kallsyms] [k] .sha_transform 0.52% cachetest [kernel.kallsyms] [k] .trace_hardirqs_off 0.50% cachetest [kernel.kallsyms] [k] .__do_fault Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Thomas Gleixner LKML-Reference: <1291872833-839-1-git-send-email-imunsie@au1.ibm.com> Signed-off-by: Ian Munsie Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-annotate.c | 2 ++ tools/perf/builtin-diff.c | 2 ++ tools/perf/builtin-record.c | 5 ++++- tools/perf/builtin-report.c | 2 ++ 4 files changed, 10 insertions(+), 1 deletion(-) (limited to 'tools/perf/builtin-annotate.c') diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 48dbab4b482f..c056cdc06912 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -375,6 +375,8 @@ static struct perf_event_ops event_ops = { .mmap = event__process_mmap, .comm = event__process_comm, .fork = event__process_task, + .ordered_samples = true, + .ordering_requires_timestamps = true, }; static int __cmd_annotate(void) diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index af84e1c0519d..97846dcafc63 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -61,6 +61,8 @@ static struct perf_event_ops event_ops = { .exit = event__process_task, .fork = event__process_task, .lost = event__process_lost, + .ordered_samples = true, + .ordering_requires_timestamps = true, }; static void perf_session__insert_hist_entry_by_name(struct rb_root *root, diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index efd1b3c3d4a0..5149e3deb7bc 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -285,7 +285,7 @@ static void create_counter(int counter, int cpu) if (system_wide) attr->sample_type |= PERF_SAMPLE_CPU; - if (sample_time) + if (sample_time || system_wide || !no_inherit || cpu_list) attr->sample_type |= PERF_SAMPLE_TIME; if (raw_samples) { @@ -327,6 +327,9 @@ try_again: * Old kernel, no attr->sample_id_type_all field */ sample_id_all_avail = false; + if (!sample_time && !raw_samples) + attr->sample_type &= ~PERF_SAMPLE_TIME; + goto retry_sample_id; } diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index fd4c4500cd15..4af7ce6e1555 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -244,6 +244,8 @@ static struct perf_event_ops event_ops = { .event_type = event__process_event_type, .tracing_data = event__process_tracing_data, .build_id = event__process_build_id, + .ordered_samples = true, + .ordering_requires_timestamps = true, }; extern volatile int session_done; -- cgit v1.2.3