From 98df858ed46ddaaf9be3573eb2b63b57a68c6af7 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 18 Jul 2015 08:24:47 -0700 Subject: perf report: Add flag for non ANY branch mode Later patches need to cheaply check that the branch mode is in ANY. Add a new function to check all event attrs and add a flag to the report state, which is then initialized. v2: Rename flag Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1437233094-12844-3-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 95a47719aec3..3ba0e9737dc5 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -53,6 +53,7 @@ struct report { bool mem_mode; bool header; bool header_only; + bool nonany_branch_mode; int max_stack; struct perf_read_values show_threads_values; const char *pretty_printing_style; @@ -258,6 +259,12 @@ static int report__setup_sample_type(struct report *rep) else callchain_param.record_mode = CALLCHAIN_FP; } + + /* ??? handle more cases than just ANY? */ + if (!(perf_evlist__combined_branch_type(session->evlist) & + PERF_SAMPLE_BRANCH_ANY)) + rep->nonany_branch_mode = true; + return 0; } -- cgit v1.2.3 From 57849998e2cd24d50295076a1bbd2f029e2d7c38 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 18 Jul 2015 08:24:49 -0700 Subject: perf report: Add processing for cycle histograms Call the earlier added cycle histogram infrastructure from the perf report hist iter callback. For this we walk the branch records. This allows to use cycle histograms when browsing perf report annotate. v2: Rename flag Signed-off-by: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1437233094-12844-5-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 3 +++ tools/perf/util/hist.c | 33 +++++++++++++++++++++++++++++++++ tools/perf/util/hist.h | 3 +++ 3 files changed, 39 insertions(+) (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 3ba0e9737dc5..3a9d1b659fcd 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -103,6 +103,9 @@ static int hist_iter__report_callback(struct hist_entry_iter *iter, if (!ui__has_annotation()) return 0; + hist__account_cycles(iter->sample->branch_stack, al, iter->sample, + rep->nonany_branch_mode); + if (sort__mode == SORT_MODE__BRANCH) { bi = he->branch_info; err = addr_map_symbol__inc_samples(&bi->from, evsel->idx); diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 54fc0033dd6a..a6e9ddd37913 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1415,6 +1415,39 @@ int hists__link(struct hists *leader, struct hists *other) return 0; } +void hist__account_cycles(struct branch_stack *bs, struct addr_location *al, + struct perf_sample *sample, bool nonany_branch_mode) +{ + struct branch_info *bi; + + /* If we have branch cycles always annotate them. */ + if (bs && bs->nr && bs->entries[0].flags.cycles) { + int i; + + bi = sample__resolve_bstack(sample, al); + if (bi) { + struct addr_map_symbol *prev = NULL; + + /* + * Ignore errors, still want to process the + * other entries. + * + * For non standard branch modes always + * force no IPC (prev == NULL) + * + * Note that perf stores branches reversed from + * program order! + */ + for (i = bs->nr - 1; i >= 0; i--) { + addr_map_symbol__account_cycles(&bi[i].from, + nonany_branch_mode ? NULL : prev, + bi[i].flags.cycles); + prev = &bi[i].to; + } + free(bi); + } + } +} size_t perf_evlist__fprintf_nr_events(struct perf_evlist *evlist, FILE *fp) { diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 3881d9815309..e2f712f85d2e 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -350,6 +350,9 @@ static inline int script_browse(const char *script_opt __maybe_unused) unsigned int hists__sort_list_width(struct hists *hists); +void hist__account_cycles(struct branch_stack *bs, struct addr_location *al, + struct perf_sample *sample, bool nonany_branch_mode); + struct option; int parse_filter_percentage(const struct option *opt __maybe_unused, const char *arg, int unset __maybe_unused); -- cgit v1.2.3 From a9710ba091b0dcdace90f791706e9192313ffb7c Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 7 Aug 2015 15:24:05 -0700 Subject: perf tools: Support full source file paths for srcline For perf report/script srcline currently only the base file name of the source file is printed. This is a good default because it usually fits on the screen. But in some cases we want to know the full file name, for example to aggregate hits per file. In the later case we need more than the base file name to resolve file naming collisions: for example the kernel source has ~70 files named "core.c" It's also useful as input to post processing tools which want to point to the right file. Add a flag to allow full file name output. Add an option to perf report/script to enable this option. Signed-off-by: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1438986245-15191-1-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-report.txt | 2 ++ tools/perf/Documentation/perf-script.txt | 3 +++ tools/perf/builtin-report.c | 2 ++ tools/perf/builtin-script.c | 2 ++ tools/perf/util/srcline.c | 6 +++++- tools/perf/util/util.h | 1 + 6 files changed, 15 insertions(+), 1 deletion(-) (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 960da203ec11..1a782ef02b68 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -354,6 +354,8 @@ OPTIONS To disable decoding entirely, use --no-itrace. +--full-source-path:: + Show the full path for source files for srcline output. include::callchain-overhead-calculation.txt[] diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index e2fec5fc21e7..8e9be1f9c1dd 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -260,6 +260,9 @@ OPTIONS To disable decoding entirely, use --no-itrace. +--full-source-path:: + Show the full path for source files for srcline output. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-script-perl[1], diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 3a9d1b659fcd..f301e865001f 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -738,6 +738,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts", "Instruction Tracing options", itrace_parse_synth_opts), + OPT_BOOLEAN(0, "full-source-path", &srcline_full_filename, + "Show full source file name path for source lines"), OPT_END() }; struct perf_data_file file = { diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 7912feb9a024..7b376d215e94 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1653,6 +1653,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts", "Instruction Tracing options", itrace_parse_synth_opts), + OPT_BOOLEAN(0, "full-source-path", &srcline_full_filename, + "Show full source file name path for source lines"), OPT_END() }; const char * const script_subcommands[] = { "record", "report", NULL }; diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index c93fb0c5bd0b..fc08248f08ca 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -10,6 +10,8 @@ #include "symbol.h" +bool srcline_full_filename; + #ifdef HAVE_LIBBFD_SUPPORT /* @@ -277,7 +279,9 @@ char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym, if (!addr2line(dso_name, addr, &file, &line, dso)) goto out; - if (asprintf(&srcline, "%s:%u", basename(file), line) < 0) { + if (asprintf(&srcline, "%s:%u", + srcline_full_filename ? file : basename(file), + line) < 0) { free(file); goto out; } diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 81487037acf7..88a891562a47 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -318,6 +318,7 @@ static inline int path__join3(char *bf, size_t size, struct dso; struct symbol; +extern bool srcline_full_filename; char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym, bool show_sym); void free_srcline(char *srcline); -- cgit v1.2.3 From 9e207ddfa20781e56465ce9a537f0a377c9d34fb Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 11 Aug 2015 06:30:49 -0400 Subject: perf report: Show call graph from reference events Introduce --show-ref-call-graph for perf report to print reference callgraph for no callgraph event. Here is an example. perf report --show-ref-call-graph --stdio # To display the perf.data header info, please use --header/--header-only options. # # # Total Lost Samples: 0 # # Samples: 5 of event 'cpu/cpu-cycles,call-graph=fp/' # Event count (approx.): 144985 # # Children Self Command Shared Object Symbol # ........ ........ ....... ................ ........................................ # 72.30% 0.00% sleep [kernel.vmlinux] [k] entry_SYSCALL_64_fastpath | ---entry_SYSCALL_64_fastpath | |--22.62%-- __GI___libc_nanosleep --77.38%-- [...] ...... # Samples: 6 of event 'cpu/instructions,call-graph=no/', show reference callgraph # Event count (approx.): 172780 # # Children Self Command Shared Object Symbol # ........ ........ ....... ................ ........................................ # 73.16% 0.00% sleep [kernel.vmlinux] [k] entry_SYSCALL_64_fastpath | ---entry_SYSCALL_64_fastpath | |--31.44%-- __GI___libc_nanosleep --68.56%-- [...] Signed-off-by: Kan Liang Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1439289050-40510-3-git-send-email-kan.liang@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-report.txt | 11 +++++++++++ tools/perf/builtin-report.c | 7 +++++++ tools/perf/ui/browsers/hists.c | 9 +++++++-- tools/perf/util/hist.c | 7 ++++++- tools/perf/util/symbol.h | 3 ++- 5 files changed, 33 insertions(+), 4 deletions(-) (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 7b07d19e2d54..a18ba757a0ed 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -359,6 +359,17 @@ OPTIONS --full-source-path:: Show the full path for source files for srcline output. +--show-ref-call-graph:: + When multiple events are sampled, it may not be needed to collect + callgraphs for all of them. The sample sites are usually nearby, + and it's enough to collect the callgraphs on a reference event. + So user can use "call-graph=no" event modifier to disable callgraph + for other events to reduce the overhead. + However, perf report cannot show callgraphs for the event which + disable the callgraph. + This option extends the perf report to show reference callgraphs, + which collected by reference event, in no callgraph event. + include::callchain-overhead-calculation.txt[] SEE ALSO diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index f301e865001f..62b285e32aa5 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -316,6 +316,11 @@ static size_t hists__fprintf_nr_sample_events(struct hists *hists, struct report if (evname != NULL) ret += fprintf(fp, " of event '%s'", evname); + if (symbol_conf.show_ref_callgraph && + strstr(evname, "call-graph=no")) { + ret += fprintf(fp, ", show reference callgraph"); + } + if (rep->mem_mode) { ret += fprintf(fp, "\n# Total weight : %" PRIu64, nr_events); ret += fprintf(fp, "\n# Sort order : %s", sort_order ? : default_mem_sort_order); @@ -740,6 +745,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) itrace_parse_synth_opts), OPT_BOOLEAN(0, "full-source-path", &srcline_full_filename, "Show full source file name path for source lines"), + OPT_BOOLEAN(0, "show-ref-call-graph", &symbol_conf.show_ref_callgraph, + "Show callgraph from reference event"), OPT_END() }; struct perf_data_file file = { diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index f296b7348449..10c7ec041039 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1267,6 +1267,8 @@ static int hists__browser_title(struct hists *hists, const char *ev_name = perf_evsel__name(evsel); char buf[512]; size_t buflen = sizeof(buf); + char ref[30] = " show reference callgraph, "; + bool enable_ref = false; if (symbol_conf.filter_relative) { nr_samples = hists->stats.nr_non_filtered_samples; @@ -1292,10 +1294,13 @@ static int hists__browser_title(struct hists *hists, } } + if (symbol_conf.show_ref_callgraph && + strstr(ev_name, "call-graph=no")) + enable_ref = true; nr_samples = convert_unit(nr_samples, &unit); printed = scnprintf(bf, size, - "Samples: %lu%c of event '%s', Event count (approx.): %" PRIu64, - nr_samples, unit, ev_name, nr_events); + "Samples: %lu%c of event '%s',%sEvent count (approx.): %" PRIu64, + nr_samples, unit, ev_name, enable_ref ? ref : " ", nr_events); if (hists->uid_filter_str) diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 1cd785b5b56e..08b6cd945f1e 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1141,7 +1141,12 @@ void hists__output_resort(struct hists *hists, struct ui_progress *prog) struct hist_entry *n; u64 min_callchain_hits; struct perf_evsel *evsel = hists_to_evsel(hists); - bool use_callchain = evsel ? (evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) : symbol_conf.use_callchain; + bool use_callchain; + + if (evsel && !symbol_conf.show_ref_callgraph) + use_callchain = evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN; + else + use_callchain = symbol_conf.use_callchain; min_callchain_hits = hists->stats.total_period * (callchain_param.min_percent / 100); diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index b98ce51af142..a4cde92afbad 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -106,7 +106,8 @@ struct symbol_conf { filter_relative, show_hist_headers, branch_callstack, - has_filter; + has_filter, + show_ref_callgraph; const char *vmlinux_name, *kallsyms_name, *source_prefix, -- cgit v1.2.3