From d4957633bf9dab70e566e7dbb2b8d0c61c3a2f1e Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 18 Jul 2015 08:24:48 -0700 Subject: perf report: Add infrastructure for a cycles histogram This adds the basic infrastructure to keep track of cycle counts per basic block for annotate. We allocate an array similar to the normal accounting, and then account branch cycles there. We handle two cases: cycles per basic block with start and cycles per branch (these are later used for either IPC or just cycles per BB) In the start case we cannot handle overlaps, so always the longest basic block wins. For the cycles per branch case everything is accurately accounted. v2: Remove unnecessary checks. Slight restructure. Move symbol__get_annotation to another patch. Move histogram allocation. v3: Merged with current tree Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1437233094-12844-4-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-annotate.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/perf/builtin-annotate.c') diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 2c1bec39c30e..467a23b14e2f 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -187,6 +187,7 @@ find_next: * symbol, free he->ms.sym->src to signal we already * processed this symbol. */ + zfree(¬es->src->cycles_hist); zfree(¬es->src); } } -- cgit v1.2.3 From f9db0d0f1b2cf030083c83d3ed3a4bbae6bdc8b7 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 11 Aug 2015 06:30:48 -0400 Subject: perf callchain: Allow disabling call graphs per event This patch introduce "call-graph=no" to disable per-event callgraph. Here is an example. perf record -e 'cpu/cpu-cycles,call-graph=fp/,cpu/instructions,call-graph=no/' sleep 1 perf report --stdio # To display the perf.data header info, please use --header/--header-only options. # # # Total Lost Samples: 0 # # Samples: 6 of event 'cpu/cpu-cycles,call-graph=fp/' # Event count (approx.): 774218 # # Children Self Command Shared Object Symbol # ........ ........ ....... ................ ........................................ # 61.94% 0.00% sleep [kernel.vmlinux] [k] entry_SYSCALL_64_fastpath | ---entry_SYSCALL_64_fastpath | |--97.30%-- __brk | --2.70%-- mmap64 _dl_check_map_versions _dl_check_all_versions 61.94% 0.00% sleep [kernel.vmlinux] [k] perf_event_mmap | ---perf_event_mmap | |--97.30%-- do_brk | sys_brk | entry_SYSCALL_64_fastpath | __brk | --2.70%-- mmap_region do_mmap_pgoff vm_mmap_pgoff sys_mmap_pgoff sys_mmap entry_SYSCALL_64_fastpath mmap64 _dl_check_map_versions _dl_check_all_versions ...... # Samples: 6 of event 'cpu/instructions,call-graph=no/' # Event count (approx.): 359692 # # Children Self Command Shared Object Symbol # ........ ........ ....... ................ ................................. # 89.03% 0.00% sleep [unknown] [.] 0xffff6598ffff6598 89.03% 0.00% sleep ld-2.17.so [.] _dl_resolve_conflicts 89.03% 0.00% sleep [kernel.vmlinux] [k] page_fault Signed-off-by: Kan Liang Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1439289050-40510-2-git-send-email-kan.liang@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-record.txt | 3 ++- tools/perf/builtin-annotate.c | 2 ++ tools/perf/builtin-diff.c | 3 +++ tools/perf/tests/hists_cumulate.c | 4 ++++ tools/perf/util/evsel.c | 17 +++++++++++------ tools/perf/util/hist.c | 9 ++++++--- 6 files changed, 28 insertions(+), 10 deletions(-) (limited to 'tools/perf/builtin-annotate.c') diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 7f82dec2b541..347a27322ed8 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -54,7 +54,8 @@ OPTIONS enabling time stamping. 0 for disabling time stamping. The default is 1. - 'call-graph': Disable/enable callgraph. Acceptable str are "fp" for - FP mode, "dwarf" for DWARF mode, "lbr" for LBR mode. + FP mode, "dwarf" for DWARF mode, "lbr" for LBR mode and + "no" for disable callgraph. - 'stack-size': user stack size for dwarf mode Note: If user explicitly sets options which conflict with the params, the value set by the params will be overridden. diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 467a23b14e2f..a32a64ef08e2 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -239,6 +239,8 @@ static int __cmd_annotate(struct perf_annotate *ann) if (nr_samples > 0) { total_nr_samples += nr_samples; hists__collapse_resort(hists, NULL); + /* Don't sort callchain */ + perf_evsel__reset_sample_bit(pos, CALLCHAIN); hists__output_resort(hists, NULL); if (symbol_conf.event_group && diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index daaa7dca9c3b..0b180a885ba3 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -722,6 +722,9 @@ static void data_process(void) if (verbose || data__files_cnt > 2) data__fprintf(); + /* Don't sort callchain for perf diff */ + perf_evsel__reset_sample_bit(evsel_base, CALLCHAIN); + hists__process(hists_base); } } diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c index 7d82c8be5e36..7ed737019de7 100644 --- a/tools/perf/tests/hists_cumulate.c +++ b/tools/perf/tests/hists_cumulate.c @@ -279,6 +279,7 @@ static int test1(struct perf_evsel *evsel, struct machine *machine) symbol_conf.use_callchain = false; symbol_conf.cumulate_callchain = false; + perf_evsel__reset_sample_bit(evsel, CALLCHAIN); setup_sorting(); callchain_register_param(&callchain_param); @@ -425,6 +426,7 @@ static int test2(struct perf_evsel *evsel, struct machine *machine) symbol_conf.use_callchain = true; symbol_conf.cumulate_callchain = false; + perf_evsel__set_sample_bit(evsel, CALLCHAIN); setup_sorting(); callchain_register_param(&callchain_param); @@ -482,6 +484,7 @@ static int test3(struct perf_evsel *evsel, struct machine *machine) symbol_conf.use_callchain = false; symbol_conf.cumulate_callchain = true; + perf_evsel__reset_sample_bit(evsel, CALLCHAIN); setup_sorting(); callchain_register_param(&callchain_param); @@ -665,6 +668,7 @@ static int test4(struct perf_evsel *evsel, struct machine *machine) symbol_conf.use_callchain = true; symbol_conf.cumulate_callchain = true; + perf_evsel__set_sample_bit(evsel, CALLCHAIN); setup_sorting(); callchain_register_param(&callchain_param); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 6647925d5f28..b096ef7a240c 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -651,12 +651,17 @@ static void apply_config_terms(struct perf_evsel *evsel, /* parse callgraph parameters */ if (callgraph_buf != NULL) { - param.enabled = true; - if (parse_callchain_record(callgraph_buf, ¶m)) { - pr_err("per-event callgraph setting for %s failed. " - "Apply callgraph global setting for it\n", - evsel->name); - return; + if (!strcmp(callgraph_buf, "no")) { + param.enabled = false; + param.record_mode = CALLCHAIN_NONE; + } else { + param.enabled = true; + if (parse_callchain_record(callgraph_buf, ¶m)) { + pr_err("per-event callgraph setting for %s failed. " + "Apply callgraph global setting for it\n", + evsel->name); + return; + } } } if (dump_size > 0) { diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 6bccfae334b1..1cd785b5b56e 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1109,13 +1109,14 @@ void hists__inc_stats(struct hists *hists, struct hist_entry *h) static void __hists__insert_output_entry(struct rb_root *entries, struct hist_entry *he, - u64 min_callchain_hits) + u64 min_callchain_hits, + bool use_callchain) { struct rb_node **p = &entries->rb_node; struct rb_node *parent = NULL; struct hist_entry *iter; - if (symbol_conf.use_callchain) + if (use_callchain) callchain_param.sort(&he->sorted_chain, he->callchain, min_callchain_hits, &callchain_param); @@ -1139,6 +1140,8 @@ void hists__output_resort(struct hists *hists, struct ui_progress *prog) struct rb_node *next; struct hist_entry *n; u64 min_callchain_hits; + struct perf_evsel *evsel = hists_to_evsel(hists); + bool use_callchain = evsel ? (evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) : symbol_conf.use_callchain; min_callchain_hits = hists->stats.total_period * (callchain_param.min_percent / 100); @@ -1157,7 +1160,7 @@ void hists__output_resort(struct hists *hists, struct ui_progress *prog) n = rb_entry(next, struct hist_entry, rb_node_in); next = rb_next(&n->rb_node_in); - __hists__insert_output_entry(&hists->entries, n, min_callchain_hits); + __hists__insert_output_entry(&hists->entries, n, min_callchain_hits, use_callchain); hists__inc_stats(hists, n); if (!n->filtered) -- cgit v1.2.3 From c0b4dffbc529244d3e4e3bd392f2bffa2d8531a7 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 24 Aug 2015 13:33:14 -0300 Subject: perf annotate: Reset the dso find_symbol cache when removing symbols The 'annotate' tool does some filtering in the entries in a DSO but forgot to reset the cache done in dso__find_symbol(), cauxing a SEGV: [root@zoo ~]# perf annotate netlink_poll perf: Segmentation fault -------- backtrace -------- perf[0x526ceb] /lib64/libc.so.6(+0x34960)[0x7faedfbe0960] perf(rb_erase+0x223)[0x499d63] perf[0x4213e9] perf[0x4bc123] perf[0x4bc621] perf[0x4bf26b] perf[0x4bc855] perf(perf_session__process_events+0x340)[0x4bddc0] perf(cmd_annotate+0x6bb)[0x421b5b] perf[0x479063] perf(main+0x60a)[0x42098a] /lib64/libc.so.6(__libc_start_main+0xf0)[0x7faedfbcbfe0] perf[0x420aa9] [0x0] [root@zoo ~]# Fix it by reseting the find cache when removing symbols. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Stephane Eranian Fixes: b685ac22b436 ("perf symbols: Add front end cache for DSO symbol lookup") Link: http://lkml.kernel.org/n/tip-b2y9x46y0t8yem1ive41zqyp@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-annotate.c | 1 + tools/perf/util/dso.h | 2 ++ tools/perf/util/symbol.c | 10 ++++++++++ 3 files changed, 13 insertions(+) (limited to 'tools/perf/builtin-annotate.c') diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index a32a64ef08e2..8edc205ff9a7 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -67,6 +67,7 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel, rb_erase(&al->sym->rb_node, &al->map->dso->symbols[al->map->type]); symbol__delete(al->sym); + dso__reset_find_symbol_cache(al->map->dso); } return 0; } diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index c73276db6d6f..fc8db9c764ac 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -324,6 +324,8 @@ struct dso *__dsos__findnew(struct dsos *dsos, const char *name); struct dso *dsos__findnew(struct dsos *dsos, const char *name); bool __dsos__read_build_ids(struct list_head *head, bool with_hits); +void dso__reset_find_symbol_cache(struct dso *dso); + size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp, bool (skip)(struct dso *dso, int parm), int parm); size_t __dsos__fprintf(struct list_head *head, FILE *fp); diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 42e98ab5a9bb..46ae0532a8a6 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -441,6 +441,16 @@ static struct symbol *symbols__find_by_name(struct rb_root *symbols, return &s->sym; } +void dso__reset_find_symbol_cache(struct dso *dso) +{ + enum map_type type; + + for (type = MAP__FUNCTION; type <= MAP__VARIABLE; ++type) { + dso->last_find_result[type].addr = 0; + dso->last_find_result[type].symbol = NULL; + } +} + struct symbol *dso__find_symbol(struct dso *dso, enum map_type type, u64 addr) { -- cgit v1.2.3