From d4957633bf9dab70e566e7dbb2b8d0c61c3a2f1e Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 18 Jul 2015 08:24:48 -0700 Subject: perf report: Add infrastructure for a cycles histogram This adds the basic infrastructure to keep track of cycle counts per basic block for annotate. We allocate an array similar to the normal accounting, and then account branch cycles there. We handle two cases: cycles per basic block with start and cycles per branch (these are later used for either IPC or just cycles per BB) In the start case we cannot handle overlaps, so always the longest basic block wins. For the cycles per branch case everything is accurately accounted. v2: Remove unnecessary checks. Slight restructure. Move symbol__get_annotation to another patch. Move histogram allocation. v3: Merged with current tree Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1437233094-12844-4-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'tools/perf/util/annotate.h') diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 7e78e6c27078..a06518dca4b7 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -79,6 +79,17 @@ struct sym_hist { u64 addr[0]; }; +struct cyc_hist { + u64 start; + u64 cycles; + u64 cycles_aggr; + u32 num; + u32 num_aggr; + u8 have_start; + /* 1 byte padding */ + u16 reset; +}; + struct source_line_samples { double percent; double percent_sum; @@ -97,6 +108,7 @@ struct source_line { * @histogram: Array of addr hit histograms per event being monitored * @lines: If 'print_lines' is specified, per source code line percentages * @source: source parsed from a disassembler like objdump -dS + * @cyc_hist: Average cycles per basic block * * lines is allocated, percentages calculated and all sorted by percentage * when the annotation is about to be presented, so the percentages are for @@ -109,6 +121,7 @@ struct annotated_source { struct source_line *lines; int nr_histograms; int sizeof_sym_hist; + struct cyc_hist *cycles_hist; struct sym_hist histograms[0]; }; @@ -130,6 +143,10 @@ static inline struct annotation *symbol__annotation(struct symbol *sym) int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, int evidx); +int addr_map_symbol__account_cycles(struct addr_map_symbol *ams, + struct addr_map_symbol *start, + unsigned cycles); + int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 addr); int symbol__alloc_hist(struct symbol *sym); -- cgit v1.2.3 From 30e863bb6f708c0abd422fbb0e6b295f5ee6407b Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 18 Jul 2015 08:24:50 -0700 Subject: perf annotate: Compute IPC and basic block cycles Compute the IPC and the basic block cycles for the annotate display. IPC is computed by counting the instructions, and then dividing the accounted cycles by that count. The actual IPC computation can only be done at annotate time, because we need to parse the objdump output first to know the number of instructions in the basic block. The cycles/IPC are also put into the perf function annotation so that the display code can show them. Again basic block overlaps are not handled, with the longest winning, but there are some heuristics to hide the IPC when the longest is not the most common. v2: Compute IPC correctly. Signed-off-by: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1437233094-12844-6-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 73 ++++++++++++++++++++++++++++++++++++++- tools/perf/util/annotate.h | 2 ++ 2 files changed, 74 insertions(+), 1 deletion(-) (limited to 'tools/perf/util/annotate.h') diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 5995a8bd7c69..6ec179547f72 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -53,6 +53,7 @@ struct annotate_browser { int max_jump_sources; int nr_jumps; bool searching_backwards; + bool have_cycles; u8 addr_width; u8 jumps_width; u8 target_width; @@ -390,7 +391,7 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, max_percent = bpos->samples[i].percent; } - if (max_percent < 0.01) { + if (max_percent < 0.01 && pos->ipc == 0) { RB_CLEAR_NODE(&bpos->rb_node); continue; } @@ -869,6 +870,75 @@ int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel, return map_symbol__tui_annotate(&he->ms, evsel, hbt); } + +static unsigned count_insn(struct annotate_browser *browser, u64 start, u64 end) +{ + unsigned n_insn = 0; + u64 offset; + + for (offset = start; offset <= end; offset++) { + if (browser->offsets[offset]) + n_insn++; + } + return n_insn; +} + +static void count_and_fill(struct annotate_browser *browser, u64 start, u64 end, + struct cyc_hist *ch) +{ + unsigned n_insn; + u64 offset; + + n_insn = count_insn(browser, start, end); + if (n_insn && ch->num && ch->cycles) { + float ipc = n_insn / ((double)ch->cycles / (double)ch->num); + + /* Hide data when there are too many overlaps. */ + if (ch->reset >= 0x7fff || ch->reset >= ch->num / 2) + return; + + for (offset = start; offset <= end; offset++) { + struct disasm_line *dl = browser->offsets[offset]; + + if (dl) + dl->ipc = ipc; + } + } +} + +/* + * This should probably be in util/annotate.c to share with the tty + * annotate, but right now we need the per byte offsets arrays, + * which are only here. + */ +static void annotate__compute_ipc(struct annotate_browser *browser, size_t size, + struct symbol *sym) +{ + u64 offset; + struct annotation *notes = symbol__annotation(sym); + + if (!notes->src || !notes->src->cycles_hist) + return; + + pthread_mutex_lock(¬es->lock); + for (offset = 0; offset < size; ++offset) { + struct cyc_hist *ch; + + ch = ¬es->src->cycles_hist[offset]; + if (ch && ch->cycles) { + struct disasm_line *dl; + + if (ch->have_start) + count_and_fill(browser, ch->start, offset, ch); + dl = browser->offsets[offset]; + if (dl && ch->num_aggr) + dl->cycles = ch->cycles_aggr / ch->num_aggr; + browser->have_cycles = true; + } + } + pthread_mutex_unlock(¬es->lock); +} + static void annotate_browser__mark_jump_targets(struct annotate_browser *browser, size_t size) { @@ -991,6 +1061,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, } annotate_browser__mark_jump_targets(&browser, size); + annotate__compute_ipc(&browser, size, sym); browser.addr_width = browser.target_width = browser.min_addr_width = hex_width(size); browser.max_addr_width = hex_width(sym->end); diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index a06518dca4b7..e9996092a093 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -59,6 +59,8 @@ struct disasm_line { char *name; struct ins *ins; int line_nr; + float ipc; + u64 cycles; struct ins_operands ops; }; -- cgit v1.2.3