From d3030191d3a6292408c5cf999ebcc1d10e00e9c2 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 16 Jan 2024 22:26:51 -0800 Subject: perf annotate-data: Handle array style accesses On x86, instructions for array access often looks like below. mov 0x1234(%rax,%rbx,8), %rcx Usually the first register holds the type information and the second one has the index. And the current code only looks up a variable for the first register. But it's possible to be in the other way around so it needs to check the second register if the first one failed. The stat changed like this. Annotate data type stats: total 294, ok 148 (50.3%), bad 146 (49.7%) ----------------------------------------------------------- 30 : no_sym 32 : no_mem_ops 66 : no_var 10 : no_typeinfo 8 : bad_offset Reviewed-by: Ian Rogers Cc: Stephane Eranian Cc: Masami Hiramatsu Link: https://lore.kernel.org/r/20240117062657.985479-4-namhyung@kernel.org Signed-off-by: Namhyung Kim --- tools/perf/util/annotate.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'tools/perf/util/annotate.h') diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index dba50762c6e8..d0ff677b460c 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -442,14 +442,18 @@ int annotate_check_args(void); /** * struct annotated_op_loc - Location info of instruction operand - * @reg: Register in the operand + * @reg1: First register in the operand + * @reg2: Second register in the operand * @offset: Memory access offset in the operand * @mem_ref: Whether the operand accesses memory + * @multi_regs: Whether the second register is used */ struct annotated_op_loc { - int reg; + int reg1; + int reg2; int offset; bool mem_ref; + bool multi_regs; }; enum annotated_insn_ops { -- cgit v1.2.3 From 5f7cdde843dd21c7228d9ae47d985086ce165985 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 16 Jan 2024 22:26:54 -0800 Subject: perf annotate-data: Support global variables Global variables are accessed using PC-relative address so it needs to be handled separately. The PC-rel addressing is detected by using DWARF_REG_PC. On x86, %rip register would be used. The address can be calculated using the ip and offset in the instruction. But it should start from the next instruction so add calculate_pcrel_addr() to do it properly. But global variables defined in a different file would only have a declaration which doesn't include a location list. So it first tries to get the type info using the address, and then looks up the variable declarations using name. The name of global variables should be get from the symbol table. The declaration would have the type info. So extend find_var_type() to take both address and name for global variables. The stat is now looks like: Annotate data type stats: total 294, ok 153 (52.0%), bad 141 (48.0%) ----------------------------------------------------------- 30 : no_sym 32 : no_mem_ops 61 : no_var 10 : no_typeinfo 8 : bad_offset Reviewed-by: Ian Rogers Cc: Stephane Eranian Cc: Masami Hiramatsu Link: https://lore.kernel.org/r/20240117062657.985479-7-namhyung@kernel.org Signed-off-by: Namhyung Kim --- tools/perf/util/annotate-data.c | 38 ++++++++++++++++++++------- tools/perf/util/annotate-data.h | 6 +++-- tools/perf/util/annotate.c | 57 +++++++++++++++++++++++++++++++++++++++-- tools/perf/util/annotate.h | 4 +++ 4 files changed, 92 insertions(+), 13 deletions(-) (limited to 'tools/perf/util/annotate.h') diff --git a/tools/perf/util/annotate-data.c b/tools/perf/util/annotate-data.c index 58c0fac42e9d..e375dd288f67 100644 --- a/tools/perf/util/annotate-data.c +++ b/tools/perf/util/annotate-data.c @@ -240,7 +240,8 @@ static int check_variable(Dwarf_Die *var_die, Dwarf_Die *type_die, int offset, /* The result will be saved in @type_die */ static int find_data_type_die(struct debuginfo *di, u64 pc, u64 addr, - struct annotated_op_loc *loc, Dwarf_Die *type_die) + const char *var_name, struct annotated_op_loc *loc, + Dwarf_Die *type_die) { Dwarf_Die cu_die, var_die; Dwarf_Die *scopes = NULL; @@ -258,11 +259,21 @@ static int find_data_type_die(struct debuginfo *di, u64 pc, u64 addr, reg = loc->reg1; offset = loc->offset; - if (reg == DWARF_REG_PC && - die_find_variable_by_addr(&cu_die, pc, addr, &var_die, &offset)) { - ret = check_variable(&var_die, type_die, offset, - /*is_pointer=*/false); - goto out; + if (reg == DWARF_REG_PC) { + if (die_find_variable_by_addr(&cu_die, pc, addr, &var_die, &offset)) { + ret = check_variable(&var_die, type_die, offset, + /*is_pointer=*/false); + loc->offset = offset; + goto out; + } + + if (var_name && die_find_variable_at(&cu_die, var_name, pc, + &var_die)) { + ret = check_variable(&var_die, type_die, 0, + /*is_pointer=*/false); + /* loc->offset will be updated by the caller */ + goto out; + } } /* Get a list of nested scopes - i.e. (inlined) functions and blocks. */ @@ -285,6 +296,7 @@ retry: /* Found a variable, see if it's correct */ ret = check_variable(&var_die, type_die, offset, reg != DWARF_REG_PC); + loc->offset = offset; goto out; } @@ -306,13 +318,21 @@ out: * @ms: map and symbol at the location * @ip: instruction address of the memory access * @loc: instruction operand location + * @addr: data address of the memory access + * @var_name: global variable name * * This functions searches the debug information of the binary to get the data - * type it accesses. The exact location is expressed by (ip, reg, offset). + * type it accesses. The exact location is expressed by (@ip, reg, offset) + * for pointer variables or (@ip, @addr) for global variables. Note that global + * variables might update the @loc->offset after finding the start of the variable. + * If it cannot find a global variable by address, it tried to fine a declaration + * of the variable using @var_name. In that case, @loc->offset won't be updated. + * * It return %NULL if not found. */ struct annotated_data_type *find_data_type(struct map_symbol *ms, u64 ip, - struct annotated_op_loc *loc) + struct annotated_op_loc *loc, u64 addr, + const char *var_name) { struct annotated_data_type *result = NULL; struct dso *dso = map__dso(ms->map); @@ -332,7 +352,7 @@ struct annotated_data_type *find_data_type(struct map_symbol *ms, u64 ip, * a file address for DWARF processing. */ pc = map__rip_2objdump(ms->map, ip); - if (find_data_type_die(di, pc, 0, loc, &type_die) < 0) + if (find_data_type_die(di, pc, addr, var_name, loc, &type_die) < 0) goto out; result = dso__findnew_data_type(dso, &type_die); diff --git a/tools/perf/util/annotate-data.h b/tools/perf/util/annotate-data.h index 214c625e7bc9..1b0db8e8c40e 100644 --- a/tools/perf/util/annotate-data.h +++ b/tools/perf/util/annotate-data.h @@ -107,7 +107,8 @@ extern struct annotated_data_stat ann_data_stat; /* Returns data type at the location (ip, reg, offset) */ struct annotated_data_type *find_data_type(struct map_symbol *ms, u64 ip, - struct annotated_op_loc *loc); + struct annotated_op_loc *loc, u64 addr, + const char *var_name); /* Update type access histogram at the given offset */ int annotated_data_type__update_samples(struct annotated_data_type *adt, @@ -121,7 +122,8 @@ void annotated_data_type__tree_delete(struct rb_root *root); static inline struct annotated_data_type * find_data_type(struct map_symbol *ms __maybe_unused, u64 ip __maybe_unused, - struct annotated_op_loc *loc __maybe_unused) + struct annotated_op_loc *loc __maybe_unused, + u64 addr __maybe_unused, const char *var_name __maybe_unused) { return NULL; } diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 655bd9443f5e..107b264fa41e 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -37,6 +37,7 @@ #include "util/sharded_mutex.h" #include "arch/common.h" #include "namespaces.h" +#include "thread.h" #include #include #include @@ -3744,6 +3745,30 @@ static bool is_stack_operation(struct arch *arch, struct disasm_line *dl) return false; } +u64 annotate_calc_pcrel(struct map_symbol *ms, u64 ip, int offset, + struct disasm_line *dl) +{ + struct annotation *notes; + struct disasm_line *next; + u64 addr; + + notes = symbol__annotation(ms->sym); + /* + * PC-relative addressing starts from the next instruction address + * But the IP is for the current instruction. Since disasm_line + * doesn't have the instruction size, calculate it using the next + * disasm_line. If it's the last one, we can use symbol's end + * address directly. + */ + if (&dl->al.node == notes->src->source.prev) + addr = ms->sym->end + offset; + else { + next = list_next_entry(dl, al.node); + addr = ip + (next->al.offset - dl->al.offset) + offset; + } + return map__rip_2objdump(ms->map, addr); +} + /** * hist_entry__get_data_type - find data type for given hist entry * @he: hist entry @@ -3763,7 +3788,9 @@ struct annotated_data_type *hist_entry__get_data_type(struct hist_entry *he) struct annotated_op_loc *op_loc; struct annotated_data_type *mem_type; struct annotated_item_stat *istat; - u64 ip = he->ip; + u64 ip = he->ip, addr = 0; + const char *var_name = NULL; + int var_offset; int i; ann_data_stat.total++; @@ -3822,12 +3849,38 @@ retry: /* Recalculate IP because of LOCK prefix or insn fusion */ ip = ms->sym->start + dl->al.offset; - mem_type = find_data_type(ms, ip, op_loc); + var_offset = op_loc->offset; + + /* PC-relative addressing */ + if (op_loc->reg1 == DWARF_REG_PC) { + struct addr_location al; + struct symbol *var; + u64 map_addr; + + addr = annotate_calc_pcrel(ms, ip, op_loc->offset, dl); + /* Kernel symbols might be relocated */ + map_addr = addr + map__reloc(ms->map); + + addr_location__init(&al); + var = thread__find_symbol_fb(he->thread, he->cpumode, + map_addr, &al); + if (var) { + var_name = var->name; + /* Calculate type offset from the start of variable */ + var_offset = map_addr - map__unmap_ip(al.map, var->start); + } + addr_location__exit(&al); + } + + mem_type = find_data_type(ms, ip, op_loc, addr, var_name); if (mem_type) istat->good++; else istat->bad++; + if (mem_type && var_name) + op_loc->offset = var_offset; + if (symbol_conf.annotate_data_sample) { annotated_data_type__update_samples(mem_type, evsel, op_loc->offset, diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index d0ff677b460c..94435607c958 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -491,4 +491,8 @@ struct annotated_item_stat { }; extern struct list_head ann_insn_stat; +/* Calculate PC-relative address */ +u64 annotate_calc_pcrel(struct map_symbol *ms, u64 ip, int offset, + struct disasm_line *dl); + #endif /* __PERF_ANNOTATE_H */ -- cgit v1.2.3 From d3e7cad6f36d9e80307b05bf31959597f9b6cd62 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 4 Mar 2024 15:08:12 -0800 Subject: perf annotate: Add a hashmap for symbol histogram Now symbol histogram uses an array to save per-offset sample counts. But it wastes a lot of memory if the symbol has a few samples only. Add a hashmap to save values only for actual samples. For now, it has duplicate histogram (one in the existing array and another in the new hash map). Once it can convert to use the hash in all places, we can get rid of the array later. Reviewed-by: Ian Rogers Reviewed-by: Arnaldo Carvalho de Melo Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20240304230815.1440583-2-namhyung@kernel.org --- tools/perf/util/annotate.c | 42 ++++++++++++++++++++++++++++++++++++++++-- tools/perf/util/annotate.h | 2 ++ 2 files changed, 42 insertions(+), 2 deletions(-) (limited to 'tools/perf/util/annotate.h') diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 107b264fa41e..caaea9421235 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -38,6 +38,7 @@ #include "arch/common.h" #include "namespaces.h" #include "thread.h" +#include "hashmap.h" #include #include #include @@ -863,6 +864,17 @@ bool arch__is(struct arch *arch, const char *name) return !strcmp(arch->name, name); } +/* symbol histogram: key = offset << 16 | evsel->core.idx */ +static size_t sym_hist_hash(long key, void *ctx __maybe_unused) +{ + return (key >> 16) + (key & 0xffff); +} + +static bool sym_hist_equal(long key1, long key2, void *ctx __maybe_unused) +{ + return key1 == key2; +} + static struct annotated_source *annotated_source__new(void) { struct annotated_source *src = zalloc(sizeof(*src)); @@ -877,6 +889,8 @@ static __maybe_unused void annotated_source__delete(struct annotated_source *src { if (src == NULL) return; + + hashmap__free(src->samples); zfree(&src->histograms); free(src); } @@ -909,6 +923,14 @@ static int annotated_source__alloc_histograms(struct annotated_source *src, src->sizeof_sym_hist = sizeof_sym_hist; src->nr_histograms = nr_hists; src->histograms = calloc(nr_hists, sizeof_sym_hist) ; + + if (src->histograms == NULL) + return -1; + + src->samples = hashmap__new(sym_hist_hash, sym_hist_equal, NULL); + if (src->samples == NULL) + zfree(&src->histograms); + return src->histograms ? 0 : -1; } @@ -920,6 +942,7 @@ void symbol__annotate_zero_histograms(struct symbol *sym) if (notes->src != NULL) { memset(notes->src->histograms, 0, notes->src->nr_histograms * notes->src->sizeof_sym_hist); + hashmap__clear(notes->src->samples); } if (notes->branch && notes->branch->cycles_hist) { memset(notes->branch->cycles_hist, 0, @@ -983,8 +1006,10 @@ static int __symbol__inc_addr_samples(struct map_symbol *ms, struct perf_sample *sample) { struct symbol *sym = ms->sym; - unsigned offset; + long hash_key; + u64 offset; struct sym_hist *h; + struct sym_hist_entry *entry; pr_debug3("%s: addr=%#" PRIx64 "\n", __func__, map__unmap_ip(ms->map, addr)); @@ -1002,15 +1027,28 @@ static int __symbol__inc_addr_samples(struct map_symbol *ms, __func__, __LINE__, sym->name, sym->start, addr, sym->end, sym->type == STT_FUNC); return -ENOMEM; } + + hash_key = offset << 16 | evidx; + if (!hashmap__find(src->samples, hash_key, &entry)) { + entry = zalloc(sizeof(*entry)); + if (entry == NULL) + return -ENOMEM; + + if (hashmap__add(src->samples, hash_key, entry) < 0) + return -ENOMEM; + } + h->nr_samples++; h->addr[offset].nr_samples++; h->period += sample->period; h->addr[offset].period += sample->period; + entry->nr_samples++; + entry->period += sample->period; pr_debug3("%#" PRIx64 " %s: period++ [addr: %#" PRIx64 ", %#" PRIx64 ", evidx=%d] => nr_samples: %" PRIu64 ", period: %" PRIu64 "\n", sym->start, sym->name, addr, addr - sym->start, evidx, - h->addr[offset].nr_samples, h->addr[offset].period); + entry->nr_samples, entry->period); return 0; } diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 94435607c958..a2b0c8210740 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -12,6 +12,7 @@ #include "symbol_conf.h" #include "mutex.h" #include "spark.h" +#include "hashmap.h" struct hist_browser_timer; struct hist_entry; @@ -280,6 +281,7 @@ struct annotated_source { size_t sizeof_sym_hist; struct sym_hist *histograms; struct annotation_line **offsets; + struct hashmap *samples; int nr_histograms; int nr_entries; int nr_asm_entries; -- cgit v1.2.3 From 80154575849778e40d9d87aa7ab14491ac401948 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 4 Mar 2024 15:08:13 -0800 Subject: perf annotate: Calculate instruction overhead using hashmap Use annotated_source.samples hashmap instead of addr array in the struct sym_hist. Reviewed-by: Ian Rogers Reviewed-by: Arnaldo Carvalho de Melo Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20240304230815.1440583-3-namhyung@kernel.org --- tools/perf/ui/gtk/annotate.c | 14 +++++++++++--- tools/perf/util/annotate.c | 44 ++++++++++++++++++++++++++++++-------------- tools/perf/util/annotate.h | 11 +++++++++++ 3 files changed, 52 insertions(+), 17 deletions(-) (limited to 'tools/perf/util/annotate.h') diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c index 394861245fd3..93ce3d47e47e 100644 --- a/tools/perf/ui/gtk/annotate.c +++ b/tools/perf/ui/gtk/annotate.c @@ -28,21 +28,29 @@ static const char *const col_names[] = { static int perf_gtk__get_percent(char *buf, size_t size, struct symbol *sym, struct disasm_line *dl, int evidx) { + struct annotation *notes; struct sym_hist *symhist; + struct sym_hist_entry *entry; double percent = 0.0; const char *markup; int ret = 0; + u64 nr_samples = 0; strcpy(buf, ""); if (dl->al.offset == (s64) -1) return 0; - symhist = annotation__histogram(symbol__annotation(sym), evidx); - if (!symbol_conf.event_group && !symhist->addr[dl->al.offset].nr_samples) + notes = symbol__annotation(sym); + symhist = annotation__histogram(notes, evidx); + entry = annotated_source__hist_entry(notes->src, evidx, dl->al.offset); + if (entry) + nr_samples = entry->nr_samples; + + if (!symbol_conf.event_group && nr_samples == 0) return 0; - percent = 100.0 * symhist->addr[dl->al.offset].nr_samples / symhist->nr_samples; + percent = 100.0 * nr_samples / symhist->nr_samples; markup = perf_gtk__get_percent_color(percent); if (markup) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index caaea9421235..1451699d931d 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -2368,17 +2368,25 @@ out_remove_tmp: return err; } -static void calc_percent(struct sym_hist *sym_hist, - struct hists *hists, +static void calc_percent(struct annotation *notes, + struct evsel *evsel, struct annotation_data *data, s64 offset, s64 end) { + struct hists *hists = evsel__hists(evsel); + int evidx = evsel->core.idx; + struct sym_hist *sym_hist = annotation__histogram(notes, evidx); unsigned int hits = 0; u64 period = 0; while (offset < end) { - hits += sym_hist->addr[offset].nr_samples; - period += sym_hist->addr[offset].period; + struct sym_hist_entry *entry; + + entry = annotated_source__hist_entry(notes->src, evidx, offset); + if (entry) { + hits += entry->nr_samples; + period += entry->period; + } ++offset; } @@ -2415,16 +2423,13 @@ static void annotation__calc_percent(struct annotation *notes, end = next ? next->offset : len; for_each_group_evsel(evsel, leader) { - struct hists *hists = evsel__hists(evsel); struct annotation_data *data; - struct sym_hist *sym_hist; BUG_ON(i >= al->data_nr); - sym_hist = annotation__histogram(notes, evsel->core.idx); data = &al->data[i++]; - calc_percent(sym_hist, hists, data, al->offset, end); + calc_percent(notes, evsel, data, al->offset, end); } } } @@ -2619,14 +2624,19 @@ static void print_summary(struct rb_root *root, const char *filename) static void symbol__annotate_hits(struct symbol *sym, struct evsel *evsel) { + int evidx = evsel->core.idx; struct annotation *notes = symbol__annotation(sym); - struct sym_hist *h = annotation__histogram(notes, evsel->core.idx); + struct sym_hist *h = annotation__histogram(notes, evidx); u64 len = symbol__size(sym), offset; - for (offset = 0; offset < len; ++offset) - if (h->addr[offset].nr_samples != 0) + for (offset = 0; offset < len; ++offset) { + struct sym_hist_entry *entry; + + entry = annotated_source__hist_entry(notes->src, evidx, offset); + if (entry && entry->nr_samples != 0) printf("%*" PRIx64 ": %" PRIu64 "\n", BITS_PER_LONG / 2, - sym->start + offset, h->addr[offset].nr_samples); + sym->start + offset, entry->nr_samples); + } printf("%*s: %" PRIu64 "\n", BITS_PER_LONG / 2, "h->nr_samples", h->nr_samples); } @@ -2855,8 +2865,14 @@ void symbol__annotate_decay_histogram(struct symbol *sym, int evidx) h->nr_samples = 0; for (offset = 0; offset < len; ++offset) { - h->addr[offset].nr_samples = h->addr[offset].nr_samples * 7 / 8; - h->nr_samples += h->addr[offset].nr_samples; + struct sym_hist_entry *entry; + + entry = annotated_source__hist_entry(notes->src, evidx, offset); + if (entry == NULL) + continue; + + entry->nr_samples = entry->nr_samples * 7 / 8; + h->nr_samples += entry->nr_samples; } } diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index a2b0c8210740..3362980a5d3d 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -356,6 +356,17 @@ static inline struct sym_hist *annotation__histogram(struct annotation *notes, i return annotated_source__histogram(notes->src, idx); } +static inline struct sym_hist_entry * +annotated_source__hist_entry(struct annotated_source *src, int idx, u64 offset) +{ + struct sym_hist_entry *entry; + long key = offset << 16 | idx; + + if (!hashmap__find(src->samples, key, &entry)) + return NULL; + return entry; +} + static inline struct annotation *symbol__annotation(struct symbol *sym) { return (void *)sym - symbol_conf.priv_size; -- cgit v1.2.3 From f59e3660cd84d94cfdddbced91200981d9c25218 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 4 Mar 2024 15:08:14 -0800 Subject: perf annotate: Remove sym_hist.addr[] array It's not used anymore and the code is coverted to use a hash map. Now sym_hist has a static size, so no need to have sizeof_sym_hist in the struct annotated_source. Reviewed-by: Ian Rogers Reviewed-by: Arnaldo Carvalho de Melo Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20240304230815.1440583-4-namhyung@kernel.org --- tools/perf/util/annotate.c | 36 +++++------------------------------- tools/perf/util/annotate.h | 4 +--- 2 files changed, 6 insertions(+), 34 deletions(-) (limited to 'tools/perf/util/annotate.h') diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 1451699d931d..ac002d907d81 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -896,33 +896,10 @@ static __maybe_unused void annotated_source__delete(struct annotated_source *src } static int annotated_source__alloc_histograms(struct annotated_source *src, - size_t size, int nr_hists) + int nr_hists) { - size_t sizeof_sym_hist; - - /* - * Add buffer of one element for zero length symbol. - * When sample is taken from first instruction of - * zero length symbol, perf still resolves it and - * shows symbol name in perf report and allows to - * annotate it. - */ - if (size == 0) - size = 1; - - /* Check for overflow when calculating sizeof_sym_hist */ - if (size > (SIZE_MAX - sizeof(struct sym_hist)) / sizeof(struct sym_hist_entry)) - return -1; - - sizeof_sym_hist = (sizeof(struct sym_hist) + size * sizeof(struct sym_hist_entry)); - - /* Check for overflow in zalloc argument */ - if (sizeof_sym_hist > SIZE_MAX / nr_hists) - return -1; - - src->sizeof_sym_hist = sizeof_sym_hist; src->nr_histograms = nr_hists; - src->histograms = calloc(nr_hists, sizeof_sym_hist) ; + src->histograms = calloc(nr_hists, sizeof(*src->histograms)); if (src->histograms == NULL) return -1; @@ -941,7 +918,7 @@ void symbol__annotate_zero_histograms(struct symbol *sym) annotation__lock(notes); if (notes->src != NULL) { memset(notes->src->histograms, 0, - notes->src->nr_histograms * notes->src->sizeof_sym_hist); + notes->src->nr_histograms * sizeof(*notes->src->histograms)); hashmap__clear(notes->src->samples); } if (notes->branch && notes->branch->cycles_hist) { @@ -1039,9 +1016,7 @@ static int __symbol__inc_addr_samples(struct map_symbol *ms, } h->nr_samples++; - h->addr[offset].nr_samples++; h->period += sample->period; - h->addr[offset].period += sample->period; entry->nr_samples++; entry->period += sample->period; @@ -1094,8 +1069,7 @@ struct annotated_source *symbol__hists(struct symbol *sym, int nr_hists) if (notes->src->histograms == NULL) { alloc_histograms: - annotated_source__alloc_histograms(notes->src, symbol__size(sym), - nr_hists); + annotated_source__alloc_histograms(notes->src, nr_hists); } return notes->src; @@ -2854,7 +2828,7 @@ void symbol__annotate_zero_histogram(struct symbol *sym, int evidx) struct annotation *notes = symbol__annotation(sym); struct sym_hist *h = annotation__histogram(notes, evidx); - memset(h, 0, notes->src->sizeof_sym_hist); + memset(h, 0, sizeof(*notes->src->histograms) * notes->src->nr_histograms); } void symbol__annotate_decay_histogram(struct symbol *sym, int evidx) diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 3362980a5d3d..4bdc70a9d376 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -242,7 +242,6 @@ void symbol__calc_percent(struct symbol *sym, struct evsel *evsel); struct sym_hist { u64 nr_samples; u64 period; - struct sym_hist_entry addr[]; }; struct cyc_hist { @@ -278,7 +277,6 @@ struct cyc_hist { */ struct annotated_source { struct list_head source; - size_t sizeof_sym_hist; struct sym_hist *histograms; struct annotation_line **offsets; struct hashmap *samples; @@ -348,7 +346,7 @@ void annotation__toggle_full_addr(struct annotation *notes, struct map_symbol *m static inline struct sym_hist *annotated_source__histogram(struct annotated_source *src, int idx) { - return ((void *)src->histograms) + (src->sizeof_sym_hist * idx); + return &src->histograms[idx]; } static inline struct sym_hist *annotation__histogram(struct annotation *notes, int idx) -- cgit v1.2.3 From 0f66dfe7b91d2743cc71dfff37af503215b204ef Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 4 Mar 2024 15:08:15 -0800 Subject: perf annotate: Add comments in the data structures Reviewed-by: Ian Rogers Reviewed-by: Arnaldo Carvalho de Melo Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20240304230815.1440583-5-namhyung@kernel.org --- tools/perf/util/annotate.h | 69 +++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 62 insertions(+), 7 deletions(-) (limited to 'tools/perf/util/annotate.h') diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 4bdc70a9d376..13cc659e508c 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -239,11 +239,42 @@ int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool r size_t disasm__fprintf(struct list_head *head, FILE *fp); void symbol__calc_percent(struct symbol *sym, struct evsel *evsel); +/** + * struct sym_hist - symbol histogram information for an event + * + * @nr_samples: Total number of samples. + * @period: Sum of sample periods. + */ struct sym_hist { u64 nr_samples; u64 period; }; +/** + * struct cyc_hist - (CPU) cycle histogram for a basic block + * + * @start: Start address of current block (if known). + * @cycles: Sum of cycles for the longest basic block. + * @cycles_aggr: Total cycles for this address. + * @cycles_max: Max cycles for this address. + * @cycles_min: Min cycles for this address. + * @cycles_spark: History of cycles for the longest basic block. + * @num: Number of samples for the longest basic block. + * @num_aggr: Total number of samples for this address. + * @have_start: Whether the current branch info has a start address. + * @reset: Number of resets due to a different start address. + * + * If sample has branch_stack and cycles info, it can construct basic blocks + * between two adjacent branches. It'd have start and end addresses but + * sometimes the start address may not be available. So the cycles are + * accounted at the end address. If multiple basic blocks end at the same + * address, it will take the longest one. + * + * The @start, @cycles, @cycles_spark and @num fields are used for the longest + * block only. Other fields are used for all cases. + * + * See __symbol__account_cycles(). + */ struct cyc_hist { u64 start; u64 cycles; @@ -258,18 +289,24 @@ struct cyc_hist { u16 reset; }; -/** struct annotated_source - symbols with hits have this attached as in sannotation +/** + * struct annotated_source - symbols with hits have this attached as in annotation * - * @histograms: Array of addr hit histograms per event being monitored - * nr_histograms: This may not be the same as evsel->evlist->core.nr_entries if + * @source: List head for annotated_line (embeded in disasm_line). + * @histograms: Array of symbol histograms per event to maintain the total number + * of samples and period. + * @nr_histograms: This may not be the same as evsel->evlist->core.nr_entries if * we have more than a group in a evlist, where we will want * to see each group separately, that is why symbol__annotate2() * sets src->nr_histograms to evsel->nr_members. - * @lines: If 'print_lines' is specified, per source code line percentages - * @source: source parsed from a disassembler like objdump -dS - * @cyc_hist: Average cycles per basic block + * @offsets: Array of annotation_line to be accessed by offset. + * @samples: Hash map of sym_hist_entry. Keyed by event index and offset in symbol. + * @nr_entries: Number of annotated_line in the source list. + * @nr_asm_entries: Number of annotated_line with actual asm instruction in the + * source list. + * @max_line_len: Maximum length of objdump output in an annotated_line. * - * lines is allocated, percentages calculated and all sorted by percentage + * disasm_lines are allocated, percentages calculated and all sorted by percentage * when the annotation is about to be presented, so the percentages are for * one of the entries in the histogram array, i.e. for the event/counter being * presented. It is deallocated right after symbol__{tui,tty,etc}_annotate @@ -286,6 +323,24 @@ struct annotated_source { u16 max_line_len; }; +/** + * struct annotated_branch - basic block and IPC information for a symbol. + * + * @hit_cycles: Total executed cycles. + * @hit_insn: Total number of instructions executed. + * @total_insn: Number of instructions in the function. + * @cover_insn: Number of distinct, actually executed instructions. + * @cycles_hist: Array of cyc_hist for each instruction. + * @max_coverage: Maximum number of covered basic block (used for block-range). + * + * This struct is used by two different codes when the sample has branch stack + * and cycles information. annotation__compute_ipc() calculates average IPC + * using @hit_insn / @hit_cycles. The actual coverage can be calculated using + * @cover_insn / @total_insn. The @cycles_hist can give IPC for each (longest) + * basic block ends at the given address. + * process_basic_block() calculates coverage of instructions (or basic blocks) + * in the function. + */ struct annotated_branch { u64 hit_cycles; u64 hit_insn; -- cgit v1.2.3