From a3397d69e4e7a1ea37af413ccbdf52dec208ce11 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 16 Jan 2024 22:26:49 -0800 Subject: perf annotate-data: Parse 'lock' prefix from llvm-objdump For the performance reason, I prefer llvm-objdump over GNU's. But I found that llvm-objdump puts x86 lock prefix in a separate line like below. ffffffff81000695: f0 lock ffffffff81000696: ff 83 54 0b 00 00 incl 2900(%rbx) This should be parsed properly, but I just changed to find the insn with next offset for now. This improves the statistics as it can process more instructions. Annotate data type stats: total 294, ok 144 (49.0%), bad 150 (51.0%) ----------------------------------------------------------- 30 : no_sym 35 : no_mem_ops 71 : no_var 6 : no_typeinfo 8 : bad_offset Reviewed-by: Ian Rogers Cc: Stephane Eranian Cc: Masami Hiramatsu Link: https://lore.kernel.org/r/20240117062657.985479-2-namhyung@kernel.org Signed-off-by: Namhyung Kim --- tools/perf/util/annotate.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'tools/perf/util/annotate.c') diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 9b70ab110ce7..8d761be1a102 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -3660,8 +3660,17 @@ static struct disasm_line *find_disasm_line(struct symbol *sym, u64 ip) notes = symbol__annotation(sym); list_for_each_entry(dl, ¬es->src->source, al.node) { - if (sym->start + dl->al.offset == ip) + if (sym->start + dl->al.offset == ip) { + /* + * llvm-objdump places "lock" in a separate line and + * in that case, we want to get the next line. + */ + if (!strcmp(dl->ins.name, "lock") && *dl->ops.raw == '\0') { + ip++; + continue; + } return dl; + } } return NULL; } @@ -3758,6 +3767,9 @@ struct annotated_data_type *hist_entry__get_data_type(struct hist_entry *he) if (!op_loc->mem_ref) continue; + /* Recalculate IP since it can be changed due to LOCK prefix */ + ip = ms->sym->start + dl->al.offset; + mem_type = find_data_type(ms, ip, op_loc->reg, op_loc->offset); if (mem_type) istat->good++; -- cgit v1.2.3 From 1cf4df0373eed561f543a9e0e0ac2498384d67a0 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 16 Jan 2024 22:26:50 -0800 Subject: perf annotate-data: Handle macro fusion on x86 When a sample was come from a conditional branch without a memory operand, it could be due to a macro fusion with a previous instruction. So it needs to check the memory operand in the previous one. This improves the stat like below: Annotate data type stats: total 294, ok 147 (50.0%), bad 147 (50.0%) ----------------------------------------------------------- 30 : no_sym 32 : no_mem_ops 71 : no_var 6 : no_typeinfo 8 : bad_offset Reviewed-by: Ian Rogers Cc: Stephane Eranian Cc: Masami Hiramatsu Link: https://lore.kernel.org/r/20240117062657.985479-3-namhyung@kernel.org Signed-off-by: Namhyung Kim --- tools/perf/util/annotate.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'tools/perf/util/annotate.c') diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 8d761be1a102..0ec42e85ca5c 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -3751,6 +3751,7 @@ struct annotated_data_type *hist_entry__get_data_type(struct hist_entry *he) return NULL; } +retry: istat = annotate_data_stat(&ann_insn_stat, dl->ins.name); if (istat == NULL) { ann_data_stat.no_insn++; @@ -3767,7 +3768,7 @@ struct annotated_data_type *hist_entry__get_data_type(struct hist_entry *he) if (!op_loc->mem_ref) continue; - /* Recalculate IP since it can be changed due to LOCK prefix */ + /* Recalculate IP because of LOCK prefix or insn fusion */ ip = ms->sym->start + dl->al.offset; mem_type = find_data_type(ms, ip, op_loc->reg, op_loc->offset); @@ -3786,6 +3787,20 @@ struct annotated_data_type *hist_entry__get_data_type(struct hist_entry *he) return mem_type; } + /* + * Some instructions can be fused and the actual memory access came + * from the previous instruction. + */ + if (dl->al.offset > 0) { + struct disasm_line *prev_dl; + + prev_dl = list_prev_entry(dl, al.node); + if (ins__is_fused(arch, prev_dl->ins.name, dl->ins.name)) { + dl = prev_dl; + goto retry; + } + } + ann_data_stat.no_mem_ops++; istat->bad++; return NULL; -- cgit v1.2.3 From d3030191d3a6292408c5cf999ebcc1d10e00e9c2 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 16 Jan 2024 22:26:51 -0800 Subject: perf annotate-data: Handle array style accesses On x86, instructions for array access often looks like below. mov 0x1234(%rax,%rbx,8), %rcx Usually the first register holds the type information and the second one has the index. And the current code only looks up a variable for the first register. But it's possible to be in the other way around so it needs to check the second register if the first one failed. The stat changed like this. Annotate data type stats: total 294, ok 148 (50.3%), bad 146 (49.7%) ----------------------------------------------------------- 30 : no_sym 32 : no_mem_ops 66 : no_var 10 : no_typeinfo 8 : bad_offset Reviewed-by: Ian Rogers Cc: Stephane Eranian Cc: Masami Hiramatsu Link: https://lore.kernel.org/r/20240117062657.985479-4-namhyung@kernel.org Signed-off-by: Namhyung Kim --- tools/perf/util/annotate-data.c | 24 +++++++++++++++++------ tools/perf/util/annotate-data.h | 5 +++-- tools/perf/util/annotate.c | 43 ++++++++++++++++++++++++++++++++--------- tools/perf/util/annotate.h | 8 ++++++-- 4 files changed, 61 insertions(+), 19 deletions(-) (limited to 'tools/perf/util/annotate.c') diff --git a/tools/perf/util/annotate-data.c b/tools/perf/util/annotate-data.c index f22b4f18271c..d1ceac7e2441 100644 --- a/tools/perf/util/annotate-data.c +++ b/tools/perf/util/annotate-data.c @@ -9,6 +9,7 @@ #include #include +#include "annotate.h" #include "annotate-data.h" #include "debuginfo.h" #include "debug.h" @@ -207,7 +208,8 @@ static int check_variable(Dwarf_Die *var_die, Dwarf_Die *type_die, int offset) * It expects a pointer type for a memory access. * Convert to a real type it points to. */ - if (dwarf_tag(type_die) != DW_TAG_pointer_type || + if ((dwarf_tag(type_die) != DW_TAG_pointer_type && + dwarf_tag(type_die) != DW_TAG_array_type) || die_get_real_type(type_die, type_die) == NULL) { pr_debug("no pointer or no type\n"); ann_data_stat.no_typeinfo++; @@ -233,10 +235,11 @@ static int check_variable(Dwarf_Die *var_die, Dwarf_Die *type_die, int offset) /* The result will be saved in @type_die */ static int find_data_type_die(struct debuginfo *di, u64 pc, - int reg, int offset, Dwarf_Die *type_die) + struct annotated_op_loc *loc, Dwarf_Die *type_die) { Dwarf_Die cu_die, var_die; Dwarf_Die *scopes = NULL; + int reg, offset; int ret = -1; int i, nr_scopes; @@ -250,6 +253,10 @@ static int find_data_type_die(struct debuginfo *di, u64 pc, /* Get a list of nested scopes - i.e. (inlined) functions and blocks. */ nr_scopes = die_get_scopes(&cu_die, pc, &scopes); + reg = loc->reg1; + offset = loc->offset; + +retry: /* Search from the inner-most scope to the outer */ for (i = nr_scopes - 1; i >= 0; i--) { /* Look up variables/parameters in this scope */ @@ -260,6 +267,12 @@ static int find_data_type_die(struct debuginfo *di, u64 pc, ret = check_variable(&var_die, type_die, offset); goto out; } + + if (loc->multi_regs && reg == loc->reg1 && loc->reg1 != loc->reg2) { + reg = loc->reg2; + goto retry; + } + if (ret < 0) ann_data_stat.no_var++; @@ -272,15 +285,14 @@ out: * find_data_type - Return a data type at the location * @ms: map and symbol at the location * @ip: instruction address of the memory access - * @reg: register that holds the base address - * @offset: offset from the base address + * @loc: instruction operand location * * This functions searches the debug information of the binary to get the data * type it accesses. The exact location is expressed by (ip, reg, offset). * It return %NULL if not found. */ struct annotated_data_type *find_data_type(struct map_symbol *ms, u64 ip, - int reg, int offset) + struct annotated_op_loc *loc) { struct annotated_data_type *result = NULL; struct dso *dso = map__dso(ms->map); @@ -300,7 +312,7 @@ struct annotated_data_type *find_data_type(struct map_symbol *ms, u64 ip, * a file address for DWARF processing. */ pc = map__rip_2objdump(ms->map, ip); - if (find_data_type_die(di, pc, reg, offset, &type_die) < 0) + if (find_data_type_die(di, pc, loc, &type_die) < 0) goto out; result = dso__findnew_data_type(dso, &type_die); diff --git a/tools/perf/util/annotate-data.h b/tools/perf/util/annotate-data.h index 8e73096c01d1..65ddd839850f 100644 --- a/tools/perf/util/annotate-data.h +++ b/tools/perf/util/annotate-data.h @@ -7,6 +7,7 @@ #include #include +struct annotated_op_loc; struct evsel; struct map_symbol; @@ -105,7 +106,7 @@ extern struct annotated_data_stat ann_data_stat; /* Returns data type at the location (ip, reg, offset) */ struct annotated_data_type *find_data_type(struct map_symbol *ms, u64 ip, - int reg, int offset); + struct annotated_op_loc *loc); /* Update type access histogram at the given offset */ int annotated_data_type__update_samples(struct annotated_data_type *adt, @@ -119,7 +120,7 @@ void annotated_data_type__tree_delete(struct rb_root *root); static inline struct annotated_data_type * find_data_type(struct map_symbol *ms __maybe_unused, u64 ip __maybe_unused, - int reg __maybe_unused, int offset __maybe_unused) + struct annotated_op_loc *loc __maybe_unused) { return NULL; } diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 0ec42e85ca5c..3cdcdd449c86 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -3563,8 +3563,22 @@ static int extract_reg_offset(struct arch *arch, const char *str, if (regname == NULL) return -1; - op_loc->reg = get_dwarf_regnum(regname, 0); + op_loc->reg1 = get_dwarf_regnum(regname, 0); free(regname); + + /* Get the second register */ + if (op_loc->multi_regs) { + p = strchr(p + 1, arch->objdump.register_char); + if (p == NULL) + return -1; + + regname = strdup(p); + if (regname == NULL) + return -1; + + op_loc->reg2 = get_dwarf_regnum(regname, 0); + free(regname); + } return 0; } @@ -3577,14 +3591,20 @@ static int extract_reg_offset(struct arch *arch, const char *str, * Get detailed location info (register and offset) in the instruction. * It needs both source and target operand and whether it accesses a * memory location. The offset field is meaningful only when the - * corresponding mem flag is set. + * corresponding mem flag is set. The reg2 field is meaningful only + * when multi_regs flag is set. * * Some examples on x86: * - * mov (%rax), %rcx # src_reg = rax, src_mem = 1, src_offset = 0 - * # dst_reg = rcx, dst_mem = 0 + * mov (%rax), %rcx # src_reg1 = rax, src_mem = 1, src_offset = 0 + * # dst_reg1 = rcx, dst_mem = 0 * - * mov 0x18, %r8 # src_reg = -1, dst_reg = r8 + * mov 0x18, %r8 # src_reg1 = -1, src_mem = 0 + * # dst_reg1 = r8, dst_mem = 0 + * + * mov %rsi, 8(%rbx,%rcx,4) # src_reg1 = rsi, src_mem = 0, dst_multi_regs = 0 + * # dst_reg1 = rbx, dst_reg2 = rcx, dst_mem = 1 + * # dst_multi_regs = 1, dst_offset = 8 */ int annotate_get_insn_location(struct arch *arch, struct disasm_line *dl, struct annotated_insn_loc *loc) @@ -3605,24 +3625,29 @@ int annotate_get_insn_location(struct arch *arch, struct disasm_line *dl, for_each_insn_op_loc(loc, i, op_loc) { const char *insn_str = ops->source.raw; + bool multi_regs = ops->source.multi_regs; - if (i == INSN_OP_TARGET) + if (i == INSN_OP_TARGET) { insn_str = ops->target.raw; + multi_regs = ops->target.multi_regs; + } /* Invalidate the register by default */ - op_loc->reg = -1; + op_loc->reg1 = -1; + op_loc->reg2 = -1; if (insn_str == NULL) continue; if (strchr(insn_str, arch->objdump.memory_ref_char)) { op_loc->mem_ref = true; + op_loc->multi_regs = multi_regs; extract_reg_offset(arch, insn_str, op_loc); } else { char *s = strdup(insn_str); if (s) { - op_loc->reg = get_dwarf_regnum(s, 0); + op_loc->reg1 = get_dwarf_regnum(s, 0); free(s); } } @@ -3771,7 +3796,7 @@ retry: /* Recalculate IP because of LOCK prefix or insn fusion */ ip = ms->sym->start + dl->al.offset; - mem_type = find_data_type(ms, ip, op_loc->reg, op_loc->offset); + mem_type = find_data_type(ms, ip, op_loc); if (mem_type) istat->good++; else diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index dba50762c6e8..d0ff677b460c 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -442,14 +442,18 @@ int annotate_check_args(void); /** * struct annotated_op_loc - Location info of instruction operand - * @reg: Register in the operand + * @reg1: First register in the operand + * @reg2: Second register in the operand * @offset: Memory access offset in the operand * @mem_ref: Whether the operand accesses memory + * @multi_regs: Whether the second register is used */ struct annotated_op_loc { - int reg; + int reg1; + int reg2; int offset; bool mem_ref; + bool multi_regs; }; enum annotated_insn_ops { -- cgit v1.2.3 From 7a54f1d83defa6dd6c25c0851191f6d3a0a42362 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 16 Jan 2024 22:26:52 -0800 Subject: perf annotate-data: Add stack operation pseudo type A typical function prologue and epilogue include multiple stack operations to save and restore the current value of registers. On x86, it looks like below: push r15 push r14 push r13 push r12 ... pop r12 pop r13 pop r14 pop r15 ret As these all touches the stack memory region, chances are high that they appear in a memory profile data. But these are not used for any real purpose yet so it'd return no types. One of my profile type shows that non neglible portion of data came from the stack operations. It also seems GCC generates more stack operations than clang. Annotate Instruction stats total 264, ok 169 (64.0%), bad 95 (36.0%) Name : Good Bad ----------------------------------------------------------- movq : 49 27 movl : 24 9 popq : 0 19 <-- here cmpl : 17 2 addq : 14 1 cmpq : 12 2 cmpxchgl : 3 7 Instead of dealing them as unknown, let's create a seperate pseudo type to represent those stack operations separately. Reviewed-by: Ian Rogers Cc: Stephane Eranian Cc: Masami Hiramatsu Link: https://lore.kernel.org/r/20240117062657.985479-5-namhyung@kernel.org Signed-off-by: Namhyung Kim --- tools/perf/util/annotate-data.h | 1 + tools/perf/util/annotate.c | 26 ++++++++++++++++++++++++++ 2 files changed, 27 insertions(+) (limited to 'tools/perf/util/annotate.c') diff --git a/tools/perf/util/annotate-data.h b/tools/perf/util/annotate-data.h index 65ddd839850f..214c625e7bc9 100644 --- a/tools/perf/util/annotate-data.h +++ b/tools/perf/util/annotate-data.h @@ -70,6 +70,7 @@ struct annotated_data_type { }; extern struct annotated_data_type unknown_type; +extern struct annotated_data_type stackop_type; /** * struct annotated_data_stat - Debug statistics diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 3cdcdd449c86..655bd9443f5e 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -107,6 +107,14 @@ static struct ins_ops ret_ops; struct annotated_data_stat ann_data_stat; LIST_HEAD(ann_insn_stat); +/* Pseudo data types */ +struct annotated_data_type stackop_type = { + .self = { + .type_name = (char *)"(stack operation)", + .children = LIST_HEAD_INIT(stackop_type.self.children), + }, +}; + static int arch__grow_instructions(struct arch *arch) { struct ins *new_instructions; @@ -3724,6 +3732,18 @@ static struct annotated_item_stat *annotate_data_stat(struct list_head *head, return istat; } +static bool is_stack_operation(struct arch *arch, struct disasm_line *dl) +{ + if (arch__is(arch, "x86")) { + if (!strncmp(dl->ins.name, "push", 4) || + !strncmp(dl->ins.name, "pop", 3) || + !strncmp(dl->ins.name, "ret", 3)) + return true; + } + + return false; +} + /** * hist_entry__get_data_type - find data type for given hist entry * @he: hist entry @@ -3789,6 +3809,12 @@ retry: return NULL; } + if (is_stack_operation(arch, dl)) { + istat->good++; + he->mem_type_off = 0; + return &stackop_type; + } + for_each_insn_op_loc(&loc, i, op_loc) { if (!op_loc->mem_ref) continue; -- cgit v1.2.3 From 5f7cdde843dd21c7228d9ae47d985086ce165985 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 16 Jan 2024 22:26:54 -0800 Subject: perf annotate-data: Support global variables Global variables are accessed using PC-relative address so it needs to be handled separately. The PC-rel addressing is detected by using DWARF_REG_PC. On x86, %rip register would be used. The address can be calculated using the ip and offset in the instruction. But it should start from the next instruction so add calculate_pcrel_addr() to do it properly. But global variables defined in a different file would only have a declaration which doesn't include a location list. So it first tries to get the type info using the address, and then looks up the variable declarations using name. The name of global variables should be get from the symbol table. The declaration would have the type info. So extend find_var_type() to take both address and name for global variables. The stat is now looks like: Annotate data type stats: total 294, ok 153 (52.0%), bad 141 (48.0%) ----------------------------------------------------------- 30 : no_sym 32 : no_mem_ops 61 : no_var 10 : no_typeinfo 8 : bad_offset Reviewed-by: Ian Rogers Cc: Stephane Eranian Cc: Masami Hiramatsu Link: https://lore.kernel.org/r/20240117062657.985479-7-namhyung@kernel.org Signed-off-by: Namhyung Kim --- tools/perf/util/annotate-data.c | 38 ++++++++++++++++++++------- tools/perf/util/annotate-data.h | 6 +++-- tools/perf/util/annotate.c | 57 +++++++++++++++++++++++++++++++++++++++-- tools/perf/util/annotate.h | 4 +++ 4 files changed, 92 insertions(+), 13 deletions(-) (limited to 'tools/perf/util/annotate.c') diff --git a/tools/perf/util/annotate-data.c b/tools/perf/util/annotate-data.c index 58c0fac42e9d..e375dd288f67 100644 --- a/tools/perf/util/annotate-data.c +++ b/tools/perf/util/annotate-data.c @@ -240,7 +240,8 @@ static int check_variable(Dwarf_Die *var_die, Dwarf_Die *type_die, int offset, /* The result will be saved in @type_die */ static int find_data_type_die(struct debuginfo *di, u64 pc, u64 addr, - struct annotated_op_loc *loc, Dwarf_Die *type_die) + const char *var_name, struct annotated_op_loc *loc, + Dwarf_Die *type_die) { Dwarf_Die cu_die, var_die; Dwarf_Die *scopes = NULL; @@ -258,11 +259,21 @@ static int find_data_type_die(struct debuginfo *di, u64 pc, u64 addr, reg = loc->reg1; offset = loc->offset; - if (reg == DWARF_REG_PC && - die_find_variable_by_addr(&cu_die, pc, addr, &var_die, &offset)) { - ret = check_variable(&var_die, type_die, offset, - /*is_pointer=*/false); - goto out; + if (reg == DWARF_REG_PC) { + if (die_find_variable_by_addr(&cu_die, pc, addr, &var_die, &offset)) { + ret = check_variable(&var_die, type_die, offset, + /*is_pointer=*/false); + loc->offset = offset; + goto out; + } + + if (var_name && die_find_variable_at(&cu_die, var_name, pc, + &var_die)) { + ret = check_variable(&var_die, type_die, 0, + /*is_pointer=*/false); + /* loc->offset will be updated by the caller */ + goto out; + } } /* Get a list of nested scopes - i.e. (inlined) functions and blocks. */ @@ -285,6 +296,7 @@ retry: /* Found a variable, see if it's correct */ ret = check_variable(&var_die, type_die, offset, reg != DWARF_REG_PC); + loc->offset = offset; goto out; } @@ -306,13 +318,21 @@ out: * @ms: map and symbol at the location * @ip: instruction address of the memory access * @loc: instruction operand location + * @addr: data address of the memory access + * @var_name: global variable name * * This functions searches the debug information of the binary to get the data - * type it accesses. The exact location is expressed by (ip, reg, offset). + * type it accesses. The exact location is expressed by (@ip, reg, offset) + * for pointer variables or (@ip, @addr) for global variables. Note that global + * variables might update the @loc->offset after finding the start of the variable. + * If it cannot find a global variable by address, it tried to fine a declaration + * of the variable using @var_name. In that case, @loc->offset won't be updated. + * * It return %NULL if not found. */ struct annotated_data_type *find_data_type(struct map_symbol *ms, u64 ip, - struct annotated_op_loc *loc) + struct annotated_op_loc *loc, u64 addr, + const char *var_name) { struct annotated_data_type *result = NULL; struct dso *dso = map__dso(ms->map); @@ -332,7 +352,7 @@ struct annotated_data_type *find_data_type(struct map_symbol *ms, u64 ip, * a file address for DWARF processing. */ pc = map__rip_2objdump(ms->map, ip); - if (find_data_type_die(di, pc, 0, loc, &type_die) < 0) + if (find_data_type_die(di, pc, addr, var_name, loc, &type_die) < 0) goto out; result = dso__findnew_data_type(dso, &type_die); diff --git a/tools/perf/util/annotate-data.h b/tools/perf/util/annotate-data.h index 214c625e7bc9..1b0db8e8c40e 100644 --- a/tools/perf/util/annotate-data.h +++ b/tools/perf/util/annotate-data.h @@ -107,7 +107,8 @@ extern struct annotated_data_stat ann_data_stat; /* Returns data type at the location (ip, reg, offset) */ struct annotated_data_type *find_data_type(struct map_symbol *ms, u64 ip, - struct annotated_op_loc *loc); + struct annotated_op_loc *loc, u64 addr, + const char *var_name); /* Update type access histogram at the given offset */ int annotated_data_type__update_samples(struct annotated_data_type *adt, @@ -121,7 +122,8 @@ void annotated_data_type__tree_delete(struct rb_root *root); static inline struct annotated_data_type * find_data_type(struct map_symbol *ms __maybe_unused, u64 ip __maybe_unused, - struct annotated_op_loc *loc __maybe_unused) + struct annotated_op_loc *loc __maybe_unused, + u64 addr __maybe_unused, const char *var_name __maybe_unused) { return NULL; } diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 655bd9443f5e..107b264fa41e 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -37,6 +37,7 @@ #include "util/sharded_mutex.h" #include "arch/common.h" #include "namespaces.h" +#include "thread.h" #include #include #include @@ -3744,6 +3745,30 @@ static bool is_stack_operation(struct arch *arch, struct disasm_line *dl) return false; } +u64 annotate_calc_pcrel(struct map_symbol *ms, u64 ip, int offset, + struct disasm_line *dl) +{ + struct annotation *notes; + struct disasm_line *next; + u64 addr; + + notes = symbol__annotation(ms->sym); + /* + * PC-relative addressing starts from the next instruction address + * But the IP is for the current instruction. Since disasm_line + * doesn't have the instruction size, calculate it using the next + * disasm_line. If it's the last one, we can use symbol's end + * address directly. + */ + if (&dl->al.node == notes->src->source.prev) + addr = ms->sym->end + offset; + else { + next = list_next_entry(dl, al.node); + addr = ip + (next->al.offset - dl->al.offset) + offset; + } + return map__rip_2objdump(ms->map, addr); +} + /** * hist_entry__get_data_type - find data type for given hist entry * @he: hist entry @@ -3763,7 +3788,9 @@ struct annotated_data_type *hist_entry__get_data_type(struct hist_entry *he) struct annotated_op_loc *op_loc; struct annotated_data_type *mem_type; struct annotated_item_stat *istat; - u64 ip = he->ip; + u64 ip = he->ip, addr = 0; + const char *var_name = NULL; + int var_offset; int i; ann_data_stat.total++; @@ -3822,12 +3849,38 @@ retry: /* Recalculate IP because of LOCK prefix or insn fusion */ ip = ms->sym->start + dl->al.offset; - mem_type = find_data_type(ms, ip, op_loc); + var_offset = op_loc->offset; + + /* PC-relative addressing */ + if (op_loc->reg1 == DWARF_REG_PC) { + struct addr_location al; + struct symbol *var; + u64 map_addr; + + addr = annotate_calc_pcrel(ms, ip, op_loc->offset, dl); + /* Kernel symbols might be relocated */ + map_addr = addr + map__reloc(ms->map); + + addr_location__init(&al); + var = thread__find_symbol_fb(he->thread, he->cpumode, + map_addr, &al); + if (var) { + var_name = var->name; + /* Calculate type offset from the start of variable */ + var_offset = map_addr - map__unmap_ip(al.map, var->start); + } + addr_location__exit(&al); + } + + mem_type = find_data_type(ms, ip, op_loc, addr, var_name); if (mem_type) istat->good++; else istat->bad++; + if (mem_type && var_name) + op_loc->offset = var_offset; + if (symbol_conf.annotate_data_sample) { annotated_data_type__update_samples(mem_type, evsel, op_loc->offset, diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index d0ff677b460c..94435607c958 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -491,4 +491,8 @@ struct annotated_item_stat { }; extern struct list_head ann_insn_stat; +/* Calculate PC-relative address */ +u64 annotate_calc_pcrel(struct map_symbol *ms, u64 ip, int offset, + struct disasm_line *dl); + #endif /* __PERF_ANNOTATE_H */ -- cgit v1.2.3 From d3e7cad6f36d9e80307b05bf31959597f9b6cd62 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 4 Mar 2024 15:08:12 -0800 Subject: perf annotate: Add a hashmap for symbol histogram Now symbol histogram uses an array to save per-offset sample counts. But it wastes a lot of memory if the symbol has a few samples only. Add a hashmap to save values only for actual samples. For now, it has duplicate histogram (one in the existing array and another in the new hash map). Once it can convert to use the hash in all places, we can get rid of the array later. Reviewed-by: Ian Rogers Reviewed-by: Arnaldo Carvalho de Melo Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20240304230815.1440583-2-namhyung@kernel.org --- tools/perf/util/annotate.c | 42 ++++++++++++++++++++++++++++++++++++++++-- tools/perf/util/annotate.h | 2 ++ 2 files changed, 42 insertions(+), 2 deletions(-) (limited to 'tools/perf/util/annotate.c') diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 107b264fa41e..caaea9421235 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -38,6 +38,7 @@ #include "arch/common.h" #include "namespaces.h" #include "thread.h" +#include "hashmap.h" #include #include #include @@ -863,6 +864,17 @@ bool arch__is(struct arch *arch, const char *name) return !strcmp(arch->name, name); } +/* symbol histogram: key = offset << 16 | evsel->core.idx */ +static size_t sym_hist_hash(long key, void *ctx __maybe_unused) +{ + return (key >> 16) + (key & 0xffff); +} + +static bool sym_hist_equal(long key1, long key2, void *ctx __maybe_unused) +{ + return key1 == key2; +} + static struct annotated_source *annotated_source__new(void) { struct annotated_source *src = zalloc(sizeof(*src)); @@ -877,6 +889,8 @@ static __maybe_unused void annotated_source__delete(struct annotated_source *src { if (src == NULL) return; + + hashmap__free(src->samples); zfree(&src->histograms); free(src); } @@ -909,6 +923,14 @@ static int annotated_source__alloc_histograms(struct annotated_source *src, src->sizeof_sym_hist = sizeof_sym_hist; src->nr_histograms = nr_hists; src->histograms = calloc(nr_hists, sizeof_sym_hist) ; + + if (src->histograms == NULL) + return -1; + + src->samples = hashmap__new(sym_hist_hash, sym_hist_equal, NULL); + if (src->samples == NULL) + zfree(&src->histograms); + return src->histograms ? 0 : -1; } @@ -920,6 +942,7 @@ void symbol__annotate_zero_histograms(struct symbol *sym) if (notes->src != NULL) { memset(notes->src->histograms, 0, notes->src->nr_histograms * notes->src->sizeof_sym_hist); + hashmap__clear(notes->src->samples); } if (notes->branch && notes->branch->cycles_hist) { memset(notes->branch->cycles_hist, 0, @@ -983,8 +1006,10 @@ static int __symbol__inc_addr_samples(struct map_symbol *ms, struct perf_sample *sample) { struct symbol *sym = ms->sym; - unsigned offset; + long hash_key; + u64 offset; struct sym_hist *h; + struct sym_hist_entry *entry; pr_debug3("%s: addr=%#" PRIx64 "\n", __func__, map__unmap_ip(ms->map, addr)); @@ -1002,15 +1027,28 @@ static int __symbol__inc_addr_samples(struct map_symbol *ms, __func__, __LINE__, sym->name, sym->start, addr, sym->end, sym->type == STT_FUNC); return -ENOMEM; } + + hash_key = offset << 16 | evidx; + if (!hashmap__find(src->samples, hash_key, &entry)) { + entry = zalloc(sizeof(*entry)); + if (entry == NULL) + return -ENOMEM; + + if (hashmap__add(src->samples, hash_key, entry) < 0) + return -ENOMEM; + } + h->nr_samples++; h->addr[offset].nr_samples++; h->period += sample->period; h->addr[offset].period += sample->period; + entry->nr_samples++; + entry->period += sample->period; pr_debug3("%#" PRIx64 " %s: period++ [addr: %#" PRIx64 ", %#" PRIx64 ", evidx=%d] => nr_samples: %" PRIu64 ", period: %" PRIu64 "\n", sym->start, sym->name, addr, addr - sym->start, evidx, - h->addr[offset].nr_samples, h->addr[offset].period); + entry->nr_samples, entry->period); return 0; } diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 94435607c958..a2b0c8210740 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -12,6 +12,7 @@ #include "symbol_conf.h" #include "mutex.h" #include "spark.h" +#include "hashmap.h" struct hist_browser_timer; struct hist_entry; @@ -280,6 +281,7 @@ struct annotated_source { size_t sizeof_sym_hist; struct sym_hist *histograms; struct annotation_line **offsets; + struct hashmap *samples; int nr_histograms; int nr_entries; int nr_asm_entries; -- cgit v1.2.3 From 80154575849778e40d9d87aa7ab14491ac401948 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 4 Mar 2024 15:08:13 -0800 Subject: perf annotate: Calculate instruction overhead using hashmap Use annotated_source.samples hashmap instead of addr array in the struct sym_hist. Reviewed-by: Ian Rogers Reviewed-by: Arnaldo Carvalho de Melo Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20240304230815.1440583-3-namhyung@kernel.org --- tools/perf/ui/gtk/annotate.c | 14 +++++++++++--- tools/perf/util/annotate.c | 44 ++++++++++++++++++++++++++++++-------------- tools/perf/util/annotate.h | 11 +++++++++++ 3 files changed, 52 insertions(+), 17 deletions(-) (limited to 'tools/perf/util/annotate.c') diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c index 394861245fd3..93ce3d47e47e 100644 --- a/tools/perf/ui/gtk/annotate.c +++ b/tools/perf/ui/gtk/annotate.c @@ -28,21 +28,29 @@ static const char *const col_names[] = { static int perf_gtk__get_percent(char *buf, size_t size, struct symbol *sym, struct disasm_line *dl, int evidx) { + struct annotation *notes; struct sym_hist *symhist; + struct sym_hist_entry *entry; double percent = 0.0; const char *markup; int ret = 0; + u64 nr_samples = 0; strcpy(buf, ""); if (dl->al.offset == (s64) -1) return 0; - symhist = annotation__histogram(symbol__annotation(sym), evidx); - if (!symbol_conf.event_group && !symhist->addr[dl->al.offset].nr_samples) + notes = symbol__annotation(sym); + symhist = annotation__histogram(notes, evidx); + entry = annotated_source__hist_entry(notes->src, evidx, dl->al.offset); + if (entry) + nr_samples = entry->nr_samples; + + if (!symbol_conf.event_group && nr_samples == 0) return 0; - percent = 100.0 * symhist->addr[dl->al.offset].nr_samples / symhist->nr_samples; + percent = 100.0 * nr_samples / symhist->nr_samples; markup = perf_gtk__get_percent_color(percent); if (markup) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index caaea9421235..1451699d931d 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -2368,17 +2368,25 @@ out_remove_tmp: return err; } -static void calc_percent(struct sym_hist *sym_hist, - struct hists *hists, +static void calc_percent(struct annotation *notes, + struct evsel *evsel, struct annotation_data *data, s64 offset, s64 end) { + struct hists *hists = evsel__hists(evsel); + int evidx = evsel->core.idx; + struct sym_hist *sym_hist = annotation__histogram(notes, evidx); unsigned int hits = 0; u64 period = 0; while (offset < end) { - hits += sym_hist->addr[offset].nr_samples; - period += sym_hist->addr[offset].period; + struct sym_hist_entry *entry; + + entry = annotated_source__hist_entry(notes->src, evidx, offset); + if (entry) { + hits += entry->nr_samples; + period += entry->period; + } ++offset; } @@ -2415,16 +2423,13 @@ static void annotation__calc_percent(struct annotation *notes, end = next ? next->offset : len; for_each_group_evsel(evsel, leader) { - struct hists *hists = evsel__hists(evsel); struct annotation_data *data; - struct sym_hist *sym_hist; BUG_ON(i >= al->data_nr); - sym_hist = annotation__histogram(notes, evsel->core.idx); data = &al->data[i++]; - calc_percent(sym_hist, hists, data, al->offset, end); + calc_percent(notes, evsel, data, al->offset, end); } } } @@ -2619,14 +2624,19 @@ static void print_summary(struct rb_root *root, const char *filename) static void symbol__annotate_hits(struct symbol *sym, struct evsel *evsel) { + int evidx = evsel->core.idx; struct annotation *notes = symbol__annotation(sym); - struct sym_hist *h = annotation__histogram(notes, evsel->core.idx); + struct sym_hist *h = annotation__histogram(notes, evidx); u64 len = symbol__size(sym), offset; - for (offset = 0; offset < len; ++offset) - if (h->addr[offset].nr_samples != 0) + for (offset = 0; offset < len; ++offset) { + struct sym_hist_entry *entry; + + entry = annotated_source__hist_entry(notes->src, evidx, offset); + if (entry && entry->nr_samples != 0) printf("%*" PRIx64 ": %" PRIu64 "\n", BITS_PER_LONG / 2, - sym->start + offset, h->addr[offset].nr_samples); + sym->start + offset, entry->nr_samples); + } printf("%*s: %" PRIu64 "\n", BITS_PER_LONG / 2, "h->nr_samples", h->nr_samples); } @@ -2855,8 +2865,14 @@ void symbol__annotate_decay_histogram(struct symbol *sym, int evidx) h->nr_samples = 0; for (offset = 0; offset < len; ++offset) { - h->addr[offset].nr_samples = h->addr[offset].nr_samples * 7 / 8; - h->nr_samples += h->addr[offset].nr_samples; + struct sym_hist_entry *entry; + + entry = annotated_source__hist_entry(notes->src, evidx, offset); + if (entry == NULL) + continue; + + entry->nr_samples = entry->nr_samples * 7 / 8; + h->nr_samples += entry->nr_samples; } } diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index a2b0c8210740..3362980a5d3d 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -356,6 +356,17 @@ static inline struct sym_hist *annotation__histogram(struct annotation *notes, i return annotated_source__histogram(notes->src, idx); } +static inline struct sym_hist_entry * +annotated_source__hist_entry(struct annotated_source *src, int idx, u64 offset) +{ + struct sym_hist_entry *entry; + long key = offset << 16 | idx; + + if (!hashmap__find(src->samples, key, &entry)) + return NULL; + return entry; +} + static inline struct annotation *symbol__annotation(struct symbol *sym) { return (void *)sym - symbol_conf.priv_size; -- cgit v1.2.3 From f59e3660cd84d94cfdddbced91200981d9c25218 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 4 Mar 2024 15:08:14 -0800 Subject: perf annotate: Remove sym_hist.addr[] array It's not used anymore and the code is coverted to use a hash map. Now sym_hist has a static size, so no need to have sizeof_sym_hist in the struct annotated_source. Reviewed-by: Ian Rogers Reviewed-by: Arnaldo Carvalho de Melo Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20240304230815.1440583-4-namhyung@kernel.org --- tools/perf/util/annotate.c | 36 +++++------------------------------- tools/perf/util/annotate.h | 4 +--- 2 files changed, 6 insertions(+), 34 deletions(-) (limited to 'tools/perf/util/annotate.c') diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 1451699d931d..ac002d907d81 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -896,33 +896,10 @@ static __maybe_unused void annotated_source__delete(struct annotated_source *src } static int annotated_source__alloc_histograms(struct annotated_source *src, - size_t size, int nr_hists) + int nr_hists) { - size_t sizeof_sym_hist; - - /* - * Add buffer of one element for zero length symbol. - * When sample is taken from first instruction of - * zero length symbol, perf still resolves it and - * shows symbol name in perf report and allows to - * annotate it. - */ - if (size == 0) - size = 1; - - /* Check for overflow when calculating sizeof_sym_hist */ - if (size > (SIZE_MAX - sizeof(struct sym_hist)) / sizeof(struct sym_hist_entry)) - return -1; - - sizeof_sym_hist = (sizeof(struct sym_hist) + size * sizeof(struct sym_hist_entry)); - - /* Check for overflow in zalloc argument */ - if (sizeof_sym_hist > SIZE_MAX / nr_hists) - return -1; - - src->sizeof_sym_hist = sizeof_sym_hist; src->nr_histograms = nr_hists; - src->histograms = calloc(nr_hists, sizeof_sym_hist) ; + src->histograms = calloc(nr_hists, sizeof(*src->histograms)); if (src->histograms == NULL) return -1; @@ -941,7 +918,7 @@ void symbol__annotate_zero_histograms(struct symbol *sym) annotation__lock(notes); if (notes->src != NULL) { memset(notes->src->histograms, 0, - notes->src->nr_histograms * notes->src->sizeof_sym_hist); + notes->src->nr_histograms * sizeof(*notes->src->histograms)); hashmap__clear(notes->src->samples); } if (notes->branch && notes->branch->cycles_hist) { @@ -1039,9 +1016,7 @@ static int __symbol__inc_addr_samples(struct map_symbol *ms, } h->nr_samples++; - h->addr[offset].nr_samples++; h->period += sample->period; - h->addr[offset].period += sample->period; entry->nr_samples++; entry->period += sample->period; @@ -1094,8 +1069,7 @@ struct annotated_source *symbol__hists(struct symbol *sym, int nr_hists) if (notes->src->histograms == NULL) { alloc_histograms: - annotated_source__alloc_histograms(notes->src, symbol__size(sym), - nr_hists); + annotated_source__alloc_histograms(notes->src, nr_hists); } return notes->src; @@ -2854,7 +2828,7 @@ void symbol__annotate_zero_histogram(struct symbol *sym, int evidx) struct annotation *notes = symbol__annotation(sym); struct sym_hist *h = annotation__histogram(notes, evidx); - memset(h, 0, notes->src->sizeof_sym_hist); + memset(h, 0, sizeof(*notes->src->histograms) * notes->src->nr_histograms); } void symbol__annotate_decay_histogram(struct symbol *sym, int evidx) diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 3362980a5d3d..4bdc70a9d376 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -242,7 +242,6 @@ void symbol__calc_percent(struct symbol *sym, struct evsel *evsel); struct sym_hist { u64 nr_samples; u64 period; - struct sym_hist_entry addr[]; }; struct cyc_hist { @@ -278,7 +277,6 @@ struct cyc_hist { */ struct annotated_source { struct list_head source; - size_t sizeof_sym_hist; struct sym_hist *histograms; struct annotation_line **offsets; struct hashmap *samples; @@ -348,7 +346,7 @@ void annotation__toggle_full_addr(struct annotation *notes, struct map_symbol *m static inline struct sym_hist *annotated_source__histogram(struct annotated_source *src, int idx) { - return ((void *)src->histograms) + (src->sizeof_sym_hist * idx); + return &src->histograms[idx]; } static inline struct sym_hist *annotation__histogram(struct annotation *notes, int idx) -- cgit v1.2.3