diff options
Diffstat (limited to 'tools/perf/builtin-kmem.c')
-rw-r--r-- | tools/perf/builtin-kmem.c | 354 |
1 files changed, 286 insertions, 68 deletions
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 256d18fa0471..35722fafc4d1 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -26,26 +26,28 @@ static u64 sample_type; static int alloc_flag; static int caller_flag; -sort_fn_t alloc_sort_fn; -sort_fn_t caller_sort_fn; - static int alloc_lines = -1; static int caller_lines = -1; +static bool raw_ip; + +static char default_sort_order[] = "frag,hit,bytes"; + static char *cwd; static int cwdlen; +static int *cpunode_map; +static int max_cpu_num; + struct alloc_stat { - union { - struct { - char *name; - u64 call_site; - }; - u64 ptr; - }; + u64 call_site; + u64 ptr; u64 bytes_req; u64 bytes_alloc; u32 hit; + u32 pingpong; + + short alloc_cpu; struct rb_node node; }; @@ -56,12 +58,74 @@ static struct rb_root root_caller_stat; static struct rb_root root_caller_sorted; static unsigned long total_requested, total_allocated; +static unsigned long nr_allocs, nr_cross_allocs; struct raw_event_sample { u32 size; char data[0]; }; +#define PATH_SYS_NODE "/sys/devices/system/node" + +static void init_cpunode_map(void) +{ + FILE *fp; + int i; + + fp = fopen("/sys/devices/system/cpu/kernel_max", "r"); + if (!fp) { + max_cpu_num = 4096; + return; + } + + if (fscanf(fp, "%d", &max_cpu_num) < 1) + die("Failed to read 'kernel_max' from sysfs"); + max_cpu_num++; + + cpunode_map = calloc(max_cpu_num, sizeof(int)); + if (!cpunode_map) + die("calloc"); + for (i = 0; i < max_cpu_num; i++) + cpunode_map[i] = -1; + fclose(fp); +} + +static void setup_cpunode_map(void) +{ + struct dirent *dent1, *dent2; + DIR *dir1, *dir2; + unsigned int cpu, mem; + char buf[PATH_MAX]; + + init_cpunode_map(); + + dir1 = opendir(PATH_SYS_NODE); + if (!dir1) + return; + + while (true) { + dent1 = readdir(dir1); + if (!dent1) + break; + + if (sscanf(dent1->d_name, "node%u", &mem) < 1) + continue; + + snprintf(buf, PATH_MAX, "%s/%s", PATH_SYS_NODE, dent1->d_name); + dir2 = opendir(buf); + if (!dir2) + continue; + while (true) { + dent2 = readdir(dir2); + if (!dent2) + break; + if (sscanf(dent2->d_name, "cpu%u", &cpu) < 1) + continue; + cpunode_map[cpu] = mem; + } + } +} + static int process_comm_event(event_t *event, unsigned long offset, unsigned long head) { @@ -81,16 +145,13 @@ process_comm_event(event_t *event, unsigned long offset, unsigned long head) return 0; } -static void insert_alloc_stat(unsigned long ptr, - int bytes_req, int bytes_alloc) +static void insert_alloc_stat(unsigned long call_site, unsigned long ptr, + int bytes_req, int bytes_alloc, int cpu) { struct rb_node **node = &root_alloc_stat.rb_node; struct rb_node *parent = NULL; struct alloc_stat *data = NULL; - if (!alloc_flag) - return; - while (*node) { parent = *node; data = rb_entry(*node, struct alloc_stat, node); @@ -109,7 +170,10 @@ static void insert_alloc_stat(unsigned long ptr, data->bytes_alloc += bytes_req; } else { data = malloc(sizeof(*data)); + if (!data) + die("malloc"); data->ptr = ptr; + data->pingpong = 0; data->hit = 1; data->bytes_req = bytes_req; data->bytes_alloc = bytes_alloc; @@ -117,6 +181,8 @@ static void insert_alloc_stat(unsigned long ptr, rb_link_node(&data->node, parent, node); rb_insert_color(&data->node, &root_alloc_stat); } + data->call_site = call_site; + data->alloc_cpu = cpu; } static void insert_caller_stat(unsigned long call_site, @@ -126,9 +192,6 @@ static void insert_caller_stat(unsigned long call_site, struct rb_node *parent = NULL; struct alloc_stat *data = NULL; - if (!caller_flag) - return; - while (*node) { parent = *node; data = rb_entry(*node, struct alloc_stat, node); @@ -147,7 +210,10 @@ static void insert_caller_stat(unsigned long call_site, data->bytes_alloc += bytes_req; } else { data = malloc(sizeof(*data)); + if (!data) + die("malloc"); data->call_site = call_site; + data->pingpong = 0; data->hit = 1; data->bytes_req = bytes_req; data->bytes_alloc = bytes_alloc; @@ -159,34 +225,89 @@ static void insert_caller_stat(unsigned long call_site, static void process_alloc_event(struct raw_event_sample *raw, struct event *event, - int cpu __used, + int cpu, u64 timestamp __used, struct thread *thread __used, - int node __used) + int node) { unsigned long call_site; unsigned long ptr; int bytes_req; int bytes_alloc; + int node1, node2; ptr = raw_field_value(event, "ptr", raw->data); call_site = raw_field_value(event, "call_site", raw->data); bytes_req = raw_field_value(event, "bytes_req", raw->data); bytes_alloc = raw_field_value(event, "bytes_alloc", raw->data); - insert_alloc_stat(ptr, bytes_req, bytes_alloc); + insert_alloc_stat(call_site, ptr, bytes_req, bytes_alloc, cpu); insert_caller_stat(call_site, bytes_req, bytes_alloc); total_requested += bytes_req; total_allocated += bytes_alloc; + + if (node) { + node1 = cpunode_map[cpu]; + node2 = raw_field_value(event, "node", raw->data); + if (node1 != node2) + nr_cross_allocs++; + } + nr_allocs++; +} + +static int ptr_cmp(struct alloc_stat *, struct alloc_stat *); +static int callsite_cmp(struct alloc_stat *, struct alloc_stat *); + +static struct alloc_stat *search_alloc_stat(unsigned long ptr, + unsigned long call_site, + struct rb_root *root, + sort_fn_t sort_fn) +{ + struct rb_node *node = root->rb_node; + struct alloc_stat key = { .ptr = ptr, .call_site = call_site }; + + while (node) { + struct alloc_stat *data; + int cmp; + + data = rb_entry(node, struct alloc_stat, node); + + cmp = sort_fn(&key, data); + if (cmp < 0) + node = node->rb_left; + else if (cmp > 0) + node = node->rb_right; + else + return data; + } + return NULL; } -static void process_free_event(struct raw_event_sample *raw __used, - struct event *event __used, - int cpu __used, +static void process_free_event(struct raw_event_sample *raw, + struct event *event, + int cpu, u64 timestamp __used, struct thread *thread __used) { + unsigned long ptr; + struct alloc_stat *s_alloc, *s_caller; + + ptr = raw_field_value(event, "ptr", raw->data); + + s_alloc = search_alloc_stat(ptr, 0, &root_alloc_stat, ptr_cmp); + if (!s_alloc) + return; + + if (cpu != s_alloc->alloc_cpu) { + s_alloc->pingpong++; + + s_caller = search_alloc_stat(0, s_alloc->call_site, + &root_caller_stat, callsite_cmp); + assert(s_caller); + s_caller->pingpong++; + } + s_alloc->alloc_cpu = -1; } static void @@ -291,7 +412,7 @@ static int read_events(void) register_idle_thread(); register_perf_file_handler(&file_handler); - return mmap_dispatch_perf_file(&header, input_name, NULL, false, 0, 0, + return mmap_dispatch_perf_file(&header, input_name, 0, 0, &cwdlen, &cwd); } @@ -307,10 +428,10 @@ static void __print_result(struct rb_root *root, int n_lines, int is_caller) { struct rb_node *next; - printf("%.78s\n", graph_dotted_line); - printf("%-28s|", is_caller ? "Callsite": "Alloc Ptr"); - printf("Total_alloc/Per | Total_req/Per | Hit | Frag\n"); - printf("%.78s\n", graph_dotted_line); + printf("%.102s\n", graph_dotted_line); + printf(" %-34s |", is_caller ? "Callsite": "Alloc Ptr"); + printf(" Total_alloc/Per | Total_req/Per | Hit | Ping-pong | Frag\n"); + printf("%.102s\n", graph_dotted_line); next = rb_first(root); @@ -318,36 +439,39 @@ static void __print_result(struct rb_root *root, int n_lines, int is_caller) struct alloc_stat *data = rb_entry(next, struct alloc_stat, node); struct symbol *sym = NULL; - char bf[BUFSIZ]; + char buf[BUFSIZ]; u64 addr; if (is_caller) { addr = data->call_site; - sym = kernel_maps__find_symbol(addr, NULL, NULL); + if (!raw_ip) + sym = kernel_maps__find_function(addr, NULL, NULL); } else addr = data->ptr; if (sym != NULL) - snprintf(bf, sizeof(bf), "%s/%Lx", sym->name, + snprintf(buf, sizeof(buf), "%s+%Lx", sym->name, addr - sym->start); else - snprintf(bf, sizeof(bf), "%#Lx", addr); + snprintf(buf, sizeof(buf), "%#Lx", addr); + printf(" %-34s |", buf); - printf("%-28s|%8llu/%-6lu |%8llu/%-6lu|%6lu|%8.3f%%\n", - bf, (unsigned long long)data->bytes_alloc, + printf(" %9llu/%-5lu | %9llu/%-5lu | %6lu | %8lu | %6.3f%%\n", + (unsigned long long)data->bytes_alloc, (unsigned long)data->bytes_alloc / data->hit, (unsigned long long)data->bytes_req, (unsigned long)data->bytes_req / data->hit, (unsigned long)data->hit, + (unsigned long)data->pingpong, fragmentation(data->bytes_req, data->bytes_alloc)); next = rb_next(next); } if (n_lines == -1) - printf(" ... | ... | ... | ... | ... \n"); + printf(" ... | ... | ... | ... | ... | ... \n"); - printf(" ------------------------------------------------------------------------------\n"); + printf("%.102s\n", graph_dotted_line); } static void print_summary(void) @@ -359,6 +483,7 @@ static void print_summary(void) total_allocated - total_requested); printf("Internal fragmentation: %f%%\n", fragmentation(total_requested, total_allocated)); + printf("Cross CPU allocations: %lu/%lu\n", nr_cross_allocs, nr_allocs); } static void print_result(void) @@ -370,20 +495,34 @@ static void print_result(void) print_summary(); } +struct sort_dimension { + const char name[20]; + sort_fn_t cmp; + struct list_head list; +}; + +static LIST_HEAD(caller_sort); +static LIST_HEAD(alloc_sort); + static void sort_insert(struct rb_root *root, struct alloc_stat *data, - sort_fn_t sort_fn) + struct list_head *sort_list) { struct rb_node **new = &(root->rb_node); struct rb_node *parent = NULL; + struct sort_dimension *sort; while (*new) { struct alloc_stat *this; - int cmp; + int cmp = 0; this = rb_entry(*new, struct alloc_stat, node); parent = *new; - cmp = sort_fn(data, this); + list_for_each_entry(sort, sort_list, list) { + cmp = sort->cmp(data, this); + if (cmp) + break; + } if (cmp > 0) new = &((*new)->rb_left); @@ -396,7 +535,7 @@ static void sort_insert(struct rb_root *root, struct alloc_stat *data, } static void __sort_result(struct rb_root *root, struct rb_root *root_sorted, - sort_fn_t sort_fn) + struct list_head *sort_list) { struct rb_node *node; struct alloc_stat *data; @@ -408,14 +547,14 @@ static void __sort_result(struct rb_root *root, struct rb_root *root_sorted, rb_erase(node, root); data = rb_entry(node, struct alloc_stat, node); - sort_insert(root_sorted, data, sort_fn); + sort_insert(root_sorted, data, sort_list); } } static void sort_result(void) { - __sort_result(&root_alloc_stat, &root_alloc_sorted, alloc_sort_fn); - __sort_result(&root_caller_stat, &root_caller_sorted, caller_sort_fn); + __sort_result(&root_alloc_stat, &root_alloc_sorted, &alloc_sort); + __sort_result(&root_caller_stat, &root_caller_sorted, &caller_sort); } static int __cmd_kmem(void) @@ -433,7 +572,6 @@ static const char * const kmem_usage[] = { NULL }; - static int ptr_cmp(struct alloc_stat *l, struct alloc_stat *r) { if (l->ptr < r->ptr) @@ -443,6 +581,11 @@ static int ptr_cmp(struct alloc_stat *l, struct alloc_stat *r) return 0; } +static struct sort_dimension ptr_sort_dimension = { + .name = "ptr", + .cmp = ptr_cmp, +}; + static int callsite_cmp(struct alloc_stat *l, struct alloc_stat *r) { if (l->call_site < r->call_site) @@ -452,6 +595,11 @@ static int callsite_cmp(struct alloc_stat *l, struct alloc_stat *r) return 0; } +static struct sort_dimension callsite_sort_dimension = { + .name = "callsite", + .cmp = callsite_cmp, +}; + static int hit_cmp(struct alloc_stat *l, struct alloc_stat *r) { if (l->hit < r->hit) @@ -461,6 +609,11 @@ static int hit_cmp(struct alloc_stat *l, struct alloc_stat *r) return 0; } +static struct sort_dimension hit_sort_dimension = { + .name = "hit", + .cmp = hit_cmp, +}; + static int bytes_cmp(struct alloc_stat *l, struct alloc_stat *r) { if (l->bytes_alloc < r->bytes_alloc) @@ -470,6 +623,11 @@ static int bytes_cmp(struct alloc_stat *l, struct alloc_stat *r) return 0; } +static struct sort_dimension bytes_sort_dimension = { + .name = "bytes", + .cmp = bytes_cmp, +}; + static int frag_cmp(struct alloc_stat *l, struct alloc_stat *r) { double x, y; @@ -484,31 +642,88 @@ static int frag_cmp(struct alloc_stat *l, struct alloc_stat *r) return 0; } +static struct sort_dimension frag_sort_dimension = { + .name = "frag", + .cmp = frag_cmp, +}; + +static int pingpong_cmp(struct alloc_stat *l, struct alloc_stat *r) +{ + if (l->pingpong < r->pingpong) + return -1; + else if (l->pingpong > r->pingpong) + return 1; + return 0; +} + +static struct sort_dimension pingpong_sort_dimension = { + .name = "pingpong", + .cmp = pingpong_cmp, +}; + +static struct sort_dimension *avail_sorts[] = { + &ptr_sort_dimension, + &callsite_sort_dimension, + &hit_sort_dimension, + &bytes_sort_dimension, + &frag_sort_dimension, + &pingpong_sort_dimension, +}; + +#define NUM_AVAIL_SORTS \ + (int)(sizeof(avail_sorts) / sizeof(struct sort_dimension *)) + +static int sort_dimension__add(const char *tok, struct list_head *list) +{ + struct sort_dimension *sort; + int i; + + for (i = 0; i < NUM_AVAIL_SORTS; i++) { + if (!strcmp(avail_sorts[i]->name, tok)) { + sort = malloc(sizeof(*sort)); + if (!sort) + die("malloc"); + memcpy(sort, avail_sorts[i], sizeof(*sort)); + list_add_tail(&sort->list, list); + return 0; + } + } + + return -1; +} + +static int setup_sorting(struct list_head *sort_list, const char *arg) +{ + char *tok; + char *str = strdup(arg); + + if (!str) + die("strdup"); + + while (true) { + tok = strsep(&str, ","); + if (!tok) + break; + if (sort_dimension__add(tok, sort_list) < 0) { + error("Unknown --sort key: '%s'", tok); + return -1; + } + } + + free(str); + return 0; +} + static int parse_sort_opt(const struct option *opt __used, const char *arg, int unset __used) { - sort_fn_t sort_fn; - if (!arg) return -1; - if (strcmp(arg, "ptr") == 0) - sort_fn = ptr_cmp; - else if (strcmp(arg, "call_site") == 0) - sort_fn = callsite_cmp; - else if (strcmp(arg, "hit") == 0) - sort_fn = hit_cmp; - else if (strcmp(arg, "bytes") == 0) - sort_fn = bytes_cmp; - else if (strcmp(arg, "frag") == 0) - sort_fn = frag_cmp; - else - return -1; - if (caller_flag > alloc_flag) - caller_sort_fn = sort_fn; + return setup_sorting(&caller_sort, arg); else - alloc_sort_fn = sort_fn; + return setup_sorting(&alloc_sort, arg); return 0; } @@ -552,12 +767,13 @@ static const struct option kmem_options[] = { OPT_CALLBACK(0, "stat", NULL, "<alloc>|<caller>", "stat selector, Pass 'alloc' or 'caller'.", parse_stat_opt), - OPT_CALLBACK('s', "sort", NULL, "key", - "sort by key: ptr, call_site, hit, bytes, frag", + OPT_CALLBACK('s', "sort", NULL, "key[,key2...]", + "sort by keys: ptr, call_site, bytes, hit, pingpong, frag", parse_sort_opt), OPT_CALLBACK('l', "line", NULL, "num", "show n lins", parse_line_opt), + OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"), OPT_END() }; @@ -604,10 +820,12 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __used) else if (argc) usage_with_options(kmem_usage, kmem_options); - if (!alloc_sort_fn) - alloc_sort_fn = bytes_cmp; - if (!caller_sort_fn) - caller_sort_fn = bytes_cmp; + if (list_empty(&caller_sort)) + setup_sorting(&caller_sort, default_sort_order); + if (list_empty(&alloc_sort)) + setup_sorting(&alloc_sort, default_sort_order); + + setup_cpunode_map(); return __cmd_kmem(); } |