From 6d6be5eb45b423a37d746d3ee0fd0c78f76ead9f Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 9 Feb 2024 12:49:47 -0800 Subject: perf metric: Don't remove scale from counts Counts were switched from the scaled saved value form to the aggregated count to avoid double accounting. When this happened the removing of scaling for a count should have been removed, however, it wasn't and this wasn't observed as it normally doesn't matter because a counter's scale is 1. A problem was observed with RAPL events that are scaled. Fixes: 37cc8ad77cf8 ("perf metric: Directly use counts rather than saved_value") Signed-off-by: Ian Rogers Reviewed-by: Kan Liang Cc: K Prateek Nayak Cc: James Clark Cc: Kaige Ye Cc: John Garry Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20240209204947.3873294-5-irogers@google.com --- tools/perf/util/stat-shadow.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'tools/perf/util/stat-shadow.c') diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index e31426167852..cf573ff3fa84 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -414,12 +414,7 @@ static int prepare_metric(struct evsel **metric_events, val = NAN; source_count = 0; } else { - /* - * If an event was scaled during stat gathering, - * reverse the scale before computing the - * metric. - */ - val = aggr->counts.val * (1.0 / metric_events[i]->scale); + val = aggr->counts.val; source_count = evsel__source_count(metric_events[i]); } } -- cgit v1.2.3 From eee41e6b287e2adfefbe3b6fc80c66097c076f89 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 20 Feb 2024 23:07:52 -0800 Subject: perf stat: Pass fewer metric arguments Pass metric_expr and evsel rather than specific variables from the struct, thereby reducing the number of arguments. This will enable later fixes. To reduce the size of the diff, local variables are added to match the previous parameter names. This isn't done in the case of "name" as evsel->name is more intention revealing. A whitespace issue is also addressed. Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: K Prateek Nayak Cc: Stephane Eranian Cc: Kaige Ye Cc: Kajol Jain Cc: Kan Liang Cc: John Garry Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20240221070754.4163916-1-irogers@google.com --- tools/perf/util/stat-shadow.c | 38 ++++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 20 deletions(-) (limited to 'tools/perf/util/stat-shadow.c') diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index cf573ff3fa84..10b452792037 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -355,11 +355,12 @@ static void print_nsecs(struct perf_stat_config *config, print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0); } -static int prepare_metric(struct evsel **metric_events, - struct metric_ref *metric_refs, +static int prepare_metric(const struct metric_expr *mexp, struct expr_parse_ctx *pctx, int aggr_idx) { + struct evsel * const *metric_events = mexp->metric_events; + struct metric_ref *metric_refs = mexp->metric_refs; int i; for (i = 0; metric_events[i]; i++) { @@ -403,7 +404,7 @@ static int prepare_metric(struct evsel **metric_events, if (!aggr) break; - if (!metric_events[i]->supported) { + if (!metric_events[i]->supported) { /* * Not supported events will have a count of 0, * which can be confusing in a @@ -436,18 +437,18 @@ static int prepare_metric(struct evsel **metric_events, } static void generic_metric(struct perf_stat_config *config, - const char *metric_expr, - const char *metric_threshold, - struct evsel **metric_events, - struct metric_ref *metric_refs, - char *name, - const char *metric_name, - const char *metric_unit, - int runtime, + struct metric_expr *mexp, + struct evsel *evsel, int aggr_idx, struct perf_stat_output_ctx *out) { print_metric_t print_metric = out->print_metric; + const char *metric_name = mexp->metric_name; + const char *metric_expr = mexp->metric_expr; + const char *metric_threshold = mexp->metric_threshold; + const char *metric_unit = mexp->metric_unit; + struct evsel * const *metric_events = mexp->metric_events; + int runtime = mexp->runtime; struct expr_parse_ctx *pctx; double ratio, scale, threshold; int i; @@ -462,7 +463,7 @@ static void generic_metric(struct perf_stat_config *config, pctx->sctx.user_requested_cpu_list = strdup(config->user_requested_cpu_list); pctx->sctx.runtime = runtime; pctx->sctx.system_wide = config->system_wide; - i = prepare_metric(metric_events, metric_refs, pctx, aggr_idx); + i = prepare_metric(mexp, pctx, aggr_idx); if (i < 0) { expr__ctx_free(pctx); return; @@ -497,18 +498,18 @@ static void generic_metric(struct perf_stat_config *config, print_metric(config, ctxp, color, "%8.2f", metric_name ? metric_name : - out->force_header ? name : "", + out->force_header ? evsel->name : "", ratio); } } else { print_metric(config, ctxp, color, /*unit=*/NULL, out->force_header ? - (metric_name ? metric_name : name) : "", 0); + (metric_name ?: evsel->name) : "", 0); } } else { print_metric(config, ctxp, color, /*unit=*/NULL, out->force_header ? - (metric_name ? metric_name : name) : "", 0); + (metric_name ?: evsel->name) : "", 0); } expr__ctx_free(pctx); @@ -523,7 +524,7 @@ double test_generic_metric(struct metric_expr *mexp, int aggr_idx) if (!pctx) return NAN; - if (prepare_metric(mexp->metric_events, mexp->metric_refs, pctx, aggr_idx) < 0) + if (prepare_metric(mexp, pctx, aggr_idx) < 0) goto out; if (expr__parse(&ratio, pctx, mexp->metric_expr)) @@ -625,10 +626,7 @@ void *perf_stat__print_shadow_stats_metricgroup(struct perf_stat_config *config, if ((*num)++ > 0) out->new_line(config, ctxp); - generic_metric(config, mexp->metric_expr, mexp->metric_threshold, - mexp->metric_events, mexp->metric_refs, evsel->name, - mexp->metric_name, mexp->metric_unit, mexp->runtime, - aggr_idx, out); + generic_metric(config, mexp, evsel, aggr_idx, out); } return NULL; -- cgit v1.2.3 From a59fb796a36bb6c2b7e6e256a9e5f9ba18109937 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 20 Feb 2024 23:07:53 -0800 Subject: perf metrics: Compute unmerged uncore metrics individually When merging counts from multiple uncore PMUs the metric is only computed for the metric leader. When merging/aggregation is disabled, prior to this patch just the leader's metric would be computed. Fix this by computing the metric for each PMU. On a SkylakeX: Before: ``` $ perf stat -A -M memory_bandwidth_total -a sleep 1 Performance counter stats for 'system wide': CPU0 82,217 UNC_M_CAS_COUNT.RD [uncore_imc_0] # 9.2 MB/s memory_bandwidth_total CPU18 0 UNC_M_CAS_COUNT.RD [uncore_imc_0] # 0.0 MB/s memory_bandwidth_total CPU0 61,395 UNC_M_CAS_COUNT.WR [uncore_imc_0] CPU18 0 UNC_M_CAS_COUNT.WR [uncore_imc_0] CPU0 0 UNC_M_CAS_COUNT.RD [uncore_imc_1] CPU18 0 UNC_M_CAS_COUNT.RD [uncore_imc_1] CPU0 0 UNC_M_CAS_COUNT.WR [uncore_imc_1] CPU18 0 UNC_M_CAS_COUNT.WR [uncore_imc_1] CPU0 81,570 UNC_M_CAS_COUNT.RD [uncore_imc_2] CPU18 113,886 UNC_M_CAS_COUNT.RD [uncore_imc_2] CPU0 62,330 UNC_M_CAS_COUNT.WR [uncore_imc_2] CPU18 66,942 UNC_M_CAS_COUNT.WR [uncore_imc_2] CPU0 75,489 UNC_M_CAS_COUNT.RD [uncore_imc_3] CPU18 27,958 UNC_M_CAS_COUNT.RD [uncore_imc_3] CPU0 55,864 UNC_M_CAS_COUNT.WR [uncore_imc_3] CPU18 38,727 UNC_M_CAS_COUNT.WR [uncore_imc_3] CPU0 0 UNC_M_CAS_COUNT.RD [uncore_imc_4] CPU18 0 UNC_M_CAS_COUNT.RD [uncore_imc_4] CPU0 0 UNC_M_CAS_COUNT.WR [uncore_imc_4] CPU18 0 UNC_M_CAS_COUNT.WR [uncore_imc_4] CPU0 75,423 UNC_M_CAS_COUNT.RD [uncore_imc_5] CPU18 104,527 UNC_M_CAS_COUNT.RD [uncore_imc_5] CPU0 57,596 UNC_M_CAS_COUNT.WR [uncore_imc_5] CPU18 56,777 UNC_M_CAS_COUNT.WR [uncore_imc_5] CPU0 1,003,440,851 ns duration_time 1.003440851 seconds time elapsed ``` After: ``` $ perf stat -A -M memory_bandwidth_total -a sleep 1 Performance counter stats for 'system wide': CPU0 88,968 UNC_M_CAS_COUNT.RD [uncore_imc_0] # 9.5 MB/s memory_bandwidth_total CPU18 0 UNC_M_CAS_COUNT.RD [uncore_imc_0] # 0.0 MB/s memory_bandwidth_total CPU0 59,498 UNC_M_CAS_COUNT.WR [uncore_imc_0] CPU18 0 UNC_M_CAS_COUNT.WR [uncore_imc_0] CPU0 0 UNC_M_CAS_COUNT.RD [uncore_imc_1] # 0.0 MB/s memory_bandwidth_total CPU18 0 UNC_M_CAS_COUNT.RD [uncore_imc_1] # 0.0 MB/s memory_bandwidth_total CPU0 0 UNC_M_CAS_COUNT.WR [uncore_imc_1] CPU18 0 UNC_M_CAS_COUNT.WR [uncore_imc_1] CPU0 88,635 UNC_M_CAS_COUNT.RD [uncore_imc_2] # 9.5 MB/s memory_bandwidth_total CPU18 117,975 UNC_M_CAS_COUNT.RD [uncore_imc_2] # 11.5 MB/s memory_bandwidth_total CPU0 60,829 UNC_M_CAS_COUNT.WR [uncore_imc_2] CPU18 62,105 UNC_M_CAS_COUNT.WR [uncore_imc_2] CPU0 82,238 UNC_M_CAS_COUNT.RD [uncore_imc_3] # 8.7 MB/s memory_bandwidth_total CPU18 22,906 UNC_M_CAS_COUNT.RD [uncore_imc_3] # 3.6 MB/s memory_bandwidth_total CPU0 53,959 UNC_M_CAS_COUNT.WR [uncore_imc_3] CPU18 32,990 UNC_M_CAS_COUNT.WR [uncore_imc_3] CPU0 0 UNC_M_CAS_COUNT.RD [uncore_imc_4] # 0.0 MB/s memory_bandwidth_total CPU18 0 UNC_M_CAS_COUNT.RD [uncore_imc_4] # 0.0 MB/s memory_bandwidth_total CPU0 0 UNC_M_CAS_COUNT.WR [uncore_imc_4] CPU18 0 UNC_M_CAS_COUNT.WR [uncore_imc_4] CPU0 83,595 UNC_M_CAS_COUNT.RD [uncore_imc_5] # 8.9 MB/s memory_bandwidth_total CPU18 110,151 UNC_M_CAS_COUNT.RD [uncore_imc_5] # 10.5 MB/s memory_bandwidth_total CPU0 56,540 UNC_M_CAS_COUNT.WR [uncore_imc_5] CPU18 53,816 UNC_M_CAS_COUNT.WR [uncore_imc_5] CPU0 1,003,353,416 ns duration_time ``` Signed-off-by: Ian Rogers | Acked-by: Namhyung Kim Cc: K Prateek Nayak Cc: Stephane Eranian Cc: Kaige Ye Cc: Kajol Jain Cc: Kan Liang Cc: John Garry Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20240221070754.4163916-2-irogers@google.com --- tools/perf/util/metricgroup.c | 2 ++ tools/perf/util/stat-shadow.c | 31 +++++++++++++++++++++++++++---- 2 files changed, 29 insertions(+), 4 deletions(-) (limited to 'tools/perf/util/stat-shadow.c') diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 966cca5a3e88..b24a1c177a80 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -44,6 +44,8 @@ struct metric_event *metricgroup__lookup(struct rblist *metric_events, if (!metric_events) return NULL; + if (evsel->metric_leader) + me.evsel = evsel->metric_leader; nd = rblist__find(metric_events, &me); if (nd) return container_of(nd, struct metric_event, nd); diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 10b452792037..3466aa952442 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -356,6 +356,7 @@ static void print_nsecs(struct perf_stat_config *config, } static int prepare_metric(const struct metric_expr *mexp, + const struct evsel *evsel, struct expr_parse_ctx *pctx, int aggr_idx) { @@ -399,8 +400,29 @@ static int prepare_metric(const struct metric_expr *mexp, source_count = 1; } else { struct perf_stat_evsel *ps = metric_events[i]->stats; - struct perf_stat_aggr *aggr = &ps->aggr[aggr_idx]; + struct perf_stat_aggr *aggr; + /* + * If there are multiple uncore PMUs and we're not + * reading the leader's stats, determine the stats for + * the appropriate uncore PMU. + */ + if (evsel && evsel->metric_leader && + evsel->pmu != evsel->metric_leader->pmu && + mexp->metric_events[i]->pmu == evsel->metric_leader->pmu) { + struct evsel *pos; + + evlist__for_each_entry(evsel->evlist, pos) { + if (pos->pmu != evsel->pmu) + continue; + if (pos->metric_leader != mexp->metric_events[i]) + continue; + ps = pos->stats; + source_count = 1; + break; + } + } + aggr = &ps->aggr[aggr_idx]; if (!aggr) break; @@ -416,7 +438,8 @@ static int prepare_metric(const struct metric_expr *mexp, source_count = 0; } else { val = aggr->counts.val; - source_count = evsel__source_count(metric_events[i]); + if (!source_count) + source_count = evsel__source_count(metric_events[i]); } } n = strdup(evsel__metric_id(metric_events[i])); @@ -463,7 +486,7 @@ static void generic_metric(struct perf_stat_config *config, pctx->sctx.user_requested_cpu_list = strdup(config->user_requested_cpu_list); pctx->sctx.runtime = runtime; pctx->sctx.system_wide = config->system_wide; - i = prepare_metric(mexp, pctx, aggr_idx); + i = prepare_metric(mexp, evsel, pctx, aggr_idx); if (i < 0) { expr__ctx_free(pctx); return; @@ -524,7 +547,7 @@ double test_generic_metric(struct metric_expr *mexp, int aggr_idx) if (!pctx) return NAN; - if (prepare_metric(mexp, pctx, aggr_idx) < 0) + if (prepare_metric(mexp, /*evsel=*/NULL, pctx, aggr_idx) < 0) goto out; if (expr__parse(&ratio, pctx, mexp->metric_expr)) -- cgit v1.2.3