From fa48c892645dfd3159e5aa6eb9cefd00d5cb347a Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 9 Nov 2017 16:04:26 -0300
Subject: perf script: Fix --per-event-dump for auxtrace synth evsels

When processing PERF_RECORD_AUXTRACE_INFO several perf_evsel entries
will be synthesized and inserted into session->evlist, eventually ending
in perf_script.tool.sample(), which ends up calling builtin-script.c's
process_event(), that expects evsel->priv to be a perf_evsel_script
object with a valid FILE pointer in fp.

So we need to intercept the processing of PERF_RECORD_AUXTRACE_INFO and
then setup evsel->priv for these newly created perf_evsel instances, do
it to fix the segfault in process_event() trying to use a NULL for that
FILE pointer.

Reported-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Cc: Wang Nan <wangnan0@huawei.com>
Cc: yuzhoujian <yuzhoujian@didichuxing.com>
Fixes: a14390fde64e ("perf script: Allow creating per-event dump files")
Link: http://lkml.kernel.org/n/tip-bthnur8r8de01gxvn2qayx6e@git.kernel.org
[ Merge fix by Ravi Bangoria before pushing upstream to preserv bisectability ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-script.c | 31 ++++++++++++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

(limited to 'tools/perf/builtin-script.c')

diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 68f36dc0344f..9b43bda45a41 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -1955,6 +1955,16 @@ static int perf_script__fopen_per_event_dump(struct perf_script *script)
 	struct perf_evsel *evsel;
 
 	evlist__for_each_entry(script->session->evlist, evsel) {
+		/*
+		 * Already setup? I.e. we may be called twice in cases like
+		 * Intel PT, one for the intel_pt// and dummy events, then
+		 * for the evsels syntheized from the auxtrace info.
+		 *
+		 * Ses perf_script__process_auxtrace_info.
+		 */
+		if (evsel->priv != NULL)
+			continue;
+
 		evsel->priv = perf_evsel_script__new(evsel, script->session->data);
 		if (evsel->priv == NULL)
 			goto out_err_fclose;
@@ -2838,6 +2848,25 @@ int process_cpu_map_event(struct perf_tool *tool __maybe_unused,
 	return set_maps(script);
 }
 
+#ifdef HAVE_AUXTRACE_SUPPORT
+static int perf_script__process_auxtrace_info(struct perf_tool *tool,
+					      union perf_event *event,
+					      struct perf_session *session)
+{
+	int ret = perf_event__process_auxtrace_info(tool, event, session);
+
+	if (ret == 0) {
+		struct perf_script *script = container_of(tool, struct perf_script, tool);
+
+		ret = perf_script__setup_per_event_dump(script);
+	}
+
+	return ret;
+}
+#else
+#define perf_script__process_auxtrace_info 0
+#endif
+
 int cmd_script(int argc, const char **argv)
 {
 	bool show_full_info = false;
@@ -2866,7 +2895,7 @@ int cmd_script(int argc, const char **argv)
 			.feature	 = perf_event__process_feature,
 			.build_id	 = perf_event__process_build_id,
 			.id_index	 = perf_event__process_id_index,
-			.auxtrace_info	 = perf_event__process_auxtrace_info,
+			.auxtrace_info	 = perf_script__process_auxtrace_info,
 			.auxtrace	 = perf_event__process_auxtrace,
 			.auxtrace_error	 = perf_event__process_auxtrace_error,
 			.stat		 = perf_event__process_stat_event,
-- 
cgit v1.2.3


From 5039c8a28fa97b8dce7b363a5ecd4bee2b87bf03 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Thu, 9 Nov 2017 06:55:26 -0800
Subject: perf script: Allow printing period for non freq mode groups

When using leader sampling the values of the not sampled but counted
events are shown by perf script in "period".

Currently printing period is only allowed when the main event has a
period, that is it is in frequency mode.

This implies that we cannot dump the values of counted events when the
leader event is not in frequency mode.

Just remove the check that the period must be set on all events. It will
just be printed as 0 instead if it's not available.

This fixes the following:

  $ perf record -c 100000 -e '{cycles,branches}:S'
  $ perf script -F event,period

Further commentary by Jiri Olsa:

The period will be the value of configured period, not 0:

int perf_evsel__parse_sample(struct ...
  ...
  data->period = evsel->attr.sample_period;

  $ perf record -c 100000
  $ perf script -F event,period | head -3
  Failed to open /tmp/perf-2048.map, continuing without symbols
      100000 cycles:ppp:
      100000 cycles:ppp:

other than that I think we can remove that check, because we will have
always sane number in period

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Link: http://lkml.kernel.org/r/20171109145528.23371-4-andi@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-script.c | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'tools/perf/builtin-script.c')

diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 9b43bda45a41..ee7c7aaaae72 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -423,11 +423,6 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
 					   PERF_OUTPUT_CPU, allow_user_set))
 		return -EINVAL;
 
-	if (PRINT_FIELD(PERIOD) &&
-		perf_evsel__check_stype(evsel, PERF_SAMPLE_PERIOD, "PERIOD",
-					PERF_OUTPUT_PERIOD))
-		return -EINVAL;
-
 	if (PRINT_FIELD(IREGS) &&
 		perf_evsel__check_stype(evsel, PERF_SAMPLE_REGS_INTR, "IREGS",
 					PERF_OUTPUT_IREGS))
-- 
cgit v1.2.3


From 4bd1bef8bba2f99ff472ae3617864dda301f81bd Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Fri, 17 Nov 2017 13:43:00 -0800
Subject: perf script: Allow computing 'perf stat' style metrics

Add support for computing 'perf stat' style metrics in 'perf script'.

When using leader sampling we can get metrics for each sampling period
by computing formulas over the values of the different group members.

This allows things like fine grained IPC tracking through sampling, much
more fine grained than with 'perf stat'.

The metric is still averaged over the sampling period, it is not just
for the sampling point.

This patch adds a new metric output field for 'perf script' that uses
the existing 'perf stat' metrics infrastructure to compute any metrics
supported by 'perf stat'.

For example to sample IPC:

  $ perf record -e '{ref-cycles,cycles,instructions}:S' -a sleep 1
  $ perf script -F metric,ip,sym,time,cpu,comm
  ...
   alsa-sink-ALC32 [000] 42815.856074:      7fd65937d6cc [unknown]
   alsa-sink-ALC32 [000] 42815.856074:      7fd65937d6cc [unknown]
   alsa-sink-ALC32 [000] 42815.856074:      7fd65937d6cc [unknown]
   alsa-sink-ALC32 [000] 42815.856074:    metric:    0.13  insn per cycle
           swapper [000] 42815.857961:  ffffffff81655df0 __schedule
           swapper [000] 42815.857961:  ffffffff81655df0 __schedule
           swapper [000] 42815.857961:  ffffffff81655df0 __schedule
           swapper [000] 42815.857961:    metric:    0.23  insn per cycle
   qemu-system-x86 [000] 42815.858130:  ffffffff8165ad0e _raw_spin_unlock_irqrestore
   qemu-system-x86 [000] 42815.858130:  ffffffff8165ad0e _raw_spin_unlock_irqrestore
   qemu-system-x86 [000] 42815.858130:  ffffffff8165ad0e _raw_spin_unlock_irqrestore
   qemu-system-x86 [000] 42815.858130:    metric:    0.46  insn per cycle
             :4972 [000] 42815.858312:  ffffffffa080e5f2 vmx_vcpu_run
             :4972 [000] 42815.858312:  ffffffffa080e5f2 vmx_vcpu_run
             :4972 [000] 42815.858312:  ffffffffa080e5f2 vmx_vcpu_run
             :4972 [000] 42815.858312:    metric:    0.45  insn per cycle

TopDown:

This requires disabling SMT if you have it enabled, because SMT would
require sampling per core, which is not supported.

  $ perf record -e '{ref-cycles,topdown-fetch-bubbles,\
                     topdown-recovery-bubbles,\
                     topdown-slots-retired,topdown-total-slots,\
                     topdown-slots-issued}:S' -a sleep 1
  $ perf script --header -I -F cpu,ip,sym,event,metric,period
  ...
  [000]     121108               ref-cycles:  ffffffff8165222e copy_user_enhanced_fast_string
  [000]     190350    topdown-fetch-bubbles:  ffffffff8165222e copy_user_enhanced_fast_string
  [000]       2055 topdown-recovery-bubbles:  ffffffff8165222e copy_user_enhanced_fast_string
  [000]     148729    topdown-slots-retired:  ffffffff8165222e copy_user_enhanced_fast_string
  [000]     144324      topdown-total-slots:  ffffffff8165222e copy_user_enhanced_fast_string
  [000]     160852     topdown-slots-issued:  ffffffff8165222e copy_user_enhanced_fast_string
  [000]   metric:     33.0% frontend bound
  [000]   metric:      3.5% bad speculation
  [000]   metric:     25.8% retiring
  [000]   metric:     37.7% backend bound
  [000]     112112               ref-cycles:  ffffffff8165aec8 _raw_spin_lock_irqsave
  [000]     357222    topdown-fetch-bubbles:  ffffffff8165aec8 _raw_spin_lock_irqsave
  [000]       3325 topdown-recovery-bubbles:  ffffffff8165aec8 _raw_spin_lock_irqsave
  [000]     323553    topdown-slots-retired:  ffffffff8165aec8 _raw_spin_lock_irqsave
  [000]     270507      topdown-total-slots:  ffffffff8165aec8 _raw_spin_lock_irqsave
  [000]     341226     topdown-slots-issued:  ffffffff8165aec8 _raw_spin_lock_irqsave
  [000]   metric:     33.0% frontend bound
  [000]   metric:      2.9% bad speculation
  [000]   metric:     29.9% retiring
  [000]   metric:     34.2% backend bound
...

v2:
Use evsel->priv for new fields
Port to new base line, support fp output.
Handle stats in ->stats, not ->priv
Minor cleanups

Extra explanation about the use of the term 'averaging', from Andi in the
thread in the Link: tag below:

<quote Andi>
The current samples contains the sum of event counts for a sampling period.

EventA-1           EventA-2                EventA-3      EventA-4
EventB-1     EventB-2                             EventC-3

                         gap with no events                overflow
|-----------------------------------------------------------------|
period-start                                             period-end
^                                                                 ^
|                                                                 |
previous sample                                      current sample

So EventA = 4 and EventB = 3 at the sample point

I generate a metric, let's say EventA / EventB. It applies to the whole period.

But the metric is over a longer time which does not have the same behavior. For
example the gap above doesn't have any events, while they are clustered at the
beginning and end of the sample period.

But we're summing everything together. The metric doesn't know that the gap is
different than the busy period.

That's what I'm trying to express with averaging.
</quote>

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Link: http://lkml.kernel.org/r/20171117214300.32746-4-andi@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-script.txt | 10 +++-
 tools/perf/builtin-script.c              | 97 +++++++++++++++++++++++++++++++-
 tools/perf/util/metricgroup.c            |  4 ++
 3 files changed, 108 insertions(+), 3 deletions(-)

(limited to 'tools/perf/builtin-script.c')

diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 2811fcf684cb..974ceb12c7f3 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -117,7 +117,7 @@ OPTIONS
         Comma separated list of fields to print. Options are:
         comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
         srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output, brstackinsn,
-        brstackoff, callindent, insn, insnlen, synth, phys_addr.
+	brstackoff, callindent, insn, insnlen, synth, phys_addr, metric.
         Field list can be prepended with the type, trace, sw or hw,
         to indicate to which event type the field list applies.
         e.g., -F sw:comm,tid,time,ip,sym  and -F trace:time,cpu,trace
@@ -217,6 +217,14 @@ OPTIONS
 
 	The brstackoff field will print an offset into a specific dso/binary.
 
+	With the metric option perf script can compute metrics for
+	sampling periods, similar to perf stat. This requires
+	specifying a group with multiple metrics with the :S option
+	for perf record. perf will sample on the first event, and
+	compute metrics for all the events in the group. Please note
+	that the metric computed is averaged over the whole sampling
+	period, not just for the sample point.
+
 -k::
 --vmlinux=<file>::
         vmlinux pathname
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index ee7c7aaaae72..39d8b55f0db3 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -22,6 +22,7 @@
 #include "util/cpumap.h"
 #include "util/thread_map.h"
 #include "util/stat.h"
+#include "util/color.h"
 #include "util/string2.h"
 #include "util/thread-stack.h"
 #include "util/time-utils.h"
@@ -90,6 +91,7 @@ enum perf_output_field {
 	PERF_OUTPUT_SYNTH           = 1U << 25,
 	PERF_OUTPUT_PHYS_ADDR       = 1U << 26,
 	PERF_OUTPUT_UREGS	    = 1U << 27,
+	PERF_OUTPUT_METRIC	    = 1U << 28,
 };
 
 struct output_option {
@@ -124,6 +126,7 @@ struct output_option {
 	{.str = "brstackoff", .field = PERF_OUTPUT_BRSTACKOFF},
 	{.str = "synth", .field = PERF_OUTPUT_SYNTH},
 	{.str = "phys_addr", .field = PERF_OUTPUT_PHYS_ADDR},
+	{.str = "metric", .field = PERF_OUTPUT_METRIC},
 };
 
 enum {
@@ -215,12 +218,20 @@ struct perf_evsel_script {
        char *filename;
        FILE *fp;
        u64  samples;
+       /* For metric output */
+       u64  val;
+       int  gnum;
 };
 
+static inline struct perf_evsel_script *evsel_script(struct perf_evsel *evsel)
+{
+	return (struct perf_evsel_script *)evsel->priv;
+}
+
 static struct perf_evsel_script *perf_evsel_script__new(struct perf_evsel *evsel,
 							struct perf_data *data)
 {
-	struct perf_evsel_script *es = malloc(sizeof(*es));
+	struct perf_evsel_script *es = zalloc(sizeof(*es));
 
 	if (es != NULL) {
 		if (asprintf(&es->filename, "%s.%s.dump", data->file.path, perf_evsel__name(evsel)) < 0)
@@ -228,7 +239,6 @@ static struct perf_evsel_script *perf_evsel_script__new(struct perf_evsel *evsel
 		es->fp = fopen(es->filename, "w");
 		if (es->fp == NULL)
 			goto out_free_filename;
-		es->samples = 0;
 	}
 
 	return es;
@@ -1472,6 +1482,86 @@ static int data_src__fprintf(u64 data_src, FILE *fp)
 	return fprintf(fp, "%-*s", maxlen, out);
 }
 
+struct metric_ctx {
+	struct perf_sample	*sample;
+	struct thread		*thread;
+	struct perf_evsel	*evsel;
+	FILE 			*fp;
+};
+
+static void script_print_metric(void *ctx, const char *color,
+			        const char *fmt,
+			        const char *unit, double val)
+{
+	struct metric_ctx *mctx = ctx;
+
+	if (!fmt)
+		return;
+	perf_sample__fprintf_start(mctx->sample, mctx->thread, mctx->evsel,
+				   mctx->fp);
+	fputs("\tmetric: ", mctx->fp);
+	if (color)
+		color_fprintf(mctx->fp, color, fmt, val);
+	else
+		printf(fmt, val);
+	fprintf(mctx->fp, " %s\n", unit);
+}
+
+static void script_new_line(void *ctx)
+{
+	struct metric_ctx *mctx = ctx;
+
+	perf_sample__fprintf_start(mctx->sample, mctx->thread, mctx->evsel,
+				   mctx->fp);
+	fputs("\tmetric: ", mctx->fp);
+}
+
+static void perf_sample__fprint_metric(struct perf_script *script,
+				       struct thread *thread,
+				       struct perf_evsel *evsel,
+				       struct perf_sample *sample,
+				       FILE *fp)
+{
+	struct perf_stat_output_ctx ctx = {
+		.print_metric = script_print_metric,
+		.new_line = script_new_line,
+		.ctx = &(struct metric_ctx) {
+				.sample = sample,
+				.thread = thread,
+				.evsel  = evsel,
+				.fp     = fp,
+			 },
+		.force_header = false,
+	};
+	struct perf_evsel *ev2;
+	static bool init;
+	u64 val;
+
+	if (!init) {
+		perf_stat__init_shadow_stats();
+		init = true;
+	}
+	if (!evsel->stats)
+		perf_evlist__alloc_stats(script->session->evlist, false);
+	if (evsel_script(evsel->leader)->gnum++ == 0)
+		perf_stat__reset_shadow_stats();
+	val = sample->period * evsel->scale;
+	perf_stat__update_shadow_stats(evsel,
+				       val,
+				       sample->cpu);
+	evsel_script(evsel)->val = val;
+	if (evsel_script(evsel->leader)->gnum == evsel->leader->nr_members) {
+		for_each_group_member (ev2, evsel->leader) {
+			perf_stat__print_shadow_stats(ev2,
+						      evsel_script(ev2)->val,
+						      sample->cpu,
+						      &ctx,
+						      NULL);
+		}
+		evsel_script(evsel->leader)->gnum = 0;
+	}
+}
+
 static void process_event(struct perf_script *script,
 			  struct perf_sample *sample, struct perf_evsel *evsel,
 			  struct addr_location *al,
@@ -1559,6 +1649,9 @@ static void process_event(struct perf_script *script,
 	if (PRINT_FIELD(PHYS_ADDR))
 		fprintf(fp, "%16" PRIx64, sample->phys_addr);
 	fprintf(fp, "\n");
+
+	if (PRINT_FIELD(METRIC))
+		perf_sample__fprint_metric(script, thread, evsel, sample, fp);
 }
 
 static struct scripting_ops	*scripting_ops;
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index 0ddd9c199227..6fd709017bbc 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -38,6 +38,10 @@ struct metric_event *metricgroup__lookup(struct rblist *metric_events,
 	struct metric_event me = {
 		.evsel = evsel
 	};
+
+	if (!metric_events)
+		return NULL;
+
 	nd = rblist__find(metric_events, &me);
 	if (nd)
 		return container_of(nd, struct metric_event, nd);
-- 
cgit v1.2.3


From 1fcd03946b52b8a57a6692fedd4406b45baedfe6 Mon Sep 17 00:00:00 2001
From: Jin Yao <yao.jin@linux.intel.com>
Date: Tue, 5 Dec 2017 22:03:04 +0800
Subject: perf stat: Update per-thread shadow stats

The functions perf_stat__update_shadow_stats() is called to update the
shadow stats on a set of static variables.

But the static variables are the limitations to be extended to support
per-thread shadow stats.

This patch lets the perf_stat__update_shadow_stats() support to update
the shadow stats on a input parameter 'st' and uses
update_runtime_stat() to update the stats. It will not directly update
the static variables as before.

Signed-off-by: Jin Yao <yao.jin@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1512482591-4646-5-git-send-email-yao.jin@linux.intel.com
[ Rename 'stat' variables to 'st' to build on centos:{5,6} and others where it shadows a global declaration ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-script.c   |  3 +-
 tools/perf/builtin-stat.c     |  3 +-
 tools/perf/util/stat-shadow.c | 86 +++++++++++++++++++++++++++++--------------
 tools/perf/util/stat.c        |  8 ++--
 tools/perf/util/stat.h        |  2 +-
 5 files changed, 68 insertions(+), 34 deletions(-)

(limited to 'tools/perf/builtin-script.c')

diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 39d8b55f0db3..81b395040298 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -1548,7 +1548,8 @@ static void perf_sample__fprint_metric(struct perf_script *script,
 	val = sample->period * evsel->scale;
 	perf_stat__update_shadow_stats(evsel,
 				       val,
-				       sample->cpu);
+				       sample->cpu,
+				       &rt_stat);
 	evsel_script(evsel)->val = val;
 	if (evsel_script(evsel->leader)->gnum == evsel->leader->nr_members) {
 		for_each_group_member (ev2, evsel->leader) {
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index a027b4712e48..3f4a2c21b824 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -1214,7 +1214,8 @@ static void aggr_update_shadow(void)
 				val += perf_counts(counter->counts, cpu, 0)->val;
 			}
 			perf_stat__update_shadow_stats(counter, val,
-						       first_shadow_cpu(counter, id));
+					first_shadow_cpu(counter, id),
+					&rt_stat);
 		}
 	}
 }
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index 07cfbf613bdc..4b28c40de927 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -116,19 +116,29 @@ static void saved_value_delete(struct rblist *rblist __maybe_unused,
 
 static struct saved_value *saved_value_lookup(struct perf_evsel *evsel,
 					      int cpu,
-					      bool create)
+					      bool create,
+					      enum stat_type type,
+					      int ctx,
+					      struct runtime_stat *st)
 {
+	struct rblist *rblist;
 	struct rb_node *nd;
 	struct saved_value dm = {
 		.cpu = cpu,
 		.evsel = evsel,
+		.type = type,
+		.ctx = ctx,
+		.stat = st,
 	};
-	nd = rblist__find(&runtime_saved_values, &dm);
+
+	rblist = &st->value_list;
+
+	nd = rblist__find(rblist, &dm);
 	if (nd)
 		return container_of(nd, struct saved_value, rb_node);
 	if (create) {
-		rblist__add_node(&runtime_saved_values, &dm);
-		nd = rblist__find(&runtime_saved_values, &dm);
+		rblist__add_node(rblist, &dm);
+		nd = rblist__find(rblist, &dm);
 		if (nd)
 			return container_of(nd, struct saved_value, rb_node);
 	}
@@ -217,13 +227,24 @@ void perf_stat__reset_shadow_stats(void)
 	}
 }
 
+static void update_runtime_stat(struct runtime_stat *st,
+				enum stat_type type,
+				int ctx, int cpu, u64 count)
+{
+	struct saved_value *v = saved_value_lookup(NULL, cpu, true,
+						   type, ctx, st);
+
+	if (v)
+		update_stats(&v->stats, count);
+}
+
 /*
  * Update various tracking values we maintain to print
  * more semantic information such as miss/hit ratios,
  * instruction rates, etc:
  */
 void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count,
-				    int cpu)
+				    int cpu, struct runtime_stat *st)
 {
 	int ctx = evsel_context(counter);
 
@@ -231,50 +252,58 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count,
 
 	if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK) ||
 	    perf_evsel__match(counter, SOFTWARE, SW_CPU_CLOCK))
-		update_stats(&runtime_nsecs_stats[cpu], count);
+		update_runtime_stat(st, STAT_NSECS, 0, cpu, count);
 	else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
-		update_stats(&runtime_cycles_stats[ctx][cpu], count);
+		update_runtime_stat(st, STAT_CYCLES, ctx, cpu, count);
 	else if (perf_stat_evsel__is(counter, CYCLES_IN_TX))
-		update_stats(&runtime_cycles_in_tx_stats[ctx][cpu], count);
+		update_runtime_stat(st, STAT_CYCLES_IN_TX, ctx, cpu, count);
 	else if (perf_stat_evsel__is(counter, TRANSACTION_START))
-		update_stats(&runtime_transaction_stats[ctx][cpu], count);
+		update_runtime_stat(st, STAT_TRANSACTION, ctx, cpu, count);
 	else if (perf_stat_evsel__is(counter, ELISION_START))
-		update_stats(&runtime_elision_stats[ctx][cpu], count);
+		update_runtime_stat(st, STAT_ELISION, ctx, cpu, count);
 	else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS))
-		update_stats(&runtime_topdown_total_slots[ctx][cpu], count);
+		update_runtime_stat(st, STAT_TOPDOWN_TOTAL_SLOTS,
+				    ctx, cpu, count);
 	else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED))
-		update_stats(&runtime_topdown_slots_issued[ctx][cpu], count);
+		update_runtime_stat(st, STAT_TOPDOWN_SLOTS_ISSUED,
+				    ctx, cpu, count);
 	else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED))
-		update_stats(&runtime_topdown_slots_retired[ctx][cpu], count);
+		update_runtime_stat(st, STAT_TOPDOWN_SLOTS_RETIRED,
+				    ctx, cpu, count);
 	else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES))
-		update_stats(&runtime_topdown_fetch_bubbles[ctx][cpu], count);
+		update_runtime_stat(st, STAT_TOPDOWN_FETCH_BUBBLES,
+				    ctx, cpu, count);
 	else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES))
-		update_stats(&runtime_topdown_recovery_bubbles[ctx][cpu], count);
+		update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES,
+				    ctx, cpu, count);
 	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
-		update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count);
+		update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT,
+				    ctx, cpu, count);
 	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
-		update_stats(&runtime_stalled_cycles_back_stats[ctx][cpu], count);
+		update_runtime_stat(st, STAT_STALLED_CYCLES_BACK,
+				    ctx, cpu, count);
 	else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
-		update_stats(&runtime_branches_stats[ctx][cpu], count);
+		update_runtime_stat(st, STAT_BRANCHES, ctx, cpu, count);
 	else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
-		update_stats(&runtime_cacherefs_stats[ctx][cpu], count);
+		update_runtime_stat(st, STAT_CACHEREFS, ctx, cpu, count);
 	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
-		update_stats(&runtime_l1_dcache_stats[ctx][cpu], count);
+		update_runtime_stat(st, STAT_L1_DCACHE, ctx, cpu, count);
 	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
-		update_stats(&runtime_ll_cache_stats[ctx][cpu], count);
+		update_runtime_stat(st, STAT_L1_ICACHE, ctx, cpu, count);
 	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
-		update_stats(&runtime_ll_cache_stats[ctx][cpu], count);
+		update_runtime_stat(st, STAT_LL_CACHE, ctx, cpu, count);
 	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
-		update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count);
+		update_runtime_stat(st, STAT_DTLB_CACHE, ctx, cpu, count);
 	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
-		update_stats(&runtime_itlb_cache_stats[ctx][cpu], count);
+		update_runtime_stat(st, STAT_ITLB_CACHE, ctx, cpu, count);
 	else if (perf_stat_evsel__is(counter, SMI_NUM))
-		update_stats(&runtime_smi_num_stats[ctx][cpu], count);
+		update_runtime_stat(st, STAT_SMI_NUM, ctx, cpu, count);
 	else if (perf_stat_evsel__is(counter, APERF))
-		update_stats(&runtime_aperf_stats[ctx][cpu], count);
+		update_runtime_stat(st, STAT_APERF, ctx, cpu, count);
 
 	if (counter->collect_stat) {
-		struct saved_value *v = saved_value_lookup(counter, cpu, true);
+		struct saved_value *v = saved_value_lookup(counter, cpu, true,
+							   STAT_NONE, 0, st);
 		update_stats(&v->stats, count);
 	}
 }
@@ -694,7 +723,8 @@ static void generic_metric(const char *metric_expr,
 			stats = &walltime_nsecs_stats;
 			scale = 1e-9;
 		} else {
-			v = saved_value_lookup(metric_events[i], cpu, false);
+			v = saved_value_lookup(metric_events[i], cpu, false,
+					       STAT_NONE, 0, &rt_stat);
 			if (!v)
 				break;
 			stats = &v->stats;
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 151e9efd7286..78abfd40b135 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -278,9 +278,11 @@ process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel
 			perf_evsel__compute_deltas(evsel, cpu, thread, count);
 		perf_counts_values__scale(count, config->scale, NULL);
 		if (config->aggr_mode == AGGR_NONE)
-			perf_stat__update_shadow_stats(evsel, count->val, cpu);
+			perf_stat__update_shadow_stats(evsel, count->val, cpu,
+						       &rt_stat);
 		if (config->aggr_mode == AGGR_THREAD)
-			perf_stat__update_shadow_stats(evsel, count->val, 0);
+			perf_stat__update_shadow_stats(evsel, count->val, 0,
+						       &rt_stat);
 		break;
 	case AGGR_GLOBAL:
 		aggr->val += count->val;
@@ -362,7 +364,7 @@ int perf_stat_process_counter(struct perf_stat_config *config,
 	/*
 	 * Save the full runtime - to allow normalization during printout:
 	 */
-	perf_stat__update_shadow_stats(counter, *count, 0);
+	perf_stat__update_shadow_stats(counter, *count, 0, &rt_stat);
 
 	return 0;
 }
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index f20240037377..bb9902ad3a79 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -129,7 +129,7 @@ void runtime_stat__exit(struct runtime_stat *st);
 void perf_stat__init_shadow_stats(void);
 void perf_stat__reset_shadow_stats(void);
 void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count,
-				    int cpu);
+				    int cpu, struct runtime_stat *st);
 struct perf_stat_output_ctx {
 	void *ctx;
 	print_metric_t print_metric;
-- 
cgit v1.2.3


From e0128b30dbfb2884530251b4accdffdbf55a6b72 Mon Sep 17 00:00:00 2001
From: Jin Yao <yao.jin@linux.intel.com>
Date: Tue, 5 Dec 2017 22:03:05 +0800
Subject: perf stat: Print per-thread shadow stats

The function perf_stat__print_shadow_stats() is called to print the
shadow stats on a set of static variables.

But the static variables are the limitations to support
per-thread shadow stats.

This patch lets the perf_stat__print_shadow_stats() support
to print the shadow stats from a input parameter 'st'.

It will not directly get value from static variable. Instead,
it now uses runtime_stat_avg() and runtime_stat_n() to get and
compute the values.

Signed-off-by: Jin Yao <yao.jin@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1512482591-4646-6-git-send-email-yao.jin@linux.intel.com
[ Rename 'stat' variables to 'st' to build on centos:{5,6} and others where it shadows a global declaration ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-script.c   |   3 +-
 tools/perf/builtin-stat.c     |  23 +++--
 tools/perf/util/stat-shadow.c | 209 ++++++++++++++++++++++++++----------------
 tools/perf/util/stat.h        |   3 +-
 4 files changed, 151 insertions(+), 87 deletions(-)

(limited to 'tools/perf/builtin-script.c')

diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 81b395040298..fac6f053e4da 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -1557,7 +1557,8 @@ static void perf_sample__fprint_metric(struct perf_script *script,
 						      evsel_script(ev2)->val,
 						      sample->cpu,
 						      &ctx,
-						      NULL);
+						      NULL,
+						      &rt_stat);
 		}
 		evsel_script(evsel->leader)->gnum = 0;
 	}
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 3f4a2c21b824..097a694d16f2 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -1097,7 +1097,8 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 }
 
 static void printout(int id, int nr, struct perf_evsel *counter, double uval,
-		     char *prefix, u64 run, u64 ena, double noise)
+		     char *prefix, u64 run, u64 ena, double noise,
+		     struct runtime_stat *st)
 {
 	struct perf_stat_output_ctx out;
 	struct outstate os = {
@@ -1190,7 +1191,7 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval,
 
 	perf_stat__print_shadow_stats(counter, uval,
 				first_shadow_cpu(counter, id),
-				&out, &metric_events);
+				&out, &metric_events, st);
 	if (!csv_output && !metric_only) {
 		print_noise(counter, noise);
 		print_running(run, ena);
@@ -1335,7 +1336,8 @@ static void print_aggr(char *prefix)
 				fprintf(output, "%s", prefix);
 
 			uval = val * counter->scale;
-			printout(id, nr, counter, uval, prefix, run, ena, 1.0);
+			printout(id, nr, counter, uval, prefix, run, ena, 1.0,
+				 &rt_stat);
 			if (!metric_only)
 				fputc('\n', output);
 		}
@@ -1365,7 +1367,8 @@ static void print_aggr_thread(struct perf_evsel *counter, char *prefix)
 			fprintf(output, "%s", prefix);
 
 		uval = val * counter->scale;
-		printout(thread, 0, counter, uval, prefix, run, ena, 1.0);
+		printout(thread, 0, counter, uval, prefix, run, ena, 1.0,
+			 &rt_stat);
 		fputc('\n', output);
 	}
 }
@@ -1402,7 +1405,8 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
 		fprintf(output, "%s", prefix);
 
 	uval = cd.avg * counter->scale;
-	printout(-1, 0, counter, uval, prefix, cd.avg_running, cd.avg_enabled, cd.avg);
+	printout(-1, 0, counter, uval, prefix, cd.avg_running, cd.avg_enabled,
+		 cd.avg, &rt_stat);
 	if (!metric_only)
 		fprintf(output, "\n");
 }
@@ -1441,7 +1445,8 @@ static void print_counter(struct perf_evsel *counter, char *prefix)
 			fprintf(output, "%s", prefix);
 
 		uval = val * counter->scale;
-		printout(cpu, 0, counter, uval, prefix, run, ena, 1.0);
+		printout(cpu, 0, counter, uval, prefix, run, ena, 1.0,
+			 &rt_stat);
 
 		fputc('\n', output);
 	}
@@ -1473,7 +1478,8 @@ static void print_no_aggr_metric(char *prefix)
 			run = perf_counts(counter->counts, cpu, 0)->run;
 
 			uval = val * counter->scale;
-			printout(cpu, 0, counter, uval, prefix, run, ena, 1.0);
+			printout(cpu, 0, counter, uval, prefix, run, ena, 1.0,
+				 &rt_stat);
 		}
 		fputc('\n', stat_config.output);
 	}
@@ -1529,7 +1535,8 @@ static void print_metric_headers(const char *prefix, bool no_indent)
 		perf_stat__print_shadow_stats(counter, 0,
 					      0,
 					      &out,
-					      &metric_events);
+					      &metric_events,
+					      &rt_stat);
 	}
 	fputc('\n', stat_config.output);
 }
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index 4b28c40de927..a95c4fe991aa 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -424,15 +424,40 @@ void perf_stat__collect_metric_expr(struct perf_evlist *evsel_list)
 	}
 }
 
+static double runtime_stat_avg(struct runtime_stat *st,
+			       enum stat_type type, int ctx, int cpu)
+{
+	struct saved_value *v;
+
+	v = saved_value_lookup(NULL, cpu, false, type, ctx, st);
+	if (!v)
+		return 0.0;
+
+	return avg_stats(&v->stats);
+}
+
+static double runtime_stat_n(struct runtime_stat *st,
+			     enum stat_type type, int ctx, int cpu)
+{
+	struct saved_value *v;
+
+	v = saved_value_lookup(NULL, cpu, false, type, ctx, st);
+	if (!v)
+		return 0.0;
+
+	return v->stats.n;
+}
+
 static void print_stalled_cycles_frontend(int cpu,
 					  struct perf_evsel *evsel, double avg,
-					  struct perf_stat_output_ctx *out)
+					  struct perf_stat_output_ctx *out,
+					  struct runtime_stat *st)
 {
 	double total, ratio = 0.0;
 	const char *color;
 	int ctx = evsel_context(evsel);
 
-	total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
+	total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
 
 	if (total)
 		ratio = avg / total * 100.0;
@@ -448,13 +473,14 @@ static void print_stalled_cycles_frontend(int cpu,
 
 static void print_stalled_cycles_backend(int cpu,
 					 struct perf_evsel *evsel, double avg,
-					 struct perf_stat_output_ctx *out)
+					 struct perf_stat_output_ctx *out,
+					 struct runtime_stat *st)
 {
 	double total, ratio = 0.0;
 	const char *color;
 	int ctx = evsel_context(evsel);
 
-	total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
+	total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
 
 	if (total)
 		ratio = avg / total * 100.0;
@@ -467,13 +493,14 @@ static void print_stalled_cycles_backend(int cpu,
 static void print_branch_misses(int cpu,
 				struct perf_evsel *evsel,
 				double avg,
-				struct perf_stat_output_ctx *out)
+				struct perf_stat_output_ctx *out,
+				struct runtime_stat *st)
 {
 	double total, ratio = 0.0;
 	const char *color;
 	int ctx = evsel_context(evsel);
 
-	total = avg_stats(&runtime_branches_stats[ctx][cpu]);
+	total = runtime_stat_avg(st, STAT_BRANCHES, ctx, cpu);
 
 	if (total)
 		ratio = avg / total * 100.0;
@@ -486,13 +513,15 @@ static void print_branch_misses(int cpu,
 static void print_l1_dcache_misses(int cpu,
 				   struct perf_evsel *evsel,
 				   double avg,
-				   struct perf_stat_output_ctx *out)
+				   struct perf_stat_output_ctx *out,
+				   struct runtime_stat *st)
+
 {
 	double total, ratio = 0.0;
 	const char *color;
 	int ctx = evsel_context(evsel);
 
-	total = avg_stats(&runtime_l1_dcache_stats[ctx][cpu]);
+	total = runtime_stat_avg(st, STAT_L1_DCACHE, ctx, cpu);
 
 	if (total)
 		ratio = avg / total * 100.0;
@@ -505,13 +534,15 @@ static void print_l1_dcache_misses(int cpu,
 static void print_l1_icache_misses(int cpu,
 				   struct perf_evsel *evsel,
 				   double avg,
-				   struct perf_stat_output_ctx *out)
+				   struct perf_stat_output_ctx *out,
+				   struct runtime_stat *st)
+
 {
 	double total, ratio = 0.0;
 	const char *color;
 	int ctx = evsel_context(evsel);
 
-	total = avg_stats(&runtime_l1_icache_stats[ctx][cpu]);
+	total = runtime_stat_avg(st, STAT_L1_ICACHE, ctx, cpu);
 
 	if (total)
 		ratio = avg / total * 100.0;
@@ -523,13 +554,14 @@ static void print_l1_icache_misses(int cpu,
 static void print_dtlb_cache_misses(int cpu,
 				    struct perf_evsel *evsel,
 				    double avg,
-				    struct perf_stat_output_ctx *out)
+				    struct perf_stat_output_ctx *out,
+				    struct runtime_stat *st)
 {
 	double total, ratio = 0.0;
 	const char *color;
 	int ctx = evsel_context(evsel);
 
-	total = avg_stats(&runtime_dtlb_cache_stats[ctx][cpu]);
+	total = runtime_stat_avg(st, STAT_DTLB_CACHE, ctx, cpu);
 
 	if (total)
 		ratio = avg / total * 100.0;
@@ -541,13 +573,14 @@ static void print_dtlb_cache_misses(int cpu,
 static void print_itlb_cache_misses(int cpu,
 				    struct perf_evsel *evsel,
 				    double avg,
-				    struct perf_stat_output_ctx *out)
+				    struct perf_stat_output_ctx *out,
+				    struct runtime_stat *st)
 {
 	double total, ratio = 0.0;
 	const char *color;
 	int ctx = evsel_context(evsel);
 
-	total = avg_stats(&runtime_itlb_cache_stats[ctx][cpu]);
+	total = runtime_stat_avg(st, STAT_ITLB_CACHE, ctx, cpu);
 
 	if (total)
 		ratio = avg / total * 100.0;
@@ -559,13 +592,14 @@ static void print_itlb_cache_misses(int cpu,
 static void print_ll_cache_misses(int cpu,
 				  struct perf_evsel *evsel,
 				  double avg,
-				  struct perf_stat_output_ctx *out)
+				  struct perf_stat_output_ctx *out,
+				  struct runtime_stat *st)
 {
 	double total, ratio = 0.0;
 	const char *color;
 	int ctx = evsel_context(evsel);
 
-	total = avg_stats(&runtime_ll_cache_stats[ctx][cpu]);
+	total = runtime_stat_avg(st, STAT_LL_CACHE, ctx, cpu);
 
 	if (total)
 		ratio = avg / total * 100.0;
@@ -623,68 +657,72 @@ static double sanitize_val(double x)
 	return x;
 }
 
-static double td_total_slots(int ctx, int cpu)
+static double td_total_slots(int ctx, int cpu, struct runtime_stat *st)
 {
-	return avg_stats(&runtime_topdown_total_slots[ctx][cpu]);
+	return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, ctx, cpu);
 }
 
-static double td_bad_spec(int ctx, int cpu)
+static double td_bad_spec(int ctx, int cpu, struct runtime_stat *st)
 {
 	double bad_spec = 0;
 	double total_slots;
 	double total;
 
-	total = avg_stats(&runtime_topdown_slots_issued[ctx][cpu]) -
-		avg_stats(&runtime_topdown_slots_retired[ctx][cpu]) +
-		avg_stats(&runtime_topdown_recovery_bubbles[ctx][cpu]);
-	total_slots = td_total_slots(ctx, cpu);
+	total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, ctx, cpu) -
+		runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, ctx, cpu) +
+		runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, ctx, cpu);
+
+	total_slots = td_total_slots(ctx, cpu, st);
 	if (total_slots)
 		bad_spec = total / total_slots;
 	return sanitize_val(bad_spec);
 }
 
-static double td_retiring(int ctx, int cpu)
+static double td_retiring(int ctx, int cpu, struct runtime_stat *st)
 {
 	double retiring = 0;
-	double total_slots = td_total_slots(ctx, cpu);
-	double ret_slots = avg_stats(&runtime_topdown_slots_retired[ctx][cpu]);
+	double total_slots = td_total_slots(ctx, cpu, st);
+	double ret_slots = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED,
+					    ctx, cpu);
 
 	if (total_slots)
 		retiring = ret_slots / total_slots;
 	return retiring;
 }
 
-static double td_fe_bound(int ctx, int cpu)
+static double td_fe_bound(int ctx, int cpu, struct runtime_stat *st)
 {
 	double fe_bound = 0;
-	double total_slots = td_total_slots(ctx, cpu);
-	double fetch_bub = avg_stats(&runtime_topdown_fetch_bubbles[ctx][cpu]);
+	double total_slots = td_total_slots(ctx, cpu, st);
+	double fetch_bub = runtime_stat_avg(st, STAT_TOPDOWN_FETCH_BUBBLES,
+					    ctx, cpu);
 
 	if (total_slots)
 		fe_bound = fetch_bub / total_slots;
 	return fe_bound;
 }
 
-static double td_be_bound(int ctx, int cpu)
+static double td_be_bound(int ctx, int cpu, struct runtime_stat *st)
 {
-	double sum = (td_fe_bound(ctx, cpu) +
-		      td_bad_spec(ctx, cpu) +
-		      td_retiring(ctx, cpu));
+	double sum = (td_fe_bound(ctx, cpu, st) +
+		      td_bad_spec(ctx, cpu, st) +
+		      td_retiring(ctx, cpu, st));
 	if (sum == 0)
 		return 0;
 	return sanitize_val(1.0 - sum);
 }
 
 static void print_smi_cost(int cpu, struct perf_evsel *evsel,
-			   struct perf_stat_output_ctx *out)
+			   struct perf_stat_output_ctx *out,
+			   struct runtime_stat *st)
 {
 	double smi_num, aperf, cycles, cost = 0.0;
 	int ctx = evsel_context(evsel);
 	const char *color = NULL;
 
-	smi_num = avg_stats(&runtime_smi_num_stats[ctx][cpu]);
-	aperf = avg_stats(&runtime_aperf_stats[ctx][cpu]);
-	cycles = avg_stats(&runtime_cycles_stats[ctx][cpu]);
+	smi_num = runtime_stat_avg(st, STAT_SMI_NUM, ctx, cpu);
+	aperf = runtime_stat_avg(st, STAT_APERF, ctx, cpu);
+	cycles = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
 
 	if ((cycles == 0) || (aperf == 0))
 		return;
@@ -704,7 +742,8 @@ static void generic_metric(const char *metric_expr,
 			   const char *metric_name,
 			   double avg,
 			   int cpu,
-			   struct perf_stat_output_ctx *out)
+			   struct perf_stat_output_ctx *out,
+			   struct runtime_stat *st)
 {
 	print_metric_t print_metric = out->print_metric;
 	struct parse_ctx pctx;
@@ -724,7 +763,7 @@ static void generic_metric(const char *metric_expr,
 			scale = 1e-9;
 		} else {
 			v = saved_value_lookup(metric_events[i], cpu, false,
-					       STAT_NONE, 0, &rt_stat);
+					       STAT_NONE, 0, st);
 			if (!v)
 				break;
 			stats = &v->stats;
@@ -752,7 +791,8 @@ static void generic_metric(const char *metric_expr,
 void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 				   double avg, int cpu,
 				   struct perf_stat_output_ctx *out,
-				   struct rblist *metric_events)
+				   struct rblist *metric_events,
+				   struct runtime_stat *st)
 {
 	void *ctxp = out->ctx;
 	print_metric_t print_metric = out->print_metric;
@@ -763,7 +803,8 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 	int num = 1;
 
 	if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
-		total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
+		total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
+
 		if (total) {
 			ratio = avg / total;
 			print_metric(ctxp, NULL, "%7.2f ",
@@ -771,8 +812,13 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 		} else {
 			print_metric(ctxp, NULL, NULL, "insn per cycle", 0);
 		}
-		total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]);
-		total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu]));
+
+		total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT,
+					 ctx, cpu);
+
+		total = max(total, runtime_stat_avg(st,
+						    STAT_STALLED_CYCLES_BACK,
+						    ctx, cpu));
 
 		if (total && avg) {
 			out->new_line(ctxp);
@@ -785,8 +831,8 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 				     "stalled cycles per insn", 0);
 		}
 	} else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) {
-		if (runtime_branches_stats[ctx][cpu].n != 0)
-			print_branch_misses(cpu, evsel, avg, out);
+		if (runtime_stat_n(st, STAT_BRANCHES, ctx, cpu) != 0)
+			print_branch_misses(cpu, evsel, avg, out, st);
 		else
 			print_metric(ctxp, NULL, NULL, "of all branches", 0);
 	} else if (
@@ -794,8 +840,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1D |
 					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
 					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
-		if (runtime_l1_dcache_stats[ctx][cpu].n != 0)
-			print_l1_dcache_misses(cpu, evsel, avg, out);
+
+		if (runtime_stat_n(st, STAT_L1_DCACHE, ctx, cpu) != 0)
+			print_l1_dcache_misses(cpu, evsel, avg, out, st);
 		else
 			print_metric(ctxp, NULL, NULL, "of all L1-dcache hits", 0);
 	} else if (
@@ -803,8 +850,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1I |
 					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
 					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
-		if (runtime_l1_icache_stats[ctx][cpu].n != 0)
-			print_l1_icache_misses(cpu, evsel, avg, out);
+
+		if (runtime_stat_n(st, STAT_L1_ICACHE, ctx, cpu) != 0)
+			print_l1_icache_misses(cpu, evsel, avg, out, st);
 		else
 			print_metric(ctxp, NULL, NULL, "of all L1-icache hits", 0);
 	} else if (
@@ -812,8 +860,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_DTLB |
 					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
 					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
-		if (runtime_dtlb_cache_stats[ctx][cpu].n != 0)
-			print_dtlb_cache_misses(cpu, evsel, avg, out);
+
+		if (runtime_stat_n(st, STAT_DTLB_CACHE, ctx, cpu) != 0)
+			print_dtlb_cache_misses(cpu, evsel, avg, out, st);
 		else
 			print_metric(ctxp, NULL, NULL, "of all dTLB cache hits", 0);
 	} else if (
@@ -821,8 +870,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_ITLB |
 					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
 					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
-		if (runtime_itlb_cache_stats[ctx][cpu].n != 0)
-			print_itlb_cache_misses(cpu, evsel, avg, out);
+
+		if (runtime_stat_n(st, STAT_ITLB_CACHE, ctx, cpu) != 0)
+			print_itlb_cache_misses(cpu, evsel, avg, out, st);
 		else
 			print_metric(ctxp, NULL, NULL, "of all iTLB cache hits", 0);
 	} else if (
@@ -830,27 +880,28 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_LL |
 					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
 					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
-		if (runtime_ll_cache_stats[ctx][cpu].n != 0)
-			print_ll_cache_misses(cpu, evsel, avg, out);
+
+		if (runtime_stat_n(st, STAT_LL_CACHE, ctx, cpu) != 0)
+			print_ll_cache_misses(cpu, evsel, avg, out, st);
 		else
 			print_metric(ctxp, NULL, NULL, "of all LL-cache hits", 0);
 	} else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) {
-		total = avg_stats(&runtime_cacherefs_stats[ctx][cpu]);
+		total = runtime_stat_avg(st, STAT_CACHEREFS, ctx, cpu);
 
 		if (total)
 			ratio = avg * 100 / total;
 
-		if (runtime_cacherefs_stats[ctx][cpu].n != 0)
+		if (runtime_stat_n(st, STAT_CACHEREFS, ctx, cpu) != 0)
 			print_metric(ctxp, NULL, "%8.3f %%",
 				     "of all cache refs", ratio);
 		else
 			print_metric(ctxp, NULL, NULL, "of all cache refs", 0);
 	} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
-		print_stalled_cycles_frontend(cpu, evsel, avg, out);
+		print_stalled_cycles_frontend(cpu, evsel, avg, out, st);
 	} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
-		print_stalled_cycles_backend(cpu, evsel, avg, out);
+		print_stalled_cycles_backend(cpu, evsel, avg, out, st);
 	} else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
-		total = avg_stats(&runtime_nsecs_stats[cpu]);
+		total = runtime_stat_avg(st, STAT_NSECS, 0, cpu);
 
 		if (total) {
 			ratio = avg / total;
@@ -859,7 +910,8 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 			print_metric(ctxp, NULL, NULL, "Ghz", 0);
 		}
 	} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) {
-		total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
+		total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
+
 		if (total)
 			print_metric(ctxp, NULL,
 					"%7.2f%%", "transactional cycles",
@@ -868,8 +920,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 			print_metric(ctxp, NULL, NULL, "transactional cycles",
 				     0);
 	} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) {
-		total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
-		total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
+		total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
+		total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, ctx, cpu);
+
 		if (total2 < avg)
 			total2 = avg;
 		if (total)
@@ -878,19 +931,21 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 		else
 			print_metric(ctxp, NULL, NULL, "aborted cycles", 0);
 	} else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) {
-		total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
+		total = runtime_stat_avg(st, STAT_CYCLES_IN_TX,
+					 ctx, cpu);
 
 		if (avg)
 			ratio = total / avg;
 
-		if (runtime_cycles_in_tx_stats[ctx][cpu].n != 0)
+		if (runtime_stat_n(st, STAT_CYCLES_IN_TX, ctx, cpu) != 0)
 			print_metric(ctxp, NULL, "%8.0f",
 				     "cycles / transaction", ratio);
 		else
 			print_metric(ctxp, NULL, NULL, "cycles / transaction",
-				     0);
+				      0);
 	} else if (perf_stat_evsel__is(evsel, ELISION_START)) {
-		total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
+		total = runtime_stat_avg(st, STAT_CYCLES_IN_TX,
+					 ctx, cpu);
 
 		if (avg)
 			ratio = total / avg;
@@ -904,28 +959,28 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 		else
 			print_metric(ctxp, NULL, NULL, "CPUs utilized", 0);
 	} else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) {
-		double fe_bound = td_fe_bound(ctx, cpu);
+		double fe_bound = td_fe_bound(ctx, cpu, st);
 
 		if (fe_bound > 0.2)
 			color = PERF_COLOR_RED;
 		print_metric(ctxp, color, "%8.1f%%", "frontend bound",
 				fe_bound * 100.);
 	} else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) {
-		double retiring = td_retiring(ctx, cpu);
+		double retiring = td_retiring(ctx, cpu, st);
 
 		if (retiring > 0.7)
 			color = PERF_COLOR_GREEN;
 		print_metric(ctxp, color, "%8.1f%%", "retiring",
 				retiring * 100.);
 	} else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) {
-		double bad_spec = td_bad_spec(ctx, cpu);
+		double bad_spec = td_bad_spec(ctx, cpu, st);
 
 		if (bad_spec > 0.1)
 			color = PERF_COLOR_RED;
 		print_metric(ctxp, color, "%8.1f%%", "bad speculation",
 				bad_spec * 100.);
 	} else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) {
-		double be_bound = td_be_bound(ctx, cpu);
+		double be_bound = td_be_bound(ctx, cpu, st);
 		const char *name = "backend bound";
 		static int have_recovery_bubbles = -1;
 
@@ -938,19 +993,19 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 
 		if (be_bound > 0.2)
 			color = PERF_COLOR_RED;
-		if (td_total_slots(ctx, cpu) > 0)
+		if (td_total_slots(ctx, cpu, st) > 0)
 			print_metric(ctxp, color, "%8.1f%%", name,
 					be_bound * 100.);
 		else
 			print_metric(ctxp, NULL, NULL, name, 0);
 	} else if (evsel->metric_expr) {
 		generic_metric(evsel->metric_expr, evsel->metric_events, evsel->name,
-				evsel->metric_name, avg, cpu, out);
-	} else if (runtime_nsecs_stats[cpu].n != 0) {
+				evsel->metric_name, avg, cpu, out, st);
+	} else if (runtime_stat_n(st, STAT_NSECS, 0, cpu) != 0) {
 		char unit = 'M';
 		char unit_buf[10];
 
-		total = avg_stats(&runtime_nsecs_stats[cpu]);
+		total = runtime_stat_avg(st, STAT_NSECS, 0, cpu);
 
 		if (total)
 			ratio = 1000.0 * avg / total;
@@ -961,7 +1016,7 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 		snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
 		print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio);
 	} else if (perf_stat_evsel__is(evsel, SMI_NUM)) {
-		print_smi_cost(cpu, evsel, out);
+		print_smi_cost(cpu, evsel, out, st);
 	} else {
 		num = 0;
 	}
@@ -974,7 +1029,7 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 				out->new_line(ctxp);
 			generic_metric(mexp->metric_expr, mexp->metric_events,
 					evsel->name, mexp->metric_name,
-					avg, cpu, out);
+					avg, cpu, out, st);
 		}
 	}
 	if (num == 0)
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index bb9902ad3a79..76b322a2d293 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -140,7 +140,8 @@ struct perf_stat_output_ctx {
 void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 				   double avg, int cpu,
 				   struct perf_stat_output_ctx *out,
-				   struct rblist *metric_events);
+				   struct rblist *metric_events,
+				   struct runtime_stat *st);
 void perf_stat__collect_metric_expr(struct perf_evlist *);
 
 int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw);
-- 
cgit v1.2.3


From 06c3f2aa9fc68e7f3fe3d83e7569d2a2801d9f99 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 6 Dec 2017 18:45:35 +0100
Subject: perf utils: Move is_directory() to path.h

So that it can be used more widely, like in the next patch, when it will
be used to fix a bug in 'perf test' handling of dirent.d_type ==
DT_UNKNOWN.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20171206174535.25380-1-jolsa@kernel.org
[ Split from a larger patch, removed needless includes in path.h ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-script.c | 14 +-------------
 tools/perf/util/path.c      | 14 ++++++++++++++
 tools/perf/util/path.h      |  3 +++
 3 files changed, 18 insertions(+), 13 deletions(-)

(limited to 'tools/perf/builtin-script.c')

diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index fac6f053e4da..77e47cf39f2c 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -26,6 +26,7 @@
 #include "util/string2.h"
 #include "util/thread-stack.h"
 #include "util/time-utils.h"
+#include "util/path.h"
 #include "print_binary.h"
 #include <linux/bitmap.h>
 #include <linux/kernel.h>
@@ -2401,19 +2402,6 @@ out:
 	return rc;
 }
 
-/* Helper function for filesystems that return a dent->d_type DT_UNKNOWN */
-static int is_directory(const char *base_path, const struct dirent *dent)
-{
-	char path[PATH_MAX];
-	struct stat st;
-
-	sprintf(path, "%s/%s", base_path, dent->d_name);
-	if (stat(path, &st))
-		return 0;
-
-	return S_ISDIR(st.st_mode);
-}
-
 #define for_each_lang(scripts_path, scripts_dir, lang_dirent)		\
 	while ((lang_dirent = readdir(scripts_dir)) != NULL)		\
 		if ((lang_dirent->d_type == DT_DIR ||			\
diff --git a/tools/perf/util/path.c b/tools/perf/util/path.c
index 933f5c6bffb4..ca56ba2dd3da 100644
--- a/tools/perf/util/path.c
+++ b/tools/perf/util/path.c
@@ -18,6 +18,7 @@
 #include <stdio.h>
 #include <sys/types.h>
 #include <sys/stat.h>
+#include <dirent.h>
 #include <unistd.h>
 
 static char bad_path[] = "/bad-path/";
@@ -77,3 +78,16 @@ bool is_regular_file(const char *file)
 
 	return S_ISREG(st.st_mode);
 }
+
+/* Helper function for filesystems that return a dent->d_type DT_UNKNOWN */
+bool is_directory(const char *base_path, const struct dirent *dent)
+{
+	char path[PATH_MAX];
+	struct stat st;
+
+	sprintf(path, "%s/%s", base_path, dent->d_name);
+	if (stat(path, &st))
+		return false;
+
+	return S_ISDIR(st.st_mode);
+}
diff --git a/tools/perf/util/path.h b/tools/perf/util/path.h
index 14a254ada7eb..f014f905df50 100644
--- a/tools/perf/util/path.h
+++ b/tools/perf/util/path.h
@@ -2,9 +2,12 @@
 #ifndef _PERF_PATH_H
 #define _PERF_PATH_H
 
+struct dirent;
+
 int path__join(char *bf, size_t size, const char *path1, const char *path2);
 int path__join3(char *bf, size_t size, const char *path1, const char *path2, const char *path3);
 
 bool is_regular_file(const char *file);
+bool is_directory(const char *base_path, const struct dirent *dent);
 
 #endif /* _PERF_PATH_H */
-- 
cgit v1.2.3


From 2ab046cd01e33a854798a3e245c9e3f32b950a7d Mon Sep 17 00:00:00 2001
From: Jin Yao <yao.jin@linux.intel.com>
Date: Fri, 8 Dec 2017 21:13:46 +0800
Subject: perf script: Support time percent and multiple time ranges

perf script has a --time option to limit the time range of output.  It
only supports absolute time.

Now this option is extended to support multiple time ranges and support
the percent of time.

For example:

1. Select the first and second 10% time slices:

   perf script --time 10%/1,10%/2

2. Select from 0% to 10% and 30% to 40% slices:

   perf script --time 0%-10%,30%-40%

Changelog:

v6: Fix the merge issue with latest perf/core branch.
    No functional changes.

v5: Add checking of first/last sample time to detect if it's recorded
    in perf.data. If it's not recorded, returns error message to user.

v4: Remove perf_time__skip_sample, only uses perf_time__ranges_skip_sample

v3: Since the definitions of first_sample_time/last_sample_time
    are moved from perf_session to perf_evlist so change the
    related code.

Signed-off-by: Jin Yao <yao.jin@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1512738826-2628-7-git-send-email-yao.jin@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-script.txt | 16 +++++++++++++++
 tools/perf/builtin-script.c              | 34 ++++++++++++++++++++++++++------
 2 files changed, 44 insertions(+), 6 deletions(-)

(limited to 'tools/perf/builtin-script.c')

diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 974ceb12c7f3..7b622a812a72 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -329,6 +329,22 @@ include::itrace.txt[]
 	stop time is not given (i.e, time string is 'x.y,') then analysis goes
 	to end of file.
 
+	Also support time percent with multipe time range. Time string is
+	'a%/n,b%/m,...' or 'a%-b%,c%-%d,...'. The maximum number of slices is 10.
+
+	For example:
+	Select the second 10% time slice
+	perf script --time 10%/2
+
+	Select from 0% to 10% time slice
+	perf script --time 0%-10%
+
+	Select the first and second 10% time slices
+	perf script --time 10%/1,10%/2
+
+	Select from 0% to 10% and 30% to 40% slices
+	perf script --time 0%-10%,30%-40%
+
 --max-blocks::
 	Set the maximum number of program blocks to print with brstackasm for
 	each sample.
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 77e47cf39f2c..330dcd9b9b8f 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -1436,6 +1436,8 @@ static int perf_sample__fprintf_synth(struct perf_sample *sample,
 	return 0;
 }
 
+#define PTIME_RANGE_MAX	10
+
 struct perf_script {
 	struct perf_tool	tool;
 	struct perf_session	*session;
@@ -1449,7 +1451,8 @@ struct perf_script {
 	struct thread_map	*threads;
 	int			name_width;
 	const char              *time_str;
-	struct perf_time_interval ptime;
+	struct perf_time_interval ptime_range[PTIME_RANGE_MAX];
+	int			range_num;
 };
 
 static int perf_evlist__max_name_len(struct perf_evlist *evlist)
@@ -1734,8 +1737,10 @@ static int process_sample_event(struct perf_tool *tool,
 	struct perf_script *scr = container_of(tool, struct perf_script, tool);
 	struct addr_location al;
 
-	if (perf_time__skip_sample(&scr->ptime, sample->time))
+	if (perf_time__ranges_skip_sample(scr->ptime_range, scr->range_num,
+					  sample->time)) {
 		return 0;
+	}
 
 	if (debug_mode) {
 		if (sample->time < last_timestamp) {
@@ -3360,10 +3365,27 @@ int cmd_script(int argc, const char **argv)
 		goto out_delete;
 
 	/* needs to be parsed after looking up reference time */
-	if (perf_time__parse_str(&script.ptime, script.time_str) != 0) {
-		pr_err("Invalid time string\n");
-		err = -EINVAL;
-		goto out_delete;
+	if (perf_time__parse_str(script.ptime_range, script.time_str) != 0) {
+		if (session->evlist->first_sample_time == 0 &&
+		    session->evlist->last_sample_time == 0) {
+			pr_err("No first/last sample time in perf data\n");
+			err = -EINVAL;
+			goto out_delete;
+		}
+
+		script.range_num = perf_time__percent_parse_str(
+					script.ptime_range, PTIME_RANGE_MAX,
+					script.time_str,
+					session->evlist->first_sample_time,
+					session->evlist->last_sample_time);
+
+		if (script.range_num < 0) {
+			pr_err("Invalid time string\n");
+			err = -EINVAL;
+			goto out_delete;
+		}
+	} else {
+		script.range_num = 1;
 	}
 
 	err = __cmd_script(&script);
-- 
cgit v1.2.3


From 28a0b39877f5ed64ae9fadf95dddb90999309dee Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Sun, 7 Jan 2018 17:03:52 +0100
Subject: perf script: Add support to display sample misc field

Adding support to display sample misc field in form
of letter for each bit:

  # perf script -F +misc ...
   sched-messaging  1414 K     28690.636582:       4590 cycles ...
   sched-messaging  1407 U     28690.636600:     325620 cycles ...
   sched-messaging  1414 K     28690.636608:      19473 cycles ...
  misc field  __________/

The misc bits are assigned to following letters:

  PERF_RECORD_MISC_KERNEL        K
  PERF_RECORD_MISC_USER          U
  PERF_RECORD_MISC_HYPERVISOR    H
  PERF_RECORD_MISC_GUEST_KERNEL  G
  PERF_RECORD_MISC_GUEST_USER    g
  PERF_RECORD_MISC_MMAP_DATA*    M
  PERF_RECORD_MISC_COMM_EXEC     E
  PERF_RECORD_MISC_SWITCH_OUT    S

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180107160356.28203-9-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-script.txt | 20 ++++++++-
 tools/perf/builtin-script.c              | 74 +++++++++++++++++++++++++++-----
 tools/perf/util/event.h                  |  1 +
 tools/perf/util/evsel.c                  |  1 +
 4 files changed, 84 insertions(+), 12 deletions(-)

(limited to 'tools/perf/builtin-script.c')

diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 7b622a812a72..93ae8d60e3d3 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -117,7 +117,7 @@ OPTIONS
         Comma separated list of fields to print. Options are:
         comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
         srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output, brstackinsn,
-	brstackoff, callindent, insn, insnlen, synth, phys_addr, metric.
+        brstackoff, callindent, insn, insnlen, synth, phys_addr, metric, misc.
         Field list can be prepended with the type, trace, sw or hw,
         to indicate to which event type the field list applies.
         e.g., -F sw:comm,tid,time,ip,sym  and -F trace:time,cpu,trace
@@ -225,6 +225,24 @@ OPTIONS
 	that the metric computed is averaged over the whole sampling
 	period, not just for the sample point.
 
+	For sample events it's possible to display misc field with -F +misc option,
+	following letters are displayed for each bit:
+
+	  PERF_RECORD_MISC_KERNEL        K
+	  PERF_RECORD_MISC_USER          U
+	  PERF_RECORD_MISC_HYPERVISOR    H
+	  PERF_RECORD_MISC_GUEST_KERNEL  G
+	  PERF_RECORD_MISC_GUEST_USER    g
+	  PERF_RECORD_MISC_MMAP_DATA*    M
+	  PERF_RECORD_MISC_COMM_EXEC     E
+	  PERF_RECORD_MISC_SWITCH_OUT    S
+
+	  $ perf script -F +misc ...
+	   sched-messaging  1414 K     28690.636582:       4590 cycles ...
+	   sched-messaging  1407 U     28690.636600:     325620 cycles ...
+	   sched-messaging  1414 K     28690.636608:      19473 cycles ...
+	  misc field ___________/
+
 -k::
 --vmlinux=<file>::
         vmlinux pathname
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 330dcd9b9b8f..bb603495cf4a 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -93,6 +93,7 @@ enum perf_output_field {
 	PERF_OUTPUT_PHYS_ADDR       = 1U << 26,
 	PERF_OUTPUT_UREGS	    = 1U << 27,
 	PERF_OUTPUT_METRIC	    = 1U << 28,
+	PERF_OUTPUT_MISC            = 1U << 29,
 };
 
 struct output_option {
@@ -128,6 +129,7 @@ struct output_option {
 	{.str = "synth", .field = PERF_OUTPUT_SYNTH},
 	{.str = "phys_addr", .field = PERF_OUTPUT_PHYS_ADDR},
 	{.str = "metric", .field = PERF_OUTPUT_METRIC},
+	{.str = "misc", .field = PERF_OUTPUT_MISC},
 };
 
 enum {
@@ -594,7 +596,8 @@ static int perf_sample__fprintf_uregs(struct perf_sample *sample,
 
 static int perf_sample__fprintf_start(struct perf_sample *sample,
 				      struct thread *thread,
-				      struct perf_evsel *evsel, FILE *fp)
+				      struct perf_evsel *evsel,
+				      u32 type, FILE *fp)
 {
 	struct perf_event_attr *attr = &evsel->attr;
 	unsigned long secs;
@@ -624,6 +627,47 @@ static int perf_sample__fprintf_start(struct perf_sample *sample,
 			printed += fprintf(fp, "[%03d] ", sample->cpu);
 	}
 
+	if (PRINT_FIELD(MISC)) {
+		int ret = 0;
+
+		#define has(m) \
+			(sample->misc & PERF_RECORD_MISC_##m) == PERF_RECORD_MISC_##m
+
+		if (has(KERNEL))
+			ret += fprintf(fp, "K");
+		if (has(USER))
+			ret += fprintf(fp, "U");
+		if (has(HYPERVISOR))
+			ret += fprintf(fp, "H");
+		if (has(GUEST_KERNEL))
+			ret += fprintf(fp, "G");
+		if (has(GUEST_USER))
+			ret += fprintf(fp, "g");
+
+		switch (type) {
+		case PERF_RECORD_MMAP:
+		case PERF_RECORD_MMAP2:
+			if (has(MMAP_DATA))
+				ret += fprintf(fp, "M");
+			break;
+		case PERF_RECORD_COMM:
+			if (has(COMM_EXEC))
+				ret += fprintf(fp, "E");
+			break;
+		case PERF_RECORD_SWITCH:
+		case PERF_RECORD_SWITCH_CPU_WIDE:
+			if (has(SWITCH_OUT))
+				ret += fprintf(fp, "S");
+		default:
+			break;
+		}
+
+		#undef has
+
+		ret += fprintf(fp, "%*s", 6 - ret, " ");
+		printed += ret;
+	}
+
 	if (PRINT_FIELD(TIME)) {
 		nsecs = sample->time;
 		secs = nsecs / NSEC_PER_SEC;
@@ -1502,7 +1546,7 @@ static void script_print_metric(void *ctx, const char *color,
 	if (!fmt)
 		return;
 	perf_sample__fprintf_start(mctx->sample, mctx->thread, mctx->evsel,
-				   mctx->fp);
+				   PERF_RECORD_SAMPLE, mctx->fp);
 	fputs("\tmetric: ", mctx->fp);
 	if (color)
 		color_fprintf(mctx->fp, color, fmt, val);
@@ -1516,7 +1560,7 @@ static void script_new_line(void *ctx)
 	struct metric_ctx *mctx = ctx;
 
 	perf_sample__fprintf_start(mctx->sample, mctx->thread, mctx->evsel,
-				   mctx->fp);
+				   PERF_RECORD_SAMPLE, mctx->fp);
 	fputs("\tmetric: ", mctx->fp);
 }
 
@@ -1584,7 +1628,8 @@ static void process_event(struct perf_script *script,
 
 	++es->samples;
 
-	perf_sample__fprintf_start(sample, thread, evsel, fp);
+	perf_sample__fprintf_start(sample, thread, evsel,
+				   PERF_RECORD_SAMPLE, fp);
 
 	if (PRINT_FIELD(PERIOD))
 		fprintf(fp, "%10" PRIu64 " ", sample->period);
@@ -1833,7 +1878,8 @@ static int process_comm_event(struct perf_tool *tool,
 		sample->tid = event->comm.tid;
 		sample->pid = event->comm.pid;
 	}
-	perf_sample__fprintf_start(sample, thread, evsel, stdout);
+	perf_sample__fprintf_start(sample, thread, evsel,
+				   PERF_RECORD_COMM, stdout);
 	perf_event__fprintf(event, stdout);
 	ret = 0;
 out:
@@ -1868,7 +1914,8 @@ static int process_namespaces_event(struct perf_tool *tool,
 		sample->tid = event->namespaces.tid;
 		sample->pid = event->namespaces.pid;
 	}
-	perf_sample__fprintf_start(sample, thread, evsel, stdout);
+	perf_sample__fprintf_start(sample, thread, evsel,
+				   PERF_RECORD_NAMESPACES, stdout);
 	perf_event__fprintf(event, stdout);
 	ret = 0;
 out:
@@ -1901,7 +1948,8 @@ static int process_fork_event(struct perf_tool *tool,
 		sample->tid = event->fork.tid;
 		sample->pid = event->fork.pid;
 	}
-	perf_sample__fprintf_start(sample, thread, evsel, stdout);
+	perf_sample__fprintf_start(sample, thread, evsel,
+				   PERF_RECORD_FORK, stdout);
 	perf_event__fprintf(event, stdout);
 	thread__put(thread);
 
@@ -1930,7 +1978,8 @@ static int process_exit_event(struct perf_tool *tool,
 		sample->tid = event->fork.tid;
 		sample->pid = event->fork.pid;
 	}
-	perf_sample__fprintf_start(sample, thread, evsel, stdout);
+	perf_sample__fprintf_start(sample, thread, evsel,
+				   PERF_RECORD_EXIT, stdout);
 	perf_event__fprintf(event, stdout);
 
 	if (perf_event__process_exit(tool, event, sample, machine) < 0)
@@ -1965,7 +2014,8 @@ static int process_mmap_event(struct perf_tool *tool,
 		sample->tid = event->mmap.tid;
 		sample->pid = event->mmap.pid;
 	}
-	perf_sample__fprintf_start(sample, thread, evsel, stdout);
+	perf_sample__fprintf_start(sample, thread, evsel,
+				   PERF_RECORD_MMAP, stdout);
 	perf_event__fprintf(event, stdout);
 	thread__put(thread);
 	return 0;
@@ -1996,7 +2046,8 @@ static int process_mmap2_event(struct perf_tool *tool,
 		sample->tid = event->mmap2.tid;
 		sample->pid = event->mmap2.pid;
 	}
-	perf_sample__fprintf_start(sample, thread, evsel, stdout);
+	perf_sample__fprintf_start(sample, thread, evsel,
+				   PERF_RECORD_MMAP2, stdout);
 	perf_event__fprintf(event, stdout);
 	thread__put(thread);
 	return 0;
@@ -2022,7 +2073,8 @@ static int process_switch_event(struct perf_tool *tool,
 		return -1;
 	}
 
-	perf_sample__fprintf_start(sample, thread, evsel, stdout);
+	perf_sample__fprintf_start(sample, thread, evsel,
+				   PERF_RECORD_SWITCH, stdout);
 	perf_event__fprintf(event, stdout);
 	thread__put(thread);
 	return 0;
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 1ae95efbfb95..e5fbd6dd1b01 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -205,6 +205,7 @@ struct perf_sample {
 	u32 flags;
 	u16 insn_len;
 	u8  cpumode;
+	u16 misc;
 	char insn[MAX_INSN];
 	void *raw_data;
 	struct ip_callchain *callchain;
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index c435b2444153..d934f04e3110 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -2042,6 +2042,7 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
 	data->stream_id = data->id = data->time = -1ULL;
 	data->period = evsel->attr.sample_period;
 	data->cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+	data->misc    = event->header.misc;
 	data->id = -1ULL;
 	data->data_src = PERF_MEM_DATA_SRC_NONE;
 
-- 
cgit v1.2.3


From 3d7c27b6dbca4c90e7d921b45c2240e7c3cb92a2 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Sun, 7 Jan 2018 17:03:53 +0100
Subject: perf script: Add support to display lost events

Adding option to display lost events:

  $ perf script --show-lost-events ...
   mplayer 13810 [002] 468011.402396:        100 cycles:ppp:  ff..
   mplayer 13810 [002] 468011.402396: PERF_RECORD_LOST lost 3880
   mplayer 13810 [002] 468011.402397:        100 cycles:ppp:  ff..

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180107160356.28203-10-jolsa@kernel.org
[ Use PRIu64 when printing u64 values, fixing the build in some arches ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-script.txt |  3 +++
 tools/perf/builtin-script.c              | 28 ++++++++++++++++++++++++++++
 tools/perf/util/event.c                  |  8 ++++++++
 3 files changed, 39 insertions(+)

(limited to 'tools/perf/builtin-script.c')

diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 93ae8d60e3d3..806ec6391fd6 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -300,6 +300,9 @@ OPTIONS
 	Display context switch events i.e. events of type PERF_RECORD_SWITCH or
 	PERF_RECORD_SWITCH_CPU_WIDE.
 
+--show-lost-events
+	Display lost events i.e. events of type PERF_RECORD_LOST.
+
 --demangle::
 	Demangle symbol names to human readable form. It's enabled by default,
 	disable with --no-demangle.
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index bb603495cf4a..c1cce474c0f1 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -1489,6 +1489,7 @@ struct perf_script {
 	bool			show_mmap_events;
 	bool			show_switch_events;
 	bool			show_namespace_events;
+	bool			show_lost_events;
 	bool			allocated;
 	bool			per_event_dump;
 	struct cpu_map		*cpus;
@@ -2080,6 +2081,29 @@ static int process_switch_event(struct perf_tool *tool,
 	return 0;
 }
 
+static int
+process_lost_event(struct perf_tool *tool,
+		   union perf_event *event,
+		   struct perf_sample *sample,
+		   struct machine *machine)
+{
+	struct perf_script *script = container_of(tool, struct perf_script, tool);
+	struct perf_session *session = script->session;
+	struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
+	struct thread *thread;
+
+	thread = machine__findnew_thread(machine, sample->pid,
+					 sample->tid);
+	if (thread == NULL)
+		return -1;
+
+	perf_sample__fprintf_start(sample, thread, evsel,
+				   PERF_RECORD_LOST, stdout);
+	perf_event__fprintf(event, stdout);
+	thread__put(thread);
+	return 0;
+}
+
 static void sig_handler(int sig __maybe_unused)
 {
 	session_done = 1;
@@ -2174,6 +2198,8 @@ static int __cmd_script(struct perf_script *script)
 		script->tool.context_switch = process_switch_event;
 	if (script->show_namespace_events)
 		script->tool.namespaces = process_namespaces_event;
+	if (script->show_lost_events)
+		script->tool.lost = process_lost_event;
 
 	if (perf_script__setup_per_event_dump(script)) {
 		pr_err("Couldn't create the per event dump files\n");
@@ -3110,6 +3136,8 @@ int cmd_script(int argc, const char **argv)
 		    "Show context switch events (if recorded)"),
 	OPT_BOOLEAN('\0', "show-namespace-events", &script.show_namespace_events,
 		    "Show namespace events (if recorded)"),
+	OPT_BOOLEAN('\0', "show-lost-events", &script.show_lost_events,
+		    "Show lost events (if recorded)"),
 	OPT_BOOLEAN('\0', "per-event-dump", &script.per_event_dump,
 		    "Dump trace output to files named by the monitored events"),
 	OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"),
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 97a8ef9980db..44e603c27944 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -1435,6 +1435,11 @@ size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp)
 		       event->context_switch.next_prev_tid);
 }
 
+static size_t perf_event__fprintf_lost(union perf_event *event, FILE *fp)
+{
+	return fprintf(fp, " lost %" PRIu64 "\n", event->lost.lost);
+}
+
 size_t perf_event__fprintf(union perf_event *event, FILE *fp)
 {
 	size_t ret = fprintf(fp, "PERF_RECORD_%s",
@@ -1467,6 +1472,9 @@ size_t perf_event__fprintf(union perf_event *event, FILE *fp)
 	case PERF_RECORD_SWITCH_CPU_WIDE:
 		ret += perf_event__fprintf_switch(event, fp);
 		break;
+	case PERF_RECORD_LOST:
+		ret += perf_event__fprintf_lost(event, fp);
+		break;
 	default:
 		ret += fprintf(fp, "\n");
 	}
-- 
cgit v1.2.3


From eabad8c6856f185f876b54c426c2cc69fe0f0a7d Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 15 Jan 2018 16:48:46 -0300
Subject: perf unwind: Do not look just at the global
 callchain_param.record_mode

When setting up DWARF callchains on specific events, without using
'record' or 'trace' --call-graph, but instead doing it like:

	perf trace -e cycles/call-graph=dwarf/

The unwind__prepare_access() call in thread__insert_map() when we
process PERF_RECORD_MMAP(2) metadata events were not being performed,
precluding us from using per-event DWARF callchains, handling them just
when we asked for all events to be DWARF, using "--call-graph dwarf".

We do it in the PERF_RECORD_MMAP because we have to look at one of the
executable maps to figure out the executable type (64-bit, 32-bit) of
the DSO laid out in that mmap. Also to look at the architecture where
the perf.data file was recorded.

All this probably should be deferred to when we process a sample for
some thread that has callchains, so that we do this processing only for
the threads with samples, not for all of them.

For now, fix using DWARF on specific events.

Before:

  # perf trace --no-syscalls -e probe_libc:inet_pton/call-graph=dwarf/ ping -6 -c 1 ::1
  PING ::1(::1) 56 data bytes
  64 bytes from ::1: icmp_seq=1 ttl=64 time=0.048 ms

  --- ::1 ping statistics ---
  1 packets transmitted, 1 received, 0% packet loss, time 0ms
  rtt min/avg/max/mdev = 0.048/0.048/0.048/0.000 ms
     0.000 probe_libc:inet_pton:(7fe9597bb350))
  Problem processing probe_libc:inet_pton callchain, skipping...
  #

After:

  # perf trace --no-syscalls -e probe_libc:inet_pton/call-graph=dwarf/ ping -6 -c 1 ::1
  PING ::1(::1) 56 data bytes
  64 bytes from ::1: icmp_seq=1 ttl=64 time=0.060 ms

  --- ::1 ping statistics ---
  1 packets transmitted, 1 received, 0% packet loss, time 0ms
  rtt min/avg/max/mdev = 0.060/0.060/0.060/0.000 ms
       0.000 probe_libc:inet_pton:(7fd4aa930350))
                                         __inet_pton (inlined)
                                         gaih_inet.constprop.7 (/usr/lib64/libc-2.26.so)
                                         __GI_getaddrinfo (inlined)
                                         [0xffffaa804e51af3f] (/usr/bin/ping)
                                         __libc_start_main (/usr/lib64/libc-2.26.so)
                                         [0xffffaa804e51b379] (/usr/bin/ping)
  #
  # perf trace --call-graph=dwarf --no-syscalls -e probe_libc:inet_pton/call-graph=dwarf/ ping -6 -c 1 ::1
  PING ::1(::1) 56 data bytes
  64 bytes from ::1: icmp_seq=1 ttl=64 time=0.057 ms

  --- ::1 ping statistics ---
  1 packets transmitted, 1 received, 0% packet loss, time 0ms
  rtt min/avg/max/mdev = 0.057/0.057/0.057/0.000 ms
       0.000 probe_libc:inet_pton:(7f9363b9e350))
                                         __inet_pton (inlined)
                                         gaih_inet.constprop.7 (/usr/lib64/libc-2.26.so)
                                         __GI_getaddrinfo (inlined)
                                         [0xffffa9e8a14e0f3f] (/usr/bin/ping)
                                         __libc_start_main (/usr/lib64/libc-2.26.so)
                                         [0xffffa9e8a14e1379] (/usr/bin/ping)
  #
  # perf trace --call-graph=fp --no-syscalls -e probe_libc:inet_pton/call-graph=dwarf/ ping -6 -c 1 ::1
  PING ::1(::1) 56 data bytes
  64 bytes from ::1: icmp_seq=1 ttl=64 time=0.077 ms

  --- ::1 ping statistics ---
  1 packets transmitted, 1 received, 0% packet loss, time 0ms
  rtt min/avg/max/mdev = 0.077/0.077/0.077/0.000 ms
       0.000 probe_libc:inet_pton:(7f4947e1c350))
                                         __inet_pton (inlined)
                                         gaih_inet.constprop.7 (/usr/lib64/libc-2.26.so)
                                         __GI_getaddrinfo (inlined)
                                         [0xffffaa716d88ef3f] (/usr/bin/ping)
                                         __libc_start_main (/usr/lib64/libc-2.26.so)
                                         [0xffffaa716d88f379] (/usr/bin/ping)
  #
  # perf trace --no-syscalls -e probe_libc:inet_pton/call-graph=fp/ ping -6 -c 1 ::1
  PING ::1(::1) 56 data bytes
  64 bytes from ::1: icmp_seq=1 ttl=64 time=0.078 ms

  --- ::1 ping statistics ---
  1 packets transmitted, 1 received, 0% packet loss, time 0ms
  rtt min/avg/max/mdev = 0.078/0.078/0.078/0.000 ms
       0.000 probe_libc:inet_pton:(7fa157696350))
                                         __GI___inet_pton (/usr/lib64/libc-2.26.so)
                                         getaddrinfo (/usr/lib64/libc-2.26.so)
                                         [0xffffa9ba39c74f40] (/usr/bin/ping)
  #

Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Hendrick Brueckner <brueckner@linux.vnet.ibm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Thomas Richter <tmricht@linux.vnet.ibm.com>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/r/20180116182650.GE16107@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-c2c.c                 |  5 +++--
 tools/perf/builtin-report.c              |  5 +++--
 tools/perf/builtin-script.c              |  5 +++--
 tools/perf/tests/dwarf-unwind.c          |  1 +
 tools/perf/util/callchain.c              | 10 ++++++++++
 tools/perf/util/callchain.h              |  2 ++
 tools/perf/util/unwind-libunwind-local.c |  9 +++------
 7 files changed, 25 insertions(+), 12 deletions(-)

(limited to 'tools/perf/builtin-script.c')

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index c0debc3f79b6..c0815a37fdb5 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -2390,9 +2390,10 @@ static int setup_callchain(struct perf_evlist *evlist)
 	enum perf_call_graph_mode mode = CALLCHAIN_NONE;
 
 	if ((sample_type & PERF_SAMPLE_REGS_USER) &&
-	    (sample_type & PERF_SAMPLE_STACK_USER))
+	    (sample_type & PERF_SAMPLE_STACK_USER)) {
 		mode = CALLCHAIN_DWARF;
-	else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
+		dwarf_callchain_users = true;
+	} else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
 		mode = CALLCHAIN_LBR;
 	else if (sample_type & PERF_SAMPLE_CALLCHAIN)
 		mode = CALLCHAIN_FP;
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index dd4df9a5cd06..6593779224d5 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -338,9 +338,10 @@ static int report__setup_sample_type(struct report *rep)
 
 	if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) {
 		if ((sample_type & PERF_SAMPLE_REGS_USER) &&
-		    (sample_type & PERF_SAMPLE_STACK_USER))
+		    (sample_type & PERF_SAMPLE_STACK_USER)) {
 			callchain_param.record_mode = CALLCHAIN_DWARF;
-		else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
+			dwarf_callchain_users = true;
+		} else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
 			callchain_param.record_mode = CALLCHAIN_LBR;
 		else
 			callchain_param.record_mode = CALLCHAIN_FP;
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index c1cce474c0f1..08bc818f371b 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -2919,9 +2919,10 @@ static void script__setup_sample_type(struct perf_script *script)
 
 	if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) {
 		if ((sample_type & PERF_SAMPLE_REGS_USER) &&
-		    (sample_type & PERF_SAMPLE_STACK_USER))
+		    (sample_type & PERF_SAMPLE_STACK_USER)) {
 			callchain_param.record_mode = CALLCHAIN_DWARF;
-		else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
+			dwarf_callchain_users = true;
+		} else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
 			callchain_param.record_mode = CALLCHAIN_LBR;
 		else
 			callchain_param.record_mode = CALLCHAIN_FP;
diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c
index ac40e05bcab4..260418969120 100644
--- a/tools/perf/tests/dwarf-unwind.c
+++ b/tools/perf/tests/dwarf-unwind.c
@@ -173,6 +173,7 @@ int test__dwarf_unwind(struct test *test __maybe_unused, int subtest __maybe_unu
 	}
 
 	callchain_param.record_mode = CALLCHAIN_DWARF;
+	dwarf_callchain_users = true;
 
 	if (init_live_machine(machine)) {
 		pr_err("Could not init machine\n");
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 082505d08d72..32ef7bdca1cf 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -37,6 +37,15 @@ struct callchain_param callchain_param = {
 	CALLCHAIN_PARAM_DEFAULT
 };
 
+/*
+ * Are there any events usind DWARF callchains?
+ *
+ * I.e.
+ *
+ * -e cycles/call-graph=dwarf/
+ */
+bool dwarf_callchain_users;
+
 struct callchain_param callchain_param_default = {
 	CALLCHAIN_PARAM_DEFAULT
 };
@@ -265,6 +274,7 @@ int parse_callchain_record(const char *arg, struct callchain_param *param)
 			ret = 0;
 			param->record_mode = CALLCHAIN_DWARF;
 			param->dump_size = default_stack_dump_size;
+			dwarf_callchain_users = true;
 
 			tok = strtok_r(NULL, ",", &saveptr);
 			if (tok) {
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index b79ef2478a57..154560b1eb65 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -89,6 +89,8 @@ enum chain_value {
 	CCVAL_COUNT,
 };
 
+extern bool dwarf_callchain_users;
+
 struct callchain_param {
 	bool			enabled;
 	enum perf_call_graph_mode record_mode;
diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c
index 7a42f703e858..af873044d33a 100644
--- a/tools/perf/util/unwind-libunwind-local.c
+++ b/tools/perf/util/unwind-libunwind-local.c
@@ -631,9 +631,8 @@ static unw_accessors_t accessors = {
 
 static int _unwind__prepare_access(struct thread *thread)
 {
-	if (callchain_param.record_mode != CALLCHAIN_DWARF)
+	if (!dwarf_callchain_users)
 		return 0;
-
 	thread->addr_space = unw_create_addr_space(&accessors, 0);
 	if (!thread->addr_space) {
 		pr_err("unwind: Can't create unwind address space.\n");
@@ -646,17 +645,15 @@ static int _unwind__prepare_access(struct thread *thread)
 
 static void _unwind__flush_access(struct thread *thread)
 {
-	if (callchain_param.record_mode != CALLCHAIN_DWARF)
+	if (!dwarf_callchain_users)
 		return;
-
 	unw_flush_cache(thread->addr_space, 0, 0);
 }
 
 static void _unwind__finish_access(struct thread *thread)
 {
-	if (callchain_param.record_mode != CALLCHAIN_DWARF)
+	if (!dwarf_callchain_users)
 		return;
-
 	unw_destroy_addr_space(thread->addr_space);
 }
 
-- 
cgit v1.2.3


From 1e2778e91616086177a255f3fc8c72ecaa564ae6 Mon Sep 17 00:00:00 2001
From: Jin Yao <yao.jin@linux.intel.com>
Date: Wed, 10 Jan 2018 23:00:27 +0800
Subject: perf script: Improve error msg when no first/last sample time found

The following message will be returned to user when executing 'perf
script --time' if perf data file doesn't contain the first/last sample
time.

"HINT: no first/last sample time found in perf data.
 Please use latest perf binary to execute 'perf record'
 (if '--buildid-all' is enabled, needs to set '--timestamp-boundary')."

Signed-off-by: Jin Yao <yao.jin@linux.intel.com>
Reviewed-by: Jiri Olsa <jolsa@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1515596433-24653-3-git-send-email-yao.jin@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-script.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'tools/perf/builtin-script.c')

diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 08bc818f371b..ac781916e51e 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -3449,7 +3449,9 @@ int cmd_script(int argc, const char **argv)
 	if (perf_time__parse_str(script.ptime_range, script.time_str) != 0) {
 		if (session->evlist->first_sample_time == 0 &&
 		    session->evlist->last_sample_time == 0) {
-			pr_err("No first/last sample time in perf data\n");
+			pr_err("HINT: no first/last sample time found in perf data.\n"
+			       "Please use latest perf binary to execute 'perf record'\n"
+			       "(if '--buildid-all' is enabled, please set '--timestamp-boundary').\n");
 			err = -EINVAL;
 			goto out_delete;
 		}
-- 
cgit v1.2.3


From cc2ef584a863b7c8033b78723cd253ca47e9a589 Mon Sep 17 00:00:00 2001
From: Jin Yao <yao.jin@linux.intel.com>
Date: Wed, 10 Jan 2018 23:00:33 +0800
Subject: perf script: Remove the time slices number limitation

Previously it was only allowed to use at most 10 time slices in 'perf
script --time'.

This patch removes this limitation.
For example, following command line is OK (12 time slices)

perf script --time 1%/1,1%/2,1%/3,1%/4,1%/5,1%/6,1%/7,1%/8,1%/9,1%/10,1%/11,1%/12

Signed-off-by: Jin Yao <yao.jin@linux.intel.com>
Suggested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Reviewed-by: Jiri Olsa <jolsa@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1515596433-24653-9-git-send-email-yao.jin@linux.intel.com
[ No need to check for NULL to call free, use zfree ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-script.txt | 10 +++++-----
 tools/perf/builtin-script.c              | 16 ++++++++++++----
 2 files changed, 17 insertions(+), 9 deletions(-)

(limited to 'tools/perf/builtin-script.c')

diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 806ec6391fd6..7730c1d2b5d3 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -351,19 +351,19 @@ include::itrace.txt[]
 	to end of file.
 
 	Also support time percent with multipe time range. Time string is
-	'a%/n,b%/m,...' or 'a%-b%,c%-%d,...'. The maximum number of slices is 10.
+	'a%/n,b%/m,...' or 'a%-b%,c%-%d,...'.
 
 	For example:
-	Select the second 10% time slice
+	Select the second 10% time slice:
 	perf script --time 10%/2
 
-	Select from 0% to 10% time slice
+	Select from 0% to 10% time slice:
 	perf script --time 0%-10%
 
-	Select the first and second 10% time slices
+	Select the first and second 10% time slices:
 	perf script --time 10%/1,10%/2
 
-	Select from 0% to 10% and 30% to 40% slices
+	Select from 0% to 10% and 30% to 40% slices:
 	perf script --time 0%-10%,30%-40%
 
 --max-blocks::
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index ac781916e51e..3499d68e1d70 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -1480,8 +1480,6 @@ static int perf_sample__fprintf_synth(struct perf_sample *sample,
 	return 0;
 }
 
-#define PTIME_RANGE_MAX	10
-
 struct perf_script {
 	struct perf_tool	tool;
 	struct perf_session	*session;
@@ -1496,7 +1494,8 @@ struct perf_script {
 	struct thread_map	*threads;
 	int			name_width;
 	const char              *time_str;
-	struct perf_time_interval ptime_range[PTIME_RANGE_MAX];
+	struct perf_time_interval *ptime_range;
+	int			range_size;
 	int			range_num;
 };
 
@@ -3445,6 +3444,13 @@ int cmd_script(int argc, const char **argv)
 	if (err < 0)
 		goto out_delete;
 
+	script.ptime_range = perf_time__range_alloc(script.time_str,
+						    &script.range_size);
+	if (!script.ptime_range) {
+		err = -ENOMEM;
+		goto out_delete;
+	}
+
 	/* needs to be parsed after looking up reference time */
 	if (perf_time__parse_str(script.ptime_range, script.time_str) != 0) {
 		if (session->evlist->first_sample_time == 0 &&
@@ -3457,7 +3463,7 @@ int cmd_script(int argc, const char **argv)
 		}
 
 		script.range_num = perf_time__percent_parse_str(
-					script.ptime_range, PTIME_RANGE_MAX,
+					script.ptime_range, script.range_size,
 					script.time_str,
 					session->evlist->first_sample_time,
 					session->evlist->last_sample_time);
@@ -3476,6 +3482,8 @@ int cmd_script(int argc, const char **argv)
 	flush_scripting();
 
 out_delete:
+	zfree(&script.ptime_range);
+
 	perf_evlist__free_stats(session->evlist);
 	perf_session__delete(session);
 
-- 
cgit v1.2.3


From bafae98e7a95df74ce4529ae96251cb12c86fdf3 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 22 Jan 2018 16:42:16 -0300
Subject: perf evlist: Remove fcntl.h from evlist.h

Not needed there, fixup the places where it is needed and was getting
only by luck via evlist.h.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-yxjpetn64z8vjuguu84gr6x6@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-kvm.c                    | 3 +++
 tools/perf/builtin-script.c                 | 1 +
 tools/perf/builtin-trace.c                  | 1 +
 tools/perf/tests/bpf.c                      | 1 +
 tools/perf/tests/openat-syscall-tp-fields.c | 3 +++
 tools/perf/util/cgroup.c                    | 3 +++
 tools/perf/util/evlist.c                    | 1 +
 tools/perf/util/evlist.h                    | 1 -
 tools/perf/util/parse-events.c              | 3 +++
 9 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'tools/perf/builtin-script.c')

diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 98853162eae9..55d919dc5bc6 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -26,6 +26,9 @@
 #include <sys/timerfd.h>
 #endif
 #include <sys/time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
 
 #include <linux/kernel.h>
 #include <linux/time64.h>
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 3499d68e1d70..ab19a6ee4093 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -42,6 +42,7 @@
 #include <sys/param.h>
 #include <sys/types.h>
 #include <sys/stat.h>
+#include <fcntl.h>
 #include <unistd.h>
 
 #include "sane_ctype.h"
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 46d3ff09440c..868306ccd8b8 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -56,6 +56,7 @@
 #include <linux/random.h>
 #include <linux/stringify.h>
 #include <linux/time64.h>
+#include <fcntl.h>
 
 #include "sane_ctype.h"
 
diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c
index 8e709c9d512c..e8399beca62b 100644
--- a/tools/perf/tests/bpf.c
+++ b/tools/perf/tests/bpf.c
@@ -3,6 +3,7 @@
 #include <sys/epoll.h>
 #include <sys/types.h>
 #include <sys/stat.h>
+#include <fcntl.h>
 #include <util/util.h>
 #include <util/bpf-loader.h>
 #include <util/evlist.h>
diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c
index 97c9407d02a0..43519267b93b 100644
--- a/tools/perf/tests/openat-syscall-tp-fields.c
+++ b/tools/perf/tests/openat-syscall-tp-fields.c
@@ -1,5 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/err.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
 #include "perf.h"
 #include "evlist.h"
 #include "evsel.h"
diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
index d9ffc1e6eb39..984f69144f87 100644
--- a/tools/perf/util/cgroup.c
+++ b/tools/perf/util/cgroup.c
@@ -6,6 +6,9 @@
 #include "cgroup.h"
 #include "evlist.h"
 #include <linux/stringify.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
 
 int nr_cgroups;
 
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 120efd85f2c8..ac35cd214feb 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -25,6 +25,7 @@
 #include "parse-events.h"
 #include <subcmd/parse-options.h>
 
+#include <fcntl.h>
 #include <sys/ioctl.h>
 #include <sys/mman.h>
 
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index e7fbca69cbac..75f8e0ad5d76 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -7,7 +7,6 @@
 #include <linux/refcount.h>
 #include <linux/list.h>
 #include <api/fd/array.h>
-#include <fcntl.h>
 #include <stdio.h>
 #include "../perf.h"
 #include "event.h"
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 170316795a18..34589c427e52 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -4,6 +4,9 @@
 #include <dirent.h>
 #include <errno.h>
 #include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
 #include <sys/param.h>
 #include "term.h"
 #include "../perf.h"
-- 
cgit v1.2.3