summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/powerpc/perf/core-book3s.c1
-rw-r--r--arch/x86/events/amd/uncore.c44
-rw-r--r--arch/x86/events/intel/core.c25
-rw-r--r--arch/x86/events/intel/lbr.c9
-rw-r--r--arch/x86/events/intel/uncore.c12
-rw-r--r--arch/x86/events/intel/uncore.h2
-rw-r--r--arch/x86/events/intel/uncore_snb.c159
-rw-r--r--arch/x86/include/asm/perf_event.h15
-rw-r--r--include/linux/min_heap.h134
-rw-r--r--include/linux/perf_event.h19
-rw-r--r--include/uapi/linux/perf_event.h8
-rw-r--r--kernel/events/core.c337
-rw-r--r--lib/Kconfig.debug10
-rw-r--r--lib/Makefile1
-rw-r--r--lib/test_min_heap.c194
-rw-r--r--tools/include/uapi/linux/perf_event.h8
-rw-r--r--tools/lib/api/fs/Build1
-rw-r--r--tools/lib/api/fs/cgroup.c67
-rw-r--r--tools/lib/api/fs/fs.h2
-rw-r--r--tools/lib/perf/Documentation/examples/counting.c83
-rw-r--r--tools/lib/traceevent/event-parse.c2
-rw-r--r--tools/perf/Documentation/perf-stat.txt9
-rw-r--r--tools/perf/builtin-diff.c21
-rw-r--r--tools/perf/builtin-report.c21
-rw-r--r--tools/perf/builtin-script.c70
-rw-r--r--tools/perf/builtin-stat.c4
-rw-r--r--tools/perf/tests/expr.c10
-rw-r--r--tools/perf/tests/sample-parsing.c7
-rw-r--r--tools/perf/util/Build11
-rw-r--r--tools/perf/util/annotate.c2
-rw-r--r--tools/perf/util/annotate.h1
-rw-r--r--tools/perf/util/block-info.c106
-rw-r--r--tools/perf/util/block-info.h9
-rw-r--r--tools/perf/util/branch.h22
-rw-r--r--tools/perf/util/cgroup.c63
-rw-r--r--tools/perf/util/cs-etm.c2
-rw-r--r--tools/perf/util/event.h1
-rw-r--r--tools/perf/util/evsel.c20
-rw-r--r--tools/perf/util/evsel.h6
-rw-r--r--tools/perf/util/expr.c112
-rw-r--r--tools/perf/util/expr.h8
-rw-r--r--tools/perf/util/expr.l114
-rw-r--r--tools/perf/util/expr.y185
-rw-r--r--tools/perf/util/header.c37
-rw-r--r--tools/perf/util/hist.c3
-rw-r--r--tools/perf/util/intel-pt.c2
-rw-r--r--tools/perf/util/llvm-utils.c2
-rw-r--r--tools/perf/util/machine.c35
-rw-r--r--tools/perf/util/perf_event_attr_fprintf.c1
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c30
-rw-r--r--tools/perf/util/session.c8
-rw-r--r--tools/perf/util/stat-display.c33
-rw-r--r--tools/perf/util/stat-shadow.c4
-rw-r--r--tools/perf/util/stat.h1
-rw-r--r--tools/perf/util/synthetic-events.c6
55 files changed, 1611 insertions, 488 deletions
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 3086055bf681..3dcfecf858f3 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -518,6 +518,7 @@ static void power_pmu_bhrb_read(struct perf_event *event, struct cpu_hw_events *
}
}
cpuhw->bhrb_stack.nr = u_index;
+ cpuhw->bhrb_stack.hw_idx = -1ULL;
return;
}
diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
index 4d867a752f0e..76400c052b0e 100644
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -180,6 +180,31 @@ static void amd_uncore_del(struct perf_event *event, int flags)
hwc->idx = -1;
}
+/*
+ * Convert logical CPU number to L3 PMC Config ThreadMask format
+ */
+static u64 l3_thread_slice_mask(int cpu)
+{
+ u64 thread_mask, core = topology_core_id(cpu);
+ unsigned int shift, thread = 0;
+
+ if (topology_smt_supported() && !topology_is_primary_thread(cpu))
+ thread = 1;
+
+ if (boot_cpu_data.x86 <= 0x18) {
+ shift = AMD64_L3_THREAD_SHIFT + 2 * (core % 4) + thread;
+ thread_mask = BIT_ULL(shift);
+
+ return AMD64_L3_SLICE_MASK | thread_mask;
+ }
+
+ core = (core << AMD64_L3_COREID_SHIFT) & AMD64_L3_COREID_MASK;
+ shift = AMD64_L3_THREAD_SHIFT + thread;
+ thread_mask = BIT_ULL(shift);
+
+ return AMD64_L3_EN_ALL_SLICES | core | thread_mask;
+}
+
static int amd_uncore_event_init(struct perf_event *event)
{
struct amd_uncore *uncore;
@@ -203,18 +228,11 @@ static int amd_uncore_event_init(struct perf_event *event)
return -EINVAL;
/*
- * SliceMask and ThreadMask need to be set for certain L3 events in
- * Family 17h. For other events, the two fields do not affect the count.
+ * SliceMask and ThreadMask need to be set for certain L3 events.
+ * For other events, the two fields do not affect the count.
*/
- if (l3_mask && is_llc_event(event)) {
- int thread = 2 * (cpu_data(event->cpu).cpu_core_id % 4);
-
- if (smp_num_siblings > 1)
- thread += cpu_data(event->cpu).apicid & 1;
-
- hwc->config |= (1ULL << (AMD64_L3_THREAD_SHIFT + thread) &
- AMD64_L3_THREAD_MASK) | AMD64_L3_SLICE_MASK;
- }
+ if (l3_mask && is_llc_event(event))
+ hwc->config |= l3_thread_slice_mask(event->cpu);
uncore = event_to_amd_uncore(event);
if (!uncore)
@@ -520,9 +538,9 @@ static int __init amd_uncore_init(void)
if (!boot_cpu_has(X86_FEATURE_TOPOEXT))
return -ENODEV;
- if (boot_cpu_data.x86 == 0x17 || boot_cpu_data.x86 == 0x18) {
+ if (boot_cpu_data.x86 >= 0x17) {
/*
- * For F17h or F18h, the Northbridge counters are
+ * For F17h and above, the Northbridge counters are
* repurposed as Data Fabric counters. Also, L3
* counters are supported too. The PMUs are exported
* based on family as either L2 or L3 and NB or DF.
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index dff6623804c2..332954cccece 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -1945,6 +1945,14 @@ static __initconst const u64 knl_hw_cache_extra_regs
* intel_bts events don't coexist with intel PMU's BTS events because of
* x86_add_exclusive(x86_lbr_exclusive_lbr); there's no need to keep them
* disabled around intel PMU's event batching etc, only inside the PMI handler.
+ *
+ * Avoid PEBS_ENABLE MSR access in PMIs.
+ * The GLOBAL_CTRL has been disabled. All the counters do not count anymore.
+ * It doesn't matter if the PEBS is enabled or not.
+ * Usually, the PEBS status are not changed in PMIs. It's unnecessary to
+ * access PEBS_ENABLE MSR in disable_all()/enable_all().
+ * However, there are some cases which may change PEBS status, e.g. PMI
+ * throttle. The PEBS_ENABLE should be updated where the status changes.
*/
static void __intel_pmu_disable_all(void)
{
@@ -1954,13 +1962,12 @@ static void __intel_pmu_disable_all(void)
if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask))
intel_pmu_disable_bts();
-
- intel_pmu_pebs_disable_all();
}
static void intel_pmu_disable_all(void)
{
__intel_pmu_disable_all();
+ intel_pmu_pebs_disable_all();
intel_pmu_lbr_disable_all();
}
@@ -1968,7 +1975,6 @@ static void __intel_pmu_enable_all(int added, bool pmi)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- intel_pmu_pebs_enable_all();
intel_pmu_lbr_enable_all(pmi);
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL,
x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask);
@@ -1986,6 +1992,7 @@ static void __intel_pmu_enable_all(int added, bool pmi)
static void intel_pmu_enable_all(int added)
{
+ intel_pmu_pebs_enable_all();
__intel_pmu_enable_all(added, false);
}
@@ -2374,9 +2381,21 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
* PEBS overflow sets bit 62 in the global status register
*/
if (__test_and_clear_bit(62, (unsigned long *)&status)) {
+ u64 pebs_enabled = cpuc->pebs_enabled;
+
handled++;
x86_pmu.drain_pebs(regs);
status &= x86_pmu.intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI;
+
+ /*
+ * PMI throttle may be triggered, which stops the PEBS event.
+ * Although cpuc->pebs_enabled is updated accordingly, the
+ * MSR_IA32_PEBS_ENABLE is not updated. Because the
+ * cpuc->enabled has been forced to 0 in PMI.
+ * Update the MSR if pebs_enabled is changed.
+ */
+ if (pebs_enabled != cpuc->pebs_enabled)
+ wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
}
/*
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 534c76606049..65113b16804a 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -585,6 +585,7 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
cpuc->lbr_entries[i].reserved = 0;
}
cpuc->lbr_stack.nr = i;
+ cpuc->lbr_stack.hw_idx = tos;
}
/*
@@ -680,6 +681,7 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
out++;
}
cpuc->lbr_stack.nr = out;
+ cpuc->lbr_stack.hw_idx = tos;
}
void intel_pmu_lbr_read(void)
@@ -1120,6 +1122,13 @@ void intel_pmu_store_pebs_lbrs(struct pebs_lbr *lbr)
int i;
cpuc->lbr_stack.nr = x86_pmu.lbr_nr;
+
+ /* Cannot get TOS for large PEBS */
+ if (cpuc->n_pebs == cpuc->n_large_pebs)
+ cpuc->lbr_stack.hw_idx = -1ULL;
+ else
+ cpuc->lbr_stack.hw_idx = intel_pmu_lbr_tos();
+
for (i = 0; i < x86_pmu.lbr_nr; i++) {
u64 info = lbr->lbr[i].info;
struct perf_branch_entry *e = &cpuc->lbr_entries[i];
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 86467f85c383..63922e3a34f5 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -1470,6 +1470,16 @@ static const struct intel_uncore_init_fun icl_uncore_init __initconst = {
.pci_init = skl_uncore_pci_init,
};
+static const struct intel_uncore_init_fun tgl_uncore_init __initconst = {
+ .cpu_init = icl_uncore_cpu_init,
+ .mmio_init = tgl_uncore_mmio_init,
+};
+
+static const struct intel_uncore_init_fun tgl_l_uncore_init __initconst = {
+ .cpu_init = icl_uncore_cpu_init,
+ .mmio_init = tgl_l_uncore_mmio_init,
+};
+
static const struct intel_uncore_init_fun snr_uncore_init __initconst = {
.cpu_init = snr_uncore_cpu_init,
.pci_init = snr_uncore_pci_init,
@@ -1505,6 +1515,8 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = {
X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE_L, icl_uncore_init),
X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE_NNPI, icl_uncore_init),
X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE, icl_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_TIGERLAKE_L, tgl_l_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_TIGERLAKE, tgl_uncore_init),
X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ATOM_TREMONT_D, snr_uncore_init),
{},
};
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index bbfdaa720b45..1204dcc9fe9b 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -527,6 +527,8 @@ void snb_uncore_cpu_init(void);
void nhm_uncore_cpu_init(void);
void skl_uncore_cpu_init(void);
void icl_uncore_cpu_init(void);
+void tgl_uncore_mmio_init(void);
+void tgl_l_uncore_mmio_init(void);
int snb_pci2phy_map_init(int devid);
/* uncore_snbep.c */
diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
index c37cb12d0ef6..3de1065eefc4 100644
--- a/arch/x86/events/intel/uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -44,6 +44,11 @@
#define PCI_DEVICE_ID_INTEL_WHL_UD_IMC 0x3e35
#define PCI_DEVICE_ID_INTEL_ICL_U_IMC 0x8a02
#define PCI_DEVICE_ID_INTEL_ICL_U2_IMC 0x8a12
+#define PCI_DEVICE_ID_INTEL_TGL_U1_IMC 0x9a02
+#define PCI_DEVICE_ID_INTEL_TGL_U2_IMC 0x9a04
+#define PCI_DEVICE_ID_INTEL_TGL_U3_IMC 0x9a12
+#define PCI_DEVICE_ID_INTEL_TGL_U4_IMC 0x9a14
+#define PCI_DEVICE_ID_INTEL_TGL_H_IMC 0x9a36
/* SNB event control */
@@ -1002,3 +1007,157 @@ void nhm_uncore_cpu_init(void)
}
/* end of Nehalem uncore support */
+
+/* Tiger Lake MMIO uncore support */
+
+static const struct pci_device_id tgl_uncore_pci_ids[] = {
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_TGL_U1_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_TGL_U2_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_TGL_U3_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_TGL_U4_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_TGL_H_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* end: all zeroes */ }
+};
+
+enum perf_tgl_uncore_imc_freerunning_types {
+ TGL_MMIO_UNCORE_IMC_DATA_TOTAL,
+ TGL_MMIO_UNCORE_IMC_DATA_READ,
+ TGL_MMIO_UNCORE_IMC_DATA_WRITE,
+ TGL_MMIO_UNCORE_IMC_FREERUNNING_TYPE_MAX
+};
+
+static struct freerunning_counters tgl_l_uncore_imc_freerunning[] = {
+ [TGL_MMIO_UNCORE_IMC_DATA_TOTAL] = { 0x5040, 0x0, 0x0, 1, 64 },
+ [TGL_MMIO_UNCORE_IMC_DATA_READ] = { 0x5058, 0x0, 0x0, 1, 64 },
+ [TGL_MMIO_UNCORE_IMC_DATA_WRITE] = { 0x50A0, 0x0, 0x0, 1, 64 },
+};
+
+static struct freerunning_counters tgl_uncore_imc_freerunning[] = {
+ [TGL_MMIO_UNCORE_IMC_DATA_TOTAL] = { 0xd840, 0x0, 0x0, 1, 64 },
+ [TGL_MMIO_UNCORE_IMC_DATA_READ] = { 0xd858, 0x0, 0x0, 1, 64 },
+ [TGL_MMIO_UNCORE_IMC_DATA_WRITE] = { 0xd8A0, 0x0, 0x0, 1, 64 },
+};
+
+static struct uncore_event_desc tgl_uncore_imc_events[] = {
+ INTEL_UNCORE_EVENT_DESC(data_total, "event=0xff,umask=0x10"),
+ INTEL_UNCORE_EVENT_DESC(data_total.scale, "6.103515625e-5"),
+ INTEL_UNCORE_EVENT_DESC(data_total.unit, "MiB"),
+
+ INTEL_UNCORE_EVENT_DESC(data_read, "event=0xff,umask=0x20"),
+ INTEL_UNCORE_EVENT_DESC(data_read.scale, "6.103515625e-5"),
+ INTEL_UNCORE_EVENT_DESC(data_read.unit, "MiB"),
+
+ INTEL_UNCORE_EVENT_DESC(data_write, "event=0xff,umask=0x30"),
+ INTEL_UNCORE_EVENT_DESC(data_write.scale, "6.103515625e-5"),
+ INTEL_UNCORE_EVENT_DESC(data_write.unit, "MiB"),
+
+ { /* end: all zeroes */ }
+};
+
+static struct pci_dev *tgl_uncore_get_mc_dev(void)
+{
+ const struct pci_device_id *ids = tgl_uncore_pci_ids;
+ struct pci_dev *mc_dev = NULL;
+
+ while (ids && ids->vendor) {
+ mc_dev = pci_get_device(PCI_VENDOR_ID_INTEL, ids->device, NULL);
+ if (mc_dev)
+ return mc_dev;
+ ids++;
+ }
+
+ return mc_dev;
+}
+
+#define TGL_UNCORE_MMIO_IMC_MEM_OFFSET 0x10000
+
+static void tgl_uncore_imc_freerunning_init_box(struct intel_uncore_box *box)
+{
+ struct pci_dev *pdev = tgl_uncore_get_mc_dev();
+ struct intel_uncore_pmu *pmu = box->pmu;
+ resource_size_t addr;
+ u32 mch_bar;
+
+ if (!pdev) {
+ pr_warn("perf uncore: Cannot find matched IMC device.\n");
+ return;
+ }
+
+ pci_read_config_dword(pdev, SNB_UNCORE_PCI_IMC_BAR_OFFSET, &mch_bar);
+ /* MCHBAR is disabled */
+ if (!(mch_bar & BIT(0))) {
+ pr_warn("perf uncore: MCHBAR is disabled. Failed to map IMC free-running counters.\n");
+ return;
+ }
+ mch_bar &= ~BIT(0);
+ addr = (resource_size_t)(mch_bar + TGL_UNCORE_MMIO_IMC_MEM_OFFSET * pmu->pmu_idx);
+
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
+ pci_read_config_dword(pdev, SNB_UNCORE_PCI_IMC_BAR_OFFSET + 4, &mch_bar);
+ addr |= ((resource_size_t)mch_bar << 32);
+#endif
+
+ box->io_addr = ioremap(addr, SNB_UNCORE_PCI_IMC_MAP_SIZE);
+}
+
+static struct intel_uncore_ops tgl_uncore_imc_freerunning_ops = {
+ .init_box = tgl_uncore_imc_freerunning_init_box,
+ .exit_box = uncore_mmio_exit_box,
+ .read_counter = uncore_mmio_read_counter,
+ .hw_config = uncore_freerunning_hw_config,
+};
+
+static struct attribute *tgl_uncore_imc_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ NULL
+};
+
+static const struct attribute_group tgl_uncore_imc_format_group = {
+ .name = "format",
+ .attrs = tgl_uncore_imc_formats_attr,
+};
+
+static struct intel_uncore_type tgl_uncore_imc_free_running = {
+ .name = "imc_free_running",
+ .num_counters = 3,
+ .num_boxes = 2,
+ .num_freerunning_types = TGL_MMIO_UNCORE_IMC_FREERUNNING_TYPE_MAX,
+ .freerunning = tgl_uncore_imc_freerunning,
+ .ops = &tgl_uncore_imc_freerunning_ops,
+ .event_descs = tgl_uncore_imc_events,
+ .format_group = &tgl_uncore_imc_format_group,
+};
+
+static struct intel_uncore_type *tgl_mmio_uncores[] = {
+ &tgl_uncore_imc_free_running,
+ NULL
+};
+
+void tgl_l_uncore_mmio_init(void)
+{
+ tgl_uncore_imc_free_running.freerunning = tgl_l_uncore_imc_freerunning;
+ uncore_mmio_uncores = tgl_mmio_uncores;
+}
+
+void tgl_uncore_mmio_init(void)
+{
+ uncore_mmio_uncores = tgl_mmio_uncores;
+}
+
+/* end of Tiger Lake MMIO uncore support */
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 29964b0e1075..e855e9cf2c37 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -50,11 +50,22 @@
#define AMD64_L3_SLICE_SHIFT 48
#define AMD64_L3_SLICE_MASK \
- ((0xFULL) << AMD64_L3_SLICE_SHIFT)
+ (0xFULL << AMD64_L3_SLICE_SHIFT)
+#define AMD64_L3_SLICEID_MASK \
+ (0x7ULL << AMD64_L3_SLICE_SHIFT)
#define AMD64_L3_THREAD_SHIFT 56
#define AMD64_L3_THREAD_MASK \
- ((0xFFULL) << AMD64_L3_THREAD_SHIFT)
+ (0xFFULL << AMD64_L3_THREAD_SHIFT)
+#define AMD64_L3_F19H_THREAD_MASK \
+ (0x3ULL << AMD64_L3_THREAD_SHIFT)
+
+#define AMD64_L3_EN_ALL_CORES BIT_ULL(47)
+#define AMD64_L3_EN_ALL_SLICES BIT_ULL(46)
+
+#define AMD64_L3_COREID_SHIFT 42
+#define AMD64_L3_COREID_MASK \
+ (0x7ULL << AMD64_L3_COREID_SHIFT)
#define X86_RAW_EVENT_MASK \
(ARCH_PERFMON_EVENTSEL_EVENT | \
diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h
new file mode 100644
index 000000000000..44077837385f
--- /dev/null
+++ b/include/linux/min_heap.h
@@ -0,0 +1,134 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_MIN_HEAP_H
+#define _LINUX_MIN_HEAP_H
+
+#include <linux/bug.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+/**
+ * struct min_heap - Data structure to hold a min-heap.
+ * @data: Start of array holding the heap elements.
+ * @nr: Number of elements currently in the heap.
+ * @size: Maximum number of elements that can be held in current storage.
+ */
+struct min_heap {
+ void *data;
+ int nr;
+ int size;
+};
+
+/**
+ * struct min_heap_callbacks - Data/functions to customise the min_heap.
+ * @elem_size: The nr of each element in bytes.
+ * @less: Partial order function for this heap.
+ * @swp: Swap elements function.
+ */
+struct min_heap_callbacks {
+ int elem_size;
+ bool (*less)(const void *lhs, const void *rhs);
+ void (*swp)(void *lhs, void *rhs);
+};
+
+/* Sift the element at pos down the heap. */
+static __always_inline
+void min_heapify(struct min_heap *heap, int pos,
+ const struct min_heap_callbacks *func)
+{
+ void *left, *right, *parent, *smallest;
+ void *data = heap->data;
+
+ for (;;) {
+ if (pos * 2 + 1 >= heap->nr)
+ break;
+
+ left = data + ((pos * 2 + 1) * func->elem_size);
+ parent = data + (pos * func->elem_size);
+ smallest = parent;
+ if (func->less(left, smallest))
+ smallest = left;
+
+ if (pos * 2 + 2 < heap->nr) {
+ right = data + ((pos * 2 + 2) * func->elem_size);
+ if (func->less(right, smallest))
+ smallest = right;
+ }
+ if (smallest == parent)
+ break;
+ func->swp(smallest, parent);
+ if (smallest == left)
+ pos = (pos * 2) + 1;
+ else
+ pos = (pos * 2) + 2;
+ }
+}
+
+/* Floyd's approach to heapification that is O(nr). */
+static __always_inline
+void min_heapify_all(struct min_heap *heap,
+ const struct min_heap_callbacks *func)
+{
+ int i;
+
+ for (i = heap->nr / 2; i >= 0; i--)
+ min_heapify(heap, i, func);
+}
+
+/* Remove minimum element from the heap, O(log2(nr)). */
+static __always_inline
+void min_heap_pop(struct min_heap *heap,
+ const struct min_heap_callbacks *func)
+{
+ void *data = heap->data;
+
+ if (WARN_ONCE(heap->nr <= 0, "Popping an empty heap"))
+ return;
+
+ /* Place last element at the root (position 0) and then sift down. */
+ heap->nr--;
+ memcpy(data, data + (heap->nr * func->elem_size), func->elem_size);
+ min_heapify(heap, 0, func);
+}
+
+/*
+ * Remove the minimum element and then push the given element. The
+ * implementation performs 1 sift (O(log2(nr))) and is therefore more
+ * efficient than a pop followed by a push that does 2.
+ */
+static __always_inline
+void min_heap_pop_push(struct min_heap *heap,
+ const void *element,
+ const struct min_heap_callbacks *func)
+{
+ memcpy(heap->data, element, func->elem_size);
+ min_heapify(heap, 0, func);
+}
+
+/* Push an element on to the heap, O(log2(nr)). */
+static __always_inline
+void min_heap_push(struct min_heap *heap, const void *element,
+ const struct min_heap_callbacks *func)
+{
+ void *data = heap->data;
+ void *child, *parent;
+ int pos;
+
+ if (WARN_ONCE(heap->nr >= heap->size, "Pushing on a full heap"))
+ return;
+
+ /* Place at the end of data. */
+ pos = heap->nr;
+ memcpy(data + (pos * func->elem_size), element, func->elem_size);
+ heap->nr++;
+
+ /* Sift child at pos up. */
+ for (; pos > 0; pos = (pos - 1) / 2) {
+ child = data + (pos * func->elem_size);
+ parent = data + ((pos - 1) / 2) * func->elem_size;
+ if (func->less(parent, child))
+ break;
+ func->swp(parent, child);
+ }
+}
+
+#endif /* _LINUX_MIN_HEAP_H */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 547773f5894e..8768a39b5258 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -93,14 +93,26 @@ struct perf_raw_record {
/*
* branch stack layout:
* nr: number of taken branches stored in entries[]
+ * hw_idx: The low level index of raw branch records
+ * for the most recent branch.
+ * -1ULL means invalid/unknown.
*
* Note that nr can vary from sample to sample
* branches (to, from) are stored from most recent
* to least recent, i.e., entries[0] contains the most
* recent branch.
+ * The entries[] is an abstraction of raw branch records,
+ * which may not be stored in age order in HW, e.g. Intel LBR.
+ * The hw_idx is to expose the low level index of raw
+ * branch record for the most recent branch aka entries[0].
+ * The hw_idx index is between -1 (unknown) and max depth,
+ * which can be retrieved in /sys/devices/cpu/caps/branches.
+ * For the architectures whose raw branch records are
+ * already stored in age order, the hw_idx should be 0.
*/
struct perf_branch_stack {
__u64 nr;
+ __u64 hw_idx;
struct perf_branch_entry entries[0];
};
@@ -850,6 +862,13 @@ struct perf_cpu_context {
int sched_cb_usage;
int online;
+ /*
+ * Per-CPU storage for iterators used in visit_groups_merge. The default
+ * storage is of size 2 to hold the CPU and any CPU event iterators.
+ */
+ int heap_size;
+ struct perf_event **heap;
+ struct perf_event *heap_default[2];
};
struct perf_output_handle {
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 377d794d3105..397cfd65b3fe 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -181,6 +181,8 @@ enum perf_branch_sample_type_shift {
PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT = 16, /* save branch type */
+ PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT = 17, /* save low level index of raw branch records */
+
PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */
};
@@ -208,6 +210,8 @@ enum perf_branch_sample_type {
PERF_SAMPLE_BRANCH_TYPE_SAVE =
1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT,
+ PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT,
+
PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT,
};
@@ -853,7 +857,9 @@ enum perf_event_type {
* char data[size];}&& PERF_SAMPLE_RAW
*
* { u64 nr;
- * { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK
+ * { u64 hw_idx; } && PERF_SAMPLE_BRANCH_HW_INDEX
+ * { u64 from, to, flags } lbr[nr];
+ * } && PERF_SAMPLE_BRANCH_STACK
*
* { u64 abi; # enum perf_sample_regs_abi
* u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER
diff --git a/kernel/events/core.c b/kernel/events/core.c
index e453589da97c..ccf8d4fc6374 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -49,6 +49,7 @@
#include <linux/sched/mm.h>
#include <linux/proc_ns.h>
#include <linux/mount.h>
+#include <linux/min_heap.h>
#include "internal.h"
@@ -891,6 +892,47 @@ static inline void perf_cgroup_sched_in(struct task_struct *prev,
rcu_read_unlock();
}
+static int perf_cgroup_ensure_storage(struct perf_event *event,
+ struct cgroup_subsys_state *css)
+{
+ struct perf_cpu_context *cpuctx;
+ struct perf_event **storage;
+ int cpu, heap_size, ret = 0;
+
+ /*
+ * Allow storage to have sufficent space for an iterator for each
+ * possibly nested cgroup plus an iterator for events with no cgroup.
+ */
+ for (heap_size = 1; css; css = css->parent)
+ heap_size++;
+
+ for_each_possible_cpu(cpu) {
+ cpuctx = per_cpu_ptr(event->pmu->pmu_cpu_context, cpu);
+ if (heap_size <= cpuctx->heap_size)
+ continue;
+
+ storage = kmalloc_node(heap_size * sizeof(struct perf_event *),
+ GFP_KERNEL, cpu_to_node(cpu));
+ if (!storage) {
+ ret = -ENOMEM;
+ break;
+ }
+
+ raw_spin_lock_irq(&cpuctx->ctx.lock);
+ if (cpuctx->heap_size < heap_size) {
+ swap(cpuctx->heap, storage);
+ if (storage == cpuctx->heap_default)
+ storage = NULL;
+ cpuctx->heap_size = heap_size;
+ }
+ raw_spin_unlock_irq(&cpuctx->ctx.lock);
+
+ kfree(storage);
+ }
+
+ return ret;
+}
+
static inline int perf_cgroup_connect(int fd, struct perf_event *event,
struct perf_event_attr *attr,
struct perf_event *group_leader)
@@ -910,6 +952,10 @@ static inline int perf_cgroup_connect(int fd, struct perf_event *event,
goto out;
}
+ ret = perf_cgroup_ensure_storage(event, css);
+ if (ret)
+ goto out;
+
cgrp = container_of(css, struct perf_cgroup, css);
event->cgrp = cgrp;
@@ -1531,6 +1577,30 @@ perf_event_groups_less(struct perf_event *left, struct perf_event *right)
if (left->cpu > right->cpu)
return false;
+#ifdef CONFIG_CGROUP_PERF
+ if (left->cgrp != right->cgrp) {
+ if (!left->cgrp || !left->cgrp->css.cgroup) {
+ /*
+ * Left has no cgroup but right does, no cgroups come
+ * first.
+ */
+ return true;
+ }
+ if (!right->cgrp || right->cgrp->css.cgroup) {
+ /*
+ * Right has no cgroup but left does, no cgroups come
+ * first.
+ */
+ return false;
+ }
+ /* Two dissimilar cgroups, order by id. */
+ if (left->cgrp->css.cgroup->kn->id < right->cgrp->css.cgroup->kn->id)
+ return true;
+
+ return false;
+ }
+#endif
+
if (left->group_index < right->group_index)
return true;
if (left->group_index > right->group_index)
@@ -1610,25 +1680,48 @@ del_event_from_groups(struct perf_event *event, struct perf_event_context *ctx)
}
/*
- * Get the leftmost event in the @cpu subtree.
+ * Get the leftmost event in the cpu/cgroup subtree.
*/
static struct perf_event *
-perf_event_groups_first(struct perf_event_groups *groups, int cpu)
+perf_event_groups_first(struct perf_event_groups *groups, int cpu,
+ struct cgroup *cgrp)
{
struct perf_event *node_event = NULL, *match = NULL;
struct rb_node *node = groups->tree.rb_node;
+#ifdef CONFIG_CGROUP_PERF
+ u64 node_cgrp_id, cgrp_id = 0;
+
+ if (cgrp)
+ cgrp_id = cgrp->kn->id;
+#endif
while (node) {
node_event = container_of(node, struct perf_event, group_node);
if (cpu < node_event->cpu) {
node = node->rb_left;
- } else if (cpu > node_event->cpu) {
+ continue;
+ }
+ if (cpu > node_event->cpu) {
node = node->rb_right;
- } else {
- match = node_event;
+ continue;
+ }
+#ifdef CONFIG_CGROUP_PERF
+ node_cgrp_id = 0;
+ if (node_event->cgrp && node_event->cgrp->css.cgroup)
+ node_cgrp_id = node_event->cgrp->css.cgroup->kn->id;
+
+ if (cgrp_id < node_cgrp_id) {
node = node->rb_left;
+ continue;
}
+ if (cgrp_id > node_cgrp_id) {
+ node = node->rb_right;
+ continue;
+ }
+#endif
+ match = node_event;
+ node = node->rb_left;
}
return match;
@@ -1641,12 +1734,26 @@ static struct perf_event *
perf_event_groups_next(struct perf_event *event)
{
struct perf_event *next;
+#ifdef CONFIG_CGROUP_PERF
+ u64 curr_cgrp_id = 0;
+ u64 next_cgrp_id = 0;
+#endif
next = rb_entry_safe(rb_next(&event->group_node), typeof(*event), group_node);
- if (next && next->cpu == event->cpu)
- return next;
+ if (next == NULL || next->cpu != event->cpu)
+ return NULL;
- return NULL;
+#ifdef CONFIG_CGROUP_PERF
+ if (event->cgrp && event->cgrp->css.cgroup)
+ curr_cgrp_id = event->cgrp->css.cgroup->kn->id;
+
+ if (next->cgrp && next->cgrp->css.cgroup)
+ next_cgrp_id = next->cgrp->css.cgroup->kn->id;
+
+ if (curr_cgrp_id != next_cgrp_id)
+ return NULL;
+#endif
+ return next;
}
/*
@@ -1986,6 +2093,12 @@ static int perf_get_aux_event(struct perf_event *event,
return 1;
}
+static inline struct list_head *get_event_list(struct perf_event *event)
+{
+ struct perf_event_context *ctx = event->ctx;
+ return event->attr.pinned ? &ctx->pinned_active : &ctx->flexible_active;
+}
+
static void perf_group_detach(struct perf_event *event)
{
struct perf_event *sibling, *tmp;
@@ -2028,12 +2141,8 @@ static void perf_group_detach(struct perf_event *event)
if (!RB_EMPTY_NODE(&event->group_node)) {
add_event_to_groups(sibling, event->ctx);
- if (sibling->state == PERF_EVENT_STATE_ACTIVE) {
- struct list_head *list = sibling->attr.pinned ?
- &ctx->pinned_active : &ctx->flexible_active;
-
- list_add_tail(&sibling->active_list, list);
- }
+ if (sibling->state == PERF_EVENT_STATE_ACTIVE)
+ list_add_tail(&sibling->active_list, get_event_list(sibling));
}
WARN_ON_ONCE(sibling->ctx != event->ctx);
@@ -2350,6 +2459,8 @@ event_sched_in(struct perf_event *event,
{
int ret = 0;
+ WARN_ON_ONCE(event->ctx != ctx);
+
lockdep_assert_held(&ctx->lock);
if (event->state <= PERF_EVENT_STATE_OFF)
@@ -3388,71 +3499,103 @@ static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx,
ctx_sched_out(&cpuctx->ctx, cpuctx, event_type);
}
-static int visit_groups_merge(struct perf_event_groups *groups, int cpu,
- int (*func)(struct perf_event *, void *), void *data)
+static bool perf_less_group_idx(const void *l, const void *r)
{
- struct perf_event **evt, *evt1, *evt2;
- int ret;
-
- evt1 = perf_event_groups_first(groups, -1);
- evt2 = perf_event_groups_first(groups, cpu);
-
- while (evt1 || evt2) {
- if (evt1 && evt2) {
- if (evt1->group_index < evt2->group_index)
- evt = &evt1;
- else
- evt = &evt2;
- } else if (evt1) {
- evt = &evt1;
- } else {
- evt = &evt2;
- }
+ const struct perf_event *le = l, *re = r;
- ret = func(*evt, data);
- if (ret)
- return ret;
+ return le->group_index < re->group_index;
+}
- *evt = perf_event_groups_next(*evt);
- }
+static void swap_ptr(void *l, void *r)
+{
+ void **lp = l, **rp = r;
- return 0;
+ swap(*lp, *rp);
}
-struct sched_in_data {
- struct perf_event_context *ctx;
- struct perf_cpu_context *cpuctx;
- int can_add_hw;
+static const struct min_heap_callbacks perf_min_heap = {
+ .elem_size = sizeof(struct perf_event *),
+ .less = perf_less_group_idx,
+ .swp = swap_ptr,
};
-static int pinned_sched_in(struct perf_event *event, void *data)
+static void __heap_add(struct min_heap *heap, struct perf_event *event)
{
- struct sched_in_data *sid = data;
+ struct perf_event **itrs = heap->data;
- if (event->state <= PERF_EVENT_STATE_OFF)
- return 0;
+ if (event) {
+ itrs[heap->nr] = event;
+ heap->nr++;
+ }
+}
- if (!event_filter_match(event))
- return 0;
+static noinline int visit_groups_merge(struct perf_cpu_context *cpuctx,
+ struct perf_event_groups *groups, int cpu,
+ int (*func)(struct perf_event *, void *),
+ void *data)
+{
+#ifdef CONFIG_CGROUP_PERF
+ struct cgroup_subsys_state *css = NULL;
+#endif
+ /* Space for per CPU and/or any CPU event iterators. */
+ struct perf_event *itrs[2];
+ struct min_heap event_heap;
+ struct perf_event **evt;
+ int ret;
+
+ if (cpuctx) {
+ event_heap = (struct min_heap){
+ .data = cpuctx->heap,
+ .nr = 0,
+ .size = cpuctx->heap_size,
+ };
- if (group_can_go_on(event, sid->cpuctx, sid->can_add_hw)) {
- if (!group_sched_in(event, sid->cpuctx, sid->ctx))
- list_add_tail(&event->active_list, &sid->ctx->pinned_active);
+ lockdep_assert_held(&cpuctx->ctx.lock);
+
+#ifdef CONFIG_CGROUP_PERF
+ if (cpuctx->cgrp)
+ css = &cpuctx->cgrp->css;
+#endif
+ } else {
+ event_heap = (struct min_heap){
+ .data = itrs,
+ .nr = 0,
+ .size = ARRAY_SIZE(itrs),
+ };
+ /* Events not within a CPU context may be on any CPU. */
+ __heap_add(&event_heap, perf_event_groups_first(groups, -1, NULL));
}
+ evt = event_heap.data;
- /*
- * If this pinned group hasn't been scheduled,
- * put it in error state.
- */
- if (event->state == PERF_EVENT_STATE_INACTIVE)
- perf_event_set_state(event, PERF_EVENT_STATE_ERROR);
+ __heap_add(&event_heap, perf_event_groups_first(groups, cpu, NULL));
+
+#ifdef CONFIG_CGROUP_PERF
+ for (; css; css = css->parent)
+ __heap_add(&event_heap, perf_event_groups_first(groups, cpu, css->cgroup));
+#endif
+
+ min_heapify_all(&event_heap, &perf_min_heap);
+
+ while (event_heap.nr) {
+ ret = func(*evt, data);
+ if (ret)
+ return ret;
+
+ *evt = perf_event_groups_next(*evt);
+ if (*evt)
+ min_heapify(&event_heap, 0, &perf_min_heap);
+ else
+ min_heap_pop(&event_heap, &perf_min_heap);
+ }
return 0;
}
-static int flexible_sched_in(struct perf_event *event, void *data)
+static int merge_sched_in(struct perf_event *event, void *data)
{
- struct sched_in_data *sid = data;
+ struct perf_event_context *ctx = event->ctx;
+ struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
+ int *can_add_hw = data;
if (event->state <= PERF_EVENT_STATE_OFF)
return 0;
@@ -3460,14 +3603,17 @@ static int flexible_sched_in(struct perf_event *event, void *data)
if (!event_filter_match(event))
return 0;
- if (group_can_go_on(event, sid->cpuctx, sid->can_add_hw)) {
- int ret = group_sched_in(event, sid->cpuctx, sid->ctx);
- if (ret) {
- sid->can_add_hw = 0;
- sid->ctx->rotate_necessary = 1;
- return 0;
- }
- list_add_tail(&event->active_list, &sid->ctx->flexible_active);
+ if (group_can_go_on(event, cpuctx, *can_add_hw)) {
+ if (!group_sched_in(event, cpuctx, ctx))
+ list_add_tail(&event->active_list, get_event_list(event));
+ }
+
+ if (event->state == PERF_EVENT_STATE_INACTIVE) {
+ if (event->attr.pinned)
+ perf_event_set_state(event, PERF_EVENT_STATE_ERROR);
+
+ *can_add_hw = 0;
+ ctx->rotate_necessary = 1;
}
return 0;
@@ -3477,30 +3623,28 @@ static void
ctx_pinned_sched_in(struct perf_event_context *ctx,
struct perf_cpu_context *cpuctx)
{
- struct sched_in_data sid = {
- .ctx = ctx,
- .cpuctx = cpuctx,
- .can_add_hw = 1,
- };
+ int can_add_hw = 1;
- visit_groups_merge(&ctx->pinned_groups,
+ if (ctx != &cpuctx->ctx)
+ cpuctx = NULL;
+
+ visit_groups_merge(cpuctx, &ctx->pinned_groups,
smp_processor_id(),
- pinned_sched_in, &sid);
+ merge_sched_in, &can_add_hw);
}
static void
ctx_flexible_sched_in(struct perf_event_context *ctx,
struct perf_cpu_context *cpuctx)
{
- struct sched_in_data sid = {
- .ctx = ctx,
- .cpuctx = cpuctx,
- .can_add_hw = 1,
- };
+ int can_add_hw = 1;
- visit_groups_merge(&ctx->flexible_groups,
+ if (ctx != &cpuctx->ctx)
+ cpuctx = NULL;
+
+ visit_groups_merge(cpuctx, &ctx->flexible_groups,
smp_processor_id(),
- flexible_sched_in, &sid);
+ merge_sched_in, &can_add_hw);
}
static void
@@ -6555,6 +6699,11 @@ static void perf_output_read(struct perf_output_handle *handle,
perf_output_read_one(handle, event, enabled, running);
}
+static inline bool perf_sample_save_hw_index(struct perf_event *event)
+{
+ return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX;
+}
+
void perf_output_sample(struct perf_output_handle *handle,
struct perf_event_header *header,
struct perf_sample_data *data,
@@ -6643,6 +6792,8 @@ void perf_output_sample(struct perf_output_handle *handle,
* sizeof(struct perf_branch_entry);
perf_output_put(handle, data->br_stack->nr);
+ if (perf_sample_save_hw_index(event))
+ perf_output_put(handle, data->br_stack->hw_idx);
perf_output_copy(handle, data->br_stack->entries, size);
} else {
/*
@@ -6836,6 +6987,9 @@ void perf_prepare_sample(struct perf_event_header *header,
if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
int size = sizeof(u64); /* nr */
if (data->br_stack) {
+ if (perf_sample_save_hw_index(event))
+ size += sizeof(u64);
+
size += data->br_stack->nr
* sizeof(struct perf_branch_entry);
}
@@ -10349,6 +10503,9 @@ skip_type:
cpuctx->online = cpumask_test_cpu(cpu, perf_online_mask);
__perf_mux_hrtimer_init(cpuctx, cpu);
+
+ cpuctx->heap_size = ARRAY_SIZE(cpuctx->heap_default);
+ cpuctx->heap = cpuctx->heap_default;
}
got_cpu_context:
@@ -10794,12 +10951,6 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
if (!has_branch_stack(event))
event->attr.branch_sample_type = 0;
- if (cgroup_fd != -1) {
- err = perf_cgroup_connect(cgroup_fd, event, attr, group_leader);
- if (err)
- goto err_ns;
- }
-
pmu = perf_init_event(event);
if (IS_ERR(pmu)) {
err = PTR_ERR(pmu);
@@ -10821,6 +10972,12 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
goto err_pmu;
}
+ if (cgroup_fd != -1) {
+ err = perf_cgroup_connect(cgroup_fd, event, attr, group_leader);
+ if (err)
+ goto err_pmu;
+ }
+
err = exclusive_event_init(event);
if (err)
goto err_pmu;
@@ -10881,12 +11038,12 @@ err_per_task:
exclusive_event_destroy(event);
err_pmu:
+ if (is_cgroup_event(event))
+ perf_detach_cgroup(event);
if (event->destroy)
event->destroy(event);
module_put(pmu->module);
err_ns:
- if (is_cgroup_event(event))
- perf_detach_cgroup(event);
if (event->ns)
put_pid_ns(event->ns);
if (event->hw.target)
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 69def4a9df00..f04b61c1a1cc 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1769,6 +1769,16 @@ config TEST_LIST_SORT
If unsure, say N.
+config TEST_MIN_HEAP
+ tristate "Min heap test"
+ depends on DEBUG_KERNEL || m
+ help
+ Enable this to turn on min heap function tests. This test is
+ executed only once during system boot (so affects only boot time),
+ or at module load time.
+
+ If unsure, say N.
+
config TEST_SORT
tristate "Array-based sort test"
depends on DEBUG_KERNEL || m
diff --git a/lib/Makefile b/lib/Makefile
index 611872c06926..09a8acb0cf92 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -67,6 +67,7 @@ CFLAGS_test_ubsan.o += $(call cc-disable-warning, vla)
UBSAN_SANITIZE_test_ubsan.o := y
obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o
obj-$(CONFIG_TEST_LIST_SORT) += test_list_sort.o
+obj-$(CONFIG_TEST_MIN_HEAP) += test_min_heap.o
obj-$(CONFIG_TEST_LKM) += test_module.o
obj-$(CONFIG_TEST_VMALLOC) += test_vmalloc.o
obj-$(CONFIG_TEST_OVERFLOW) += test_overflow.o
diff --git a/lib/test_min_heap.c b/lib/test_min_heap.c
new file mode 100644
index 000000000000..d19c8080fd4d
--- /dev/null
+++ b/lib/test_min_heap.c
@@ -0,0 +1,194 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#define pr_fmt(fmt) "min_heap_test: " fmt
+
+/*
+ * Test cases for the min max heap.
+ */
+
+#include <linux/log2.h>
+#include <linux/min_heap.h>
+#include <linux/module.h>
+#include <linux/printk.h>
+#include <linux/random.h>
+
+static __init bool less_than(const void *lhs, const void *rhs)
+{
+ return *(int *)lhs < *(int *)rhs;
+}
+
+static __init bool greater_than(const void *lhs, const void *rhs)
+{
+ return *(int *)lhs > *(int *)rhs;
+}
+
+static __init void swap_ints(void *lhs, void *rhs)
+{
+ int temp = *(int *)lhs;
+
+ *(int *)lhs = *(int *)rhs;
+ *(int *)rhs = temp;
+}
+
+static __init int pop_verify_heap(bool min_heap,
+ struct min_heap *heap,
+ const struct min_heap_callbacks *funcs)
+{
+ int *values = heap->data;
+ int err = 0;
+ int last;
+
+ last = values[0];
+ min_heap_pop(heap, funcs);
+ while (heap->nr > 0) {
+ if (min_heap) {
+ if (last > values[0]) {
+ pr_err("error: expected %d <= %d\n", last,
+ values[0]);
+ err++;
+ }
+ } else {
+ if (last < values[0]) {
+ pr_err("error: expected %d >= %d\n", last,
+ values[0]);
+ err++;
+ }
+ }
+ last = values[0];
+ min_heap_pop(heap, funcs);
+ }
+ return err;
+}
+
+static __init int test_heapify_all(bool min_heap)
+{
+ int values[] = { 3, 1, 2, 4, 0x8000000, 0x7FFFFFF, 0,
+ -3, -1, -2, -4, 0x8000000, 0x7FFFFFF };
+ struct min_heap heap = {
+ .data = values,
+ .nr = ARRAY_SIZE(values),
+ .size = ARRAY_SIZE(values),
+ };
+ struct min_heap_callbacks funcs = {
+ .elem_size = sizeof(int),
+ .less = min_heap ? less_than : greater_than,
+ .swp = swap_ints,
+ };
+ int i, err;
+
+ /* Test with known set of values. */
+ min_heapify_all(&heap, &funcs);
+ err = pop_verify_heap(min_heap, &heap, &funcs);
+
+
+ /* Test with randomly generated values. */
+ heap.nr = ARRAY_SIZE(values);
+ for (i = 0; i < heap.nr; i++)
+ values[i] = get_random_int();
+
+ min_heapify_all(&heap, &funcs);
+ err += pop_verify_heap(min_heap, &heap, &funcs);
+
+ return err;
+}
+
+static __init int test_heap_push(bool min_heap)
+{
+ const int data[] = { 3, 1, 2, 4, 0x80000000, 0x7FFFFFFF, 0,
+ -3, -1, -2, -4, 0x80000000, 0x7FFFFFFF };
+ int values[ARRAY_SIZE(data)];
+ struct min_heap heap = {
+ .data = values,
+ .nr = 0,
+ .size = ARRAY_SIZE(values),
+ };
+ struct min_heap_callbacks funcs = {
+ .elem_size = sizeof(int),
+ .less = min_heap ? less_than : greater_than,
+ .swp = swap_ints,
+ };
+ int i, temp, err;
+
+ /* Test with known set of values copied from data. */
+ for (i = 0; i < ARRAY_SIZE(data); i++)
+ min_heap_push(&heap, &data[i], &funcs);
+
+ err = pop_verify_heap(min_heap, &heap, &funcs);
+
+ /* Test with randomly generated values. */
+ while (heap.nr < heap.size) {
+ temp = get_random_int();
+ min_heap_push(&heap, &temp, &funcs);
+ }
+ err += pop_verify_heap(min_heap, &heap, &funcs);
+
+ return err;
+}
+
+static __init int test_heap_pop_push(bool min_heap)
+{
+ const int data[] = { 3, 1, 2, 4, 0x80000000, 0x7FFFFFFF, 0,
+ -3, -1, -2, -4, 0x80000000, 0x7FFFFFFF };
+ int values[ARRAY_SIZE(data)];
+ struct min_heap heap = {
+ .data = values,
+ .nr = 0,
+ .size = ARRAY_SIZE(values),
+ };
+ struct min_heap_callbacks funcs = {
+ .elem_size = sizeof(int),
+ .less = min_heap ? less_than : greater_than,
+ .swp = swap_ints,
+ };
+ int i, temp, err;
+
+ /* Fill values with data to pop and replace. */
+ temp = min_heap ? 0x80000000 : 0x7FFFFFFF;
+ for (i = 0; i < ARRAY_SIZE(data); i++)
+ min_heap_push(&heap, &temp, &funcs);
+
+ /* Test with known set of values copied from data. */
+ for (i = 0; i < ARRAY_SIZE(data); i++)
+ min_heap_pop_push(&heap, &data[i], &funcs);
+
+ err = pop_verify_heap(min_heap, &heap, &funcs);
+
+ heap.nr = 0;
+ for (i = 0; i < ARRAY_SIZE(data); i++)
+ min_heap_push(&heap, &temp, &funcs);
+
+ /* Test with randomly generated values. */
+ for (i = 0; i < ARRAY_SIZE(data); i++) {
+ temp = get_random_int();
+ min_heap_pop_push(&heap, &temp, &funcs);
+ }
+ err += pop_verify_heap(min_heap, &heap, &funcs);
+
+ return err;
+}
+
+static int __init test_min_heap_init(void)
+{
+ int err = 0;
+
+ err += test_heapify_all(true);
+ err += test_heapify_all(false);
+ err += test_heap_push(true);
+ err += test_heap_push(false);
+ err += test_heap_pop_push(true);
+ err += test_heap_pop_push(false);
+ if (err) {
+ pr_err("test failed with %d errors\n", err);
+ return -EINVAL;
+ }
+ pr_info("test passed\n");
+ return 0;
+}
+module_init(test_min_heap_init);
+
+static void __exit test_min_heap_exit(void)
+{
+ /* do nothing */
+}
+module_exit(test_min_heap_exit);
+
+MODULE_LICENSE("GPL");
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index 377d794d3105..397cfd65b3fe 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -181,6 +181,8 @@ enum perf_branch_sample_type_shift {
PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT = 16, /* save branch type */
+ PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT = 17, /* save low level index of raw branch records */
+
PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */
};
@@ -208,6 +210,8 @@ enum perf_branch_sample_type {
PERF_SAMPLE_BRANCH_TYPE_SAVE =
1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT,
+ PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT,
+
PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT,
};
@@ -853,7 +857,9 @@ enum perf_event_type {
* char data[size];}&& PERF_SAMPLE_RAW
*
* { u64 nr;
- * { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK
+ * { u64 hw_idx; } && PERF_SAMPLE_BRANCH_HW_INDEX
+ * { u64 from, to, flags } lbr[nr];
+ * } && PERF_SAMPLE_BRANCH_STACK
*
* { u64 abi; # enum perf_sample_regs_abi
* u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER
diff --git a/tools/lib/api/fs/Build b/tools/lib/api/fs/Build
index f4ed9629ae85..0f75b28654de 100644
--- a/tools/lib/api/fs/Build
+++ b/tools/lib/api/fs/Build
@@ -1,2 +1,3 @@
libapi-y += fs.o
libapi-y += tracing_path.o
+libapi-y += cgroup.o
diff --git a/tools/lib/api/fs/cgroup.c b/tools/lib/api/fs/cgroup.c
new file mode 100644
index 000000000000..889a6eb4aaca
--- /dev/null
+++ b/tools/lib/api/fs/cgroup.c
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/stringify.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "fs.h"
+
+int cgroupfs_find_mountpoint(char *buf, size_t maxlen, const char *subsys)
+{
+ FILE *fp;
+ char mountpoint[PATH_MAX + 1], tokens[PATH_MAX + 1], type[PATH_MAX + 1];
+ char path_v1[PATH_MAX + 1], path_v2[PATH_MAX + 2], *path;
+ char *token, *saved_ptr = NULL;
+
+ fp = fopen("/proc/mounts", "r");
+ if (!fp)
+ return -1;
+
+ /*
+ * in order to handle split hierarchy, we need to scan /proc/mounts
+ * and inspect every cgroupfs mount point to find one that has
+ * perf_event subsystem
+ */
+ path_v1[0] = '\0';
+ path_v2[0] = '\0';
+
+ while (fscanf(fp, "%*s %"__stringify(PATH_MAX)"s %"__stringify(PATH_MAX)"s %"
+ __stringify(PATH_MAX)"s %*d %*d\n",
+ mountpoint, type, tokens) == 3) {
+
+ if (!path_v1[0] && !strcmp(type, "cgroup")) {
+
+ token = strtok_r(tokens, ",", &saved_ptr);
+
+ while (token != NULL) {
+ if (subsys && !strcmp(token, subsys)) {
+ strcpy(path_v1, mountpoint);
+ break;
+ }
+ token = strtok_r(NULL, ",", &saved_ptr);
+ }
+ }
+
+ if (!path_v2[0] && !strcmp(type, "cgroup2"))
+ strcpy(path_v2, mountpoint);
+
+ if (path_v1[0] && path_v2[0])
+ break;
+ }
+ fclose(fp);
+
+ if (path_v1[0])
+ path = path_v1;
+ else if (path_v2[0])
+ path = path_v2;
+ else
+ return -1;
+
+ if (strlen(path) < maxlen) {
+ strcpy(buf, path);
+ return 0;
+ }
+ return -1;
+}
diff --git a/tools/lib/api/fs/fs.h b/tools/lib/api/fs/fs.h
index 92d03b8396b1..936edb95e1f3 100644
--- a/tools/lib/api/fs/fs.h
+++ b/tools/lib/api/fs/fs.h
@@ -28,6 +28,8 @@ FS(bpf_fs)
#undef FS
+int cgroupfs_find_mountpoint(char *buf, size_t maxlen, const char *subsys);
+
int filename__read_int(const char *filename, int *value);
int filename__read_ull(const char *filename, unsigned long long *value);
int filename__read_xll(const char *filename, unsigned long long *value);
diff --git a/tools/lib/perf/Documentation/examples/counting.c b/tools/lib/perf/Documentation/examples/counting.c
new file mode 100644
index 000000000000..6085693571ef
--- /dev/null
+++ b/tools/lib/perf/Documentation/examples/counting.c
@@ -0,0 +1,83 @@
+#include <linux/perf_event.h>
+#include <perf/evlist.h>
+#include <perf/evsel.h>
+#include <perf/cpumap.h>
+#include <perf/threadmap.h>
+#include <perf/mmap.h>
+#include <perf/core.h>
+#include <perf/event.h>
+#include <stdio.h>
+#include <unistd.h>
+
+static int libperf_print(enum libperf_print_level level,
+ const char *fmt, va_list ap)
+{
+ return vfprintf(stderr, fmt, ap);
+}
+
+int main(int argc, char **argv)
+{
+ int count = 100000, err = 0;
+ struct perf_evlist *evlist;
+ struct perf_evsel *evsel;
+ struct perf_thread_map *threads;
+ struct perf_counts_values counts;
+
+ struct perf_event_attr attr1 = {
+ .type = PERF_TYPE_SOFTWARE,
+ .config = PERF_COUNT_SW_CPU_CLOCK,
+ .read_format = PERF_FORMAT_TOTAL_TIME_ENABLED|PERF_FORMAT_TOTAL_TIME_RUNNING,
+ .disabled = 1,
+ };
+ struct perf_event_attr attr2 = {
+ .type = PERF_TYPE_SOFTWARE,
+ .config = PERF_COUNT_SW_TASK_CLOCK,
+ .read_format = PERF_FORMAT_TOTAL_TIME_ENABLED|PERF_FORMAT_TOTAL_TIME_RUNNING,
+ .disabled = 1,
+ };
+
+ libperf_init(libperf_print);
+ threads = perf_thread_map__new_dummy();
+ if (!threads) {
+ fprintf(stderr, "failed to create threads\n");
+ return -1;
+ }
+ perf_thread_map__set_pid(threads, 0, 0);
+ evlist = perf_evlist__new();
+ if (!evlist) {
+ fprintf(stderr, "failed to create evlist\n");
+ goto out_threads;
+ }
+ evsel = perf_evsel__new(&attr1);
+ if (!evsel) {
+ fprintf(stderr, "failed to create evsel1\n");
+ goto out_evlist;
+ }
+ perf_evlist__add(evlist, evsel);
+ evsel = perf_evsel__new(&attr2);
+ if (!evsel) {
+ fprintf(stderr, "failed to create evsel2\n");
+ goto out_evlist;
+ }
+ perf_evlist__add(evlist, evsel);
+ perf_evlist__set_maps(evlist, NULL, threads);
+ err = perf_evlist__open(evlist);
+ if (err) {
+ fprintf(stderr, "failed to open evsel\n");
+ goto out_evlist;
+ }
+ perf_evlist__enable(evlist);
+ while (count--);
+ perf_evlist__disable(evlist);
+ perf_evlist__for_each_evsel(evlist, evsel) {
+ perf_evsel__read(evsel, 0, 0, &counts);
+ fprintf(stdout, "count %llu, enabled %llu, run %llu\n",
+ counts.val, counts.ena, counts.run);
+ }
+ perf_evlist__close(evlist);
+out_evlist:
+ perf_evlist__delete(evlist);
+out_threads:
+ perf_thread_map__put(threads);
+ return err;
+}
diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index beaa8b8c08ff..e1bd2a93c6db 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -5541,7 +5541,7 @@ static void print_event_time(struct tep_handle *tep, struct trace_seq *s,
if (p10 > 1 && p10 < time)
trace_seq_printf(s, "%5llu.%0*llu", time / p10, prec, time % p10);
else
- trace_seq_printf(s, "%12llu\n", time);
+ trace_seq_printf(s, "%12llu", time);
}
struct print_event_type {
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 9431b8066fb4..4d56586b2fb9 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -334,6 +334,15 @@ Configure all used events to run in kernel space.
--all-user::
Configure all used events to run in user space.
+--percore-show-thread::
+The event modifier "percore" has supported to sum up the event counts
+for all hardware threads in a core and show the counts per core.
+
+This option with event modifier "percore" enabled also sums up the event
+counts for all hardware threads in a core but show the sum counts per
+hardware thread. This is essentially a replacement for the any bit and
+convenient for post processing.
+
EXAMPLES
--------
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index c03c36fde7e2..5e697cd2224a 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -572,29 +572,12 @@ static void init_block_hist(struct block_hist *bh)
bh->valid = true;
}
-static int block_pair_cmp(struct hist_entry *a, struct hist_entry *b)
-{
- struct block_info *bi_a = a->block_info;
- struct block_info *bi_b = b->block_info;
- int cmp;
-
- if (!bi_a->sym || !bi_b->sym)
- return -1;
-
- cmp = strcmp(bi_a->sym->name, bi_b->sym->name);
-
- if ((!cmp) && (bi_a->start == bi_b->start) && (bi_a->end == bi_b->end))
- return 0;
-
- return -1;
-}
-
static struct hist_entry *get_block_pair(struct hist_entry *he,
struct hists *hists_pair)
{
struct rb_root_cached *root = hists_pair->entries_in;
struct rb_node *next = rb_first_cached(root);
- int cmp;
+ int64_t cmp;
while (next != NULL) {
struct hist_entry *he_pair = rb_entry(next, struct hist_entry,
@@ -602,7 +585,7 @@ static struct hist_entry *get_block_pair(struct hist_entry *he,
next = rb_next(&he_pair->rb_node_in);
- cmp = block_pair_cmp(he_pair, he);
+ cmp = __block_info__cmp(he_pair, he);
if (!cmp)
return he_pair;
}
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 72a12b69f120..d7c905f7520f 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -104,6 +104,7 @@ struct report {
bool symbol_ipc;
bool total_cycles_mode;
struct block_report *block_reports;
+ int nr_block_reports;
};
static int report__config(const char *var, const char *value, void *cb)
@@ -966,8 +967,19 @@ static int __cmd_report(struct report *rep)
report__output_resort(rep);
if (rep->total_cycles_mode) {
+ int block_hpps[6] = {
+ PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_PCT,
+ PERF_HPP_REPORT__BLOCK_LBR_CYCLES,
+ PERF_HPP_REPORT__BLOCK_CYCLES_PCT,
+ PERF_HPP_REPORT__BLOCK_AVG_CYCLES,
+ PERF_HPP_REPORT__BLOCK_RANGE,
+ PERF_HPP_REPORT__BLOCK_DSO,
+ };
+
rep->block_reports = block_info__create_report(session->evlist,
- rep->total_cycles);
+ rep->total_cycles,
+ block_hpps, 6,
+ &rep->nr_block_reports);
if (!rep->block_reports)
return -1;
}
@@ -1551,8 +1563,11 @@ error:
zfree(&report.ptime_range);
}
- if (report.block_reports)
- zfree(&report.block_reports);
+ if (report.block_reports) {
+ block_info__free_report(report.block_reports,
+ report.nr_block_reports);
+ report.block_reports = NULL;
+ }
zstd_fini(&(session->zstd_data));
perf_session__delete(session);
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index e2406b291c1c..656b347f6dd8 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -735,6 +735,7 @@ static int perf_sample__fprintf_brstack(struct perf_sample *sample,
struct perf_event_attr *attr, FILE *fp)
{
struct branch_stack *br = sample->branch_stack;
+ struct branch_entry *entries = perf_sample__branch_entries(sample);
struct addr_location alf, alt;
u64 i, from, to;
int printed = 0;
@@ -743,8 +744,8 @@ static int perf_sample__fprintf_brstack(struct perf_sample *sample,
return 0;
for (i = 0; i < br->nr; i++) {
- from = br->entries[i].from;
- to = br->entries[i].to;
+ from = entries[i].from;
+ to = entries[i].to;
if (PRINT_FIELD(DSO)) {
memset(&alf, 0, sizeof(alf));
@@ -768,10 +769,10 @@ static int perf_sample__fprintf_brstack(struct perf_sample *sample,
}
printed += fprintf(fp, "/%c/%c/%c/%d ",
- mispred_str( br->entries + i),
- br->entries[i].flags.in_tx? 'X' : '-',
- br->entries[i].flags.abort? 'A' : '-',
- br->entries[i].flags.cycles);
+ mispred_str(entries + i),
+ entries[i].flags.in_tx ? 'X' : '-',
+ entries[i].flags.abort ? 'A' : '-',
+ entries[i].flags.cycles);
}
return printed;
@@ -782,6 +783,7 @@ static int perf_sample__fprintf_brstacksym(struct perf_sample *sample,
struct perf_event_attr *attr, FILE *fp)
{
struct branch_stack *br = sample->branch_stack;
+ struct branch_entry *entries = perf_sample__branch_entries(sample);
struct addr_location alf, alt;
u64 i, from, to;
int printed = 0;
@@ -793,8 +795,8 @@ static int perf_sample__fprintf_brstacksym(struct perf_sample *sample,
memset(&alf, 0, sizeof(alf));
memset(&alt, 0, sizeof(alt));
- from = br->entries[i].from;
- to = br->entries[i].to;
+ from = entries[i].from;
+ to = entries[i].to;
thread__find_symbol_fb(thread, sample->cpumode, from, &alf);
thread__find_symbol_fb(thread, sample->cpumode, to, &alt);
@@ -813,10 +815,10 @@ static int perf_sample__fprintf_brstacksym(struct perf_sample *sample,
printed += fprintf(fp, ")");
}
printed += fprintf(fp, "/%c/%c/%c/%d ",
- mispred_str( br->entries + i),
- br->entries[i].flags.in_tx? 'X' : '-',
- br->entries[i].flags.abort? 'A' : '-',
- br->entries[i].flags.cycles);
+ mispred_str(entries + i),
+ entries[i].flags.in_tx ? 'X' : '-',
+ entries[i].flags.abort ? 'A' : '-',
+ entries[i].flags.cycles);
}
return printed;
@@ -827,6 +829,7 @@ static int perf_sample__fprintf_brstackoff(struct perf_sample *sample,
struct perf_event_attr *attr, FILE *fp)
{
struct branch_stack *br = sample->branch_stack;
+ struct branch_entry *entries = perf_sample__branch_entries(sample);
struct addr_location alf, alt;
u64 i, from, to;
int printed = 0;
@@ -838,8 +841,8 @@ static int perf_sample__fprintf_brstackoff(struct perf_sample *sample,
memset(&alf, 0, sizeof(alf));
memset(&alt, 0, sizeof(alt));
- from = br->entries[i].from;
- to = br->entries[i].to;
+ from = entries[i].from;
+ to = entries[i].to;
if (thread__find_map_fb(thread, sample->cpumode, from, &alf) &&
!alf.map->dso->adjust_symbols)
@@ -862,10 +865,10 @@ static int perf_sample__fprintf_brstackoff(struct perf_sample *sample,
printed += fprintf(fp, ")");
}
printed += fprintf(fp, "/%c/%c/%c/%d ",
- mispred_str(br->entries + i),
- br->entries[i].flags.in_tx ? 'X' : '-',
- br->entries[i].flags.abort ? 'A' : '-',
- br->entries[i].flags.cycles);
+ mispred_str(entries + i),
+ entries[i].flags.in_tx ? 'X' : '-',
+ entries[i].flags.abort ? 'A' : '-',
+ entries[i].flags.cycles);
}
return printed;
@@ -1053,6 +1056,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
struct machine *machine, FILE *fp)
{
struct branch_stack *br = sample->branch_stack;
+ struct branch_entry *entries = perf_sample__branch_entries(sample);
u64 start, end;
int i, insn, len, nr, ilen, printed = 0;
struct perf_insn x;
@@ -1073,31 +1077,31 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
printed += fprintf(fp, "%c", '\n');
/* Handle first from jump, of which we don't know the entry. */
- len = grab_bb(buffer, br->entries[nr-1].from,
- br->entries[nr-1].from,
+ len = grab_bb(buffer, entries[nr-1].from,
+ entries[nr-1].from,
machine, thread, &x.is64bit, &x.cpumode, false);
if (len > 0) {
- printed += ip__fprintf_sym(br->entries[nr - 1].from, thread,
+ printed += ip__fprintf_sym(entries[nr - 1].from, thread,
x.cpumode, x.cpu, &lastsym, attr, fp);
- printed += ip__fprintf_jump(br->entries[nr - 1].from, &br->entries[nr - 1],
+ printed += ip__fprintf_jump(entries[nr - 1].from, &entries[nr - 1],
&x, buffer, len, 0, fp, &total_cycles);
if (PRINT_FIELD(SRCCODE))
- printed += print_srccode(thread, x.cpumode, br->entries[nr - 1].from);
+ printed += print_srccode(thread, x.cpumode, entries[nr - 1].from);
}
/* Print all blocks */
for (i = nr - 2; i >= 0; i--) {
- if (br->entries[i].from || br->entries[i].to)
+ if (entries[i].from || entries[i].to)
pr_debug("%d: %" PRIx64 "-%" PRIx64 "\n", i,
- br->entries[i].from,
- br->entries[i].to);
- start = br->entries[i + 1].to;
- end = br->entries[i].from;
+ entries[i].from,
+ entries[i].to);
+ start = entries[i + 1].to;
+ end = entries[i].from;
len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, false);
/* Patch up missing kernel transfers due to ring filters */
if (len == -ENXIO && i > 0) {
- end = br->entries[--i].from;
+ end = entries[--i].from;
pr_debug("\tpatching up to %" PRIx64 "-%" PRIx64 "\n", start, end);
len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, false);
}
@@ -1110,7 +1114,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
printed += ip__fprintf_sym(ip, thread, x.cpumode, x.cpu, &lastsym, attr, fp);
if (ip == end) {
- printed += ip__fprintf_jump(ip, &br->entries[i], &x, buffer + off, len - off, ++insn, fp,
+ printed += ip__fprintf_jump(ip, &entries[i], &x, buffer + off, len - off, ++insn, fp,
&total_cycles);
if (PRINT_FIELD(SRCCODE))
printed += print_srccode(thread, x.cpumode, ip);
@@ -1134,9 +1138,9 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
* Hit the branch? In this case we are already done, and the target
* has not been executed yet.
*/
- if (br->entries[0].from == sample->ip)
+ if (entries[0].from == sample->ip)
goto out;
- if (br->entries[0].flags.abort)
+ if (entries[0].flags.abort)
goto out;
/*
@@ -1147,7 +1151,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
* between final branch and sample. When this happens just
* continue walking after the last TO until we hit a branch.
*/
- start = br->entries[0].to;
+ start = entries[0].to;
end = sample->ip;
if (end < start) {
/* Missing jump. Scan 128 bytes for the next branch */
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index a098c2ebf4ea..ec053dc1e35c 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -929,6 +929,10 @@ static struct option stat_options[] = {
OPT_BOOLEAN_FLAG(0, "all-user", &stat_config.all_user,
"Configure all used events to run in user space.",
PARSE_OPT_EXCLUSIVE),
+ OPT_BOOLEAN(0, "percore-show-thread", &stat_config.percore_show_thread,
+ "Use with 'percore' event qualifier to show the event "
+ "counts of one hardware thread by sum up total hardware "
+ "threads of same physical core"),
OPT_END()
};
diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c
index 87843af4c118..28313e59d6f6 100644
--- a/tools/perf/tests/expr.c
+++ b/tools/perf/tests/expr.c
@@ -10,7 +10,7 @@ static int test(struct parse_ctx *ctx, const char *e, double val2)
{
double val;
- if (expr__parse(&val, ctx, &e))
+ if (expr__parse(&val, ctx, e))
TEST_ASSERT_VAL("parse test failed", 0);
TEST_ASSERT_VAL("unexpected value", val == val2);
return 0;
@@ -44,12 +44,12 @@ int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused)
return ret;
p = "FOO/0";
- ret = expr__parse(&val, &ctx, &p);
- TEST_ASSERT_VAL("division by zero", ret == 1);
+ ret = expr__parse(&val, &ctx, p);
+ TEST_ASSERT_VAL("division by zero", ret == -1);
p = "BAR/";
- ret = expr__parse(&val, &ctx, &p);
- TEST_ASSERT_VAL("missing operand", ret == 1);
+ ret = expr__parse(&val, &ctx, p);
+ TEST_ASSERT_VAL("missing operand", ret == -1);
TEST_ASSERT_VAL("find other",
expr__find_other("FOO + BAR + BAZ + BOZO", "FOO", &other, &num_other) == 0);
diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c
index 2762e1155238..14239e472187 100644
--- a/tools/perf/tests/sample-parsing.c
+++ b/tools/perf/tests/sample-parsing.c
@@ -99,6 +99,7 @@ static bool samples_same(const struct perf_sample *s1,
if (type & PERF_SAMPLE_BRANCH_STACK) {
COMP(branch_stack->nr);
+ COMP(branch_stack->hw_idx);
for (i = 0; i < s1->branch_stack->nr; i++)
MCOMP(branch_stack->entries[i]);
}
@@ -186,7 +187,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format)
u64 data[64];
} branch_stack = {
/* 1 branch_entry */
- .data = {1, 211, 212, 213},
+ .data = {1, -1ULL, 211, 212, 213},
};
u64 regs[64];
const u64 raw_data[] = {0x123456780a0b0c0dULL, 0x1102030405060708ULL};
@@ -208,6 +209,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format)
.transaction = 112,
.raw_data = (void *)raw_data,
.callchain = &callchain.callchain,
+ .no_hw_idx = false,
.branch_stack = &branch_stack.branch_stack,
.user_regs = {
.abi = PERF_SAMPLE_REGS_ABI_64,
@@ -244,6 +246,9 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format)
if (sample_type & PERF_SAMPLE_REGS_INTR)
evsel.core.attr.sample_regs_intr = sample_regs;
+ if (sample_type & PERF_SAMPLE_BRANCH_STACK)
+ evsel.core.attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX;
+
for (i = 0; i < sizeof(regs); i++)
*(i + (u8 *)regs) = i & 0xfe;
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 07da6c790b63..c0cf8dff694e 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -121,7 +121,9 @@ perf-y += mem-events.o
perf-y += vsprintf.o
perf-y += units.o
perf-y += time-utils.o
+perf-y += expr-flex.o
perf-y += expr-bison.o
+perf-y += expr.o
perf-y += branch.o
perf-y += mem2node.o
@@ -189,9 +191,13 @@ $(OUTPUT)util/parse-events-bison.c: util/parse-events.y
$(call rule_mkdir)
$(Q)$(call echo-cmd,bison)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $@ -p parse_events_
+$(OUTPUT)util/expr-flex.c: util/expr.l $(OUTPUT)util/expr-bison.c
+ $(call rule_mkdir)
+ $(Q)$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/expr-flex.h $(PARSER_DEBUG_FLEX) util/expr.l
+
$(OUTPUT)util/expr-bison.c: util/expr.y
$(call rule_mkdir)
- $(Q)$(call echo-cmd,bison)$(BISON) -v util/expr.y -d $(PARSER_DEBUG_BISON) -o $@ -p expr__
+ $(Q)$(call echo-cmd,bison)$(BISON) -v util/expr.y -d $(PARSER_DEBUG_BISON) -o $@ -p expr_
$(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c
$(call rule_mkdir)
@@ -203,12 +209,14 @@ $(OUTPUT)util/pmu-bison.c: util/pmu.y
CFLAGS_parse-events-flex.o += -w
CFLAGS_pmu-flex.o += -w
+CFLAGS_expr-flex.o += -w
CFLAGS_parse-events-bison.o += -DYYENABLE_NLS=0 -w
CFLAGS_pmu-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w
CFLAGS_expr-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w
$(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c
$(OUTPUT)util/pmu.o: $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c
+$(OUTPUT)util/expr.o: $(OUTPUT)util/expr-flex.c $(OUTPUT)util/expr-bison.c
CFLAGS_bitmap.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
CFLAGS_find_bit.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
@@ -216,6 +224,7 @@ CFLAGS_rbtree.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ET
CFLAGS_libstring.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
CFLAGS_hweight.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
CFLAGS_parse-events.o += -Wno-redundant-decls
+CFLAGS_expr.o += -Wno-redundant-decls
CFLAGS_header.o += -include $(OUTPUT)PERF-VERSION-FILE
$(OUTPUT)util/kallsyms.o: ../lib/symbol/kallsyms.c FORCE
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 0ea95be84b3b..f1ea0d61eb5b 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -2611,8 +2611,6 @@ void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym)
if (++al->jump_sources > notes->max_jump_sources)
notes->max_jump_sources = al->jump_sources;
-
- ++notes->nr_jumps;
}
}
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 001258601a37..07c775938d46 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -279,7 +279,6 @@ struct annotation {
struct annotation_options *options;
struct annotation_line **offsets;
int nr_events;
- int nr_jumps;
int max_jump_sources;
int nr_entries;
int nr_asm_entries;
diff --git a/tools/perf/util/block-info.c b/tools/perf/util/block-info.c
index fbbb6d640dad..423ec69bda6c 100644
--- a/tools/perf/util/block-info.c
+++ b/tools/perf/util/block-info.c
@@ -65,8 +65,7 @@ struct block_info *block_info__new(void)
return bi;
}
-int64_t block_info__cmp(struct perf_hpp_fmt *fmt __maybe_unused,
- struct hist_entry *left, struct hist_entry *right)
+int64_t __block_info__cmp(struct hist_entry *left, struct hist_entry *right)
{
struct block_info *bi_l = left->block_info;
struct block_info *bi_r = right->block_info;
@@ -74,30 +73,27 @@ int64_t block_info__cmp(struct perf_hpp_fmt *fmt __maybe_unused,
if (!bi_l->sym || !bi_r->sym) {
if (!bi_l->sym && !bi_r->sym)
- return 0;
+ return -1;
else if (!bi_l->sym)
return -1;
else
return 1;
}
- if (bi_l->sym == bi_r->sym) {
- if (bi_l->start == bi_r->start) {
- if (bi_l->end == bi_r->end)
- return 0;
- else
- return (int64_t)(bi_r->end - bi_l->end);
- } else
- return (int64_t)(bi_r->start - bi_l->start);
- } else {
- cmp = strcmp(bi_l->sym->name, bi_r->sym->name);
+ cmp = strcmp(bi_l->sym->name, bi_r->sym->name);
+ if (cmp)
return cmp;
- }
- if (bi_l->sym->start != bi_r->sym->start)
- return (int64_t)(bi_r->sym->start - bi_l->sym->start);
+ if (bi_l->start != bi_r->start)
+ return (int64_t)(bi_r->start - bi_l->start);
- return (int64_t)(bi_r->sym->end - bi_l->sym->end);
+ return (int64_t)(bi_r->end - bi_l->end);
+}
+
+int64_t block_info__cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+ struct hist_entry *left, struct hist_entry *right)
+{
+ return __block_info__cmp(left, right);
}
static void init_block_info(struct block_info *bi, struct symbol *sym,
@@ -185,6 +181,17 @@ static int block_column_width(struct perf_hpp_fmt *fmt,
return block_fmt->width;
}
+static int color_pct(struct perf_hpp *hpp, int width, double pct)
+{
+#ifdef HAVE_SLANG_SUPPORT
+ if (use_browser) {
+ return __hpp__slsmg_color_printf(hpp, "%*.2f%%",
+ width - 1, pct);
+ }
+#endif
+ return hpp_color_scnprintf(hpp, "%*.2f%%", width - 1, pct);
+}
+
static int block_total_cycles_pct_entry(struct perf_hpp_fmt *fmt,
struct perf_hpp *hpp,
struct hist_entry *he)
@@ -192,14 +199,11 @@ static int block_total_cycles_pct_entry(struct perf_hpp_fmt *fmt,
struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
struct block_info *bi = he->block_info;
double ratio = 0.0;
- char buf[16];
if (block_fmt->total_cycles)
ratio = (double)bi->cycles / (double)block_fmt->total_cycles;
- sprintf(buf, "%.2f%%", 100.0 * ratio);
-
- return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width, buf);
+ return color_pct(hpp, block_fmt->width, 100.0 * ratio);
}
static int64_t block_total_cycles_pct_sort(struct perf_hpp_fmt *fmt,
@@ -252,16 +256,13 @@ static int block_cycles_pct_entry(struct perf_hpp_fmt *fmt,
struct block_info *bi = he->block_info;
double ratio = 0.0;
u64 avg;
- char buf[16];
if (block_fmt->block_cycles && bi->num_aggr) {
avg = bi->cycles_aggr / bi->num_aggr;
ratio = (double)avg / (double)block_fmt->block_cycles;
}
- sprintf(buf, "%.2f%%", 100.0 * ratio);
-
- return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width, buf);
+ return color_pct(hpp, block_fmt->width, 100.0 * ratio);
}
static int block_avg_cycles_entry(struct perf_hpp_fmt *fmt,
@@ -349,7 +350,7 @@ static void hpp_register(struct block_fmt *block_fmt, int idx,
switch (idx) {
case PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_PCT:
- fmt->entry = block_total_cycles_pct_entry;
+ fmt->color = block_total_cycles_pct_entry;
fmt->cmp = block_info__cmp;
fmt->sort = block_total_cycles_pct_sort;
break;
@@ -357,7 +358,7 @@ static void hpp_register(struct block_fmt *block_fmt, int idx,
fmt->entry = block_cycles_lbr_entry;
break;
case PERF_HPP_REPORT__BLOCK_CYCLES_PCT:
- fmt->entry = block_cycles_pct_entry;
+ fmt->color = block_cycles_pct_entry;
break;
case PERF_HPP_REPORT__BLOCK_AVG_CYCLES:
fmt->entry = block_avg_cycles_entry;
@@ -377,33 +378,41 @@ static void hpp_register(struct block_fmt *block_fmt, int idx,
}
static void register_block_columns(struct perf_hpp_list *hpp_list,
- struct block_fmt *block_fmts)
+ struct block_fmt *block_fmts,
+ int *block_hpps, int nr_hpps)
{
- for (int i = 0; i < PERF_HPP_REPORT__BLOCK_MAX_INDEX; i++)
- hpp_register(&block_fmts[i], i, hpp_list);
+ for (int i = 0; i < nr_hpps; i++)
+ hpp_register(&block_fmts[i], block_hpps[i], hpp_list);
}
-static void init_block_hist(struct block_hist *bh, struct block_fmt *block_fmts)
+static void init_block_hist(struct block_hist *bh, struct block_fmt *block_fmts,
+ int *block_hpps, int nr_hpps)
{
__hists__init(&bh->block_hists, &bh->block_list);
perf_hpp_list__init(&bh->block_list);
bh->block_list.nr_header_lines = 1;
- register_block_columns(&bh->block_list, block_fmts);
+ register_block_columns(&bh->block_list, block_fmts,
+ block_hpps, nr_hpps);
- perf_hpp_list__register_sort_field(&bh->block_list,
- &block_fmts[PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_PCT].fmt);
+ /* Sort by the first fmt */
+ perf_hpp_list__register_sort_field(&bh->block_list, &block_fmts[0].fmt);
}
-static void process_block_report(struct hists *hists,
- struct block_report *block_report,
- u64 total_cycles)
+static int process_block_report(struct hists *hists,
+ struct block_report *block_report,
+ u64 total_cycles, int *block_hpps,
+ int nr_hpps)
{
struct rb_node *next = rb_first_cached(&hists->entries);
struct block_hist *bh = &block_report->hist;
struct hist_entry *he;
- init_block_hist(bh, block_report->fmts);
+ if (nr_hpps > PERF_HPP_REPORT__BLOCK_MAX_INDEX)
+ return -1;
+
+ block_report->nr_fmts = nr_hpps;
+ init_block_hist(bh, block_report->fmts, block_hpps, nr_hpps);
while (next) {
he = rb_entry(next, struct hist_entry, rb_node);
@@ -412,16 +421,19 @@ static void process_block_report(struct hists *hists,
next = rb_next(&he->rb_node);
}
- for (int i = 0; i < PERF_HPP_REPORT__BLOCK_MAX_INDEX; i++) {
+ for (int i = 0; i < nr_hpps; i++) {
block_report->fmts[i].total_cycles = total_cycles;
block_report->fmts[i].block_cycles = block_report->cycles;
}
hists__output_resort(&bh->block_hists, NULL);
+ return 0;
}
struct block_report *block_info__create_report(struct evlist *evlist,
- u64 total_cycles)
+ u64 total_cycles,
+ int *block_hpps, int nr_hpps,
+ int *nr_reps)
{
struct block_report *block_reports;
int nr_hists = evlist->core.nr_entries, i = 0;
@@ -434,13 +446,23 @@ struct block_report *block_info__create_report(struct evlist *evlist,
evlist__for_each_entry(evlist, pos) {
struct hists *hists = evsel__hists(pos);
- process_block_report(hists, &block_reports[i], total_cycles);
+ process_block_report(hists, &block_reports[i], total_cycles,
+ block_hpps, nr_hpps);
i++;
}
+ *nr_reps = nr_hists;
return block_reports;
}
+void block_info__free_report(struct block_report *reps, int nr_reps)
+{
+ for (int i = 0; i < nr_reps; i++)
+ hists__delete_entries(&reps[i].hist.block_hists);
+
+ free(reps);
+}
+
int report__browse_block_hists(struct block_hist *bh, float min_percent,
struct evsel *evsel, struct perf_env *env,
struct annotation_options *annotation_opts)
@@ -452,13 +474,11 @@ int report__browse_block_hists(struct block_hist *bh, float min_percent,
symbol_conf.report_individual_block = true;
hists__fprintf(&bh->block_hists, true, 0, 0, min_percent,
stdout, true);
- hists__delete_entries(&bh->block_hists);
return 0;
case 1:
symbol_conf.report_individual_block = true;
ret = block_hists_tui_browse(bh, evsel, min_percent,
env, annotation_opts);
- hists__delete_entries(&bh->block_hists);
return ret;
default:
return -1;
diff --git a/tools/perf/util/block-info.h b/tools/perf/util/block-info.h
index bef0d75e9819..42e9dcc4cf0a 100644
--- a/tools/perf/util/block-info.h
+++ b/tools/perf/util/block-info.h
@@ -45,6 +45,7 @@ struct block_report {
struct block_hist hist;
u64 cycles;
struct block_fmt fmts[PERF_HPP_REPORT__BLOCK_MAX_INDEX];
+ int nr_fmts;
};
struct block_hist;
@@ -61,6 +62,8 @@ static inline void __block_info__zput(struct block_info **bi)
#define block_info__zput(bi) __block_info__zput(&bi)
+int64_t __block_info__cmp(struct hist_entry *left, struct hist_entry *right);
+
int64_t block_info__cmp(struct perf_hpp_fmt *fmt __maybe_unused,
struct hist_entry *left, struct hist_entry *right);
@@ -68,7 +71,11 @@ int block_info__process_sym(struct hist_entry *he, struct block_hist *bh,
u64 *block_cycles_aggr, u64 total_cycles);
struct block_report *block_info__create_report(struct evlist *evlist,
- u64 total_cycles);
+ u64 total_cycles,
+ int *block_hpps, int nr_hpps,
+ int *nr_reps);
+
+void block_info__free_report(struct block_report *reps, int nr_reps);
int report__browse_block_hists(struct block_hist *bh, float min_percent,
struct evsel *evsel, struct perf_env *env,
diff --git a/tools/perf/util/branch.h b/tools/perf/util/branch.h
index 88e00d268f6f..154a05cd03af 100644
--- a/tools/perf/util/branch.h
+++ b/tools/perf/util/branch.h
@@ -12,6 +12,7 @@
#include <linux/stddef.h>
#include <linux/perf_event.h>
#include <linux/types.h>
+#include "event.h"
struct branch_flags {
u64 mispred:1;
@@ -39,9 +40,30 @@ struct branch_entry {
struct branch_stack {
u64 nr;
+ u64 hw_idx;
struct branch_entry entries[0];
};
+/*
+ * The hw_idx is only available when PERF_SAMPLE_BRANCH_HW_INDEX is applied.
+ * Otherwise, the output format of a sample with branch stack is
+ * struct branch_stack {
+ * u64 nr;
+ * struct branch_entry entries[0];
+ * }
+ * Check whether the hw_idx is available,
+ * and return the corresponding pointer of entries[0].
+ */
+static inline struct branch_entry *perf_sample__branch_entries(struct perf_sample *sample)
+{
+ u64 *entry = (u64 *)sample->branch_stack;
+
+ entry++;
+ if (sample->no_hw_idx)
+ return (struct branch_entry *)entry;
+ return (struct branch_entry *)(++entry);
+}
+
struct branch_type_stat {
bool branch_to;
u64 counts[PERF_BR_MAX];
diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
index 4881d4af3381..5bc9d3b01bd9 100644
--- a/tools/perf/util/cgroup.c
+++ b/tools/perf/util/cgroup.c
@@ -3,75 +3,16 @@
#include "evsel.h"
#include "cgroup.h"
#include "evlist.h"
-#include <linux/stringify.h>
#include <linux/zalloc.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdlib.h>
#include <string.h>
+#include <api/fs/fs.h>
int nr_cgroups;
-static int
-cgroupfs_find_mountpoint(char *buf, size_t maxlen)
-{
- FILE *fp;
- char mountpoint[PATH_MAX + 1], tokens[PATH_MAX + 1], type[PATH_MAX + 1];
- char path_v1[PATH_MAX + 1], path_v2[PATH_MAX + 2], *path;
- char *token, *saved_ptr = NULL;
-
- fp = fopen("/proc/mounts", "r");
- if (!fp)
- return -1;
-
- /*
- * in order to handle split hierarchy, we need to scan /proc/mounts
- * and inspect every cgroupfs mount point to find one that has
- * perf_event subsystem
- */
- path_v1[0] = '\0';
- path_v2[0] = '\0';
-
- while (fscanf(fp, "%*s %"__stringify(PATH_MAX)"s %"__stringify(PATH_MAX)"s %"
- __stringify(PATH_MAX)"s %*d %*d\n",
- mountpoint, type, tokens) == 3) {
-
- if (!path_v1[0] && !strcmp(type, "cgroup")) {
-
- token = strtok_r(tokens, ",", &saved_ptr);
-
- while (token != NULL) {
- if (!strcmp(token, "perf_event")) {
- strcpy(path_v1, mountpoint);
- break;
- }
- token = strtok_r(NULL, ",", &saved_ptr);
- }
- }
-
- if (!path_v2[0] && !strcmp(type, "cgroup2"))
- strcpy(path_v2, mountpoint);
-
- if (path_v1[0] && path_v2[0])
- break;
- }
- fclose(fp);
-
- if (path_v1[0])
- path = path_v1;
- else if (path_v2[0])
- path = path_v2;
- else
- return -1;
-
- if (strlen(path) < maxlen) {
- strcpy(buf, path);
- return 0;
- }
- return -1;
-}
-
static int open_cgroup(const char *name)
{
char path[PATH_MAX + 1];
@@ -79,7 +20,7 @@ static int open_cgroup(const char *name)
int fd;
- if (cgroupfs_find_mountpoint(mnt, PATH_MAX + 1))
+ if (cgroupfs_find_mountpoint(mnt, PATH_MAX + 1, "perf_event"))
return -1;
scnprintf(path, PATH_MAX, "%s/%s", mnt, name);
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index 5471045ebf5c..b3b3fe3ea345 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -1172,6 +1172,7 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
union perf_event *event = tidq->event_buf;
struct dummy_branch_stack {
u64 nr;
+ u64 hw_idx;
struct branch_entry entries;
} dummy_bs;
u64 ip;
@@ -1202,6 +1203,7 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
if (etm->synth_opts.last_branch) {
dummy_bs = (struct dummy_branch_stack){
.nr = 1,
+ .hw_idx = -1ULL,
.entries = {
.from = sample.ip,
.to = sample.addr,
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 85223159737c..3cda40a2fafc 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -139,6 +139,7 @@ struct perf_sample {
u16 insn_len;
u8 cpumode;
u16 misc;
+ bool no_hw_idx; /* No hw_idx collected in branch_stack */
char insn[MAX_INSN];
void *raw_data;
struct ip_callchain *callchain;
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index c8dc4450884c..816d930d774e 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -712,7 +712,8 @@ static void __perf_evsel__config_callchain(struct evsel *evsel,
attr->branch_sample_type = PERF_SAMPLE_BRANCH_USER |
PERF_SAMPLE_BRANCH_CALL_STACK |
PERF_SAMPLE_BRANCH_NO_CYCLES |
- PERF_SAMPLE_BRANCH_NO_FLAGS;
+ PERF_SAMPLE_BRANCH_NO_FLAGS |
+ PERF_SAMPLE_BRANCH_HW_INDEX;
}
} else
pr_warning("Cannot use LBR callstack with branch stack. "
@@ -763,7 +764,8 @@ perf_evsel__reset_callgraph(struct evsel *evsel,
if (param->record_mode == CALLCHAIN_LBR) {
perf_evsel__reset_sample_bit(evsel, BRANCH_STACK);
attr->branch_sample_type &= ~(PERF_SAMPLE_BRANCH_USER |
- PERF_SAMPLE_BRANCH_CALL_STACK);
+ PERF_SAMPLE_BRANCH_CALL_STACK |
+ PERF_SAMPLE_BRANCH_HW_INDEX);
}
if (param->record_mode == CALLCHAIN_DWARF) {
perf_evsel__reset_sample_bit(evsel, REGS_USER);
@@ -1673,6 +1675,8 @@ fallback_missing_features:
evsel->core.attr.ksymbol = 0;
if (perf_missing_features.bpf)
evsel->core.attr.bpf_event = 0;
+ if (perf_missing_features.branch_hw_idx)
+ evsel->core.attr.branch_sample_type &= ~PERF_SAMPLE_BRANCH_HW_INDEX;
retry_sample_id:
if (perf_missing_features.sample_id_all)
evsel->core.attr.sample_id_all = 0;
@@ -1784,7 +1788,12 @@ try_fallback:
* Must probe features in the order they were added to the
* perf_event_attr interface.
*/
- if (!perf_missing_features.aux_output && evsel->core.attr.aux_output) {
+ if (!perf_missing_features.branch_hw_idx &&
+ (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX)) {
+ perf_missing_features.branch_hw_idx = true;
+ pr_debug2("switching off branch HW index support\n");
+ goto fallback_missing_features;
+ } else if (!perf_missing_features.aux_output && evsel->core.attr.aux_output) {
perf_missing_features.aux_output = true;
pr_debug2_peo("Kernel has no attr.aux_output support, bailing out\n");
goto out_close;
@@ -2169,7 +2178,12 @@ int perf_evsel__parse_sample(struct evsel *evsel, union perf_event *event,
if (data->branch_stack->nr > max_branch_nr)
return -EFAULT;
+
sz = data->branch_stack->nr * sizeof(struct branch_entry);
+ if (perf_evsel__has_branch_hw_idx(evsel))
+ sz += sizeof(u64);
+ else
+ data->no_hw_idx = true;
OVERFLOW_CHECK(array, sz, max_size);
array = (void *)array + sz;
}
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index dc14f4a823cd..33804740e2ca 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -119,6 +119,7 @@ struct perf_missing_features {
bool ksymbol;
bool bpf;
bool aux_output;
+ bool branch_hw_idx;
};
extern struct perf_missing_features perf_missing_features;
@@ -389,6 +390,11 @@ static inline bool perf_evsel__has_branch_callstack(const struct evsel *evsel)
return evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK;
}
+static inline bool perf_evsel__has_branch_hw_idx(const struct evsel *evsel)
+{
+ return evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX;
+}
+
static inline bool evsel__has_callchain(const struct evsel *evsel)
{
return (evsel->core.attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0;
diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c
new file mode 100644
index 000000000000..fd192ddf93c1
--- /dev/null
+++ b/tools/perf/util/expr.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdbool.h>
+#include <assert.h>
+#include "expr.h"
+#include "expr-bison.h"
+#define YY_EXTRA_TYPE int
+#include "expr-flex.h"
+
+#ifdef PARSER_DEBUG
+extern int expr_debug;
+#endif
+
+/* Caller must make sure id is allocated */
+void expr__add_id(struct parse_ctx *ctx, const char *name, double val)
+{
+ int idx;
+
+ assert(ctx->num_ids < MAX_PARSE_ID);
+ idx = ctx->num_ids++;
+ ctx->ids[idx].name = name;
+ ctx->ids[idx].val = val;
+}
+
+void expr__ctx_init(struct parse_ctx *ctx)
+{
+ ctx->num_ids = 0;
+}
+
+static int
+__expr__parse(double *val, struct parse_ctx *ctx, const char *expr,
+ int start)
+{
+ YY_BUFFER_STATE buffer;
+ void *scanner;
+ int ret;
+
+ ret = expr_lex_init_extra(start, &scanner);
+ if (ret)
+ return ret;
+
+ buffer = expr__scan_string(expr, scanner);
+
+#ifdef PARSER_DEBUG
+ expr_debug = 1;
+#endif
+
+ ret = expr_parse(val, ctx, scanner);
+
+ expr__flush_buffer(buffer, scanner);
+ expr__delete_buffer(buffer, scanner);
+ expr_lex_destroy(scanner);
+ return ret;
+}
+
+int expr__parse(double *final_val, struct parse_ctx *ctx, const char *expr)
+{
+ return __expr__parse(final_val, ctx, expr, EXPR_PARSE) ? -1 : 0;
+}
+
+static bool
+already_seen(const char *val, const char *one, const char **other,
+ int num_other)
+{
+ int i;
+
+ if (one && !strcasecmp(one, val))
+ return true;
+ for (i = 0; i < num_other; i++)
+ if (!strcasecmp(other[i], val))
+ return true;
+ return false;
+}
+
+int expr__find_other(const char *expr, const char *one, const char ***other,
+ int *num_other)
+{
+ int err, i = 0, j = 0;
+ struct parse_ctx ctx;
+
+ expr__ctx_init(&ctx);
+ err = __expr__parse(NULL, &ctx, expr, EXPR_OTHER);
+ if (err)
+ return -1;
+
+ *other = malloc((ctx.num_ids + 1) * sizeof(char *));
+ if (!*other)
+ return -ENOMEM;
+
+ for (i = 0, j = 0; i < ctx.num_ids; i++) {
+ const char *str = ctx.ids[i].name;
+
+ if (already_seen(str, one, *other, j))
+ continue;
+
+ str = strdup(str);
+ if (!str)
+ goto out;
+ (*other)[j++] = str;
+ }
+ (*other)[j] = NULL;
+
+out:
+ if (i != ctx.num_ids) {
+ while (--j)
+ free((char *) (*other)[i]);
+ free(*other);
+ err = -1;
+ }
+
+ *num_other = j;
+ return err;
+}
diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h
index 046160831f90..9377538f4097 100644
--- a/tools/perf/util/expr.h
+++ b/tools/perf/util/expr.h
@@ -2,7 +2,7 @@
#ifndef PARSE_CTX_H
#define PARSE_CTX_H 1
-#define EXPR_MAX_OTHER 15
+#define EXPR_MAX_OTHER 20
#define MAX_PARSE_ID EXPR_MAX_OTHER
struct parse_id {
@@ -17,10 +17,8 @@ struct parse_ctx {
void expr__ctx_init(struct parse_ctx *ctx);
void expr__add_id(struct parse_ctx *ctx, const char *id, double val);
-#ifndef IN_EXPR_Y
-int expr__parse(double *final_val, struct parse_ctx *ctx, const char **pp);
-#endif
-int expr__find_other(const char *p, const char *one, const char ***other,
+int expr__parse(double *final_val, struct parse_ctx *ctx, const char *expr);
+int expr__find_other(const char *expr, const char *one, const char ***other,
int *num_other);
#endif
diff --git a/tools/perf/util/expr.l b/tools/perf/util/expr.l
new file mode 100644
index 000000000000..1928f2a3dddc
--- /dev/null
+++ b/tools/perf/util/expr.l
@@ -0,0 +1,114 @@
+%option prefix="expr_"
+%option reentrant
+%option bison-bridge
+
+%{
+#include <linux/compiler.h>
+#include "expr.h"
+#include "expr-bison.h"
+
+char *expr_get_text(yyscan_t yyscanner);
+YYSTYPE *expr_get_lval(yyscan_t yyscanner);
+
+static int __value(YYSTYPE *yylval, char *str, int base, int token)
+{
+ u64 num;
+
+ errno = 0;
+ num = strtoull(str, NULL, base);
+ if (errno)
+ return EXPR_ERROR;
+
+ yylval->num = num;
+ return token;
+}
+
+static int value(yyscan_t scanner, int base)
+{
+ YYSTYPE *yylval = expr_get_lval(scanner);
+ char *text = expr_get_text(scanner);
+
+ return __value(yylval, text, base, NUMBER);
+}
+
+/*
+ * Allow @ instead of / to be able to specify pmu/event/ without
+ * conflicts with normal division.
+ */
+static char *normalize(char *str)
+{
+ char *ret = str;
+ char *dst = str;
+
+ while (*str) {
+ if (*str == '@')
+ *dst++ = '/';
+ else if (*str == '\\')
+ *dst++ = *++str;
+ else
+ *dst++ = *str;
+ str++;
+ }
+
+ *dst = 0x0;
+ return ret;
+}
+
+static int str(yyscan_t scanner, int token)
+{
+ YYSTYPE *yylval = expr_get_lval(scanner);
+ char *text = expr_get_text(scanner);
+
+ yylval->str = normalize(strdup(text));
+ if (!yylval->str)
+ return EXPR_ERROR;
+
+ yylval->str = normalize(yylval->str);
+ return token;
+}
+%}
+
+number [0-9]+
+
+sch [-,=]
+spec \\{sch}
+sym [0-9a-zA-Z_\.:@]+
+symbol {spec}*{sym}*{spec}*{sym}*
+
+%%
+ {
+ int start_token;
+
+ start_token = parse_events_get_extra(yyscanner);
+
+ if (start_token) {
+ parse_events_set_extra(NULL, yyscanner);
+ return start_token;
+ }
+ }
+
+max { return MAX; }
+min { return MIN; }
+if { return IF; }
+else { return ELSE; }
+#smt_on { return SMT_ON; }
+{number} { return value(yyscanner, 10); }
+{symbol} { return str(yyscanner, ID); }
+"|" { return '|'; }
+"^" { return '^'; }
+"&" { return '&'; }
+"-" { return '-'; }
+"+" { return '+'; }
+"*" { return '*'; }
+"/" { return '/'; }
+"%" { return '%'; }
+"(" { return '('; }
+")" { return ')'; }
+"," { return ','; }
+. { }
+%%
+
+int expr_wrap(void *scanner __maybe_unused)
+{
+ return 1;
+}
diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y
index 7d226241f1d7..4720cbe79357 100644
--- a/tools/perf/util/expr.y
+++ b/tools/perf/util/expr.y
@@ -1,31 +1,32 @@
/* Simple expression parser */
%{
+#define YYDEBUG 1
+#include <stdio.h>
#include "util.h"
#include "util/debug.h"
#include <stdlib.h> // strtod()
#define IN_EXPR_Y 1
#include "expr.h"
#include "smt.h"
-#include <assert.h>
#include <string.h>
-#define MAXIDLEN 256
%}
%define api.pure full
%parse-param { double *final_val }
%parse-param { struct parse_ctx *ctx }
-%parse-param { const char **pp }
-%lex-param { const char **pp }
+%parse-param {void *scanner}
+%lex-param {void* scanner}
%union {
- double num;
- char id[MAXIDLEN+1];
+ double num;
+ char *str;
}
+%token EXPR_PARSE EXPR_OTHER EXPR_ERROR
%token <num> NUMBER
-%token <id> ID
+%token <str> ID
%token MIN MAX IF ELSE SMT_ON
%left MIN MAX IF
%left '|'
@@ -37,11 +38,9 @@
%type <num> expr if_expr
%{
-static int expr__lex(YYSTYPE *res, const char **pp);
-
-static void expr__error(double *final_val __maybe_unused,
+static void expr_error(double *final_val __maybe_unused,
struct parse_ctx *ctx __maybe_unused,
- const char **pp __maybe_unused,
+ void *scanner,
const char *s)
{
pr_debug("%s\n", s);
@@ -63,6 +62,27 @@ static int lookup_id(struct parse_ctx *ctx, char *id, double *val)
%}
%%
+start:
+EXPR_PARSE all_expr
+|
+EXPR_OTHER all_other
+
+all_other: all_other other
+|
+
+other: ID
+{
+ if (ctx->num_ids + 1 >= EXPR_MAX_OTHER) {
+ pr_err("failed: way too many variables");
+ YYABORT;
+ }
+
+ ctx->ids[ctx->num_ids++].name = $1;
+}
+|
+MIN | MAX | IF | ELSE | SMT_ON | NUMBER | '|' | '^' | '&' | '-' | '+' | '*' | '/' | '%' | '(' | ')'
+
+
all_expr: if_expr { *final_val = $1; }
;
@@ -93,146 +113,3 @@ expr: NUMBER
;
%%
-
-static int expr__symbol(YYSTYPE *res, const char *p, const char **pp)
-{
- char *dst = res->id;
- const char *s = p;
-
- if (*p == '#')
- *dst++ = *p++;
-
- while (isalnum(*p) || *p == '_' || *p == '.' || *p == ':' || *p == '@' || *p == '\\') {
- if (p - s >= MAXIDLEN)
- return -1;
- /*
- * Allow @ instead of / to be able to specify pmu/event/ without
- * conflicts with normal division.
- */
- if (*p == '@')
- *dst++ = '/';
- else if (*p == '\\')
- *dst++ = *++p;
- else
- *dst++ = *p;
- p++;
- }
- *dst = 0;
- *pp = p;
- dst = res->id;
- switch (dst[0]) {
- case 'm':
- if (!strcmp(dst, "min"))
- return MIN;
- if (!strcmp(dst, "max"))
- return MAX;
- break;
- case 'i':
- if (!strcmp(dst, "if"))
- return IF;
- break;
- case 'e':
- if (!strcmp(dst, "else"))
- return ELSE;
- break;
- case '#':
- if (!strcasecmp(dst, "#smt_on"))
- return SMT_ON;
- break;
- }
- return ID;
-}
-
-static int expr__lex(YYSTYPE *res, const char **pp)
-{
- int tok;
- const char *s;
- const char *p = *pp;
-
- while (isspace(*p))
- p++;
- s = p;
- switch (*p++) {
- case '#':
- case 'a' ... 'z':
- case 'A' ... 'Z':
- return expr__symbol(res, p - 1, pp);
- case '0' ... '9': case '.':
- res->num = strtod(s, (char **)&p);
- tok = NUMBER;
- break;
- default:
- tok = *s;
- break;
- }
- *pp = p;
- return tok;
-}
-
-/* Caller must make sure id is allocated */
-void expr__add_id(struct parse_ctx *ctx, const char *name, double val)
-{
- int idx;
- assert(ctx->num_ids < MAX_PARSE_ID);
- idx = ctx->num_ids++;
- ctx->ids[idx].name = name;
- ctx->ids[idx].val = val;
-}
-
-void expr__ctx_init(struct parse_ctx *ctx)
-{
- ctx->num_ids = 0;
-}
-
-static bool already_seen(const char *val, const char *one, const char **other,
- int num_other)
-{
- int i;
-
- if (one && !strcasecmp(one, val))
- return true;
- for (i = 0; i < num_other; i++)
- if (!strcasecmp(other[i], val))
- return true;
- return false;
-}
-
-int expr__find_other(const char *p, const char *one, const char ***other,
- int *num_otherp)
-{
- const char *orig = p;
- int err = -1;
- int num_other;
-
- *other = malloc((EXPR_MAX_OTHER + 1) * sizeof(char *));
- if (!*other)
- return -1;
-
- num_other = 0;
- for (;;) {
- YYSTYPE val;
- int tok = expr__lex(&val, &p);
- if (tok == 0) {
- err = 0;
- break;
- }
- if (tok == ID && !already_seen(val.id, one, *other, num_other)) {
- if (num_other >= EXPR_MAX_OTHER - 1) {
- pr_debug("Too many extra events in %s\n", orig);
- break;
- }
- (*other)[num_other] = strdup(val.id);
- if (!(*other)[num_other])
- return -1;
- num_other++;
- }
- }
- (*other)[num_other] = NULL;
- *num_otherp = num_other;
- if (err) {
- *num_otherp = 0;
- free(*other);
- *other = NULL;
- }
- return err;
-}
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 4246e7447e54..acbd046bf95c 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -1590,6 +1590,40 @@ static void free_event_desc(struct evsel *events)
free(events);
}
+static bool perf_attr_check(struct perf_event_attr *attr)
+{
+ if (attr->__reserved_1 || attr->__reserved_2 || attr->__reserved_3) {
+ pr_warning("Reserved bits are set unexpectedly. "
+ "Please update perf tool.\n");
+ return false;
+ }
+
+ if (attr->sample_type & ~(PERF_SAMPLE_MAX-1)) {
+ pr_warning("Unknown sample type (0x%llx) is detected. "
+ "Please update perf tool.\n",
+ attr->sample_type);
+ return false;
+ }
+
+ if (attr->read_format & ~(PERF_FORMAT_MAX-1)) {
+ pr_warning("Unknown read format (0x%llx) is detected. "
+ "Please update perf tool.\n",
+ attr->read_format);
+ return false;
+ }
+
+ if ((attr->sample_type & PERF_SAMPLE_BRANCH_STACK) &&
+ (attr->branch_sample_type & ~(PERF_SAMPLE_BRANCH_MAX-1))) {
+ pr_warning("Unknown branch sample type (0x%llx) is detected. "
+ "Please update perf tool.\n",
+ attr->branch_sample_type);
+
+ return false;
+ }
+
+ return true;
+}
+
static struct evsel *read_event_desc(struct feat_fd *ff)
{
struct evsel *evsel, *events = NULL;
@@ -1634,6 +1668,9 @@ static struct evsel *read_event_desc(struct feat_fd *ff)
memcpy(&evsel->core.attr, buf, msz);
+ if (!perf_attr_check(&evsel->core.attr))
+ goto error;
+
if (do_read_u32(ff, &nr))
goto error;
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index ca5a8f4d007e..e74a5acf66d9 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -2584,9 +2584,10 @@ void hist__account_cycles(struct branch_stack *bs, struct addr_location *al,
u64 *total_cycles)
{
struct branch_info *bi;
+ struct branch_entry *entries = perf_sample__branch_entries(sample);
/* If we have branch cycles always annotate them. */
- if (bs && bs->nr && bs->entries[0].flags.cycles) {
+ if (bs && bs->nr && entries[0].flags.cycles) {
int i;
bi = sample__resolve_bstack(sample, al);
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 33cf8928cf05..23c8289c2472 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -1295,6 +1295,7 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
struct perf_sample sample = { .ip = 0, };
struct dummy_branch_stack {
u64 nr;
+ u64 hw_idx;
struct branch_entry entries;
} dummy_bs;
@@ -1316,6 +1317,7 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
if (pt->synth_opts.last_branch && sort__mode == SORT_MODE__BRANCH) {
dummy_bs = (struct dummy_branch_stack){
.nr = 1,
+ .hw_idx = -1ULL,
.entries = {
.from = sample.ip,
.to = sample.addr,
diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c
index b5af680fc667..dbdffb6673fe 100644
--- a/tools/perf/util/llvm-utils.c
+++ b/tools/perf/util/llvm-utils.c
@@ -265,6 +265,8 @@ static int detect_kbuild_dir(char **kbuild_dir)
return -ENOMEM;
return 0;
}
+ pr_debug("%s: Couldn't find \"%s\", missing kernel-devel package?.\n",
+ __func__, autoconf_path);
free(autoconf_path);
return -ENOENT;
}
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index fb5c2cd44d30..fd14f1489802 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -2081,15 +2081,16 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
{
unsigned int i;
const struct branch_stack *bs = sample->branch_stack;
+ struct branch_entry *entries = perf_sample__branch_entries(sample);
struct branch_info *bi = calloc(bs->nr, sizeof(struct branch_info));
if (!bi)
return NULL;
for (i = 0; i < bs->nr; i++) {
- ip__resolve_ams(al->thread, &bi[i].to, bs->entries[i].to);
- ip__resolve_ams(al->thread, &bi[i].from, bs->entries[i].from);
- bi[i].flags = bs->entries[i].flags;
+ ip__resolve_ams(al->thread, &bi[i].to, entries[i].to);
+ ip__resolve_ams(al->thread, &bi[i].from, entries[i].from);
+ bi[i].flags = entries[i].flags;
}
return bi;
}
@@ -2185,6 +2186,7 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
/* LBR only affects the user callchain */
if (i != chain_nr) {
struct branch_stack *lbr_stack = sample->branch_stack;
+ struct branch_entry *entries = perf_sample__branch_entries(sample);
int lbr_nr = lbr_stack->nr, j, k;
bool branch;
struct branch_flags *flags;
@@ -2210,31 +2212,29 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
ip = chain->ips[j];
else if (j > i + 1) {
k = j - i - 2;
- ip = lbr_stack->entries[k].from;
+ ip = entries[k].from;
branch = true;
- flags = &lbr_stack->entries[k].flags;
+ flags = &entries[k].flags;
} else {
- ip = lbr_stack->entries[0].to;
+ ip = entries[0].to;
branch = true;
- flags = &lbr_stack->entries[0].flags;
- branch_from =
- lbr_stack->entries[0].from;
+ flags = &entries[0].flags;
+ branch_from = entries[0].from;
}
} else {
if (j < lbr_nr) {
k = lbr_nr - j - 1;
- ip = lbr_stack->entries[k].from;
+ ip = entries[k].from;
branch = true;
- flags = &lbr_stack->entries[k].flags;
+ flags = &entries[k].flags;
}
else if (j > lbr_nr)
ip = chain->ips[i + 1 - (j - lbr_nr)];
else {
- ip = lbr_stack->entries[0].to;
+ ip = entries[0].to;
branch = true;
- flags = &lbr_stack->entries[0].flags;
- branch_from =
- lbr_stack->entries[0].from;
+ flags = &entries[0].flags;
+ branch_from = entries[0].from;
}
}
@@ -2281,6 +2281,7 @@ static int thread__resolve_callchain_sample(struct thread *thread,
int max_stack)
{
struct branch_stack *branch = sample->branch_stack;
+ struct branch_entry *entries = perf_sample__branch_entries(sample);
struct ip_callchain *chain = sample->callchain;
int chain_nr = 0;
u8 cpumode = PERF_RECORD_MISC_USER;
@@ -2328,7 +2329,7 @@ static int thread__resolve_callchain_sample(struct thread *thread,
for (i = 0; i < nr; i++) {
if (callchain_param.order == ORDER_CALLEE) {
- be[i] = branch->entries[i];
+ be[i] = entries[i];
if (chain == NULL)
continue;
@@ -2347,7 +2348,7 @@ static int thread__resolve_callchain_sample(struct thread *thread,
be[i].from >= chain->ips[first_call] - 8)
first_call++;
} else
- be[i] = branch->entries[branch->nr - i - 1];
+ be[i] = entries[branch->nr - i - 1];
}
memset(iter, 0, sizeof(struct iterations) * nr);
diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c
index 651203126c71..355d3458d4e6 100644
--- a/tools/perf/util/perf_event_attr_fprintf.c
+++ b/tools/perf/util/perf_event_attr_fprintf.c
@@ -50,6 +50,7 @@ static void __p_branch_sample_type(char *buf, size_t size, u64 value)
bit_name(ABORT_TX), bit_name(IN_TX), bit_name(NO_TX),
bit_name(COND), bit_name(CALL_STACK), bit_name(IND_JUMP),
bit_name(CALL), bit_name(NO_FLAGS), bit_name(NO_CYCLES),
+ bit_name(HW_INDEX),
{ .name = NULL, }
};
#undef bit_name
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 80ca5d0ab7fe..8c1b27cd8b99 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -464,6 +464,7 @@ static PyObject *python_process_brstack(struct perf_sample *sample,
struct thread *thread)
{
struct branch_stack *br = sample->branch_stack;
+ struct branch_entry *entries = perf_sample__branch_entries(sample);
PyObject *pylist;
u64 i;
@@ -484,28 +485,28 @@ static PyObject *python_process_brstack(struct perf_sample *sample,
Py_FatalError("couldn't create Python dictionary");
pydict_set_item_string_decref(pyelem, "from",
- PyLong_FromUnsignedLongLong(br->entries[i].from));
+ PyLong_FromUnsignedLongLong(entries[i].from));
pydict_set_item_string_decref(pyelem, "to",
- PyLong_FromUnsignedLongLong(br->entries[i].to));
+ PyLong_FromUnsignedLongLong(entries[i].to));
pydict_set_item_string_decref(pyelem, "mispred",
- PyBool_FromLong(br->entries[i].flags.mispred));
+ PyBool_FromLong(entries[i].flags.mispred));
pydict_set_item_string_decref(pyelem, "predicted",
- PyBool_FromLong(br->entries[i].flags.predicted));
+ PyBool_FromLong(entries[i].flags.predicted));
pydict_set_item_string_decref(pyelem, "in_tx",
- PyBool_FromLong(br->entries[i].flags.in_tx));
+ PyBool_FromLong(entries[i].flags.in_tx));
pydict_set_item_string_decref(pyelem, "abort",
- PyBool_FromLong(br->entries[i].flags.abort));
+ PyBool_FromLong(entries[i].flags.abort));
pydict_set_item_string_decref(pyelem, "cycles",
- PyLong_FromUnsignedLongLong(br->entries[i].flags.cycles));
+ PyLong_FromUnsignedLongLong(entries[i].flags.cycles));
thread__find_map_fb(thread, sample->cpumode,
- br->entries[i].from, &al);
+ entries[i].from, &al);
dsoname = get_dsoname(al.map);
pydict_set_item_string_decref(pyelem, "from_dsoname",
_PyUnicode_FromString(dsoname));
thread__find_map_fb(thread, sample->cpumode,
- br->entries[i].to, &al);
+ entries[i].to, &al);
dsoname = get_dsoname(al.map);
pydict_set_item_string_decref(pyelem, "to_dsoname",
_PyUnicode_FromString(dsoname));
@@ -561,6 +562,7 @@ static PyObject *python_process_brstacksym(struct perf_sample *sample,
struct thread *thread)
{
struct branch_stack *br = sample->branch_stack;
+ struct branch_entry *entries = perf_sample__branch_entries(sample);
PyObject *pylist;
u64 i;
char bf[512];
@@ -581,22 +583,22 @@ static PyObject *python_process_brstacksym(struct perf_sample *sample,
Py_FatalError("couldn't create Python dictionary");
thread__find_symbol_fb(thread, sample->cpumode,
- br->entries[i].from, &al);
+ entries[i].from, &al);
get_symoff(al.sym, &al, true, bf, sizeof(bf));
pydict_set_item_string_decref(pyelem, "from",
_PyUnicode_FromString(bf));
thread__find_symbol_fb(thread, sample->cpumode,
- br->entries[i].to, &al);
+ entries[i].to, &al);
get_symoff(al.sym, &al, true, bf, sizeof(bf));
pydict_set_item_string_decref(pyelem, "to",
_PyUnicode_FromString(bf));
- get_br_mspred(&br->entries[i].flags, bf, sizeof(bf));
+ get_br_mspred(&entries[i].flags, bf, sizeof(bf));
pydict_set_item_string_decref(pyelem, "pred",
_PyUnicode_FromString(bf));
- if (br->entries[i].flags.in_tx) {
+ if (entries[i].flags.in_tx) {
pydict_set_item_string_decref(pyelem, "in_tx",
_PyUnicode_FromString("X"));
} else {
@@ -604,7 +606,7 @@ static PyObject *python_process_brstacksym(struct perf_sample *sample,
_PyUnicode_FromString("-"));
}
- if (br->entries[i].flags.abort) {
+ if (entries[i].flags.abort) {
pydict_set_item_string_decref(pyelem, "abort",
_PyUnicode_FromString("A"));
} else {
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index d0d7d25b23e3..055b00abd56d 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1007,6 +1007,7 @@ static void callchain__lbr_callstack_printf(struct perf_sample *sample)
{
struct ip_callchain *callchain = sample->callchain;
struct branch_stack *lbr_stack = sample->branch_stack;
+ struct branch_entry *entries = perf_sample__branch_entries(sample);
u64 kernel_callchain_nr = callchain->nr;
unsigned int i;
@@ -1043,10 +1044,10 @@ static void callchain__lbr_callstack_printf(struct perf_sample *sample)
i, callchain->ips[i]);
printf("..... %2d: %016" PRIx64 "\n",
- (int)(kernel_callchain_nr), lbr_stack->entries[0].to);
+ (int)(kernel_callchain_nr), entries[0].to);
for (i = 0; i < lbr_stack->nr; i++)
printf("..... %2d: %016" PRIx64 "\n",
- (int)(i + kernel_callchain_nr + 1), lbr_stack->entries[i].from);
+ (int)(i + kernel_callchain_nr + 1), entries[i].from);
}
}
@@ -1068,6 +1069,7 @@ static void callchain__printf(struct evsel *evsel,
static void branch_stack__printf(struct perf_sample *sample, bool callstack)
{
+ struct branch_entry *entries = perf_sample__branch_entries(sample);
uint64_t i;
printf("%s: nr:%" PRIu64 "\n",
@@ -1075,7 +1077,7 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack)
sample->branch_stack->nr);
for (i = 0; i < sample->branch_stack->nr; i++) {
- struct branch_entry *e = &sample->branch_stack->entries[i];
+ struct branch_entry *e = &entries[i];
if (!callstack) {
printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 " %hu cycles %s%s%s%s %x\n",
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index bc31fccc0057..d89cb0da90f8 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -110,7 +110,7 @@ static void aggr_printout(struct perf_stat_config *config,
config->csv_sep);
break;
case AGGR_NONE:
- if (evsel->percore) {
+ if (evsel->percore && !config->percore_show_thread) {
fprintf(config->output, "S%d-D%d-C%*d%s",
cpu_map__id_to_socket(id),
cpu_map__id_to_die(id),
@@ -628,7 +628,7 @@ static void aggr_cb(struct perf_stat_config *config,
static void print_counter_aggrdata(struct perf_stat_config *config,
struct evsel *counter, int s,
char *prefix, bool metric_only,
- bool *first)
+ bool *first, int cpu)
{
struct aggr_data ad;
FILE *output = config->output;
@@ -654,7 +654,7 @@ static void print_counter_aggrdata(struct perf_stat_config *config,
fprintf(output, "%s", prefix);
uval = val * counter->scale;
- printout(config, id, nr, counter, uval, prefix,
+ printout(config, cpu != -1 ? cpu : id, nr, counter, uval, prefix,
run, ena, 1.0, &rt_stat);
if (!metric_only)
fputc('\n', output);
@@ -687,7 +687,7 @@ static void print_aggr(struct perf_stat_config *config,
evlist__for_each_entry(evlist, counter) {
print_counter_aggrdata(config, counter, s,
prefix, metric_only,
- &first);
+ &first, -1);
}
if (metric_only)
fputc('\n', output);
@@ -1146,6 +1146,26 @@ static void print_footer(struct perf_stat_config *config)
"the same PMU. Try reorganizing the group.\n");
}
+static void print_percore_thread(struct perf_stat_config *config,
+ struct evsel *counter, char *prefix)
+{
+ int s, s2, id;
+ bool first = true;
+
+ for (int i = 0; i < perf_evsel__nr_cpus(counter); i++) {
+ s2 = config->aggr_get_id(config, evsel__cpus(counter), i);
+ for (s = 0; s < config->aggr_map->nr; s++) {
+ id = config->aggr_map->map[s];
+ if (s2 == id)
+ break;
+ }
+
+ print_counter_aggrdata(config, counter, s,
+ prefix, false,
+ &first, i);
+ }
+}
+
static void print_percore(struct perf_stat_config *config,
struct evsel *counter, char *prefix)
{
@@ -1157,13 +1177,16 @@ static void print_percore(struct perf_stat_config *config,
if (!(config->aggr_map || config->aggr_get_id))
return;
+ if (config->percore_show_thread)
+ return print_percore_thread(config, counter, prefix);
+
for (s = 0; s < config->aggr_map->nr; s++) {
if (prefix && metric_only)
fprintf(output, "%s", prefix);
print_counter_aggrdata(config, counter, s,
prefix, metric_only,
- &first);
+ &first, -1);
}
if (metric_only)
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index 90d23cc3c8d4..0fd713d3674f 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -777,9 +777,7 @@ static void generic_metric(struct perf_stat_config *config,
}
if (!metric_events[i]) {
- const char *p = metric_expr;
-
- if (expr__parse(&ratio, &pctx, &p) == 0) {
+ if (expr__parse(&ratio, &pctx, metric_expr) == 0) {
char *unit;
char metric_bf[64];
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index fb990efa54a8..b4fdfaa7f2c0 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -109,6 +109,7 @@ struct perf_stat_config {
bool walltime_run_table;
bool all_kernel;
bool all_user;
+ bool percore_show_thread;
FILE *output;
unsigned int interval;
unsigned int timeout;
diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c
index c423298fe62d..dd3e6f43fb86 100644
--- a/tools/perf/util/synthetic-events.c
+++ b/tools/perf/util/synthetic-events.c
@@ -1183,7 +1183,8 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,
if (type & PERF_SAMPLE_BRANCH_STACK) {
sz = sample->branch_stack->nr * sizeof(struct branch_entry);
- sz += sizeof(u64);
+ /* nr, hw_idx */
+ sz += 2 * sizeof(u64);
result += sz;
}
@@ -1344,7 +1345,8 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo
if (type & PERF_SAMPLE_BRANCH_STACK) {
sz = sample->branch_stack->nr * sizeof(struct branch_entry);
- sz += sizeof(u64);
+ /* nr, hw_idx */
+ sz += 2 * sizeof(u64);
memcpy(array, sample->branch_stack, sz);
array = (void *)array + sz;
}