From 574b69cbb633037a9c305d2993aeb680f4a8badd Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 25 Mar 2011 13:13:34 +0100 Subject: ARM: 6834/1: perf: reset counters on all CPUs during initialisation ARMv7 dictates that the interrupt-enable and count-enable registers for each PMU counter are UNKNOWN following core reset. This patch adds a new (optional) function pointer to struct arm_pmu for resetting the PMU state during init. The reset function is called on each CPU via an arch_initcall in the generic ARM perf_event code and allows the PMU backend to write sane values to any UNKNOWN registers. Acked-by: Jean Pihet Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/kernel/perf_event.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'arch/arm/kernel/perf_event.c') diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 22e194eb8536..e422f4c269a0 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -79,6 +79,7 @@ struct arm_pmu { void (*write_counter)(int idx, u32 val); void (*start)(void); void (*stop)(void); + void (*reset)(void *); const unsigned (*cache_map)[PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX]; @@ -624,6 +625,19 @@ static struct pmu pmu = { #include "perf_event_v6.c" #include "perf_event_v7.c" +/* + * Ensure the PMU has sane values out of reset. + * This requires SMP to be available, so exists as a separate initcall. + */ +static int __init +armpmu_reset(void) +{ + if (armpmu && armpmu->reset) + return on_each_cpu(armpmu->reset, NULL, 1); + return 0; +} +arch_initcall(armpmu_reset); + static int __init init_hw_perf_events(void) { -- cgit v1.2.3 From a737823d37666255e3e74ce84bc9611a038e0888 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 25 Mar 2011 17:12:37 +0100 Subject: ARM: 6835/1: perf: ensure overflows aren't missed due to IRQ latency If a counter overflows during a perf stat profiling run it may overtake the last known value of the counter: 0 prev new 0xffffffff |----------|-------|----------------------| In this case, the number of events that have occurred is (0xffffffff - prev) + new. Unfortunately, the event update code will not realise an overflow has occurred and will instead report the event delta as (new - prev) which may be considerably smaller than the real count. This patch adds an extra argument to armpmu_event_update which indicates whether or not an overflow has occurred. If an overflow has occurred then we use the maximum period of the counter to calculate the elapsed events. Acked-by: Jamie Iles Reported-by: Ashwin Chaugule Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/kernel/perf_event.c | 19 +++++++++++-------- arch/arm/kernel/perf_event_v6.c | 2 +- arch/arm/kernel/perf_event_v7.c | 2 +- arch/arm/kernel/perf_event_xscale.c | 4 ++-- 4 files changed, 15 insertions(+), 12 deletions(-) (limited to 'arch/arm/kernel/perf_event.c') diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index e422f4c269a0..69cfee0fe00f 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -205,11 +205,9 @@ armpmu_event_set_period(struct perf_event *event, static u64 armpmu_event_update(struct perf_event *event, struct hw_perf_event *hwc, - int idx) + int idx, int overflow) { - int shift = 64 - 32; - s64 prev_raw_count, new_raw_count; - u64 delta; + u64 delta, prev_raw_count, new_raw_count; again: prev_raw_count = local64_read(&hwc->prev_count); @@ -219,8 +217,13 @@ again: new_raw_count) != prev_raw_count) goto again; - delta = (new_raw_count << shift) - (prev_raw_count << shift); - delta >>= shift; + new_raw_count &= armpmu->max_period; + prev_raw_count &= armpmu->max_period; + + if (overflow) + delta = armpmu->max_period - prev_raw_count + new_raw_count; + else + delta = new_raw_count - prev_raw_count; local64_add(delta, &event->count); local64_sub(delta, &hwc->period_left); @@ -237,7 +240,7 @@ armpmu_read(struct perf_event *event) if (hwc->idx < 0) return; - armpmu_event_update(event, hwc, hwc->idx); + armpmu_event_update(event, hwc, hwc->idx, 0); } static void @@ -255,7 +258,7 @@ armpmu_stop(struct perf_event *event, int flags) if (!(hwc->state & PERF_HES_STOPPED)) { armpmu->disable(hwc, hwc->idx); barrier(); /* why? */ - armpmu_event_update(event, hwc, hwc->idx); + armpmu_event_update(event, hwc, hwc->idx, 0); hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; } } diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c index 6fc2d228db55..f1e8dd94afe8 100644 --- a/arch/arm/kernel/perf_event_v6.c +++ b/arch/arm/kernel/perf_event_v6.c @@ -474,7 +474,7 @@ armv6pmu_handle_irq(int irq_num, continue; hwc = &event->hw; - armpmu_event_update(event, hwc, idx); + armpmu_event_update(event, hwc, idx, 1); data.period = event->hw.last_period; if (!armpmu_event_set_period(event, hwc, idx)) continue; diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index c08d07a99fcc..4960686afb58 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -782,7 +782,7 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev) continue; hwc = &event->hw; - armpmu_event_update(event, hwc, idx); + armpmu_event_update(event, hwc, idx, 1); data.period = event->hw.last_period; if (!armpmu_event_set_period(event, hwc, idx)) continue; diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c index 28cd3b025bc3..39affbe4fdb2 100644 --- a/arch/arm/kernel/perf_event_xscale.c +++ b/arch/arm/kernel/perf_event_xscale.c @@ -246,7 +246,7 @@ xscale1pmu_handle_irq(int irq_num, void *dev) continue; hwc = &event->hw; - armpmu_event_update(event, hwc, idx); + armpmu_event_update(event, hwc, idx, 1); data.period = event->hw.last_period; if (!armpmu_event_set_period(event, hwc, idx)) continue; @@ -578,7 +578,7 @@ xscale2pmu_handle_irq(int irq_num, void *dev) continue; hwc = &event->hw; - armpmu_event_update(event, hwc, idx); + armpmu_event_update(event, hwc, idx, 1); data.period = event->hw.last_period; if (!armpmu_event_set_period(event, hwc, idx)) continue; -- cgit v1.2.3 From 6759788b944139793bffa889761cc3d8d703fdc0 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 5 Apr 2011 14:01:24 +0100 Subject: ARM: 6865/1: perf: ensure pass through zero is counted on overflow Commit a737823d ("ARM: perf: ensure overflows aren't missed due to IRQ latency") changed the way that event deltas are calculated on overflow so that we don't miss events when the new count value overtakes the previous one. Unfortunately, we forget to count the event that passes through zero so we end up being off by 1. This patch adds on the correction. Reported-by: Chris Moore Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/kernel/perf_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm/kernel/perf_event.c') diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 69cfee0fe00f..979da3947f42 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -221,7 +221,7 @@ again: prev_raw_count &= armpmu->max_period; if (overflow) - delta = armpmu->max_period - prev_raw_count + new_raw_count; + delta = armpmu->max_period - prev_raw_count + new_raw_count + 1; else delta = new_raw_count - prev_raw_count; -- cgit v1.2.3 From 860ad7823fdc00cd61dc70e7f35e07fb327cc9a4 Mon Sep 17 00:00:00 2001 From: Sonny Rao Date: Mon, 18 Apr 2011 22:12:59 +0100 Subject: ARM: 6884/1: Fix infinite loop in ARM user perf_event backtrace code The ARM user backtrace code can get into an infinite loop if it runs into an invalid stack frame which points back to itself. This situation has been observed in practice. Fix it by capping the number of entries in the backtrace. This is also what other architectures do in their backtrace code. Signed-off-by: Sonny Rao Acked-by: Jamie Iles Acked-by: Olof Johansson Signed-off-by: Russell King --- arch/arm/kernel/perf_event.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/arm/kernel/perf_event.c') diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 979da3947f42..139e3c827369 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -746,7 +746,8 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) tail = (struct frame_tail __user *)regs->ARM_fp - 1; - while (tail && !((unsigned long)tail & 0x3)) + while ((entry->nr < PERF_MAX_STACK_DEPTH) && + tail && !((unsigned long)tail & 0x3)) tail = user_backtrace(tail, entry); } -- cgit v1.2.3 From 57ce9bb39b476accf8fba6e16aea67ed76ea523d Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 17 May 2011 09:15:38 +0100 Subject: ARM: 6902/1: perf: Remove erroneous check on active_events When initialising a PMU, there is a check to protect against races with other CPUs filling all of the available event slots. Since armpmu_add checks that an event can be scheduled, we do not need to do this at initialisation time. Furthermore the current code is broken because it assumes that atomic_inc_not_zero will unconditionally increment active_counts and then tries to decrement it again on failure. This patch removes the broken, redundant code. Signed-off-by: Mark Rutland Acked-by: Will Deacon Cc: Jamie Iles Signed-off-by: Russell King --- arch/arm/kernel/perf_event.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'arch/arm/kernel/perf_event.c') diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 139e3c827369..d53c0abc4dd3 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -560,11 +560,6 @@ static int armpmu_event_init(struct perf_event *event) event->destroy = hw_perf_event_destroy; if (!atomic_inc_not_zero(&active_events)) { - if (atomic_read(&active_events) > armpmu->num_events) { - atomic_dec(&active_events); - return -ENOSPC; - } - mutex_lock(&pmu_reserve_mutex); if (atomic_read(&active_events) == 0) { err = armpmu_reserve_hardware(); -- cgit v1.2.3