From 93a6d3ce6962044fe9badf528fed46b455d58292 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 9 Jan 2009 16:52:19 +1100 Subject: powerpc: Provide a way to defer perf counter work until interrupts are enabled Because 64-bit powerpc uses lazy (soft) interrupt disabling, it is possible for a performance monitor exception to come in when the kernel thinks interrupts are disabled (i.e. when they are soft-disabled but hard-enabled). In such a situation the performance monitor exception handler might have some processing to do (such as process wakeups) which can't be done in what is effectively an NMI handler. This provides a way to defer that work until interrupts get enabled, either in raw_local_irq_restore() or by returning from an interrupt handler to code that had interrupts enabled. We have a per-processor flag that indicates that there is work pending to do when interrupts subsequently get re-enabled. This flag is checked in the interrupt return path and in raw_local_irq_restore(), and if it is set, perf_counter_do_pending() is called to do the pending work. Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/hw_irq.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'arch/powerpc/include/asm/hw_irq.h') diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index f75a5fc64d2e..e10f151c3db6 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -131,5 +131,36 @@ static inline int irqs_disabled_flags(unsigned long flags) */ struct hw_interrupt_type; +#ifdef CONFIG_PERF_COUNTERS +static inline unsigned long get_perf_counter_pending(void) +{ + unsigned long x; + + asm volatile("lbz %0,%1(13)" + : "=r" (x) + : "i" (offsetof(struct paca_struct, perf_counter_pending))); + return x; +} + +static inline void set_perf_counter_pending(int x) +{ + asm volatile("stb %0,%1(13)" : : + "r" (x), + "i" (offsetof(struct paca_struct, perf_counter_pending))); +} + +extern void perf_counter_do_pending(void); + +#else + +static inline unsigned long get_perf_counter_pending(void) +{ + return 0; +} + +static inline void set_perf_counter_pending(int x) {} +static inline void perf_counter_do_pending(void) {} +#endif /* CONFIG_PERF_COUNTERS */ + #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_HW_IRQ_H */ -- cgit v1.2.3 From b6c5a71da1477d261bc36254fe1f20d32b57598d Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 16 Mar 2009 21:00:00 +1100 Subject: perf_counter: abstract wakeup flag setting in core to fix powerpc build Impact: build fix for powerpc Commit bd753921015e7905 ("perf_counter: software counter event infrastructure") introduced a use of TIF_PERF_COUNTERS into the core perfcounter code. This breaks the build on powerpc because we use a flag in a per-cpu area to signal wakeups on powerpc rather than a thread_info flag, because the thread_info flags have to be manipulated with atomic operations and are thus slower than per-cpu flags. This fixes the by changing the core to use an abstracted set_perf_counter_pending() function, which is defined on x86 to set the TIF_PERF_COUNTERS flag and on powerpc to set the per-cpu flag (paca->perf_counter_pending). It changes the previous powerpc definition of set_perf_counter_pending to not take an argument and adds a clear_perf_counter_pending, so as to simplify the definition on x86. On x86, set_perf_counter_pending() is defined as a macro. Defining it as a static inline in arch/x86/include/asm/perf_counters.h causes compile failures because gets included early in , and the definitions of set_tsk_thread_flag etc. are therefore not available in . (On powerpc this problem is avoided by defining set_perf_counter_pending etc. in .) Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/hw_irq.h | 14 +++++++++++--- arch/powerpc/kernel/irq.c | 11 +++-------- arch/powerpc/kernel/perf_counter.c | 3 +-- arch/x86/include/asm/perf_counter.h | 3 +++ kernel/perf_counter.c | 2 +- 5 files changed, 19 insertions(+), 14 deletions(-) (limited to 'arch/powerpc/include/asm/hw_irq.h') diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index b43076ff92c9..cb32d571c9c7 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -142,10 +142,17 @@ static inline unsigned long get_perf_counter_pending(void) return x; } -static inline void set_perf_counter_pending(int x) +static inline void set_perf_counter_pending(void) { asm volatile("stb %0,%1(13)" : : - "r" (x), + "r" (1), + "i" (offsetof(struct paca_struct, perf_counter_pending))); +} + +static inline void clear_perf_counter_pending(void) +{ + asm volatile("stb %0,%1(13)" : : + "r" (0), "i" (offsetof(struct paca_struct, perf_counter_pending))); } @@ -158,7 +165,8 @@ static inline unsigned long get_perf_counter_pending(void) return 0; } -static inline void set_perf_counter_pending(int x) {} +static inline void set_perf_counter_pending(void) {} +static inline void clear_perf_counter_pending(void) {} static inline void perf_counter_do_pending(void) {} #endif /* CONFIG_PERF_COUNTERS */ diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 0d2e37c57738..469e9635ff04 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -104,13 +104,6 @@ static inline notrace void set_soft_enabled(unsigned long enable) : : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled))); } -#ifdef CONFIG_PERF_COUNTERS -notrace void __weak perf_counter_do_pending(void) -{ - set_perf_counter_pending(0); -} -#endif - notrace void raw_local_irq_restore(unsigned long en) { /* @@ -142,8 +135,10 @@ notrace void raw_local_irq_restore(unsigned long en) iseries_handle_interrupts(); } - if (get_perf_counter_pending()) + if (get_perf_counter_pending()) { + clear_perf_counter_pending(); perf_counter_do_pending(); + } /* * if (get_paca()->hard_enabled) return; diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 0e33d27cd464..5008762e8bf4 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -653,7 +653,6 @@ void perf_counter_do_pending(void) struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters); struct perf_counter *counter; - set_perf_counter_pending(0); for (i = 0; i < cpuhw->n_counters; ++i) { counter = cpuhw->counter[i]; if (counter && counter->wakeup_pending) { @@ -811,7 +810,7 @@ static void perf_counter_interrupt(struct pt_regs *regs) perf_counter_do_pending(); irq_exit(); } else { - set_perf_counter_pending(1); + set_perf_counter_pending(); } } } diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h index 2e08ed736647..1662043b340f 100644 --- a/arch/x86/include/asm/perf_counter.h +++ b/arch/x86/include/asm/perf_counter.h @@ -84,6 +84,9 @@ union cpuid10_edx { #define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b #define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2) +#define set_perf_counter_pending() \ + set_tsk_thread_flag(current, TIF_PERF_COUNTERS); + #ifdef CONFIG_PERF_COUNTERS extern void init_hw_perf_counters(void); extern void perf_counters_lapic_init(int nmi); diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 0018c5e81249..b39456ad74a1 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1433,7 +1433,7 @@ static void perf_swcounter_interrupt(struct perf_counter *counter, if (nmi) { counter->wakeup_pending = 1; - set_tsk_thread_flag(current, TIF_PERF_COUNTERS); + set_perf_counter_pending(); } else wake_up(&counter->waitq); } -- cgit v1.2.3 From 925d519ab82b6dd7aca9420d809ee83819c08db2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 30 Mar 2009 19:07:02 +0200 Subject: perf_counter: unify and fix delayed counter wakeup While going over the wakeup code I noticed delayed wakeups only work for hardware counters but basically all software counters rely on them. This patch unifies and generalizes the delayed wakeup to fix this issue. Since we're dealing with NMI context bits here, use a cmpxchg() based single link list implementation to track counters that have pending wakeups. [ This should really be generic code for delayed wakeups, but since we cannot use cmpxchg()/xchg() in generic code, I've let it live in the perf_counter code. -- Eric Dumazet could use it to aggregate the network wakeups. ] Furthermore, the x86 method of using TIF flags was flawed in that its quite possible to end up setting the bit on the idle task, loosing the wakeup. The powerpc method uses per-cpu storage and does appear to be sufficient. Signed-off-by: Peter Zijlstra Acked-by: Paul Mackerras Orig-LKML-Reference: <20090330171023.153932974@chello.nl> Signed-off-by: Ingo Molnar --- arch/powerpc/include/asm/hw_irq.h | 4 +- arch/powerpc/kernel/irq.c | 2 +- arch/powerpc/kernel/perf_counter.c | 22 +------ arch/x86/include/asm/perf_counter.h | 5 +- arch/x86/include/asm/thread_info.h | 4 +- arch/x86/kernel/cpu/perf_counter.c | 29 -------- arch/x86/kernel/signal.c | 6 -- include/linux/perf_counter.h | 15 +++-- kernel/perf_counter.c | 128 +++++++++++++++++++++++++++++++++--- kernel/timer.c | 3 + 10 files changed, 142 insertions(+), 76 deletions(-) (limited to 'arch/powerpc/include/asm/hw_irq.h') diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index cb32d571c9c7..20a44d0c9fdd 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -132,7 +132,7 @@ static inline int irqs_disabled_flags(unsigned long flags) struct irq_chip; #ifdef CONFIG_PERF_COUNTERS -static inline unsigned long get_perf_counter_pending(void) +static inline unsigned long test_perf_counter_pending(void) { unsigned long x; @@ -160,7 +160,7 @@ extern void perf_counter_do_pending(void); #else -static inline unsigned long get_perf_counter_pending(void) +static inline unsigned long test_perf_counter_pending(void) { return 0; } diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 469e9635ff04..2cd471f92fe6 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -135,7 +135,7 @@ notrace void raw_local_irq_restore(unsigned long en) iseries_handle_interrupts(); } - if (get_perf_counter_pending()) { + if (test_perf_counter_pending()) { clear_perf_counter_pending(); perf_counter_do_pending(); } diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index df007fe0cc0b..cde720fc495c 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -649,24 +649,6 @@ hw_perf_counter_init(struct perf_counter *counter) return &power_perf_ops; } -/* - * Handle wakeups. - */ -void perf_counter_do_pending(void) -{ - int i; - struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters); - struct perf_counter *counter; - - for (i = 0; i < cpuhw->n_counters; ++i) { - counter = cpuhw->counter[i]; - if (counter && counter->wakeup_pending) { - counter->wakeup_pending = 0; - wake_up(&counter->waitq); - } - } -} - /* * A counter has overflowed; update its count and record * things if requested. Note that interrupts are hard-disabled @@ -720,7 +702,7 @@ static void perf_counter_interrupt(struct pt_regs *regs) struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters); struct perf_counter *counter; long val; - int need_wakeup = 0, found = 0; + int found = 0; for (i = 0; i < cpuhw->n_counters; ++i) { counter = cpuhw->counter[i]; @@ -761,7 +743,7 @@ static void perf_counter_interrupt(struct pt_regs *regs) * immediately; otherwise we'll have do the wakeup when interrupts * get soft-enabled. */ - if (get_perf_counter_pending() && regs->softe) { + if (test_perf_counter_pending() && regs->softe) { irq_enter(); clear_perf_counter_pending(); perf_counter_do_pending(); diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h index 1662043b340f..e2b0e66b2353 100644 --- a/arch/x86/include/asm/perf_counter.h +++ b/arch/x86/include/asm/perf_counter.h @@ -84,8 +84,9 @@ union cpuid10_edx { #define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b #define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2) -#define set_perf_counter_pending() \ - set_tsk_thread_flag(current, TIF_PERF_COUNTERS); +#define set_perf_counter_pending() do { } while (0) +#define clear_perf_counter_pending() do { } while (0) +#define test_perf_counter_pending() (0) #ifdef CONFIG_PERF_COUNTERS extern void init_hw_perf_counters(void); diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 3ffd5d2a3676..8820a73ae090 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -83,7 +83,6 @@ struct thread_info { #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ #define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */ -#define TIF_PERF_COUNTERS 11 /* notify perf counter work */ #define TIF_NOTSC 16 /* TSC is not accessible in userland */ #define TIF_IA32 17 /* 32bit process */ #define TIF_FORK 18 /* ret_from_fork */ @@ -107,7 +106,6 @@ struct thread_info { #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY) -#define _TIF_PERF_COUNTERS (1 << TIF_PERF_COUNTERS) #define _TIF_NOTSC (1 << TIF_NOTSC) #define _TIF_IA32 (1 << TIF_IA32) #define _TIF_FORK (1 << TIF_FORK) @@ -141,7 +139,7 @@ struct thread_info { /* Only used for 64 bit */ #define _TIF_DO_NOTIFY_MASK \ - (_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_PERF_COUNTERS|_TIF_NOTIFY_RESUME) + (_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_NOTIFY_RESUME) /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 3f95b0cdc550..7aab177fb566 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -227,7 +227,6 @@ static int __hw_perf_counter_init(struct perf_counter *counter) */ hwc->config |= pmc_ops->event_map(perf_event_id(hw_event)); } - counter->wakeup_pending = 0; return 0; } @@ -773,34 +772,6 @@ void smp_perf_counter_interrupt(struct pt_regs *regs) irq_exit(); } -/* - * This handler is triggered by NMI contexts: - */ -void perf_counter_notify(struct pt_regs *regs) -{ - struct cpu_hw_counters *cpuc; - unsigned long flags; - int bit, cpu; - - local_irq_save(flags); - cpu = smp_processor_id(); - cpuc = &per_cpu(cpu_hw_counters, cpu); - - for_each_bit(bit, cpuc->used, X86_PMC_IDX_MAX) { - struct perf_counter *counter = cpuc->counters[bit]; - - if (!counter) - continue; - - if (counter->wakeup_pending) { - counter->wakeup_pending = 0; - wake_up(&counter->waitq); - } - } - - local_irq_restore(flags); -} - void perf_counters_lapic_init(int nmi) { u32 apic_val; diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 611615a92c90..0a813b17b172 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -6,7 +6,6 @@ * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes * 2000-2002 x86-64 support by Andi Kleen */ -#include #include #include #include @@ -872,11 +871,6 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) tracehook_notify_resume(regs); } - if (thread_info_flags & _TIF_PERF_COUNTERS) { - clear_thread_flag(TIF_PERF_COUNTERS); - perf_counter_notify(regs); - } - #ifdef CONFIG_X86_32 clear_thread_flag(TIF_IRET); #endif /* CONFIG_X86_32 */ diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 6bf67ce17625..0d833228eee5 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -275,6 +275,10 @@ struct perf_mmap_data { void *data_pages[0]; }; +struct perf_wakeup_entry { + struct perf_wakeup_entry *next; +}; + /** * struct perf_counter - performance counter kernel representation: */ @@ -350,7 +354,7 @@ struct perf_counter { /* poll related */ wait_queue_head_t waitq; /* optional: for NMIs */ - int wakeup_pending; + struct perf_wakeup_entry wakeup; void (*destroy)(struct perf_counter *); struct rcu_head rcu_head; @@ -427,7 +431,7 @@ extern void perf_counter_task_sched_out(struct task_struct *task, int cpu); extern void perf_counter_task_tick(struct task_struct *task, int cpu); extern void perf_counter_init_task(struct task_struct *child); extern void perf_counter_exit_task(struct task_struct *child); -extern void perf_counter_notify(struct pt_regs *regs); +extern void perf_counter_do_pending(void); extern void perf_counter_print_debug(void); extern void perf_counter_unthrottle(void); extern u64 hw_perf_save_disable(void); @@ -461,7 +465,7 @@ static inline void perf_counter_task_tick(struct task_struct *task, int cpu) { } static inline void perf_counter_init_task(struct task_struct *child) { } static inline void perf_counter_exit_task(struct task_struct *child) { } -static inline void perf_counter_notify(struct pt_regs *regs) { } +static inline void perf_counter_do_pending(void) { } static inline void perf_counter_print_debug(void) { } static inline void perf_counter_unthrottle(void) { } static inline void hw_perf_restore(u64 ctrl) { } @@ -469,8 +473,9 @@ static inline u64 hw_perf_save_disable(void) { return 0; } static inline int perf_counter_task_disable(void) { return -EINVAL; } static inline int perf_counter_task_enable(void) { return -EINVAL; } -static inline void perf_swcounter_event(u32 event, u64 nr, - int nmi, struct pt_regs *regs) { } +static inline void +perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs) { } + #endif #endif /* __KERNEL__ */ diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 3b862a7988cd..f70ff80e79d7 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1197,8 +1197,12 @@ static void free_counter_rcu(struct rcu_head *head) kfree(counter); } +static void perf_pending_sync(struct perf_counter *counter); + static void free_counter(struct perf_counter *counter) { + perf_pending_sync(counter); + if (counter->destroy) counter->destroy(counter); @@ -1528,6 +1532,118 @@ static const struct file_operations perf_fops = { .mmap = perf_mmap, }; +/* + * Perf counter wakeup + * + * If there's data, ensure we set the poll() state and publish everything + * to user-space before waking everybody up. + */ + +void perf_counter_wakeup(struct perf_counter *counter) +{ + struct perf_mmap_data *data; + + rcu_read_lock(); + data = rcu_dereference(counter->data); + if (data) { + (void)atomic_xchg(&data->wakeup, POLL_IN); + __perf_counter_update_userpage(counter, data); + } + rcu_read_unlock(); + + wake_up_all(&counter->waitq); +} + +/* + * Pending wakeups + * + * Handle the case where we need to wakeup up from NMI (or rq->lock) context. + * + * The NMI bit means we cannot possibly take locks. Therefore, maintain a + * single linked list and use cmpxchg() to add entries lockless. + */ + +#define PENDING_TAIL ((struct perf_wakeup_entry *)-1UL) + +static DEFINE_PER_CPU(struct perf_wakeup_entry *, perf_wakeup_head) = { + PENDING_TAIL, +}; + +static void perf_pending_queue(struct perf_counter *counter) +{ + struct perf_wakeup_entry **head; + struct perf_wakeup_entry *prev, *next; + + if (cmpxchg(&counter->wakeup.next, NULL, PENDING_TAIL) != NULL) + return; + + head = &get_cpu_var(perf_wakeup_head); + + do { + prev = counter->wakeup.next = *head; + next = &counter->wakeup; + } while (cmpxchg(head, prev, next) != prev); + + set_perf_counter_pending(); + + put_cpu_var(perf_wakeup_head); +} + +static int __perf_pending_run(void) +{ + struct perf_wakeup_entry *list; + int nr = 0; + + list = xchg(&__get_cpu_var(perf_wakeup_head), PENDING_TAIL); + while (list != PENDING_TAIL) { + struct perf_counter *counter = container_of(list, + struct perf_counter, wakeup); + + list = list->next; + + counter->wakeup.next = NULL; + /* + * Ensure we observe the unqueue before we issue the wakeup, + * so that we won't be waiting forever. + * -- see perf_not_pending(). + */ + smp_wmb(); + + perf_counter_wakeup(counter); + nr++; + } + + return nr; +} + +static inline int perf_not_pending(struct perf_counter *counter) +{ + /* + * If we flush on whatever cpu we run, there is a chance we don't + * need to wait. + */ + get_cpu(); + __perf_pending_run(); + put_cpu(); + + /* + * Ensure we see the proper queue state before going to sleep + * so that we do not miss the wakeup. -- see perf_pending_handle() + */ + smp_rmb(); + return counter->wakeup.next == NULL; +} + +static void perf_pending_sync(struct perf_counter *counter) +{ + wait_event(counter->waitq, perf_not_pending(counter)); +} + +void perf_counter_do_pending(void) +{ + __perf_pending_run(); +} + /* * Output */ @@ -1611,13 +1727,10 @@ static void perf_output_copy(struct perf_output_handle *handle, static void perf_output_end(struct perf_output_handle *handle, int nmi) { if (handle->wakeup) { - (void)atomic_xchg(&handle->data->wakeup, POLL_IN); - __perf_counter_update_userpage(handle->counter, handle->data); - if (nmi) { - handle->counter->wakeup_pending = 1; - set_perf_counter_pending(); - } else - wake_up(&handle->counter->waitq); + if (nmi) + perf_pending_queue(handle->counter); + else + perf_counter_wakeup(handle->counter); } rcu_read_unlock(); } @@ -2211,7 +2324,6 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, counter->cpu = cpu; counter->hw_event = *hw_event; - counter->wakeup_pending = 0; counter->group_leader = group_leader; counter->hw_ops = NULL; counter->ctx = ctx; diff --git a/kernel/timer.c b/kernel/timer.c index b4555568b4e4..672ca25fbc43 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -1167,6 +1168,8 @@ static void run_timer_softirq(struct softirq_action *h) { struct tvec_base *base = __get_cpu_var(tvec_bases); + perf_counter_do_pending(); + hrtimer_run_pending(); if (time_after_eq(jiffies, base->timer_jiffies)) -- cgit v1.2.3 From e14112d1bd5e193166b54be19119cf6440470560 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Fri, 12 Jun 2009 10:14:22 +1000 Subject: perfcounters: remove powerpc definitions of perf_counter_do_pending Commit 925d519ab82b6dd7aca9420d809ee83819c08db2 ("perf_counter: unify and fix delayed counter wakeup") added global definitions. Signed-off-by: Stephen Rothwell Acked-by: Paul Mackerras Acked-by: Benjamin Herrenschmidt Signed-off-by: Linus Torvalds --- arch/powerpc/include/asm/hw_irq.h | 3 --- arch/powerpc/kernel/irq.c | 1 + 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/powerpc/include/asm/hw_irq.h') diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 20a44d0c9fdd..53512374e1c9 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -156,8 +156,6 @@ static inline void clear_perf_counter_pending(void) "i" (offsetof(struct paca_struct, perf_counter_pending))); } -extern void perf_counter_do_pending(void); - #else static inline unsigned long test_perf_counter_pending(void) @@ -167,7 +165,6 @@ static inline unsigned long test_perf_counter_pending(void) static inline void set_perf_counter_pending(void) {} static inline void clear_perf_counter_pending(void) {} -static inline void perf_counter_do_pending(void) {} #endif /* CONFIG_PERF_COUNTERS */ #endif /* __KERNEL__ */ diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index feff792ed0f9..844d3f882a15 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -53,6 +53,7 @@ #include #include #include +#include #include #include -- cgit v1.2.3 From 4c75f84f2c781beb230031234ed961d28771a764 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 12 Jun 2009 02:00:50 +0000 Subject: powerpc: Add compiler memory barrier to mtmsr macro On 32-bit non-Book E, local_irq_restore() turns into just mtmsr(), which doesn't currently have a compiler memory barrier. This means that accesses to memory inside a local_irq_save/restore section, or a spin_lock_irqsave/spin_unlock_irqrestore section on UP, can be reordered by the compiler to occur outside that section. To fix this, this adds a compiler memory barrier to mtmsr for both 32-bit and 64-bit. Having a compiler memory barrier in mtmsr makes sense because it will almost always be changing something about the context in which memory accesses are done, so in general we don't want memory accesses getting moved from one side of an mtmsr to the other. With the barrier in mtmsr(), some of the explicit barriers in hw_irq.h are now redundant, so this removes them. Signed-off-by: Paul Mackerras Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/hw_irq.h | 5 ++--- arch/powerpc/include/asm/reg.h | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/include/asm/hw_irq.h') diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 53512374e1c9..b7f8f4a87cc0 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -80,7 +80,7 @@ static inline void local_irq_disable(void) __asm__ __volatile__("wrteei 0": : :"memory"); #else unsigned long msr; - __asm__ __volatile__("": : :"memory"); + msr = mfmsr(); SET_MSR_EE(msr & ~MSR_EE); #endif @@ -92,7 +92,7 @@ static inline void local_irq_enable(void) __asm__ __volatile__("wrteei 1": : :"memory"); #else unsigned long msr; - __asm__ __volatile__("": : :"memory"); + msr = mfmsr(); SET_MSR_EE(msr | MSR_EE); #endif @@ -108,7 +108,6 @@ static inline void local_irq_save_ptr(unsigned long *flags) #else SET_MSR_EE(msr & ~MSR_EE); #endif - __asm__ __volatile__("": : :"memory"); } #define local_save_flags(flags) ((flags) = mfmsr()) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index fb359b0a6937..a3c28e46947c 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -745,11 +745,11 @@ asm volatile("mfmsr %0" : "=r" (rval)); rval;}) #ifdef CONFIG_PPC64 #define __mtmsrd(v, l) asm volatile("mtmsrd %0," __stringify(l) \ - : : "r" (v)) + : : "r" (v) : "memory") #define mtmsrd(v) __mtmsrd((v), 0) #define mtmsr(v) mtmsrd(v) #else -#define mtmsr(v) asm volatile("mtmsr %0" : : "r" (v)) +#define mtmsr(v) asm volatile("mtmsr %0" : : "r" (v) : "memory") #endif #define mfspr(rn) ({unsigned long rval; \ -- cgit v1.2.3 From 9974458e2f9a11dbd2f4bd14fab5a79af4907b41 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 15 Jun 2009 21:45:16 +1000 Subject: perf_counter: Make set_perf_counter_pending() declaration common At present, every architecture that supports perf_counters has to declare set_perf_counter_pending() in its arch-specific headers. This consolidates the declarations into a single declaration in one common place, include/linux/perf_counter.h. On powerpc, we continue to provide a static inline definition of set_perf_counter_pending() in the powerpc hw_irq.h. Also, this removes from the x86 perf_counter.h the unused null definitions of {test,clear}_perf_counter_pending. Reported-by: Mike Frysinger Signed-off-by: Paul Mackerras Cc: Peter Zijlstra Cc: benh@kernel.crashing.org LKML-Reference: <18998.13388.920691.523227@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- arch/powerpc/include/asm/hw_irq.h | 1 - arch/powerpc/include/asm/perf_counter.h | 2 ++ arch/x86/include/asm/perf_counter.h | 5 ----- include/linux/perf_counter.h | 1 + 4 files changed, 3 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/include/asm/hw_irq.h') diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 53512374e1c9..1974cf191b03 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -163,7 +163,6 @@ static inline unsigned long test_perf_counter_pending(void) return 0; } -static inline void set_perf_counter_pending(void) {} static inline void clear_perf_counter_pending(void) {} #endif /* CONFIG_PERF_COUNTERS */ diff --git a/arch/powerpc/include/asm/perf_counter.h b/arch/powerpc/include/asm/perf_counter.h index cc7c887705b8..b398a84edced 100644 --- a/arch/powerpc/include/asm/perf_counter.h +++ b/arch/powerpc/include/asm/perf_counter.h @@ -10,6 +10,8 @@ */ #include +#include + #define MAX_HWCOUNTERS 8 #define MAX_EVENT_ALTERNATIVES 8 #define MAX_LIMITED_HWCOUNTERS 2 diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h index 876ed97147b3..5fb33e160ea0 100644 --- a/arch/x86/include/asm/perf_counter.h +++ b/arch/x86/include/asm/perf_counter.h @@ -84,11 +84,6 @@ union cpuid10_edx { #define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b #define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2) -extern void set_perf_counter_pending(void); - -#define clear_perf_counter_pending() do { } while (0) -#define test_perf_counter_pending() (0) - #ifdef CONFIG_PERF_COUNTERS extern void init_hw_perf_counters(void); extern void perf_counters_lapic_init(void); diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 1b3118a1023a..eccae437fe37 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -604,6 +604,7 @@ extern void perf_counter_task_tick(struct task_struct *task, int cpu); extern int perf_counter_init_task(struct task_struct *child); extern void perf_counter_exit_task(struct task_struct *child); extern void perf_counter_free_task(struct task_struct *task); +extern void set_perf_counter_pending(void); extern void perf_counter_do_pending(void); extern void perf_counter_print_debug(void); extern void __perf_disable(void); -- cgit v1.2.3 From 105988c015943e77092a6568bc5fb7e386df6ccd Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 17 Jun 2009 21:50:04 +1000 Subject: perf_counter: powerpc: Enable use of software counters on 32-bit powerpc This enables the perf_counter subsystem on 32-bit powerpc. Since we don't have any support for hardware counters on 32-bit powerpc yet, only software counters can be used. Besides selecting HAVE_PERF_COUNTERS for 32-bit powerpc as well as 64-bit, the main thing this does is add an implementation of set_perf_counter_pending(). This needs to arrange for perf_counter_do_pending() to be called when interrupts are enabled. Rather than add code to local_irq_restore as 64-bit does, the 32-bit set_perf_counter_pending() generates an interrupt by setting the decrementer to 1 so that a decrementer interrupt will become pending in 1 or 2 timebase ticks (if a decrementer interrupt isn't already pending). When interrupts are enabled, timer_interrupt() will be called, and some new code in there calls perf_counter_do_pending(). We use a per-cpu array of flags to indicate whether we need to call perf_counter_do_pending() or not. This introduces a couple of new Kconfig symbols: PPC_HAVE_PMU_SUPPORT, which is selected by processor families for which we have hardware PMU support (currently only PPC64), and PPC_PERF_CTRS, which enables the powerpc-specific perf_counter back-end. Signed-off-by: Paul Mackerras Cc: Peter Zijlstra Cc: linuxppc-dev@ozlabs.org Cc: benh@kernel.crashing.org LKML-Reference: <19000.55404.103840.393470@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- arch/powerpc/Kconfig | 1 + arch/powerpc/include/asm/hw_irq.h | 5 ++++- arch/powerpc/include/asm/perf_counter.h | 10 ++++++++-- arch/powerpc/kernel/Makefile | 6 +++--- arch/powerpc/kernel/time.c | 25 +++++++++++++++++++++++++ arch/powerpc/platforms/Kconfig.cputype | 11 ++++++++++- 6 files changed, 51 insertions(+), 7 deletions(-) (limited to 'arch/powerpc/include/asm/hw_irq.h') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 9fb344d5a86a..bf6cedfa05db 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -126,6 +126,7 @@ config PPC select HAVE_OPROFILE select HAVE_SYSCALL_WRAPPERS if PPC64 select GENERIC_ATOMIC64 if PPC32 + select HAVE_PERF_COUNTERS config EARLY_PRINTK bool diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 10a642df014e..867ab8ed69b3 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -131,6 +131,8 @@ static inline int irqs_disabled_flags(unsigned long flags) struct irq_chip; #ifdef CONFIG_PERF_COUNTERS + +#ifdef CONFIG_PPC64 static inline unsigned long test_perf_counter_pending(void) { unsigned long x; @@ -154,8 +156,9 @@ static inline void clear_perf_counter_pending(void) "r" (0), "i" (offsetof(struct paca_struct, perf_counter_pending))); } +#endif /* CONFIG_PPC64 */ -#else +#else /* CONFIG_PERF_COUNTERS */ static inline unsigned long test_perf_counter_pending(void) { diff --git a/arch/powerpc/include/asm/perf_counter.h b/arch/powerpc/include/asm/perf_counter.h index b398a84edced..2c2d9f643df0 100644 --- a/arch/powerpc/include/asm/perf_counter.h +++ b/arch/powerpc/include/asm/perf_counter.h @@ -57,10 +57,16 @@ extern struct power_pmu *ppmu; struct pt_regs; extern unsigned long perf_misc_flags(struct pt_regs *regs); -#define perf_misc_flags(regs) perf_misc_flags(regs) - extern unsigned long perf_instruction_pointer(struct pt_regs *regs); +/* + * Only override the default definitions in include/linux/perf_counter.h + * if we have hardware PMU support. + */ +#ifdef CONFIG_PPC_PERF_CTRS +#define perf_misc_flags(regs) perf_misc_flags(regs) +#endif + /* * The power_pmu.get_constraint function returns a 64-bit value and * a 64-bit mask that express the constraints between this event and diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 612b0c4dc26d..c5f93f061927 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -95,9 +95,9 @@ obj64-$(CONFIG_AUDIT) += compat_audit.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o -obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o power4-pmu.o ppc970-pmu.o \ - power5-pmu.o power5+-pmu.o power6-pmu.o \ - power7-pmu.o +obj-$(CONFIG_PPC_PERF_CTRS) += perf_counter.o +obj64-$(CONFIG_PPC_PERF_CTRS) += power4-pmu.o ppc970-pmu.o power5-pmu.o \ + power5+-pmu.o power6-pmu.o power7-pmu.o obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 15391c2ab013..eae4511ceeac 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -53,6 +53,7 @@ #include #include #include +#include #include #include @@ -525,6 +526,26 @@ void __init iSeries_time_init_early(void) } #endif /* CONFIG_PPC_ISERIES */ +#if defined(CONFIG_PERF_COUNTERS) && defined(CONFIG_PPC32) +DEFINE_PER_CPU(u8, perf_counter_pending); + +void set_perf_counter_pending(void) +{ + get_cpu_var(perf_counter_pending) = 1; + set_dec(1); + put_cpu_var(perf_counter_pending); +} + +#define test_perf_counter_pending() __get_cpu_var(perf_counter_pending) +#define clear_perf_counter_pending() __get_cpu_var(perf_counter_pending) = 0 + +#else /* CONFIG_PERF_COUNTERS && CONFIG_PPC32 */ + +#define test_perf_counter_pending() 0 +#define clear_perf_counter_pending() + +#endif /* CONFIG_PERF_COUNTERS && CONFIG_PPC32 */ + /* * For iSeries shared processors, we have to let the hypervisor * set the hardware decrementer. We set a virtual decrementer @@ -551,6 +572,10 @@ void timer_interrupt(struct pt_regs * regs) set_dec(DECREMENTER_MAX); #ifdef CONFIG_PPC32 + if (test_perf_counter_pending()) { + clear_perf_counter_pending(); + perf_counter_do_pending(); + } if (atomic_read(&ppc_n_lost_interrupts) != 0) do_IRQ(regs); #endif diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index cca6b4fc719a..dd9f3ec5ee30 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -1,7 +1,7 @@ config PPC64 bool "64-bit kernel" default n - select HAVE_PERF_COUNTERS + select PPC_HAVE_PMU_SUPPORT help This option selects whether a 32-bit or a 64-bit kernel will be built. @@ -243,6 +243,15 @@ config VIRT_CPU_ACCOUNTING If in doubt, say Y here. +config PPC_HAVE_PMU_SUPPORT + bool + +config PPC_PERF_CTRS + def_bool y + depends on PERF_COUNTERS && PPC_HAVE_PMU_SUPPORT + help + This enables the powerpc-specific perf_counter back-end. + config SMP depends on PPC_STD_MMU || FSL_BOOKE bool "Symmetric multi-processing support" -- cgit v1.2.3