summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2011-02-21 15:24:58 +1100
committerStephen Rothwell <sfr@canb.auug.org.au>2011-02-21 15:24:58 +1100
commit1e7464fbcc9a9e7e3986ba4b0c3e18d842b7bf72 (patch)
treec7355af83c07c368c76c065974db49788611a001
parent05b1fc22b40b0182227e2a2a4a793463ea51524d (diff)
parent38455d3b0a9d59c40822078c9841ee94eaba8f35 (diff)
Merge remote-tracking branch 'tip/auto-latest'
-rw-r--r--Documentation/spinlocks.txt24
-rw-r--r--Documentation/trace/kprobetrace.txt16
-rw-r--r--arch/alpha/include/asm/rwsem.h36
-rw-r--r--arch/alpha/kernel/time.c8
-rw-r--r--arch/arm/kernel/time.c4
-rw-r--r--arch/arm/mach-clps711x/include/mach/time.h2
-rw-r--r--arch/blackfin/kernel/time.c6
-rw-r--r--arch/cris/arch-v10/kernel/time.c4
-rw-r--r--arch/cris/arch-v32/kernel/time.c6
-rw-r--r--arch/frv/kernel/time.c14
-rw-r--r--arch/h8300/kernel/time.c4
-rw-r--r--arch/h8300/kernel/timer/timer8.c2
-rw-r--r--arch/ia64/include/asm/rwsem.h37
-rw-r--r--arch/ia64/kernel/time.c19
-rw-r--r--arch/ia64/xen/time.c13
-rw-r--r--arch/m32r/kernel/time.c5
-rw-r--r--arch/m68k/bvme6000/config.c4
-rw-r--r--arch/m68k/kernel/time.c4
-rw-r--r--arch/m68k/mvme147/config.c4
-rw-r--r--arch/m68k/mvme16x/config.c4
-rw-r--r--arch/m68k/sun3/sun3ints.c2
-rw-r--r--arch/m68knommu/kernel/time.c8
-rw-r--r--arch/mips/kernel/vpe.c4
-rw-r--r--arch/mn10300/kernel/time.c6
-rw-r--r--arch/parisc/kernel/time.c7
-rw-r--r--arch/powerpc/include/asm/rwsem.h51
-rw-r--r--arch/s390/include/asm/rwsem.h63
-rw-r--r--arch/sh/include/asm/rwsem.h56
-rw-r--r--arch/sparc/include/asm/rwsem.h46
-rw-r--r--arch/sparc/kernel/pcic.c4
-rw-r--r--arch/sparc/kernel/time_32.c9
-rw-r--r--arch/sparc/lib/atomic32.c2
-rw-r--r--arch/um/Kconfig.common1
-rw-r--r--arch/um/drivers/ubd_kern.c2
-rw-r--r--arch/um/kernel/irq.c31
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/ia32/ia32entry.S1
-rw-r--r--arch/x86/include/asm/amd_nb.h6
-rw-r--r--arch/x86/include/asm/apic.h36
-rw-r--r--arch/x86/include/asm/entry_arch.h5
-rw-r--r--arch/x86/include/asm/hw_irq.h24
-rw-r--r--arch/x86/include/asm/ipi.h8
-rw-r--r--arch/x86/include/asm/irq_vectors.h45
-rw-r--r--arch/x86/include/asm/mpspec.h3
-rw-r--r--arch/x86/include/asm/numa.h49
-rw-r--r--arch/x86/include/asm/numa_32.h7
-rw-r--r--arch/x86/include/asm/numa_64.h16
-rw-r--r--arch/x86/include/asm/page_types.h8
-rw-r--r--arch/x86/include/asm/processor.h4
-rw-r--r--arch/x86/include/asm/rwsem.h80
-rw-r--r--arch/x86/include/asm/smp.h10
-rw-r--r--arch/x86/include/asm/topology.h17
-rw-r--r--arch/x86/include/asm/unistd_32.h3
-rw-r--r--arch/x86/include/asm/unistd_64.h2
-rw-r--r--arch/x86/include/asm/x86_init.h2
-rw-r--r--arch/x86/kernel/acpi/boot.c8
-rw-r--r--arch/x86/kernel/amd_nb.c84
-rw-r--r--arch/x86/kernel/aperture_64.c33
-rw-r--r--arch/x86/kernel/apic/apic.c76
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c4
-rw-r--r--arch/x86/kernel/apic/apic_noop.c26
-rw-r--r--arch/x86/kernel/apic/bigsmp_32.c34
-rw-r--r--arch/x86/kernel/apic/es7000_32.c35
-rw-r--r--arch/x86/kernel/apic/ipi.c12
-rw-r--r--arch/x86/kernel/apic/numaq_32.c21
-rw-r--r--arch/x86/kernel/apic/probe_32.c10
-rw-r--r--arch/x86/kernel/apic/summit_32.c47
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c2
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c2
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c2
-rw-r--r--arch/x86/kernel/asm-offsets.c70
-rw-r--r--arch/x86/kernel/asm-offsets_32.c69
-rw-r--r--arch/x86/kernel/asm-offsets_64.c90
-rw-r--r--arch/x86/kernel/cpu/amd.c61
-rw-r--r--arch/x86/kernel/cpu/common.c2
-rw-r--r--arch/x86/kernel/cpu/intel.c5
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c80
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c7
-rw-r--r--arch/x86/kernel/cpu/perf_event.c4
-rw-r--r--arch/x86/kernel/e820.c10
-rw-r--r--arch/x86/kernel/entry_64.S5
-rw-r--r--arch/x86/kernel/irqinit.c79
-rw-r--r--arch/x86/kernel/microcode_amd.c188
-rw-r--r--arch/x86/kernel/microcode_core.c6
-rw-r--r--arch/x86/kernel/setup.c45
-rw-r--r--arch/x86/kernel/setup_percpu.c11
-rw-r--r--arch/x86/kernel/smpboot.c113
-rw-r--r--arch/x86/kernel/syscall_table_32.S1
-rw-r--r--arch/x86/kernel/vmlinux.lds.S2
-rw-r--r--arch/x86/kernel/x8664_ksyms_64.c1
-rw-r--r--arch/x86/kernel/x86_init.c1
-rw-r--r--arch/x86/lib/memmove_64.S197
-rw-r--r--arch/x86/lib/memmove_64.c192
-rw-r--r--arch/x86/mm/amdtopology_64.c12
-rw-r--r--arch/x86/mm/init.c36
-rw-r--r--arch/x86/mm/init_64.c117
-rw-r--r--arch/x86/mm/numa.c212
-rw-r--r--arch/x86/mm/numa_32.c7
-rw-r--r--arch/x86/mm/numa_64.c239
-rw-r--r--arch/x86/mm/srat_32.c6
-rw-r--r--arch/x86/mm/srat_64.c14
-rw-r--r--arch/x86/mm/tlb.c11
-rw-r--r--arch/x86/pci/amd_bus.c2
-rw-r--r--arch/x86/platform/mrst/mrst.c2
-rw-r--r--arch/x86/platform/mrst/vrtc.c16
-rw-r--r--arch/xtensa/include/asm/rwsem.h37
-rw-r--r--arch/xtensa/kernel/time.c6
-rw-r--r--drivers/char/mmtimer.c30
-rw-r--r--drivers/rtc/rtc-mrst.c13
-rw-r--r--include/linux/ftrace.h2
-rw-r--r--include/linux/interrupt.h3
-rw-r--r--include/linux/kthread.h2
-rw-r--r--include/linux/pci_ids.h1
-rw-r--r--include/linux/posix-clock.h150
-rw-r--r--include/linux/posix-timers.h44
-rw-r--r--include/linux/rwlock_types.h8
-rw-r--r--include/linux/rwsem-spinlock.h31
-rw-r--r--include/linux/rwsem.h53
-rw-r--r--include/linux/sched.h9
-rw-r--r--include/linux/security.h9
-rw-r--r--include/linux/spinlock_types.h8
-rw-r--r--include/linux/syscalls.h2
-rw-r--r--include/linux/thread_info.h3
-rw-r--r--include/linux/time.h11
-rw-r--r--include/linux/timex.h3
-rw-r--r--kernel/compat.c136
-rw-r--r--kernel/cred.c2
-rw-r--r--kernel/futex.c22
-rw-r--r--kernel/hrtimer.c13
-rw-r--r--kernel/irq/irqdesc.c17
-rw-r--r--kernel/irq/manage.c20
-rw-r--r--kernel/irq/pm.c3
-rw-r--r--kernel/perf_event.c260
-rw-r--r--kernel/posix-cpu-timers.c110
-rw-r--r--kernel/posix-timers.c321
-rw-r--r--kernel/rtmutex-debug.c1
-rw-r--r--kernel/rtmutex-tester.c39
-rw-r--r--kernel/rtmutex.c318
-rw-r--r--kernel/rtmutex_common.h16
-rw-r--r--kernel/sched.c31
-rw-r--r--kernel/softirq.c5
-rw-r--r--kernel/time.c57
-rw-r--r--kernel/time/Makefile3
-rw-r--r--kernel/time/clockevents.c1
-rw-r--r--kernel/time/jiffies.c20
-rw-r--r--kernel/time/ntp.c13
-rw-r--r--kernel/time/posix-clock.c441
-rw-r--r--kernel/time/tick-broadcast.c1
-rw-r--r--kernel/time/tick-common.c1
-rw-r--r--kernel/time/tick-internal.h9
-rw-r--r--kernel/time/tick-oneshot.c1
-rw-r--r--kernel/time/tick-sched.c1
-rw-r--r--kernel/time/timekeeping.c86
-rw-r--r--kernel/timer.c13
-rw-r--r--kernel/trace/ftrace.c52
-rw-r--r--kernel/trace/trace_kprobe.c111
-rw-r--r--lib/rwsem.c10
-rw-r--r--mm/page_alloc.c34
-rwxr-xr-xscripts/checkpatch.pl5
-rw-r--r--security/commoncap.c2
-rw-r--r--security/security.c2
-rw-r--r--tools/perf/Documentation/perf-probe.txt19
-rw-r--r--tools/perf/Makefile44
-rw-r--r--tools/perf/bench/sched-pipe.c2
-rw-r--r--tools/perf/builtin-annotate.c291
-rw-r--r--tools/perf/builtin-diff.c16
-rw-r--r--tools/perf/builtin-inject.c82
-rw-r--r--tools/perf/builtin-kmem.c10
-rw-r--r--tools/perf/builtin-lock.c6
-rw-r--r--tools/perf/builtin-probe.c70
-rw-r--r--tools/perf/builtin-record.c420
-rw-r--r--tools/perf/builtin-report.c108
-rw-r--r--tools/perf/builtin-sched.c27
-rw-r--r--tools/perf/builtin-script.c17
-rw-r--r--tools/perf/builtin-stat.c78
-rw-r--r--tools/perf/builtin-test.c184
-rw-r--r--tools/perf/builtin-timechart.c13
-rw-r--r--tools/perf/builtin-top.c974
-rw-r--r--tools/perf/perf.h26
-rwxr-xr-xtools/perf/python/twatch.py41
-rw-r--r--tools/perf/util/annotate.c593
-rw-r--r--tools/perf/util/annotate.h101
-rw-r--r--tools/perf/util/build-id.c21
-rw-r--r--tools/perf/util/cache.h7
-rw-r--r--tools/perf/util/callchain.c227
-rw-r--r--tools/perf/util/callchain.h76
-rw-r--r--tools/perf/util/cpumap.c5
-rw-r--r--tools/perf/util/cpumap.h2
-rw-r--r--tools/perf/util/debug.c2
-rw-r--r--tools/perf/util/debug.h2
-rw-r--r--tools/perf/util/event.c319
-rw-r--r--tools/perf/util/event.h78
-rw-r--r--tools/perf/util/evlist.c350
-rw-r--r--tools/perf/util/evlist.h64
-rw-r--r--tools/perf/util/evsel.c195
-rw-r--r--tools/perf/util/evsel.h29
-rw-r--r--tools/perf/util/header.c100
-rw-r--r--tools/perf/util/header.h55
-rw-r--r--tools/perf/util/hist.c237
-rw-r--r--tools/perf/util/hist.h48
-rw-r--r--tools/perf/util/include/linux/list.h1
-rw-r--r--tools/perf/util/parse-events.c71
-rw-r--r--tools/perf/util/parse-events.h7
-rw-r--r--tools/perf/util/probe-event.c137
-rw-r--r--tools/perf/util/probe-event.h4
-rw-r--r--tools/perf/util/probe-finder.c528
-rw-r--r--tools/perf/util/python.c891
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c3
-rw-r--r--tools/perf/util/session.c219
-rw-r--r--tools/perf/util/session.h27
-rw-r--r--tools/perf/util/setup.py19
-rw-r--r--tools/perf/util/strfilter.c200
-rw-r--r--tools/perf/util/strfilter.h48
-rw-r--r--tools/perf/util/symbol.c4
-rw-r--r--tools/perf/util/thread.c55
-rw-r--r--tools/perf/util/thread.h14
-rw-r--r--tools/perf/util/thread_map.c64
-rw-r--r--tools/perf/util/thread_map.h15
-rw-r--r--tools/perf/util/top.c218
-rw-r--r--tools/perf/util/top.h64
-rw-r--r--tools/perf/util/trace-event-parse.c2
-rw-r--r--tools/perf/util/ui/browsers/annotate.c57
-rw-r--r--tools/perf/util/ui/browsers/hists.c11
-rw-r--r--tools/perf/util/ui/browsers/map.c2
-rw-r--r--tools/perf/util/ui/browsers/top.c136
-rw-r--r--tools/perf/util/ui/libslang.h6
-rw-r--r--tools/perf/util/ui/setup.c5
227 files changed, 8246 insertions, 5248 deletions
diff --git a/Documentation/spinlocks.txt b/Documentation/spinlocks.txt
index 178c831b907d..2e3c64b1a6a5 100644
--- a/Documentation/spinlocks.txt
+++ b/Documentation/spinlocks.txt
@@ -86,7 +86,7 @@ to change the variables it has to get an exclusive write lock.
The routines look the same as above:
- rwlock_t xxx_lock = RW_LOCK_UNLOCKED;
+ rwlock_t xxx_lock = __RW_LOCK_UNLOCKED(xxx_lock);
unsigned long flags;
@@ -196,25 +196,3 @@ appropriate:
For static initialization, use DEFINE_SPINLOCK() / DEFINE_RWLOCK() or
__SPIN_LOCK_UNLOCKED() / __RW_LOCK_UNLOCKED() as appropriate.
-
-SPIN_LOCK_UNLOCKED and RW_LOCK_UNLOCKED are deprecated. These interfere
-with lockdep state tracking.
-
-Most of the time, you can simply turn:
- static spinlock_t xxx_lock = SPIN_LOCK_UNLOCKED;
-into:
- static DEFINE_SPINLOCK(xxx_lock);
-
-Static structure member variables go from:
-
- struct foo bar {
- .lock = SPIN_LOCK_UNLOCKED;
- };
-
-to:
-
- struct foo bar {
- .lock = __SPIN_LOCK_UNLOCKED(bar.lock);
- };
-
-Declaration of static rw_locks undergo a similar transformation.
diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt
index 5f77d94598dd..6d27ab8d6e9f 100644
--- a/Documentation/trace/kprobetrace.txt
+++ b/Documentation/trace/kprobetrace.txt
@@ -42,11 +42,25 @@ Synopsis of kprobe_events
+|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(**)
NAME=FETCHARG : Set NAME as the argument name of FETCHARG.
FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types
- (u8/u16/u32/u64/s8/s16/s32/s64) and string are supported.
+ (u8/u16/u32/u64/s8/s16/s32/s64), "string" and bitfield
+ are supported.
(*) only for return probe.
(**) this is useful for fetching a field of data structures.
+Types
+-----
+Several types are supported for fetch-args. Kprobe tracer will access memory
+by given type. Prefix 's' and 'u' means those types are signed and unsigned
+respectively. Traced arguments are shown in decimal (signed) or hex (unsigned).
+String type is a special type, which fetches a "null-terminated" string from
+kernel space. This means it will fail and store NULL if the string container
+has been paged out.
+Bitfield is another special type, which takes 3 parameters, bit-width, bit-
+offset, and container-size (usually 32). The syntax is;
+
+ b<bit-width>@<bit-offset>/<container-size>
+
Per-Probe Event Filtering
-------------------------
diff --git a/arch/alpha/include/asm/rwsem.h b/arch/alpha/include/asm/rwsem.h
index 1570c0b54336..a83bbea62c67 100644
--- a/arch/alpha/include/asm/rwsem.h
+++ b/arch/alpha/include/asm/rwsem.h
@@ -13,44 +13,13 @@
#ifdef __KERNEL__
#include <linux/compiler.h>
-#include <linux/list.h>
-#include <linux/spinlock.h>
-struct rwsem_waiter;
-
-extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *);
-extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
-
-/*
- * the semaphore definition
- */
-struct rw_semaphore {
- long count;
#define RWSEM_UNLOCKED_VALUE 0x0000000000000000L
#define RWSEM_ACTIVE_BIAS 0x0000000000000001L
#define RWSEM_ACTIVE_MASK 0x00000000ffffffffL
#define RWSEM_WAITING_BIAS (-0x0000000100000000L)
#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
- spinlock_t wait_lock;
- struct list_head wait_list;
-};
-
-#define __RWSEM_INITIALIZER(name) \
- { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \
- LIST_HEAD_INIT((name).wait_list) }
-
-#define DECLARE_RWSEM(name) \
- struct rw_semaphore name = __RWSEM_INITIALIZER(name)
-
-static inline void init_rwsem(struct rw_semaphore *sem)
-{
- sem->count = RWSEM_UNLOCKED_VALUE;
- spin_lock_init(&sem->wait_lock);
- INIT_LIST_HEAD(&sem->wait_list);
-}
static inline void __down_read(struct rw_semaphore *sem)
{
@@ -250,10 +219,5 @@ static inline long rwsem_atomic_update(long val, struct rw_semaphore *sem)
#endif
}
-static inline int rwsem_is_locked(struct rw_semaphore *sem)
-{
- return (sem->count != 0);
-}
-
#endif /* __KERNEL__ */
#endif /* _ALPHA_RWSEM_H */
diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c
index c1f3e7cb82a4..a58e84f1a63b 100644
--- a/arch/alpha/kernel/time.c
+++ b/arch/alpha/kernel/time.c
@@ -159,7 +159,7 @@ void read_persistent_clock(struct timespec *ts)
/*
* timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick
+ * as well as call the "xtime_update()" routine every clocktick
*/
irqreturn_t timer_interrupt(int irq, void *dev)
{
@@ -172,8 +172,6 @@ irqreturn_t timer_interrupt(int irq, void *dev)
profile_tick(CPU_PROFILING);
#endif
- write_seqlock(&xtime_lock);
-
/*
* Calculate how many ticks have passed since the last update,
* including any previous partial leftover. Save any resulting
@@ -187,9 +185,7 @@ irqreturn_t timer_interrupt(int irq, void *dev)
nticks = delta >> FIX_SHIFT;
if (nticks)
- do_timer(nticks);
-
- write_sequnlock(&xtime_lock);
+ xtime_update(nticks);
if (test_irq_work_pending()) {
clear_irq_work_pending();
diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c
index 3d76bf233734..1ff46cabc7ef 100644
--- a/arch/arm/kernel/time.c
+++ b/arch/arm/kernel/time.c
@@ -107,9 +107,7 @@ void timer_tick(void)
{
profile_tick(CPU_PROFILING);
do_leds();
- write_seqlock(&xtime_lock);
- do_timer(1);
- write_sequnlock(&xtime_lock);
+ xtime_update(1);
#ifndef CONFIG_SMP
update_process_times(user_mode(get_irq_regs()));
#endif
diff --git a/arch/arm/mach-clps711x/include/mach/time.h b/arch/arm/mach-clps711x/include/mach/time.h
index 8fe283ccd1f3..61fef9129c6a 100644
--- a/arch/arm/mach-clps711x/include/mach/time.h
+++ b/arch/arm/mach-clps711x/include/mach/time.h
@@ -30,7 +30,7 @@ p720t_timer_interrupt(int irq, void *dev_id)
{
struct pt_regs *regs = get_irq_regs();
do_leds();
- do_timer(1);
+ xtime_update(1);
#ifndef CONFIG_SMP
update_process_times(user_mode(regs));
#endif
diff --git a/arch/blackfin/kernel/time.c b/arch/blackfin/kernel/time.c
index c9113619029f..8d73724c0092 100644
--- a/arch/blackfin/kernel/time.c
+++ b/arch/blackfin/kernel/time.c
@@ -114,16 +114,14 @@ u32 arch_gettimeoffset(void)
/*
* timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick
+ * as well as call the "xtime_update()" routine every clocktick
*/
#ifdef CONFIG_CORE_TIMER_IRQ_L1
__attribute__((l1_text))
#endif
irqreturn_t timer_interrupt(int irq, void *dummy)
{
- write_seqlock(&xtime_lock);
- do_timer(1);
- write_sequnlock(&xtime_lock);
+ xtime_update(1);
#ifdef CONFIG_IPIPE
update_root_process_times(get_irq_regs());
diff --git a/arch/cris/arch-v10/kernel/time.c b/arch/cris/arch-v10/kernel/time.c
index 00eb36f8debf..20c85b5dc7d0 100644
--- a/arch/cris/arch-v10/kernel/time.c
+++ b/arch/cris/arch-v10/kernel/time.c
@@ -140,7 +140,7 @@ stop_watchdog(void)
/*
* timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick
+ * as well as call the "xtime_update()" routine every clocktick
*/
//static unsigned short myjiff; /* used by our debug routine print_timestamp */
@@ -176,7 +176,7 @@ timer_interrupt(int irq, void *dev_id)
/* call the real timer interrupt handler */
- do_timer(1);
+ xtime_update(1);
cris_do_profile(regs); /* Save profiling information */
return IRQ_HANDLED;
diff --git a/arch/cris/arch-v32/kernel/time.c b/arch/cris/arch-v32/kernel/time.c
index a545211e999d..bb978ede8985 100644
--- a/arch/cris/arch-v32/kernel/time.c
+++ b/arch/cris/arch-v32/kernel/time.c
@@ -183,7 +183,7 @@ void handle_watchdog_bite(struct pt_regs *regs)
/*
* timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick.
+ * as well as call the "xtime_update()" routine every clocktick.
*/
extern void cris_do_profile(struct pt_regs *regs);
@@ -216,9 +216,7 @@ static inline irqreturn_t timer_interrupt(int irq, void *dev_id)
return IRQ_HANDLED;
/* Call the real timer interrupt handler */
- write_seqlock(&xtime_lock);
- do_timer(1);
- write_sequnlock(&xtime_lock);
+ xtime_update(1);
return IRQ_HANDLED;
}
diff --git a/arch/frv/kernel/time.c b/arch/frv/kernel/time.c
index 0ddbbae83cb2..b457de496b70 100644
--- a/arch/frv/kernel/time.c
+++ b/arch/frv/kernel/time.c
@@ -50,21 +50,13 @@ static struct irqaction timer_irq = {
/*
* timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick
+ * as well as call the "xtime_update()" routine every clocktick
*/
static irqreturn_t timer_interrupt(int irq, void *dummy)
{
profile_tick(CPU_PROFILING);
- /*
- * Here we are in the timer irq handler. We just have irqs locally
- * disabled but we don't know if the timer_bh is running on the other
- * CPU. We need to avoid to SMP race with it. NOTE: we don't need
- * the irq version of write_lock because as just said we have irq
- * locally disabled. -arca
- */
- write_seqlock(&xtime_lock);
- do_timer(1);
+ xtime_update(1);
#ifdef CONFIG_HEARTBEAT
static unsigned short n;
@@ -72,8 +64,6 @@ static irqreturn_t timer_interrupt(int irq, void *dummy)
__set_LEDS(n);
#endif /* CONFIG_HEARTBEAT */
- write_sequnlock(&xtime_lock);
-
update_process_times(user_mode(get_irq_regs()));
return IRQ_HANDLED;
diff --git a/arch/h8300/kernel/time.c b/arch/h8300/kernel/time.c
index 165005aff9df..32263a138aa6 100644
--- a/arch/h8300/kernel/time.c
+++ b/arch/h8300/kernel/time.c
@@ -35,9 +35,7 @@ void h8300_timer_tick(void)
{
if (current->pid)
profile_tick(CPU_PROFILING);
- write_seqlock(&xtime_lock);
- do_timer(1);
- write_sequnlock(&xtime_lock);
+ xtime_update(1);
update_process_times(user_mode(get_irq_regs()));
}
diff --git a/arch/h8300/kernel/timer/timer8.c b/arch/h8300/kernel/timer/timer8.c
index 3946c0fa8374..7a1533fad47d 100644
--- a/arch/h8300/kernel/timer/timer8.c
+++ b/arch/h8300/kernel/timer/timer8.c
@@ -61,7 +61,7 @@
/*
* timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick
+ * as well as call the "xtime_update()" routine every clocktick
*/
static irqreturn_t timer_interrupt(int irq, void *dev_id)
diff --git a/arch/ia64/include/asm/rwsem.h b/arch/ia64/include/asm/rwsem.h
index 215d5454c7d3..3027e7516d85 100644
--- a/arch/ia64/include/asm/rwsem.h
+++ b/arch/ia64/include/asm/rwsem.h
@@ -25,20 +25,8 @@
#error "Please don't include <asm/rwsem.h> directly, use <linux/rwsem.h> instead."
#endif
-#include <linux/list.h>
-#include <linux/spinlock.h>
-
#include <asm/intrinsics.h>
-/*
- * the semaphore definition
- */
-struct rw_semaphore {
- signed long count;
- spinlock_t wait_lock;
- struct list_head wait_list;
-};
-
#define RWSEM_UNLOCKED_VALUE __IA64_UL_CONST(0x0000000000000000)
#define RWSEM_ACTIVE_BIAS (1L)
#define RWSEM_ACTIVE_MASK (0xffffffffL)
@@ -46,26 +34,6 @@ struct rw_semaphore {
#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
-#define __RWSEM_INITIALIZER(name) \
- { RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait_lock), \
- LIST_HEAD_INIT((name).wait_list) }
-
-#define DECLARE_RWSEM(name) \
- struct rw_semaphore name = __RWSEM_INITIALIZER(name)
-
-extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
-
-static inline void
-init_rwsem (struct rw_semaphore *sem)
-{
- sem->count = RWSEM_UNLOCKED_VALUE;
- spin_lock_init(&sem->wait_lock);
- INIT_LIST_HEAD(&sem->wait_list);
-}
-
/*
* lock for reading
*/
@@ -174,9 +142,4 @@ __downgrade_write (struct rw_semaphore *sem)
#define rwsem_atomic_add(delta, sem) atomic64_add(delta, (atomic64_t *)(&(sem)->count))
#define rwsem_atomic_update(delta, sem) atomic64_add_return(delta, (atomic64_t *)(&(sem)->count))
-static inline int rwsem_is_locked(struct rw_semaphore *sem)
-{
- return (sem->count != 0);
-}
-
#endif /* _ASM_IA64_RWSEM_H */
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 9702fa92489e..156ad803d5b7 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -190,19 +190,10 @@ timer_interrupt (int irq, void *dev_id)
new_itm += local_cpu_data->itm_delta;
- if (smp_processor_id() == time_keeper_id) {
- /*
- * Here we are in the timer irq handler. We have irqs locally
- * disabled, but we don't know if the timer_bh is running on
- * another CPU. We need to avoid to SMP race by acquiring the
- * xtime_lock.
- */
- write_seqlock(&xtime_lock);
- do_timer(1);
- local_cpu_data->itm_next = new_itm;
- write_sequnlock(&xtime_lock);
- } else
- local_cpu_data->itm_next = new_itm;
+ if (smp_processor_id() == time_keeper_id)
+ xtime_update(1);
+
+ local_cpu_data->itm_next = new_itm;
if (time_after(new_itm, ia64_get_itc()))
break;
@@ -222,7 +213,7 @@ skip_process_time_accounting:
* comfort, we increase the safety margin by
* intentionally dropping the next tick(s). We do NOT
* update itm.next because that would force us to call
- * do_timer() which in turn would let our clock run
+ * xtime_update() which in turn would let our clock run
* too fast (with the potentially devastating effect
* of losing monotony of time).
*/
diff --git a/arch/ia64/xen/time.c b/arch/ia64/xen/time.c
index c1c544513e8d..1f8244a78bee 100644
--- a/arch/ia64/xen/time.c
+++ b/arch/ia64/xen/time.c
@@ -139,14 +139,11 @@ consider_steal_time(unsigned long new_itm)
run_posix_cpu_timers(p);
delta_itm += local_cpu_data->itm_delta * (stolen + blocked);
- if (cpu == time_keeper_id) {
- write_seqlock(&xtime_lock);
- do_timer(stolen + blocked);
- local_cpu_data->itm_next = delta_itm + new_itm;
- write_sequnlock(&xtime_lock);
- } else {
- local_cpu_data->itm_next = delta_itm + new_itm;
- }
+ if (cpu == time_keeper_id)
+ xtime_update(stolen + blocked);
+
+ local_cpu_data->itm_next = delta_itm + new_itm;
+
per_cpu(xen_stolen_time, cpu) += NS_PER_TICK * stolen;
per_cpu(xen_blocked_time, cpu) += NS_PER_TICK * blocked;
}
diff --git a/arch/m32r/kernel/time.c b/arch/m32r/kernel/time.c
index bda86820bffd..84dd04048db9 100644
--- a/arch/m32r/kernel/time.c
+++ b/arch/m32r/kernel/time.c
@@ -107,15 +107,14 @@ u32 arch_gettimeoffset(void)
/*
* timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick
+ * as well as call the "xtime_update()" routine every clocktick
*/
static irqreturn_t timer_interrupt(int irq, void *dev_id)
{
#ifndef CONFIG_SMP
profile_tick(CPU_PROFILING);
#endif
- /* XXX FIXME. Uh, the xtime_lock should be held here, no? */
- do_timer(1);
+ xtime_update(1);
#ifndef CONFIG_SMP
update_process_times(user_mode(get_irq_regs()));
diff --git a/arch/m68k/bvme6000/config.c b/arch/m68k/bvme6000/config.c
index 9fe6fefb5e14..1edd95095cb4 100644
--- a/arch/m68k/bvme6000/config.c
+++ b/arch/m68k/bvme6000/config.c
@@ -45,8 +45,8 @@ extern int bvme6000_set_clock_mmss (unsigned long);
extern void bvme6000_reset (void);
void bvme6000_set_vectors (void);
-/* Save tick handler routine pointer, will point to do_timer() in
- * kernel/sched.c, called via bvme6000_process_int() */
+/* Save tick handler routine pointer, will point to xtime_update() in
+ * kernel/timer/timekeeping.c, called via bvme6000_process_int() */
static irq_handler_t tick_handler;
diff --git a/arch/m68k/kernel/time.c b/arch/m68k/kernel/time.c
index 06438dac08ff..18b34ee5db3b 100644
--- a/arch/m68k/kernel/time.c
+++ b/arch/m68k/kernel/time.c
@@ -37,11 +37,11 @@ static inline int set_rtc_mmss(unsigned long nowtime)
/*
* timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick
+ * as well as call the "xtime_update()" routine every clocktick
*/
static irqreturn_t timer_interrupt(int irq, void *dummy)
{
- do_timer(1);
+ xtime_update(1);
update_process_times(user_mode(get_irq_regs()));
profile_tick(CPU_PROFILING);
diff --git a/arch/m68k/mvme147/config.c b/arch/m68k/mvme147/config.c
index 100baaa692a1..6cb9c3a9b6c9 100644
--- a/arch/m68k/mvme147/config.c
+++ b/arch/m68k/mvme147/config.c
@@ -46,8 +46,8 @@ extern void mvme147_reset (void);
static int bcd2int (unsigned char b);
-/* Save tick handler routine pointer, will point to do_timer() in
- * kernel/sched.c, called via mvme147_process_int() */
+/* Save tick handler routine pointer, will point to xtime_update() in
+ * kernel/time/timekeeping.c, called via mvme147_process_int() */
irq_handler_t tick_handler;
diff --git a/arch/m68k/mvme16x/config.c b/arch/m68k/mvme16x/config.c
index 11edf61cc2c4..0b28e2621653 100644
--- a/arch/m68k/mvme16x/config.c
+++ b/arch/m68k/mvme16x/config.c
@@ -51,8 +51,8 @@ extern void mvme16x_reset (void);
int bcd2int (unsigned char b);
-/* Save tick handler routine pointer, will point to do_timer() in
- * kernel/sched.c, called via mvme16x_process_int() */
+/* Save tick handler routine pointer, will point to xtime_update() in
+ * kernel/time/timekeeping.c, called via mvme16x_process_int() */
static irq_handler_t tick_handler;
diff --git a/arch/m68k/sun3/sun3ints.c b/arch/m68k/sun3/sun3ints.c
index 2d9e21bd313a..6464ad3ae3e6 100644
--- a/arch/m68k/sun3/sun3ints.c
+++ b/arch/m68k/sun3/sun3ints.c
@@ -66,7 +66,7 @@ static irqreturn_t sun3_int5(int irq, void *dev_id)
#ifdef CONFIG_SUN3
intersil_clear();
#endif
- do_timer(1);
+ xtime_update(1);
update_process_times(user_mode(get_irq_regs()));
if (!(kstat_cpu(0).irqs[irq] % 20))
sun3_leds(led_pattern[(kstat_cpu(0).irqs[irq] % 160) / 20]);
diff --git a/arch/m68knommu/kernel/time.c b/arch/m68knommu/kernel/time.c
index d6ac2a43453c..6623909f70e6 100644
--- a/arch/m68knommu/kernel/time.c
+++ b/arch/m68knommu/kernel/time.c
@@ -36,7 +36,7 @@ static inline int set_rtc_mmss(unsigned long nowtime)
#ifndef CONFIG_GENERIC_CLOCKEVENTS
/*
* timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick
+ * as well as call the "xtime_update()" routine every clocktick
*/
irqreturn_t arch_timer_interrupt(int irq, void *dummy)
{
@@ -44,11 +44,7 @@ irqreturn_t arch_timer_interrupt(int irq, void *dummy)
if (current->pid)
profile_tick(CPU_PROFILING);
- write_seqlock(&xtime_lock);
-
- do_timer(1);
-
- write_sequnlock(&xtime_lock);
+ xtime_update(1);
update_process_times(user_mode(get_irq_regs()));
diff --git a/arch/mips/kernel/vpe.c b/arch/mips/kernel/vpe.c
index 6a1fdfef8fde..ab52b7cf3b6b 100644
--- a/arch/mips/kernel/vpe.c
+++ b/arch/mips/kernel/vpe.c
@@ -148,9 +148,9 @@ struct {
spinlock_t tc_list_lock;
struct list_head tc_list; /* Thread contexts */
} vpecontrol = {
- .vpe_list_lock = SPIN_LOCK_UNLOCKED,
+ .vpe_list_lock = __SPIN_LOCK_UNLOCKED(vpe_list_lock),
.vpe_list = LIST_HEAD_INIT(vpecontrol.vpe_list),
- .tc_list_lock = SPIN_LOCK_UNLOCKED,
+ .tc_list_lock = __SPIN_LOCK_UNLOCKED(tc_list_lock),
.tc_list = LIST_HEAD_INIT(vpecontrol.tc_list)
};
diff --git a/arch/mn10300/kernel/time.c b/arch/mn10300/kernel/time.c
index 75da468090b9..5b955000626d 100644
--- a/arch/mn10300/kernel/time.c
+++ b/arch/mn10300/kernel/time.c
@@ -104,8 +104,6 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id)
unsigned tsc, elapse;
irqreturn_t ret;
- write_seqlock(&xtime_lock);
-
while (tsc = get_cycles(),
elapse = tsc - mn10300_last_tsc, /* time elapsed since last
* tick */
@@ -114,11 +112,9 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id)
mn10300_last_tsc += MN10300_TSC_PER_HZ;
/* advance the kernel's time tracking system */
- do_timer(1);
+ xtime_update(1);
}
- write_sequnlock(&xtime_lock);
-
ret = local_timer_interrupt();
#ifdef CONFIG_SMP
send_IPI_allbutself(LOCAL_TIMER_IPI);
diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c
index 05511ccb61d2..45b7389d77aa 100644
--- a/arch/parisc/kernel/time.c
+++ b/arch/parisc/kernel/time.c
@@ -162,11 +162,8 @@ irqreturn_t __irq_entry timer_interrupt(int irq, void *dev_id)
update_process_times(user_mode(get_irq_regs()));
}
- if (cpu == 0) {
- write_seqlock(&xtime_lock);
- do_timer(ticks_elapsed);
- write_sequnlock(&xtime_lock);
- }
+ if (cpu == 0)
+ xtime_update(ticks_elapsed);
return IRQ_HANDLED;
}
diff --git a/arch/powerpc/include/asm/rwsem.h b/arch/powerpc/include/asm/rwsem.h
index 8447d89fbe72..bb1e2cdeb9bf 100644
--- a/arch/powerpc/include/asm/rwsem.h
+++ b/arch/powerpc/include/asm/rwsem.h
@@ -13,11 +13,6 @@
* by Paul Mackerras <paulus@samba.org>.
*/
-#include <linux/list.h>
-#include <linux/spinlock.h>
-#include <asm/atomic.h>
-#include <asm/system.h>
-
/*
* the semaphore definition
*/
@@ -33,47 +28,6 @@
#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
-struct rw_semaphore {
- long count;
- spinlock_t wait_lock;
- struct list_head wait_list;
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- struct lockdep_map dep_map;
-#endif
-};
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
-#else
-# define __RWSEM_DEP_MAP_INIT(lockname)
-#endif
-
-#define __RWSEM_INITIALIZER(name) \
-{ \
- RWSEM_UNLOCKED_VALUE, \
- __SPIN_LOCK_UNLOCKED((name).wait_lock), \
- LIST_HEAD_INIT((name).wait_list) \
- __RWSEM_DEP_MAP_INIT(name) \
-}
-
-#define DECLARE_RWSEM(name) \
- struct rw_semaphore name = __RWSEM_INITIALIZER(name)
-
-extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
-
-extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
- struct lock_class_key *key);
-
-#define init_rwsem(sem) \
- do { \
- static struct lock_class_key __key; \
- \
- __init_rwsem((sem), #sem, &__key); \
- } while (0)
-
/*
* lock for reading
*/
@@ -174,10 +128,5 @@ static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
return atomic_long_add_return(delta, (atomic_long_t *)&sem->count);
}
-static inline int rwsem_is_locked(struct rw_semaphore *sem)
-{
- return sem->count != 0;
-}
-
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_RWSEM_H */
diff --git a/arch/s390/include/asm/rwsem.h b/arch/s390/include/asm/rwsem.h
index 423fdda2322d..d0eb4653cebd 100644
--- a/arch/s390/include/asm/rwsem.h
+++ b/arch/s390/include/asm/rwsem.h
@@ -43,29 +43,6 @@
#ifdef __KERNEL__
-#include <linux/list.h>
-#include <linux/spinlock.h>
-
-struct rwsem_waiter;
-
-extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *);
-extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *);
-extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *);
-extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *);
-extern struct rw_semaphore *rwsem_downgrade_write(struct rw_semaphore *);
-
-/*
- * the semaphore definition
- */
-struct rw_semaphore {
- signed long count;
- spinlock_t wait_lock;
- struct list_head wait_list;
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- struct lockdep_map dep_map;
-#endif
-};
-
#ifndef __s390x__
#define RWSEM_UNLOCKED_VALUE 0x00000000
#define RWSEM_ACTIVE_BIAS 0x00000001
@@ -81,41 +58,6 @@ struct rw_semaphore {
#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
/*
- * initialisation
- */
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
-#else
-# define __RWSEM_DEP_MAP_INIT(lockname)
-#endif
-
-#define __RWSEM_INITIALIZER(name) \
- { RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait.lock), \
- LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) }
-
-#define DECLARE_RWSEM(name) \
- struct rw_semaphore name = __RWSEM_INITIALIZER(name)
-
-static inline void init_rwsem(struct rw_semaphore *sem)
-{
- sem->count = RWSEM_UNLOCKED_VALUE;
- spin_lock_init(&sem->wait_lock);
- INIT_LIST_HEAD(&sem->wait_list);
-}
-
-extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
- struct lock_class_key *key);
-
-#define init_rwsem(sem) \
-do { \
- static struct lock_class_key __key; \
- \
- __init_rwsem((sem), #sem, &__key); \
-} while (0)
-
-
-/*
* lock for reading
*/
static inline void __down_read(struct rw_semaphore *sem)
@@ -377,10 +319,5 @@ static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
return new;
}
-static inline int rwsem_is_locked(struct rw_semaphore *sem)
-{
- return (sem->count != 0);
-}
-
#endif /* __KERNEL__ */
#endif /* _S390_RWSEM_H */
diff --git a/arch/sh/include/asm/rwsem.h b/arch/sh/include/asm/rwsem.h
index 06e2251a5e48..edab57265293 100644
--- a/arch/sh/include/asm/rwsem.h
+++ b/arch/sh/include/asm/rwsem.h
@@ -11,64 +11,13 @@
#endif
#ifdef __KERNEL__
-#include <linux/list.h>
-#include <linux/spinlock.h>
-#include <asm/atomic.h>
-#include <asm/system.h>
-/*
- * the semaphore definition
- */
-struct rw_semaphore {
- long count;
#define RWSEM_UNLOCKED_VALUE 0x00000000
#define RWSEM_ACTIVE_BIAS 0x00000001
#define RWSEM_ACTIVE_MASK 0x0000ffff
#define RWSEM_WAITING_BIAS (-0x00010000)
#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
- spinlock_t wait_lock;
- struct list_head wait_list;
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- struct lockdep_map dep_map;
-#endif
-};
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
-#else
-# define __RWSEM_DEP_MAP_INIT(lockname)
-#endif
-
-#define __RWSEM_INITIALIZER(name) \
- { RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait_lock), \
- LIST_HEAD_INIT((name).wait_list) \
- __RWSEM_DEP_MAP_INIT(name) }
-
-#define DECLARE_RWSEM(name) \
- struct rw_semaphore name = __RWSEM_INITIALIZER(name)
-
-extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
-
-extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
- struct lock_class_key *key);
-
-#define init_rwsem(sem) \
-do { \
- static struct lock_class_key __key; \
- \
- __init_rwsem((sem), #sem, &__key); \
-} while (0)
-
-static inline void init_rwsem(struct rw_semaphore *sem)
-{
- sem->count = RWSEM_UNLOCKED_VALUE;
- spin_lock_init(&sem->wait_lock);
- INIT_LIST_HEAD(&sem->wait_list);
-}
/*
* lock for reading
@@ -179,10 +128,5 @@ static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem)
return atomic_add_return(delta, (atomic_t *)(&sem->count));
}
-static inline int rwsem_is_locked(struct rw_semaphore *sem)
-{
- return (sem->count != 0);
-}
-
#endif /* __KERNEL__ */
#endif /* _ASM_SH_RWSEM_H */
diff --git a/arch/sparc/include/asm/rwsem.h b/arch/sparc/include/asm/rwsem.h
index a2b4302869bc..069bf4d663a1 100644
--- a/arch/sparc/include/asm/rwsem.h
+++ b/arch/sparc/include/asm/rwsem.h
@@ -13,53 +13,12 @@
#ifdef __KERNEL__
-#include <linux/list.h>
-#include <linux/spinlock.h>
-
-struct rwsem_waiter;
-
-struct rw_semaphore {
- signed long count;
#define RWSEM_UNLOCKED_VALUE 0x00000000L
#define RWSEM_ACTIVE_BIAS 0x00000001L
#define RWSEM_ACTIVE_MASK 0xffffffffL
#define RWSEM_WAITING_BIAS (-RWSEM_ACTIVE_MASK-1)
#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
- spinlock_t wait_lock;
- struct list_head wait_list;
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- struct lockdep_map dep_map;
-#endif
-};
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
-#else
-# define __RWSEM_DEP_MAP_INIT(lockname)
-#endif
-
-#define __RWSEM_INITIALIZER(name) \
-{ RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait_lock), \
- LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) }
-
-#define DECLARE_RWSEM(name) \
- struct rw_semaphore name = __RWSEM_INITIALIZER(name)
-
-extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
-
-extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
- struct lock_class_key *key);
-
-#define init_rwsem(sem) \
-do { \
- static struct lock_class_key __key; \
- \
- __init_rwsem((sem), #sem, &__key); \
-} while (0)
/*
* lock for reading
@@ -160,11 +119,6 @@ static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
return atomic64_add_return(delta, (atomic64_t *)(&sem->count));
}
-static inline int rwsem_is_locked(struct rw_semaphore *sem)
-{
- return (sem->count != 0);
-}
-
#endif /* __KERNEL__ */
#endif /* _SPARC64_RWSEM_H */
diff --git a/arch/sparc/kernel/pcic.c b/arch/sparc/kernel/pcic.c
index aeaa09a3c655..2cdc131b50ac 100644
--- a/arch/sparc/kernel/pcic.c
+++ b/arch/sparc/kernel/pcic.c
@@ -700,10 +700,8 @@ static void pcic_clear_clock_irq(void)
static irqreturn_t pcic_timer_handler (int irq, void *h)
{
- write_seqlock(&xtime_lock); /* Dummy, to show that we remember */
pcic_clear_clock_irq();
- do_timer(1);
- write_sequnlock(&xtime_lock);
+ xtime_update(1);
#ifndef CONFIG_SMP
update_process_times(user_mode(get_irq_regs()));
#endif
diff --git a/arch/sparc/kernel/time_32.c b/arch/sparc/kernel/time_32.c
index 9c743b1886ff..4211bfc9bcad 100644
--- a/arch/sparc/kernel/time_32.c
+++ b/arch/sparc/kernel/time_32.c
@@ -85,7 +85,7 @@ int update_persistent_clock(struct timespec now)
/*
* timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "do_timer()" routine every clocktick
+ * as well as call the "xtime_update()" routine every clocktick
*/
#define TICK_SIZE (tick_nsec / 1000)
@@ -96,14 +96,9 @@ static irqreturn_t timer_interrupt(int dummy, void *dev_id)
profile_tick(CPU_PROFILING);
#endif
- /* Protect counter clear so that do_gettimeoffset works */
- write_seqlock(&xtime_lock);
-
clear_clock_irq();
- do_timer(1);
-
- write_sequnlock(&xtime_lock);
+ xtime_update(1);
#ifndef CONFIG_SMP
update_process_times(user_mode(get_irq_regs()));
diff --git a/arch/sparc/lib/atomic32.c b/arch/sparc/lib/atomic32.c
index cbddeb38ffda..d3c7a12ad879 100644
--- a/arch/sparc/lib/atomic32.c
+++ b/arch/sparc/lib/atomic32.c
@@ -16,7 +16,7 @@
#define ATOMIC_HASH(a) (&__atomic_hash[(((unsigned long)a)>>8) & (ATOMIC_HASH_SIZE-1)])
spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] = {
- [0 ... (ATOMIC_HASH_SIZE-1)] = SPIN_LOCK_UNLOCKED
+ [0 ... (ATOMIC_HASH_SIZE-1)] = __SPIN_LOCK_UNLOCKED(__atomic_hash)
};
#else /* SMP */
diff --git a/arch/um/Kconfig.common b/arch/um/Kconfig.common
index e351e14b4339..1e78940218c0 100644
--- a/arch/um/Kconfig.common
+++ b/arch/um/Kconfig.common
@@ -7,6 +7,7 @@ config UML
bool
default y
select HAVE_GENERIC_HARDIRQS
+ select GENERIC_HARDIRQS_NO_DEPRECATED
config MMU
bool
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index ba4a98ba39c0..620f5b70957d 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -185,7 +185,7 @@ struct ubd {
.no_cow = 0, \
.shared = 0, \
.cow = DEFAULT_COW, \
- .lock = SPIN_LOCK_UNLOCKED, \
+ .lock = __SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
.request = NULL, \
.start_sg = 0, \
.end_sg = 0, \
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index 3f0ac9e0c966..64cfea80cfe2 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -35,8 +35,10 @@ int show_interrupts(struct seq_file *p, void *v)
}
if (i < NR_IRQS) {
- raw_spin_lock_irqsave(&irq_desc[i].lock, flags);
- action = irq_desc[i].action;
+ struct irq_desc *desc = irq_to_desc(i);
+
+ raw_spin_lock_irqsave(&desc->lock, flags);
+ action = desc->action;
if (!action)
goto skip;
seq_printf(p, "%3d: ",i);
@@ -46,7 +48,7 @@ int show_interrupts(struct seq_file *p, void *v)
for_each_online_cpu(j)
seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
#endif
- seq_printf(p, " %14s", irq_desc[i].chip->name);
+ seq_printf(p, " %14s", get_irq_desc_chip(desc)->name);
seq_printf(p, " %s", action->name);
for (action=action->next; action; action = action->next)
@@ -54,7 +56,7 @@ int show_interrupts(struct seq_file *p, void *v)
seq_putc(p, '\n');
skip:
- raw_spin_unlock_irqrestore(&irq_desc[i].lock, flags);
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
} else if (i == NR_IRQS)
seq_putc(p, '\n');
@@ -360,10 +362,10 @@ EXPORT_SYMBOL(um_request_irq);
EXPORT_SYMBOL(reactivate_fd);
/*
- * irq_chip must define (startup || enable) &&
- * (shutdown || disable) && end
+ * irq_chip must define at least enable/disable and ack when
+ * the edge handler is used.
*/
-static void dummy(unsigned int irq)
+static void dummy(struct irq_data *d)
{
}
@@ -371,20 +373,17 @@ static void dummy(unsigned int irq)
static struct irq_chip normal_irq_type = {
.name = "SIGIO",
.release = free_irq_by_irq_and_dev,
- .disable = dummy,
- .enable = dummy,
- .ack = dummy,
- .end = dummy
+ .irq_disable = dummy,
+ .irq_enable = dummy,
+ .irq_ack = dummy,
};
static struct irq_chip SIGVTALRM_irq_type = {
.name = "SIGVTALRM",
.release = free_irq_by_irq_and_dev,
- .shutdown = dummy, /* never called */
- .disable = dummy,
- .enable = dummy,
- .ack = dummy,
- .end = dummy
+ .irq_disable = dummy,
+ .irq_enable = dummy,
+ .irq_ack = dummy,
};
void __init init_IRQ(void)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 9137e5b39a25..4de4abc9e58e 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1705,7 +1705,7 @@ config HAVE_ARCH_EARLY_PFN_TO_NID
depends on NUMA
config USE_PERCPU_NUMA_NODE_ID
- def_bool X86_64
+ def_bool y
depends on NUMA
menu "Power management and ACPI options"
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 518bb99c3394..0ed78961b60f 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -851,4 +851,5 @@ ia32_sys_call_table:
.quad sys_fanotify_init
.quad sys32_fanotify_mark
.quad sys_prlimit64 /* 340 */
+ .quad compat_sys_clock_adjtime
ia32_syscall_end:
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index 64dc82ee19f0..2b33c4df979f 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -9,7 +9,7 @@ struct amd_nb_bus_dev_range {
u8 dev_limit;
};
-extern struct pci_device_id amd_nb_misc_ids[];
+extern const struct pci_device_id amd_nb_misc_ids[];
extern const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[];
struct bootnode;
@@ -18,6 +18,8 @@ extern int amd_cache_northbridges(void);
extern void amd_flush_garts(void);
extern int amd_numa_init(unsigned long start_pfn, unsigned long end_pfn);
extern int amd_scan_nodes(void);
+extern int amd_get_subcaches(int);
+extern int amd_set_subcaches(int, int);
#ifdef CONFIG_NUMA_EMU
extern void amd_fake_nodes(const struct bootnode *nodes, int nr_nodes);
@@ -26,6 +28,7 @@ extern void amd_get_nodes(struct bootnode *nodes);
struct amd_northbridge {
struct pci_dev *misc;
+ struct pci_dev *link;
};
struct amd_northbridge_info {
@@ -37,6 +40,7 @@ extern struct amd_northbridge_info amd_northbridges;
#define AMD_NB_GART 0x1
#define AMD_NB_L3_INDEX_DISABLE 0x2
+#define AMD_NB_L3_PARTITIONING 0x4
#ifdef CONFIG_AMD_NB
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 3c896946f4cc..b8a3484d69e9 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -307,8 +307,6 @@ struct apic {
void (*setup_apic_routing)(void);
int (*multi_timer_check)(int apic, int irq);
- int (*apicid_to_node)(int logical_apicid);
- int (*cpu_to_logical_apicid)(int cpu);
int (*cpu_present_to_apicid)(int mps_cpu);
void (*apicid_to_cpu_present)(int phys_apicid, physid_mask_t *retmap);
void (*setup_portio_remap)(void);
@@ -356,6 +354,23 @@ struct apic {
void (*icr_write)(u32 low, u32 high);
void (*wait_icr_idle)(void);
u32 (*safe_wait_icr_idle)(void);
+
+#ifdef CONFIG_X86_32
+ /*
+ * Called very early during boot from get_smp_config(). It should
+ * return the logical apicid. x86_[bios]_cpu_to_apicid is
+ * initialized before this function is called.
+ *
+ * If logical apicid can't be determined that early, the function
+ * may return BAD_APICID. Logical apicid will be configured after
+ * init_apic_ldr() while bringing up CPUs. Note that NUMA affinity
+ * won't be applied properly during early boot in this case.
+ */
+ int (*x86_32_early_logical_apicid)(int cpu);
+
+ /* determine CPU -> NUMA node mapping */
+ int (*x86_32_numa_cpu_node)(int cpu);
+#endif
};
/*
@@ -503,6 +518,11 @@ extern struct apic apic_noop;
extern struct apic apic_default;
+static inline int noop_x86_32_early_logical_apicid(int cpu)
+{
+ return BAD_APICID;
+}
+
/*
* Set up the logical destination ID.
*
@@ -522,7 +542,7 @@ static inline int default_phys_pkg_id(int cpuid_apic, int index_msb)
return cpuid_apic >> index_msb;
}
-extern int default_apicid_to_node(int logical_apicid);
+extern int default_x86_32_numa_cpu_node(int cpu);
#endif
@@ -558,12 +578,6 @@ static inline void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_ma
*retmap = *phys_map;
}
-/* Mapping from cpu number to logical apicid */
-static inline int default_cpu_to_logical_apicid(int cpu)
-{
- return 1 << cpu;
-}
-
static inline int __default_cpu_present_to_apicid(int mps_cpu)
{
if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu))
@@ -596,8 +610,4 @@ extern int default_check_phys_apicid_present(int phys_apicid);
#endif /* CONFIG_X86_LOCAL_APIC */
-#ifdef CONFIG_X86_32
-extern u8 cpu_2_logical_apicid[NR_CPUS];
-#endif
-
#endif /* _ASM_X86_APIC_H */
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
index 57650ab4a5f5..1cd6d26a0a8d 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -16,10 +16,13 @@ BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR)
BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR)
-.irpc idx, "01234567"
+.irp idx,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \
+ 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
+.if NUM_INVALIDATE_TLB_VECTORS > \idx
BUILD_INTERRUPT3(invalidate_interrupt\idx,
(INVALIDATE_TLB_VECTOR_START)+\idx,
smp_invalidate_interrupt)
+.endif
.endr
#endif
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 0274ec5a7e62..bb9efe8706e2 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -45,6 +45,30 @@ extern void invalidate_interrupt4(void);
extern void invalidate_interrupt5(void);
extern void invalidate_interrupt6(void);
extern void invalidate_interrupt7(void);
+extern void invalidate_interrupt8(void);
+extern void invalidate_interrupt9(void);
+extern void invalidate_interrupt10(void);
+extern void invalidate_interrupt11(void);
+extern void invalidate_interrupt12(void);
+extern void invalidate_interrupt13(void);
+extern void invalidate_interrupt14(void);
+extern void invalidate_interrupt15(void);
+extern void invalidate_interrupt16(void);
+extern void invalidate_interrupt17(void);
+extern void invalidate_interrupt18(void);
+extern void invalidate_interrupt19(void);
+extern void invalidate_interrupt20(void);
+extern void invalidate_interrupt21(void);
+extern void invalidate_interrupt22(void);
+extern void invalidate_interrupt23(void);
+extern void invalidate_interrupt24(void);
+extern void invalidate_interrupt25(void);
+extern void invalidate_interrupt26(void);
+extern void invalidate_interrupt27(void);
+extern void invalidate_interrupt28(void);
+extern void invalidate_interrupt29(void);
+extern void invalidate_interrupt30(void);
+extern void invalidate_interrupt31(void);
extern void irq_move_cleanup_interrupt(void);
extern void reboot_interrupt(void);
diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h
index 0b7228268a63..615fa9061b57 100644
--- a/arch/x86/include/asm/ipi.h
+++ b/arch/x86/include/asm/ipi.h
@@ -123,10 +123,6 @@ extern void default_send_IPI_mask_sequence_phys(const struct cpumask *mask,
int vector);
extern void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask,
int vector);
-extern void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
- int vector);
-extern void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
- int vector);
/* Avoid include hell */
#define NMI_VECTOR 0x02
@@ -150,6 +146,10 @@ static inline void __default_local_send_IPI_all(int vector)
}
#ifdef CONFIG_X86_32
+extern void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
+ int vector);
+extern void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
+ int vector);
extern void default_send_IPI_mask_logical(const struct cpumask *mask,
int vector);
extern void default_send_IPI_allbutself(int vector);
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 6af0894dafb4..4980f48bbbb7 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -1,6 +1,7 @@
#ifndef _ASM_X86_IRQ_VECTORS_H
#define _ASM_X86_IRQ_VECTORS_H
+#include <linux/threads.h>
/*
* Linux IRQ vector layout.
*
@@ -16,8 +17,8 @@
* Vectors 0 ... 31 : system traps and exceptions - hardcoded events
* Vectors 32 ... 127 : device interrupts
* Vector 128 : legacy int80 syscall interface
- * Vectors 129 ... 237 : device interrupts
- * Vectors 238 ... 255 : special interrupts
+ * Vectors 129 ... INVALIDATE_TLB_VECTOR_START-1 : device interrupts
+ * Vectors INVALIDATE_TLB_VECTOR_START ... 255 : special interrupts
*
* 64-bit x86 has per CPU IDT tables, 32-bit has one shared IDT table.
*
@@ -96,37 +97,43 @@
#define THRESHOLD_APIC_VECTOR 0xf9
#define REBOOT_VECTOR 0xf8
-/* f0-f7 used for spreading out TLB flushes: */
-#define INVALIDATE_TLB_VECTOR_END 0xf7
-#define INVALIDATE_TLB_VECTOR_START 0xf0
-#define NUM_INVALIDATE_TLB_VECTORS 8
-
-/*
- * Local APIC timer IRQ vector is on a different priority level,
- * to work around the 'lost local interrupt if more than 2 IRQ
- * sources per level' errata.
- */
-#define LOCAL_TIMER_VECTOR 0xef
-
/*
* Generic system vector for platform specific use
*/
-#define X86_PLATFORM_IPI_VECTOR 0xed
+#define X86_PLATFORM_IPI_VECTOR 0xf7
/*
* IRQ work vector:
*/
-#define IRQ_WORK_VECTOR 0xec
+#define IRQ_WORK_VECTOR 0xf6
-#define UV_BAU_MESSAGE 0xea
+#define UV_BAU_MESSAGE 0xf5
/*
* Self IPI vector for machine checks
*/
-#define MCE_SELF_VECTOR 0xeb
+#define MCE_SELF_VECTOR 0xf4
/* Xen vector callback to receive events in a HVM domain */
-#define XEN_HVM_EVTCHN_CALLBACK 0xe9
+#define XEN_HVM_EVTCHN_CALLBACK 0xf3
+
+/*
+ * Local APIC timer IRQ vector is on a different priority level,
+ * to work around the 'lost local interrupt if more than 2 IRQ
+ * sources per level' errata.
+ */
+#define LOCAL_TIMER_VECTOR 0xef
+
+/* up to 32 vectors used for spreading out TLB flushes: */
+#if NR_CPUS <= 32
+# define NUM_INVALIDATE_TLB_VECTORS NR_CPUS
+#else
+# define NUM_INVALIDATE_TLB_VECTORS 32
+#endif
+
+#define INVALIDATE_TLB_VECTOR_END 0xee
+#define INVALIDATE_TLB_VECTOR_START \
+ (INVALIDATE_TLB_VECTOR_END - NUM_INVALIDATE_TLB_VECTORS + 1)
#define NR_VECTORS 256
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index 0c90dd9f0505..9c7d95f6174b 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -25,7 +25,6 @@ extern int pic_mode;
#define MAX_IRQ_SOURCES 256
extern unsigned int def_to_bigsmp;
-extern u8 apicid_2_node[];
#ifdef CONFIG_X86_NUMAQ
extern int mp_bus_id_to_node[MAX_MP_BUSSES];
@@ -33,8 +32,6 @@ extern int mp_bus_id_to_local[MAX_MP_BUSSES];
extern int quad_local_to_mp_bus_id [NR_CPUS/4][4];
#endif
-#define MAX_APICID 256
-
#else /* CONFIG_X86_64: */
#define MAX_MP_BUSSES 256
diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h
index 27da400d3138..26fc6e2dd0fb 100644
--- a/arch/x86/include/asm/numa.h
+++ b/arch/x86/include/asm/numa.h
@@ -1,5 +1,54 @@
+#ifndef _ASM_X86_NUMA_H
+#define _ASM_X86_NUMA_H
+
+#include <asm/topology.h>
+#include <asm/apicdef.h>
+
+#ifdef CONFIG_NUMA
+/*
+ * __apicid_to_node[] stores the raw mapping between physical apicid and
+ * node and is used to initialize cpu_to_node mapping.
+ *
+ * The mapping may be overridden by apic->numa_cpu_node() on 32bit and thus
+ * should be accessed by the accessors - set_apicid_to_node() and
+ * numa_cpu_node().
+ */
+extern s16 __apicid_to_node[MAX_LOCAL_APIC];
+
+static inline void set_apicid_to_node(int apicid, s16 node)
+{
+ __apicid_to_node[apicid] = node;
+}
+#else /* CONFIG_NUMA */
+static inline void set_apicid_to_node(int apicid, s16 node)
+{
+}
+#endif /* CONFIG_NUMA */
+
#ifdef CONFIG_X86_32
# include "numa_32.h"
#else
# include "numa_64.h"
#endif
+
+#ifdef CONFIG_NUMA
+extern void __cpuinit numa_set_node(int cpu, int node);
+extern void __cpuinit numa_clear_node(int cpu);
+extern void __init numa_init_array(void);
+extern void __init init_cpu_to_node(void);
+extern void __cpuinit numa_add_cpu(int cpu);
+extern void __cpuinit numa_remove_cpu(int cpu);
+#else /* CONFIG_NUMA */
+static inline void numa_set_node(int cpu, int node) { }
+static inline void numa_clear_node(int cpu) { }
+static inline void numa_init_array(void) { }
+static inline void init_cpu_to_node(void) { }
+static inline void numa_add_cpu(int cpu) { }
+static inline void numa_remove_cpu(int cpu) { }
+#endif /* CONFIG_NUMA */
+
+#ifdef CONFIG_DEBUG_PER_CPU_MAPS
+struct cpumask __cpuinit *debug_cpumask_set_cpu(int cpu, int enable);
+#endif
+
+#endif /* _ASM_X86_NUMA_H */
diff --git a/arch/x86/include/asm/numa_32.h b/arch/x86/include/asm/numa_32.h
index b0ef2b449a9d..c6beed1ef103 100644
--- a/arch/x86/include/asm/numa_32.h
+++ b/arch/x86/include/asm/numa_32.h
@@ -4,7 +4,12 @@
extern int numa_off;
extern int pxm_to_nid(int pxm);
-extern void numa_remove_cpu(int cpu);
+
+#ifdef CONFIG_NUMA
+extern int __cpuinit numa_cpu_node(int cpu);
+#else /* CONFIG_NUMA */
+static inline int numa_cpu_node(int cpu) { return NUMA_NO_NODE; }
+#endif /* CONFIG_NUMA */
#ifdef CONFIG_HIGHMEM
extern void set_highmem_pages_init(void);
diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h
index 0493be39607c..2819afa33632 100644
--- a/arch/x86/include/asm/numa_64.h
+++ b/arch/x86/include/asm/numa_64.h
@@ -2,7 +2,6 @@
#define _ASM_X86_NUMA_64_H
#include <linux/nodemask.h>
-#include <asm/apicdef.h>
struct bootnode {
u64 start;
@@ -14,11 +13,8 @@ extern int compute_hash_shift(struct bootnode *nodes, int numblks,
#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
-extern void numa_init_array(void);
extern int numa_off;
-extern s16 apicid_to_node[MAX_LOCAL_APIC];
-
extern unsigned long numa_free_all_bootmem(void);
extern void setup_node_bootmem(int nodeid, unsigned long start,
unsigned long end);
@@ -31,11 +27,7 @@ extern void setup_node_bootmem(int nodeid, unsigned long start,
*/
#define NODE_MIN_SIZE (4*1024*1024)
-extern void __init init_cpu_to_node(void);
-extern void __cpuinit numa_set_node(int cpu, int node);
-extern void __cpuinit numa_clear_node(int cpu);
-extern void __cpuinit numa_add_cpu(int cpu);
-extern void __cpuinit numa_remove_cpu(int cpu);
+extern int __cpuinit numa_cpu_node(int cpu);
#ifdef CONFIG_NUMA_EMU
#define FAKE_NODE_MIN_SIZE ((u64)32 << 20)
@@ -43,11 +35,7 @@ extern void __cpuinit numa_remove_cpu(int cpu);
void numa_emu_cmdline(char *);
#endif /* CONFIG_NUMA_EMU */
#else
-static inline void init_cpu_to_node(void) { }
-static inline void numa_set_node(int cpu, int node) { }
-static inline void numa_clear_node(int cpu) { }
-static inline void numa_add_cpu(int cpu, int node) { }
-static inline void numa_remove_cpu(int cpu) { }
+static inline int numa_cpu_node(int cpu) { return NUMA_NO_NODE; }
#endif
#endif /* _ASM_X86_NUMA_64_H */
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index 1df66211fd1b..731d211a1b20 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -2,6 +2,7 @@
#define _ASM_X86_PAGE_DEFS_H
#include <linux/const.h>
+#include <linux/types.h>
/* PAGE_SHIFT determines the page size */
#define PAGE_SHIFT 12
@@ -45,9 +46,16 @@ extern int devmem_is_allowed(unsigned long pagenr);
extern unsigned long max_low_pfn_mapped;
extern unsigned long max_pfn_mapped;
+static inline phys_addr_t get_max_mapped(void)
+{
+ return (phys_addr_t)max_pfn_mapped << PAGE_SHIFT;
+}
+
extern unsigned long init_memory_mapping(unsigned long start,
unsigned long end);
+void init_memory_mapping_high(void);
+
extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn,
int acpi, int k8);
extern void free_initmem(void);
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 45636cefa186..4c25ab48257b 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -94,10 +94,6 @@ struct cpuinfo_x86 {
int x86_cache_alignment; /* In bytes */
int x86_power;
unsigned long loops_per_jiffy;
-#ifdef CONFIG_SMP
- /* cpus sharing the last level cache: */
- cpumask_var_t llc_shared_map;
-#endif
/* cpuid returned max cores value: */
u16 x86_max_cores;
u16 apicid;
diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h
index d1e41b0f9b60..df4cd32b4cc6 100644
--- a/arch/x86/include/asm/rwsem.h
+++ b/arch/x86/include/asm/rwsem.h
@@ -37,26 +37,9 @@
#endif
#ifdef __KERNEL__
-
-#include <linux/list.h>
-#include <linux/spinlock.h>
-#include <linux/lockdep.h>
#include <asm/asm.h>
-struct rwsem_waiter;
-
-extern asmregparm struct rw_semaphore *
- rwsem_down_read_failed(struct rw_semaphore *sem);
-extern asmregparm struct rw_semaphore *
- rwsem_down_write_failed(struct rw_semaphore *sem);
-extern asmregparm struct rw_semaphore *
- rwsem_wake(struct rw_semaphore *);
-extern asmregparm struct rw_semaphore *
- rwsem_downgrade_wake(struct rw_semaphore *sem);
-
/*
- * the semaphore definition
- *
* The bias values and the counter type limits the number of
* potential readers/writers to 32767 for 32 bits and 2147483647
* for 64 bits.
@@ -74,43 +57,6 @@ extern asmregparm struct rw_semaphore *
#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
-typedef signed long rwsem_count_t;
-
-struct rw_semaphore {
- rwsem_count_t count;
- spinlock_t wait_lock;
- struct list_head wait_list;
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- struct lockdep_map dep_map;
-#endif
-};
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
-#else
-# define __RWSEM_DEP_MAP_INIT(lockname)
-#endif
-
-
-#define __RWSEM_INITIALIZER(name) \
-{ \
- RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait_lock), \
- LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) \
-}
-
-#define DECLARE_RWSEM(name) \
- struct rw_semaphore name = __RWSEM_INITIALIZER(name)
-
-extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
- struct lock_class_key *key);
-
-#define init_rwsem(sem) \
-do { \
- static struct lock_class_key __key; \
- \
- __init_rwsem((sem), #sem, &__key); \
-} while (0)
-
/*
* lock for reading
*/
@@ -133,7 +79,7 @@ static inline void __down_read(struct rw_semaphore *sem)
*/
static inline int __down_read_trylock(struct rw_semaphore *sem)
{
- rwsem_count_t result, tmp;
+ long result, tmp;
asm volatile("# beginning __down_read_trylock\n\t"
" mov %0,%1\n\t"
"1:\n\t"
@@ -155,7 +101,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem)
*/
static inline void __down_write_nested(struct rw_semaphore *sem, int subclass)
{
- rwsem_count_t tmp;
+ long tmp;
asm volatile("# beginning down_write\n\t"
LOCK_PREFIX " xadd %1,(%2)\n\t"
/* adds 0xffff0001, returns the old value */
@@ -180,9 +126,8 @@ static inline void __down_write(struct rw_semaphore *sem)
*/
static inline int __down_write_trylock(struct rw_semaphore *sem)
{
- rwsem_count_t ret = cmpxchg(&sem->count,
- RWSEM_UNLOCKED_VALUE,
- RWSEM_ACTIVE_WRITE_BIAS);
+ long ret = cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE,
+ RWSEM_ACTIVE_WRITE_BIAS);
if (ret == RWSEM_UNLOCKED_VALUE)
return 1;
return 0;
@@ -193,7 +138,7 @@ static inline int __down_write_trylock(struct rw_semaphore *sem)
*/
static inline void __up_read(struct rw_semaphore *sem)
{
- rwsem_count_t tmp;
+ long tmp;
asm volatile("# beginning __up_read\n\t"
LOCK_PREFIX " xadd %1,(%2)\n\t"
/* subtracts 1, returns the old value */
@@ -211,7 +156,7 @@ static inline void __up_read(struct rw_semaphore *sem)
*/
static inline void __up_write(struct rw_semaphore *sem)
{
- rwsem_count_t tmp;
+ long tmp;
asm volatile("# beginning __up_write\n\t"
LOCK_PREFIX " xadd %1,(%2)\n\t"
/* subtracts 0xffff0001, returns the old value */
@@ -247,8 +192,7 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
/*
* implement atomic add functionality
*/
-static inline void rwsem_atomic_add(rwsem_count_t delta,
- struct rw_semaphore *sem)
+static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem)
{
asm volatile(LOCK_PREFIX _ASM_ADD "%1,%0"
: "+m" (sem->count)
@@ -258,10 +202,9 @@ static inline void rwsem_atomic_add(rwsem_count_t delta,
/*
* implement exchange and add functionality
*/
-static inline rwsem_count_t rwsem_atomic_update(rwsem_count_t delta,
- struct rw_semaphore *sem)
+static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
{
- rwsem_count_t tmp = delta;
+ long tmp = delta;
asm volatile(LOCK_PREFIX "xadd %0,%1"
: "+r" (tmp), "+m" (sem->count)
@@ -270,10 +213,5 @@ static inline rwsem_count_t rwsem_atomic_update(rwsem_count_t delta,
return tmp + delta;
}
-static inline int rwsem_is_locked(struct rw_semaphore *sem)
-{
- return (sem->count != 0);
-}
-
#endif /* __KERNEL__ */
#endif /* _ASM_X86_RWSEM_H */
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 1f4695136776..0747de6227dc 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -23,6 +23,8 @@ extern unsigned int num_processors;
DECLARE_PER_CPU(cpumask_var_t, cpu_sibling_map);
DECLARE_PER_CPU(cpumask_var_t, cpu_core_map);
+/* cpus sharing the last level cache: */
+DECLARE_PER_CPU(cpumask_var_t, cpu_llc_shared_map);
DECLARE_PER_CPU(u16, cpu_llc_id);
DECLARE_PER_CPU(int, cpu_number);
@@ -36,8 +38,16 @@ static inline struct cpumask *cpu_core_mask(int cpu)
return per_cpu(cpu_core_map, cpu);
}
+static inline struct cpumask *cpu_llc_shared_mask(int cpu)
+{
+ return per_cpu(cpu_llc_shared_map, cpu);
+}
+
DECLARE_EARLY_PER_CPU(u16, x86_cpu_to_apicid);
DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid);
+#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32)
+DECLARE_EARLY_PER_CPU(int, x86_cpu_to_logical_apicid);
+#endif
/* Static state in head.S used to set up a CPU */
extern unsigned long stack_start; /* Initial stack pointer address */
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 21899cc31e52..b101c17861f5 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -47,21 +47,6 @@
#include <asm/mpspec.h>
-#ifdef CONFIG_X86_32
-
-/* Mappings between logical cpu number and node number */
-extern int cpu_to_node_map[];
-
-/* Returns the number of the node containing CPU 'cpu' */
-static inline int __cpu_to_node(int cpu)
-{
- return cpu_to_node_map[cpu];
-}
-#define early_cpu_to_node __cpu_to_node
-#define cpu_to_node __cpu_to_node
-
-#else /* CONFIG_X86_64 */
-
/* Mappings between logical cpu number and node number */
DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map);
@@ -84,8 +69,6 @@ static inline int early_cpu_to_node(int cpu)
#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */
-#endif /* CONFIG_X86_64 */
-
/* Mappings between node number and cpus on that node. */
extern cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index b766a5e8ba0e..b6f73f1bc7ac 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -346,10 +346,11 @@
#define __NR_fanotify_init 338
#define __NR_fanotify_mark 339
#define __NR_prlimit64 340
+#define __NR_clock_adjtime 341
#ifdef __KERNEL__
-#define NR_syscalls 341
+#define NR_syscalls 342
#define __ARCH_WANT_IPC_PARSE_VERSION
#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index 363e9b8a715b..5ee30858f5d6 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -669,6 +669,8 @@ __SYSCALL(__NR_fanotify_init, sys_fanotify_init)
__SYSCALL(__NR_fanotify_mark, sys_fanotify_mark)
#define __NR_prlimit64 302
__SYSCALL(__NR_prlimit64, sys_prlimit64)
+#define __NR_clock_adjtime 303
+__SYSCALL(__NR_clock_adjtime, sys_clock_adjtime)
#ifndef __NO_STUBS
#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 64642ad019fb..643ebf2e2ad8 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -83,11 +83,13 @@ struct x86_init_paging {
* boot cpu
* @tsc_pre_init: platform function called before TSC init
* @timer_init: initialize the platform timer (default PIT/HPET)
+ * @wallclock_init: init the wallclock device
*/
struct x86_init_timers {
void (*setup_percpu_clockev)(void);
void (*tsc_pre_init)(void);
void (*timer_init)(void);
+ void (*wallclock_init)(void);
};
/**
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index b3a71137983a..a2c512175395 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -589,14 +589,8 @@ static void acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
nid = acpi_get_node(handle);
if (nid == -1 || !node_online(nid))
return;
-#ifdef CONFIG_X86_64
- apicid_to_node[physid] = nid;
+ set_apicid_to_node(physid, nid);
numa_set_node(cpu, nid);
-#else /* CONFIG_X86_32 */
- apicid_2_node[physid] = nid;
- cpu_to_node_map[cpu] = nid;
-#endif
-
#endif
}
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 0a99f7198bc3..ed3c2e5b714a 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -12,7 +12,7 @@
static u32 *flush_words;
-struct pci_device_id amd_nb_misc_ids[] = {
+const struct pci_device_id amd_nb_misc_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_MISC) },
@@ -20,6 +20,11 @@ struct pci_device_id amd_nb_misc_ids[] = {
};
EXPORT_SYMBOL(amd_nb_misc_ids);
+static struct pci_device_id amd_nb_link_ids[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_LINK) },
+ {}
+};
+
const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[] __initconst = {
{ 0x00, 0x18, 0x20 },
{ 0xff, 0x00, 0x20 },
@@ -31,7 +36,7 @@ struct amd_northbridge_info amd_northbridges;
EXPORT_SYMBOL(amd_northbridges);
static struct pci_dev *next_northbridge(struct pci_dev *dev,
- struct pci_device_id *ids)
+ const struct pci_device_id *ids)
{
do {
dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev);
@@ -45,7 +50,7 @@ int amd_cache_northbridges(void)
{
int i = 0;
struct amd_northbridge *nb;
- struct pci_dev *misc;
+ struct pci_dev *misc, *link;
if (amd_nb_num())
return 0;
@@ -64,10 +69,12 @@ int amd_cache_northbridges(void)
amd_northbridges.nb = nb;
amd_northbridges.num = i;
- misc = NULL;
+ link = misc = NULL;
for (i = 0; i != amd_nb_num(); i++) {
node_to_amd_nb(i)->misc = misc =
next_northbridge(misc, amd_nb_misc_ids);
+ node_to_amd_nb(i)->link = link =
+ next_northbridge(link, amd_nb_link_ids);
}
/* some CPU families (e.g. family 0x11) do not support GART */
@@ -85,6 +92,13 @@ int amd_cache_northbridges(void)
boot_cpu_data.x86_mask >= 0x1))
amd_northbridges.flags |= AMD_NB_L3_INDEX_DISABLE;
+ if (boot_cpu_data.x86 == 0x15)
+ amd_northbridges.flags |= AMD_NB_L3_INDEX_DISABLE;
+
+ /* L3 cache partitioning is supported on family 0x15 */
+ if (boot_cpu_data.x86 == 0x15)
+ amd_northbridges.flags |= AMD_NB_L3_PARTITIONING;
+
return 0;
}
EXPORT_SYMBOL_GPL(amd_cache_northbridges);
@@ -93,8 +107,9 @@ EXPORT_SYMBOL_GPL(amd_cache_northbridges);
they're useless anyways */
int __init early_is_amd_nb(u32 device)
{
- struct pci_device_id *id;
+ const struct pci_device_id *id;
u32 vendor = device & 0xffff;
+
device >>= 16;
for (id = amd_nb_misc_ids; id->vendor; id++)
if (vendor == id->vendor && device == id->device)
@@ -102,6 +117,65 @@ int __init early_is_amd_nb(u32 device)
return 0;
}
+int amd_get_subcaches(int cpu)
+{
+ struct pci_dev *link = node_to_amd_nb(amd_get_nb_id(cpu))->link;
+ unsigned int mask;
+ int cuid = 0;
+
+ if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
+ return 0;
+
+ pci_read_config_dword(link, 0x1d4, &mask);
+
+#ifdef CONFIG_SMP
+ cuid = cpu_data(cpu).compute_unit_id;
+#endif
+ return (mask >> (4 * cuid)) & 0xf;
+}
+
+int amd_set_subcaches(int cpu, int mask)
+{
+ static unsigned int reset, ban;
+ struct amd_northbridge *nb = node_to_amd_nb(amd_get_nb_id(cpu));
+ unsigned int reg;
+ int cuid = 0;
+
+ if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING) || mask > 0xf)
+ return -EINVAL;
+
+ /* if necessary, collect reset state of L3 partitioning and BAN mode */
+ if (reset == 0) {
+ pci_read_config_dword(nb->link, 0x1d4, &reset);
+ pci_read_config_dword(nb->misc, 0x1b8, &ban);
+ ban &= 0x180000;
+ }
+
+ /* deactivate BAN mode if any subcaches are to be disabled */
+ if (mask != 0xf) {
+ pci_read_config_dword(nb->misc, 0x1b8, &reg);
+ pci_write_config_dword(nb->misc, 0x1b8, reg & ~0x180000);
+ }
+
+#ifdef CONFIG_SMP
+ cuid = cpu_data(cpu).compute_unit_id;
+#endif
+ mask <<= 4 * cuid;
+ mask |= (0xf ^ (1 << cuid)) << 26;
+
+ pci_write_config_dword(nb->link, 0x1d4, mask);
+
+ /* reset BAN mode if L3 partitioning returned to reset state */
+ pci_read_config_dword(nb->link, 0x1d4, &reg);
+ if (reg == reset) {
+ pci_read_config_dword(nb->misc, 0x1b8, &reg);
+ reg &= ~0x180000;
+ pci_write_config_dword(nb->misc, 0x1b8, reg | ban);
+ }
+
+ return 0;
+}
+
int amd_cache_gart(void)
{
int i;
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 5955a7800a96..7b1e8e10b89c 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -13,7 +13,7 @@
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/init.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
#include <linux/mmzone.h>
#include <linux/pci_ids.h>
#include <linux/pci.h>
@@ -57,7 +57,7 @@ static void __init insert_aperture_resource(u32 aper_base, u32 aper_size)
static u32 __init allocate_aperture(void)
{
u32 aper_size;
- void *p;
+ unsigned long addr;
/* aper_size should <= 1G */
if (fallback_aper_order > 5)
@@ -83,27 +83,26 @@ static u32 __init allocate_aperture(void)
* so don't use 512M below as gart iommu, leave the space for kernel
* code for safe
*/
- p = __alloc_bootmem_nopanic(aper_size, aper_size, 512ULL<<20);
+ addr = memblock_find_in_range(0, 1ULL<<32, aper_size, 512ULL<<20);
+ if (addr == MEMBLOCK_ERROR || addr + aper_size > 0xffffffff) {
+ printk(KERN_ERR
+ "Cannot allocate aperture memory hole (%lx,%uK)\n",
+ addr, aper_size>>10);
+ return 0;
+ }
+ memblock_x86_reserve_range(addr, addr + aper_size, "aperture64");
/*
* Kmemleak should not scan this block as it may not be mapped via the
* kernel direct mapping.
*/
- kmemleak_ignore(p);
- if (!p || __pa(p)+aper_size > 0xffffffff) {
- printk(KERN_ERR
- "Cannot allocate aperture memory hole (%p,%uK)\n",
- p, aper_size>>10);
- if (p)
- free_bootmem(__pa(p), aper_size);
- return 0;
- }
+ kmemleak_ignore(phys_to_virt(addr));
printk(KERN_INFO "Mapping aperture over %d KB of RAM @ %lx\n",
- aper_size >> 10, __pa(p));
- insert_aperture_resource((u32)__pa(p), aper_size);
- register_nosave_region((u32)__pa(p) >> PAGE_SHIFT,
- (u32)__pa(p+aper_size) >> PAGE_SHIFT);
+ aper_size >> 10, addr);
+ insert_aperture_resource((u32)addr, aper_size);
+ register_nosave_region(addr >> PAGE_SHIFT,
+ (addr+aper_size) >> PAGE_SHIFT);
- return (u32)__pa(p);
+ return (u32)addr;
}
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 76b96d74978a..1e1792972198 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -78,6 +78,15 @@ EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
#ifdef CONFIG_X86_32
+
+/*
+ * On x86_32, the mapping between cpu and logical apicid may vary
+ * depending on apic in use. The following early percpu variable is
+ * used for the mapping. This is where the behaviors of x86_64 and 32
+ * actually diverge. Let's keep it ugly for now.
+ */
+DEFINE_EARLY_PER_CPU(int, x86_cpu_to_logical_apicid, BAD_APICID);
+
/*
* Knob to control our willingness to enable the local APIC.
*
@@ -1237,6 +1246,19 @@ void __cpuinit setup_local_APIC(void)
*/
apic->init_apic_ldr();
+#ifdef CONFIG_X86_32
+ /*
+ * APIC LDR is initialized. If logical_apicid mapping was
+ * initialized during get_smp_config(), make sure it matches the
+ * actual value.
+ */
+ i = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
+ WARN_ON(i != BAD_APICID && i != logical_smp_processor_id());
+ /* always use the value from LDR */
+ early_per_cpu(x86_cpu_to_logical_apicid, cpu) =
+ logical_smp_processor_id();
+#endif
+
/*
* Set Task Priority to 'accept all'. We never change this
* later on.
@@ -1930,17 +1952,6 @@ void __cpuinit generic_processor_info(int apicid, int version)
{
int cpu;
- /*
- * Validate version
- */
- if (version == 0x0) {
- pr_warning("BIOS bug, APIC version is 0 for CPU#%d! "
- "fixing up to 0x10. (tell your hw vendor)\n",
- version);
- version = 0x10;
- }
- apic_version[apicid] = version;
-
if (num_processors >= nr_cpu_ids) {
int max = nr_cpu_ids;
int thiscpu = max + disabled_cpus;
@@ -1954,22 +1965,34 @@ void __cpuinit generic_processor_info(int apicid, int version)
}
num_processors++;
- cpu = cpumask_next_zero(-1, cpu_present_mask);
-
- if (version != apic_version[boot_cpu_physical_apicid])
- WARN_ONCE(1,
- "ACPI: apic version mismatch, bootcpu: %x cpu %d: %x\n",
- apic_version[boot_cpu_physical_apicid], cpu, version);
-
- physid_set(apicid, phys_cpu_present_map);
if (apicid == boot_cpu_physical_apicid) {
/*
* x86_bios_cpu_apicid is required to have processors listed
* in same order as logical cpu numbers. Hence the first
* entry is BSP, and so on.
+ * boot_cpu_init() already hold bit 0 in cpu_present_mask
+ * for BSP.
*/
cpu = 0;
+ } else
+ cpu = cpumask_next_zero(-1, cpu_present_mask);
+
+ /*
+ * Validate version
+ */
+ if (version == 0x0) {
+ pr_warning("BIOS bug: APIC version is 0 for CPU %d/0x%x, fixing up to 0x10\n",
+ cpu, apicid);
+ version = 0x10;
}
+ apic_version[apicid] = version;
+
+ if (version != apic_version[boot_cpu_physical_apicid]) {
+ pr_warning("BIOS bug: APIC version mismatch, boot CPU: %x, CPU %d: version %x\n",
+ apic_version[boot_cpu_physical_apicid], cpu, version);
+ }
+
+ physid_set(apicid, phys_cpu_present_map);
if (apicid > max_physical_apicid)
max_physical_apicid = apicid;
@@ -1977,7 +2000,10 @@ void __cpuinit generic_processor_info(int apicid, int version)
early_per_cpu(x86_cpu_to_apicid, cpu) = apicid;
early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
#endif
-
+#ifdef CONFIG_X86_32
+ early_per_cpu(x86_cpu_to_logical_apicid, cpu) =
+ apic->x86_32_early_logical_apicid(cpu);
+#endif
set_cpu_possible(cpu, true);
set_cpu_present(cpu, true);
}
@@ -1998,10 +2024,14 @@ void default_init_apic_ldr(void)
}
#ifdef CONFIG_X86_32
-int default_apicid_to_node(int logical_apicid)
+int default_x86_32_numa_cpu_node(int cpu)
{
-#ifdef CONFIG_SMP
- return apicid_2_node[hard_smp_processor_id()];
+#ifdef CONFIG_NUMA
+ int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
+
+ if (apicid != BAD_APICID)
+ return __apicid_to_node[apicid];
+ return NUMA_NO_NODE;
#else
return 0;
#endif
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 09d3b17ce0c2..5652d31fe108 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -185,8 +185,6 @@ struct apic apic_flat = {
.ioapic_phys_id_map = NULL,
.setup_apic_routing = NULL,
.multi_timer_check = NULL,
- .apicid_to_node = NULL,
- .cpu_to_logical_apicid = NULL,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
.apicid_to_cpu_present = NULL,
.setup_portio_remap = NULL,
@@ -337,8 +335,6 @@ struct apic apic_physflat = {
.ioapic_phys_id_map = NULL,
.setup_apic_routing = NULL,
.multi_timer_check = NULL,
- .apicid_to_node = NULL,
- .cpu_to_logical_apicid = NULL,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
.apicid_to_cpu_present = NULL,
.setup_portio_remap = NULL,
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index e31b9ffe25f5..f1baa2dc087a 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -54,11 +54,6 @@ static u64 noop_apic_icr_read(void)
return 0;
}
-static int noop_cpu_to_logical_apicid(int cpu)
-{
- return 0;
-}
-
static int noop_phys_pkg_id(int cpuid_apic, int index_msb)
{
return 0;
@@ -113,12 +108,6 @@ static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask)
cpumask_set_cpu(cpu, retmask);
}
-int noop_apicid_to_node(int logical_apicid)
-{
- /* we're always on node 0 */
- return 0;
-}
-
static u32 noop_apic_read(u32 reg)
{
WARN_ON_ONCE((cpu_has_apic && !disable_apic));
@@ -130,6 +119,14 @@ static void noop_apic_write(u32 reg, u32 v)
WARN_ON_ONCE(cpu_has_apic && !disable_apic);
}
+#ifdef CONFIG_X86_32
+static int noop_x86_32_numa_cpu_node(int cpu)
+{
+ /* we're always on node 0 */
+ return 0;
+}
+#endif
+
struct apic apic_noop = {
.name = "noop",
.probe = noop_probe,
@@ -153,9 +150,7 @@ struct apic apic_noop = {
.ioapic_phys_id_map = default_ioapic_phys_id_map,
.setup_apic_routing = NULL,
.multi_timer_check = NULL,
- .apicid_to_node = noop_apicid_to_node,
- .cpu_to_logical_apicid = noop_cpu_to_logical_apicid,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
.apicid_to_cpu_present = physid_set_mask_of_physid,
@@ -197,4 +192,9 @@ struct apic apic_noop = {
.icr_write = noop_apic_icr_write,
.wait_icr_idle = noop_apic_wait_icr_idle,
.safe_wait_icr_idle = noop_safe_apic_wait_icr_idle,
+
+#ifdef CONFIG_X86_32
+ .x86_32_early_logical_apicid = noop_x86_32_early_logical_apicid,
+ .x86_32_numa_cpu_node = noop_x86_32_numa_cpu_node,
+#endif
};
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index cb804c5091b9..541a2e431659 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -45,6 +45,12 @@ static unsigned long bigsmp_check_apicid_present(int bit)
return 1;
}
+static int bigsmp_early_logical_apicid(int cpu)
+{
+ /* on bigsmp, logical apicid is the same as physical */
+ return early_per_cpu(x86_cpu_to_apicid, cpu);
+}
+
static inline unsigned long calculate_ldr(int cpu)
{
unsigned long val, id;
@@ -80,11 +86,6 @@ static void bigsmp_setup_apic_routing(void)
nr_ioapics);
}
-static int bigsmp_apicid_to_node(int logical_apicid)
-{
- return apicid_2_node[hard_smp_processor_id()];
-}
-
static int bigsmp_cpu_present_to_apicid(int mps_cpu)
{
if (mps_cpu < nr_cpu_ids)
@@ -93,14 +94,6 @@ static int bigsmp_cpu_present_to_apicid(int mps_cpu)
return BAD_APICID;
}
-/* Mapping from cpu number to logical apicid */
-static inline int bigsmp_cpu_to_logical_apicid(int cpu)
-{
- if (cpu >= nr_cpu_ids)
- return BAD_APICID;
- return cpu_physical_id(cpu);
-}
-
static void bigsmp_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap)
{
/* For clustered we don't have a good way to do this yet - hack */
@@ -115,7 +108,11 @@ static int bigsmp_check_phys_apicid_present(int phys_apicid)
/* As we are using single CPU as destination, pick only one CPU here */
static unsigned int bigsmp_cpu_mask_to_apicid(const struct cpumask *cpumask)
{
- return bigsmp_cpu_to_logical_apicid(cpumask_first(cpumask));
+ int cpu = cpumask_first(cpumask);
+
+ if (cpu < nr_cpu_ids)
+ return cpu_physical_id(cpu);
+ return BAD_APICID;
}
static unsigned int bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
@@ -129,9 +126,9 @@ static unsigned int bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
*/
for_each_cpu_and(cpu, cpumask, andmask) {
if (cpumask_test_cpu(cpu, cpu_online_mask))
- break;
+ return cpu_physical_id(cpu);
}
- return bigsmp_cpu_to_logical_apicid(cpu);
+ return BAD_APICID;
}
static int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb)
@@ -219,8 +216,6 @@ struct apic apic_bigsmp = {
.ioapic_phys_id_map = bigsmp_ioapic_phys_id_map,
.setup_apic_routing = bigsmp_setup_apic_routing,
.multi_timer_check = NULL,
- .apicid_to_node = bigsmp_apicid_to_node,
- .cpu_to_logical_apicid = bigsmp_cpu_to_logical_apicid,
.cpu_present_to_apicid = bigsmp_cpu_present_to_apicid,
.apicid_to_cpu_present = physid_set_mask_of_physid,
.setup_portio_remap = NULL,
@@ -256,4 +251,7 @@ struct apic apic_bigsmp = {
.icr_write = native_apic_icr_write,
.wait_icr_idle = native_apic_wait_icr_idle,
.safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
+
+ .x86_32_early_logical_apicid = bigsmp_early_logical_apicid,
+ .x86_32_numa_cpu_node = default_x86_32_numa_cpu_node,
};
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index 8593582d8022..3e9de4854c5b 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -460,6 +460,12 @@ static unsigned long es7000_check_apicid_present(int bit)
return physid_isset(bit, phys_cpu_present_map);
}
+static int es7000_early_logical_apicid(int cpu)
+{
+ /* on es7000, logical apicid is the same as physical */
+ return early_per_cpu(x86_bios_cpu_apicid, cpu);
+}
+
static unsigned long calculate_ldr(int cpu)
{
unsigned long id = per_cpu(x86_bios_cpu_apicid, cpu);
@@ -504,12 +510,11 @@ static void es7000_setup_apic_routing(void)
nr_ioapics, cpumask_bits(es7000_target_cpus())[0]);
}
-static int es7000_apicid_to_node(int logical_apicid)
+static int es7000_numa_cpu_node(int cpu)
{
return 0;
}
-
static int es7000_cpu_present_to_apicid(int mps_cpu)
{
if (!mps_cpu)
@@ -528,18 +533,6 @@ static void es7000_apicid_to_cpu_present(int phys_apicid, physid_mask_t *retmap)
++cpu_id;
}
-/* Mapping from cpu number to logical apicid */
-static int es7000_cpu_to_logical_apicid(int cpu)
-{
-#ifdef CONFIG_SMP
- if (cpu >= nr_cpu_ids)
- return BAD_APICID;
- return cpu_2_logical_apicid[cpu];
-#else
- return logical_smp_processor_id();
-#endif
-}
-
static void es7000_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap)
{
/* For clustered we don't have a good way to do this yet - hack */
@@ -561,7 +554,7 @@ static unsigned int es7000_cpu_mask_to_apicid(const struct cpumask *cpumask)
* The cpus in the mask must all be on the apic cluster.
*/
for_each_cpu(cpu, cpumask) {
- int new_apicid = es7000_cpu_to_logical_apicid(cpu);
+ int new_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) {
WARN(1, "Not a valid mask!");
@@ -578,7 +571,7 @@ static unsigned int
es7000_cpu_mask_to_apicid_and(const struct cpumask *inmask,
const struct cpumask *andmask)
{
- int apicid = es7000_cpu_to_logical_apicid(0);
+ int apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0);
cpumask_var_t cpumask;
if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
@@ -655,8 +648,6 @@ struct apic __refdata apic_es7000_cluster = {
.ioapic_phys_id_map = es7000_ioapic_phys_id_map,
.setup_apic_routing = es7000_setup_apic_routing,
.multi_timer_check = NULL,
- .apicid_to_node = es7000_apicid_to_node,
- .cpu_to_logical_apicid = es7000_cpu_to_logical_apicid,
.cpu_present_to_apicid = es7000_cpu_present_to_apicid,
.apicid_to_cpu_present = es7000_apicid_to_cpu_present,
.setup_portio_remap = NULL,
@@ -695,6 +686,9 @@ struct apic __refdata apic_es7000_cluster = {
.icr_write = native_apic_icr_write,
.wait_icr_idle = native_apic_wait_icr_idle,
.safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
+
+ .x86_32_early_logical_apicid = es7000_early_logical_apicid,
+ .x86_32_numa_cpu_node = es7000_numa_cpu_node,
};
struct apic __refdata apic_es7000 = {
@@ -720,8 +714,6 @@ struct apic __refdata apic_es7000 = {
.ioapic_phys_id_map = es7000_ioapic_phys_id_map,
.setup_apic_routing = es7000_setup_apic_routing,
.multi_timer_check = NULL,
- .apicid_to_node = es7000_apicid_to_node,
- .cpu_to_logical_apicid = es7000_cpu_to_logical_apicid,
.cpu_present_to_apicid = es7000_cpu_present_to_apicid,
.apicid_to_cpu_present = es7000_apicid_to_cpu_present,
.setup_portio_remap = NULL,
@@ -758,4 +750,7 @@ struct apic __refdata apic_es7000 = {
.icr_write = native_apic_icr_write,
.wait_icr_idle = native_apic_wait_icr_idle,
.safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
+
+ .x86_32_early_logical_apicid = es7000_early_logical_apicid,
+ .x86_32_numa_cpu_node = es7000_numa_cpu_node,
};
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index 08385e090a6f..cce91bf26676 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -56,6 +56,8 @@ void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask,
local_irq_restore(flags);
}
+#ifdef CONFIG_X86_32
+
void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
int vector)
{
@@ -71,8 +73,8 @@ void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
local_irq_save(flags);
for_each_cpu(query_cpu, mask)
__default_send_IPI_dest_field(
- apic->cpu_to_logical_apicid(query_cpu), vector,
- apic->dest_logical);
+ early_per_cpu(x86_cpu_to_logical_apicid, query_cpu),
+ vector, apic->dest_logical);
local_irq_restore(flags);
}
@@ -90,14 +92,12 @@ void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
if (query_cpu == this_cpu)
continue;
__default_send_IPI_dest_field(
- apic->cpu_to_logical_apicid(query_cpu), vector,
- apic->dest_logical);
+ early_per_cpu(x86_cpu_to_logical_apicid, query_cpu),
+ vector, apic->dest_logical);
}
local_irq_restore(flags);
}
-#ifdef CONFIG_X86_32
-
/*
* This is only used on smaller machines.
*/
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
index 960f26ab5c9f..6273eee5134b 100644
--- a/arch/x86/kernel/apic/numaq_32.c
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -373,13 +373,6 @@ static inline void numaq_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask
return physids_promote(0xFUL, retmap);
}
-static inline int numaq_cpu_to_logical_apicid(int cpu)
-{
- if (cpu >= nr_cpu_ids)
- return BAD_APICID;
- return cpu_2_logical_apicid[cpu];
-}
-
/*
* Supporting over 60 cpus on NUMA-Q requires a locality-dependent
* cpu to APIC ID relation to properly interact with the intelligent
@@ -398,6 +391,15 @@ static inline int numaq_apicid_to_node(int logical_apicid)
return logical_apicid >> 4;
}
+static int numaq_numa_cpu_node(int cpu)
+{
+ int logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
+
+ if (logical_apicid != BAD_APICID)
+ return numaq_apicid_to_node(logical_apicid);
+ return NUMA_NO_NODE;
+}
+
static void numaq_apicid_to_cpu_present(int logical_apicid, physid_mask_t *retmap)
{
int node = numaq_apicid_to_node(logical_apicid);
@@ -508,8 +510,6 @@ struct apic __refdata apic_numaq = {
.ioapic_phys_id_map = numaq_ioapic_phys_id_map,
.setup_apic_routing = numaq_setup_apic_routing,
.multi_timer_check = numaq_multi_timer_check,
- .apicid_to_node = numaq_apicid_to_node,
- .cpu_to_logical_apicid = numaq_cpu_to_logical_apicid,
.cpu_present_to_apicid = numaq_cpu_present_to_apicid,
.apicid_to_cpu_present = numaq_apicid_to_cpu_present,
.setup_portio_remap = numaq_setup_portio_remap,
@@ -547,4 +547,7 @@ struct apic __refdata apic_numaq = {
.icr_write = native_apic_icr_write,
.wait_icr_idle = native_apic_wait_icr_idle,
.safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
+
+ .x86_32_early_logical_apicid = noop_x86_32_early_logical_apicid,
+ .x86_32_numa_cpu_node = numaq_numa_cpu_node,
};
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 99d2fe016084..fc84c7b61108 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -77,6 +77,11 @@ void __init default_setup_apic_routing(void)
apic->setup_apic_routing();
}
+static int default_x86_32_early_logical_apicid(int cpu)
+{
+ return 1 << cpu;
+}
+
static void setup_apic_flat_routing(void)
{
#ifdef CONFIG_X86_IO_APIC
@@ -130,8 +135,6 @@ struct apic apic_default = {
.ioapic_phys_id_map = default_ioapic_phys_id_map,
.setup_apic_routing = setup_apic_flat_routing,
.multi_timer_check = NULL,
- .apicid_to_node = default_apicid_to_node,
- .cpu_to_logical_apicid = default_cpu_to_logical_apicid,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
.apicid_to_cpu_present = physid_set_mask_of_physid,
.setup_portio_remap = NULL,
@@ -167,6 +170,9 @@ struct apic apic_default = {
.icr_write = native_apic_icr_write,
.wait_icr_idle = native_apic_wait_icr_idle,
.safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
+
+ .x86_32_early_logical_apicid = default_x86_32_early_logical_apicid,
+ .x86_32_numa_cpu_node = default_x86_32_numa_cpu_node,
};
extern struct apic apic_numaq;
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index 9b419263d90d..e4b8059b414a 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -194,11 +194,10 @@ static unsigned long summit_check_apicid_present(int bit)
return 1;
}
-static void summit_init_apic_ldr(void)
+static int summit_early_logical_apicid(int cpu)
{
- unsigned long val, id;
int count = 0;
- u8 my_id = (u8)hard_smp_processor_id();
+ u8 my_id = early_per_cpu(x86_cpu_to_apicid, cpu);
u8 my_cluster = APIC_CLUSTER(my_id);
#ifdef CONFIG_SMP
u8 lid;
@@ -206,7 +205,7 @@ static void summit_init_apic_ldr(void)
/* Create logical APIC IDs by counting CPUs already in cluster. */
for (count = 0, i = nr_cpu_ids; --i >= 0; ) {
- lid = cpu_2_logical_apicid[i];
+ lid = early_per_cpu(x86_cpu_to_logical_apicid, i);
if (lid != BAD_APICID && APIC_CLUSTER(lid) == my_cluster)
++count;
}
@@ -214,7 +213,15 @@ static void summit_init_apic_ldr(void)
/* We only have a 4 wide bitmap in cluster mode. If a deranged
* BIOS puts 5 CPUs in one APIC cluster, we're hosed. */
BUG_ON(count >= XAPIC_DEST_CPUS_SHIFT);
- id = my_cluster | (1UL << count);
+ return my_cluster | (1UL << count);
+}
+
+static void summit_init_apic_ldr(void)
+{
+ int cpu = smp_processor_id();
+ unsigned long id = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
+ unsigned long val;
+
apic_write(APIC_DFR, SUMMIT_APIC_DFR_VALUE);
val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
val |= SET_APIC_LOGICAL_ID(id);
@@ -232,27 +239,6 @@ static void summit_setup_apic_routing(void)
nr_ioapics);
}
-static int summit_apicid_to_node(int logical_apicid)
-{
-#ifdef CONFIG_SMP
- return apicid_2_node[hard_smp_processor_id()];
-#else
- return 0;
-#endif
-}
-
-/* Mapping from cpu number to logical apicid */
-static inline int summit_cpu_to_logical_apicid(int cpu)
-{
-#ifdef CONFIG_SMP
- if (cpu >= nr_cpu_ids)
- return BAD_APICID;
- return cpu_2_logical_apicid[cpu];
-#else
- return logical_smp_processor_id();
-#endif
-}
-
static int summit_cpu_present_to_apicid(int mps_cpu)
{
if (mps_cpu < nr_cpu_ids)
@@ -286,7 +272,7 @@ static unsigned int summit_cpu_mask_to_apicid(const struct cpumask *cpumask)
* The cpus in the mask must all be on the apic cluster.
*/
for_each_cpu(cpu, cpumask) {
- int new_apicid = summit_cpu_to_logical_apicid(cpu);
+ int new_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) {
printk("%s: Not a valid mask!\n", __func__);
@@ -301,7 +287,7 @@ static unsigned int summit_cpu_mask_to_apicid(const struct cpumask *cpumask)
static unsigned int summit_cpu_mask_to_apicid_and(const struct cpumask *inmask,
const struct cpumask *andmask)
{
- int apicid = summit_cpu_to_logical_apicid(0);
+ int apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0);
cpumask_var_t cpumask;
if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
@@ -528,8 +514,6 @@ struct apic apic_summit = {
.ioapic_phys_id_map = summit_ioapic_phys_id_map,
.setup_apic_routing = summit_setup_apic_routing,
.multi_timer_check = NULL,
- .apicid_to_node = summit_apicid_to_node,
- .cpu_to_logical_apicid = summit_cpu_to_logical_apicid,
.cpu_present_to_apicid = summit_cpu_present_to_apicid,
.apicid_to_cpu_present = summit_apicid_to_cpu_present,
.setup_portio_remap = NULL,
@@ -565,4 +549,7 @@ struct apic apic_summit = {
.icr_write = native_apic_icr_write,
.wait_icr_idle = native_apic_wait_icr_idle,
.safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
+
+ .x86_32_early_logical_apicid = summit_early_logical_apicid,
+ .x86_32_numa_cpu_node = default_x86_32_numa_cpu_node,
};
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index cf69c59f4910..90949bbd566d 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -206,8 +206,6 @@ struct apic apic_x2apic_cluster = {
.ioapic_phys_id_map = NULL,
.setup_apic_routing = NULL,
.multi_timer_check = NULL,
- .apicid_to_node = NULL,
- .cpu_to_logical_apicid = NULL,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
.apicid_to_cpu_present = NULL,
.setup_portio_remap = NULL,
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index 8972f38c5ced..c7e6d6645bf4 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -195,8 +195,6 @@ struct apic apic_x2apic_phys = {
.ioapic_phys_id_map = NULL,
.setup_apic_routing = NULL,
.multi_timer_check = NULL,
- .apicid_to_node = NULL,
- .cpu_to_logical_apicid = NULL,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
.apicid_to_cpu_present = NULL,
.setup_portio_remap = NULL,
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index bd16b58b8850..3c289281394c 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -338,8 +338,6 @@ struct apic __refdata apic_x2apic_uv_x = {
.ioapic_phys_id_map = NULL,
.setup_apic_routing = NULL,
.multi_timer_check = NULL,
- .apicid_to_node = NULL,
- .cpu_to_logical_apicid = NULL,
.cpu_present_to_apicid = default_cpu_present_to_apicid,
.apicid_to_cpu_present = NULL,
.setup_portio_remap = NULL,
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index cfa82c899f47..2b141c1915a5 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -1,5 +1,75 @@
+/*
+ * Generate definitions needed by assembly language modules.
+ * This code generates raw asm output which is post-processed to extract
+ * and format the required data.
+ */
+#define COMPILE_OFFSETS
+
+#include <linux/crypto.h>
+#include <linux/sched.h>
+#include <linux/stddef.h>
+#include <linux/hardirq.h>
+#include <linux/suspend.h>
+#include <linux/kbuild.h>
+#include <asm/processor.h>
+#include <asm/thread_info.h>
+#include <asm/sigframe.h>
+#include <asm/bootparam.h>
+#include <asm/suspend.h>
+
+#ifdef CONFIG_XEN
+#include <xen/interface/xen.h>
+#endif
+
#ifdef CONFIG_X86_32
# include "asm-offsets_32.c"
#else
# include "asm-offsets_64.c"
#endif
+
+void common(void) {
+ BLANK();
+ DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
+ DEFINE(PAGE_SHIFT_asm, PAGE_SHIFT);
+ DEFINE(THREAD_SIZE_asm, THREAD_SIZE);
+
+ BLANK();
+ OFFSET(TI_flags, thread_info, flags);
+ OFFSET(TI_status, thread_info, status);
+ OFFSET(TI_addr_limit, thread_info, addr_limit);
+ OFFSET(TI_preempt_count, thread_info, preempt_count);
+
+ BLANK();
+ OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
+
+ BLANK();
+ OFFSET(pbe_address, pbe, address);
+ OFFSET(pbe_orig_address, pbe, orig_address);
+ OFFSET(pbe_next, pbe, next);
+
+#ifdef CONFIG_PARAVIRT
+ BLANK();
+ OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
+ OFFSET(PARAVIRT_PATCH_pv_cpu_ops, paravirt_patch_template, pv_cpu_ops);
+ OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops);
+ OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
+ OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
+ OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
+ OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
+ OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0);
+ OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2);
+#endif
+
+#ifdef CONFIG_XEN
+ BLANK();
+ OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask);
+ OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending);
+#endif
+
+ BLANK();
+ OFFSET(BP_scratch, boot_params, scratch);
+ OFFSET(BP_loadflags, boot_params, hdr.loadflags);
+ OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch);
+ OFFSET(BP_version, boot_params, hdr.version);
+ OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment);
+}
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 1a4088dda37a..c29d631af6fc 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -1,26 +1,4 @@
-/*
- * Generate definitions needed by assembly language modules.
- * This code generates raw asm output which is post-processed
- * to extract and format the required data.
- */
-
-#include <linux/crypto.h>
-#include <linux/sched.h>
-#include <linux/signal.h>
-#include <linux/personality.h>
-#include <linux/suspend.h>
-#include <linux/kbuild.h>
#include <asm/ucontext.h>
-#include <asm/sigframe.h>
-#include <asm/pgtable.h>
-#include <asm/fixmap.h>
-#include <asm/processor.h>
-#include <asm/thread_info.h>
-#include <asm/bootparam.h>
-#include <asm/elf.h>
-#include <asm/suspend.h>
-
-#include <xen/interface/xen.h>
#include <linux/lguest.h>
#include "../../../drivers/lguest/lg.h"
@@ -51,21 +29,10 @@ void foo(void)
OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id);
BLANK();
- OFFSET(TI_task, thread_info, task);
- OFFSET(TI_exec_domain, thread_info, exec_domain);
- OFFSET(TI_flags, thread_info, flags);
- OFFSET(TI_status, thread_info, status);
- OFFSET(TI_preempt_count, thread_info, preempt_count);
- OFFSET(TI_addr_limit, thread_info, addr_limit);
- OFFSET(TI_restart_block, thread_info, restart_block);
OFFSET(TI_sysenter_return, thread_info, sysenter_return);
OFFSET(TI_cpu, thread_info, cpu);
BLANK();
- OFFSET(GDS_size, desc_ptr, size);
- OFFSET(GDS_address, desc_ptr, address);
- BLANK();
-
OFFSET(PT_EBX, pt_regs, bx);
OFFSET(PT_ECX, pt_regs, cx);
OFFSET(PT_EDX, pt_regs, dx);
@@ -85,42 +52,13 @@ void foo(void)
OFFSET(PT_OLDSS, pt_regs, ss);
BLANK();
- OFFSET(EXEC_DOMAIN_handler, exec_domain, handler);
OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe, uc.uc_mcontext);
BLANK();
- OFFSET(pbe_address, pbe, address);
- OFFSET(pbe_orig_address, pbe, orig_address);
- OFFSET(pbe_next, pbe, next);
-
/* Offset from the sysenter stack to tss.sp0 */
DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) -
sizeof(struct tss_struct));
- DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
- DEFINE(PAGE_SHIFT_asm, PAGE_SHIFT);
- DEFINE(THREAD_SIZE_asm, THREAD_SIZE);
-
- OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
-
-#ifdef CONFIG_PARAVIRT
- BLANK();
- OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
- OFFSET(PARAVIRT_PATCH_pv_cpu_ops, paravirt_patch_template, pv_cpu_ops);
- OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops);
- OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
- OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
- OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
- OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
- OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0);
-#endif
-
-#ifdef CONFIG_XEN
- BLANK();
- OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask);
- OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending);
-#endif
-
#if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE)
BLANK();
OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled);
@@ -139,11 +77,4 @@ void foo(void)
OFFSET(LGUEST_PAGES_regs_errcode, lguest_pages, regs.errcode);
OFFSET(LGUEST_PAGES_regs, lguest_pages, regs);
#endif
-
- BLANK();
- OFFSET(BP_scratch, boot_params, scratch);
- OFFSET(BP_loadflags, boot_params, hdr.loadflags);
- OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch);
- OFFSET(BP_version, boot_params, hdr.version);
- OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment);
}
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 4a6aeedcd965..e72a1194af22 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -1,27 +1,4 @@
-/*
- * Generate definitions needed by assembly language modules.
- * This code generates raw asm output which is post-processed to extract
- * and format the required data.
- */
-#define COMPILE_OFFSETS
-
-#include <linux/crypto.h>
-#include <linux/sched.h>
-#include <linux/stddef.h>
-#include <linux/errno.h>
-#include <linux/hardirq.h>
-#include <linux/suspend.h>
-#include <linux/kbuild.h>
-#include <asm/processor.h>
-#include <asm/segment.h>
-#include <asm/thread_info.h>
#include <asm/ia32.h>
-#include <asm/bootparam.h>
-#include <asm/suspend.h>
-
-#include <xen/interface/xen.h>
-
-#include <asm/sigframe.h>
#define __NO_STUBS 1
#undef __SYSCALL
@@ -33,41 +10,19 @@ static char syscalls[] = {
int main(void)
{
-#define ENTRY(entry) DEFINE(tsk_ ## entry, offsetof(struct task_struct, entry))
- ENTRY(state);
- ENTRY(flags);
- ENTRY(pid);
- BLANK();
-#undef ENTRY
-#define ENTRY(entry) DEFINE(TI_ ## entry, offsetof(struct thread_info, entry))
- ENTRY(flags);
- ENTRY(addr_limit);
- ENTRY(preempt_count);
- ENTRY(status);
-#ifdef CONFIG_IA32_EMULATION
- ENTRY(sysenter_return);
-#endif
- BLANK();
-#undef ENTRY
#ifdef CONFIG_PARAVIRT
- BLANK();
- OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
- OFFSET(PARAVIRT_PATCH_pv_cpu_ops, paravirt_patch_template, pv_cpu_ops);
- OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops);
- OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
- OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
OFFSET(PV_IRQ_adjust_exception_frame, pv_irq_ops, adjust_exception_frame);
- OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
OFFSET(PV_CPU_usergs_sysret32, pv_cpu_ops, usergs_sysret32);
OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64);
- OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs);
- OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2);
+ BLANK();
#endif
-
#ifdef CONFIG_IA32_EMULATION
-#define ENTRY(entry) DEFINE(IA32_SIGCONTEXT_ ## entry, offsetof(struct sigcontext_ia32, entry))
+ OFFSET(TI_sysenter_return, thread_info, sysenter_return);
+ BLANK();
+
+#define ENTRY(entry) OFFSET(IA32_SIGCONTEXT_ ## entry, sigcontext_ia32, entry)
ENTRY(ax);
ENTRY(bx);
ENTRY(cx);
@@ -79,15 +34,12 @@ int main(void)
ENTRY(ip);
BLANK();
#undef ENTRY
- DEFINE(IA32_RT_SIGFRAME_sigcontext,
- offsetof (struct rt_sigframe_ia32, uc.uc_mcontext));
+
+ OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe_ia32, uc.uc_mcontext);
BLANK();
#endif
- DEFINE(pbe_address, offsetof(struct pbe, address));
- DEFINE(pbe_orig_address, offsetof(struct pbe, orig_address));
- DEFINE(pbe_next, offsetof(struct pbe, next));
- BLANK();
-#define ENTRY(entry) DEFINE(pt_regs_ ## entry, offsetof(struct pt_regs, entry))
+
+#define ENTRY(entry) OFFSET(pt_regs_ ## entry, pt_regs, entry)
ENTRY(bx);
ENTRY(bx);
ENTRY(cx);
@@ -107,7 +59,8 @@ int main(void)
ENTRY(flags);
BLANK();
#undef ENTRY
-#define ENTRY(entry) DEFINE(saved_context_ ## entry, offsetof(struct saved_context, entry))
+
+#define ENTRY(entry) OFFSET(saved_context_ ## entry, saved_context, entry)
ENTRY(cr0);
ENTRY(cr2);
ENTRY(cr3);
@@ -115,26 +68,11 @@ int main(void)
ENTRY(cr8);
BLANK();
#undef ENTRY
- DEFINE(TSS_ist, offsetof(struct tss_struct, x86_tss.ist));
- BLANK();
- DEFINE(crypto_tfm_ctx_offset, offsetof(struct crypto_tfm, __crt_ctx));
- BLANK();
- DEFINE(__NR_syscall_max, sizeof(syscalls) - 1);
+ OFFSET(TSS_ist, tss_struct, x86_tss.ist);
BLANK();
- OFFSET(BP_scratch, boot_params, scratch);
- OFFSET(BP_loadflags, boot_params, hdr.loadflags);
- OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch);
- OFFSET(BP_version, boot_params, hdr.version);
- OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment);
- BLANK();
- DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
-#ifdef CONFIG_XEN
- BLANK();
- OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask);
- OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending);
-#undef ENTRY
-#endif
+ DEFINE(__NR_syscall_max, sizeof(syscalls) - 1);
+
return 0;
}
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 7c7bedb83c5a..f771ab6b49e9 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -233,18 +233,22 @@ static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c)
}
#endif
-#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
+#ifdef CONFIG_NUMA
+/*
+ * To workaround broken NUMA config. Read the comment in
+ * srat_detect_node().
+ */
static int __cpuinit nearby_node(int apicid)
{
int i, node;
for (i = apicid - 1; i >= 0; i--) {
- node = apicid_to_node[i];
+ node = __apicid_to_node[i];
if (node != NUMA_NO_NODE && node_online(node))
return node;
}
for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) {
- node = apicid_to_node[i];
+ node = __apicid_to_node[i];
if (node != NUMA_NO_NODE && node_online(node))
return node;
}
@@ -261,7 +265,7 @@ static int __cpuinit nearby_node(int apicid)
#ifdef CONFIG_X86_HT
static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c)
{
- u32 nodes;
+ u32 nodes, cores_per_cu = 1;
u8 node_id;
int cpu = smp_processor_id();
@@ -276,6 +280,7 @@ static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c)
/* get compute unit information */
smp_num_siblings = ((ebx >> 8) & 3) + 1;
c->compute_unit_id = ebx & 0xff;
+ cores_per_cu += ((ebx >> 8) & 3);
} else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) {
u64 value;
@@ -288,15 +293,18 @@ static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c)
/* fixup multi-node processor information */
if (nodes > 1) {
u32 cores_per_node;
+ u32 cus_per_node;
set_cpu_cap(c, X86_FEATURE_AMD_DCM);
cores_per_node = c->x86_max_cores / nodes;
+ cus_per_node = cores_per_node / cores_per_cu;
/* store NodeID, use llc_shared_map to store sibling info */
per_cpu(cpu_llc_id, cpu) = node_id;
- /* core id to be in range from 0 to (cores_per_node - 1) */
- c->cpu_core_id = c->cpu_core_id % cores_per_node;
+ /* core id has to be in the [0 .. cores_per_node - 1] range */
+ c->cpu_core_id %= cores_per_node;
+ c->compute_unit_id %= cus_per_node;
}
}
#endif
@@ -334,31 +342,40 @@ EXPORT_SYMBOL_GPL(amd_get_nb_id);
static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
{
-#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
+#ifdef CONFIG_NUMA
int cpu = smp_processor_id();
int node;
unsigned apicid = c->apicid;
- node = per_cpu(cpu_llc_id, cpu);
+ node = numa_cpu_node(cpu);
+ if (node == NUMA_NO_NODE)
+ node = per_cpu(cpu_llc_id, cpu);
- if (apicid_to_node[apicid] != NUMA_NO_NODE)
- node = apicid_to_node[apicid];
if (!node_online(node)) {
- /* Two possibilities here:
- - The CPU is missing memory and no node was created.
- In that case try picking one from a nearby CPU
- - The APIC IDs differ from the HyperTransport node IDs
- which the K8 northbridge parsing fills in.
- Assume they are all increased by a constant offset,
- but in the same order as the HT nodeids.
- If that doesn't result in a usable node fall back to the
- path for the previous case. */
-
+ /*
+ * Two possibilities here:
+ *
+ * - The CPU is missing memory and no node was created. In
+ * that case try picking one from a nearby CPU.
+ *
+ * - The APIC IDs differ from the HyperTransport node IDs
+ * which the K8 northbridge parsing fills in. Assume
+ * they are all increased by a constant offset, but in
+ * the same order as the HT nodeids. If that doesn't
+ * result in a usable node fall back to the path for the
+ * previous case.
+ *
+ * This workaround operates directly on the mapping between
+ * APIC ID and NUMA node, assuming certain relationship
+ * between APIC ID, HT node ID and NUMA topology. As going
+ * through CPU mapping may alter the outcome, directly
+ * access __apicid_to_node[].
+ */
int ht_nodeid = c->initial_apicid;
if (ht_nodeid >= 0 &&
- apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
- node = apicid_to_node[ht_nodeid];
+ __apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
+ node = __apicid_to_node[ht_nodeid];
/* Pick a nearby node */
if (!node_online(node))
node = nearby_node(apicid);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 1d59834396bd..a2559c3ed500 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -869,7 +869,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
select_idle_routine(c);
-#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
+#ifdef CONFIG_NUMA
numa_add_cpu(smp_processor_id());
#endif
}
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index d16c2c53d6bf..df86bc8c859d 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -276,14 +276,13 @@ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
{
-#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
+#ifdef CONFIG_NUMA
unsigned node;
int cpu = smp_processor_id();
- int apicid = cpu_has_apic ? hard_smp_processor_id() : c->apicid;
/* Don't do the funky fallback heuristics the AMD version employs
for now. */
- node = apicid_to_node[apicid];
+ node = numa_cpu_node(cpu);
if (node == NUMA_NO_NODE || !node_online(node)) {
/* reuse the value from init_cpu_to_node() */
node = cpu_to_node(cpu);
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index ec2c19a7b8ef..1ce1af2899df 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -304,8 +304,9 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
struct _cache_attr {
struct attribute attr;
- ssize_t (*show)(struct _cpuid4_info *, char *);
- ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count);
+ ssize_t (*show)(struct _cpuid4_info *, char *, unsigned int);
+ ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count,
+ unsigned int);
};
#ifdef CONFIG_AMD_NB
@@ -400,7 +401,8 @@ static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
#define SHOW_CACHE_DISABLE(slot) \
static ssize_t \
-show_cache_disable_##slot(struct _cpuid4_info *this_leaf, char *buf) \
+show_cache_disable_##slot(struct _cpuid4_info *this_leaf, char *buf, \
+ unsigned int cpu) \
{ \
return show_cache_disable(this_leaf, buf, slot); \
}
@@ -512,7 +514,8 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
#define STORE_CACHE_DISABLE(slot) \
static ssize_t \
store_cache_disable_##slot(struct _cpuid4_info *this_leaf, \
- const char *buf, size_t count) \
+ const char *buf, size_t count, \
+ unsigned int cpu) \
{ \
return store_cache_disable(this_leaf, buf, count, slot); \
}
@@ -524,6 +527,39 @@ static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644,
static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
show_cache_disable_1, store_cache_disable_1);
+static ssize_t
+show_subcaches(struct _cpuid4_info *this_leaf, char *buf, unsigned int cpu)
+{
+ if (!this_leaf->l3 || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
+ return -EINVAL;
+
+ return sprintf(buf, "%x\n", amd_get_subcaches(cpu));
+}
+
+static ssize_t
+store_subcaches(struct _cpuid4_info *this_leaf, const char *buf, size_t count,
+ unsigned int cpu)
+{
+ unsigned long val;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (!this_leaf->l3 || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
+ return -EINVAL;
+
+ if (strict_strtoul(buf, 16, &val) < 0)
+ return -EINVAL;
+
+ if (amd_set_subcaches(cpu, val))
+ return -EINVAL;
+
+ return count;
+}
+
+static struct _cache_attr subcaches =
+ __ATTR(subcaches, 0644, show_subcaches, store_subcaches);
+
#else /* CONFIG_AMD_NB */
#define amd_init_l3_cache(x, y)
#endif /* CONFIG_AMD_NB */
@@ -532,9 +568,9 @@ static int
__cpuinit cpuid4_cache_lookup_regs(int index,
struct _cpuid4_info_regs *this_leaf)
{
- union _cpuid4_leaf_eax eax;
- union _cpuid4_leaf_ebx ebx;
- union _cpuid4_leaf_ecx ecx;
+ union _cpuid4_leaf_eax eax;
+ union _cpuid4_leaf_ebx ebx;
+ union _cpuid4_leaf_ecx ecx;
unsigned edx;
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
@@ -732,11 +768,11 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
struct cpuinfo_x86 *c = &cpu_data(cpu);
if ((index == 3) && (c->x86_vendor == X86_VENDOR_AMD)) {
- for_each_cpu(i, c->llc_shared_map) {
+ for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
if (!per_cpu(ici_cpuid4_info, i))
continue;
this_leaf = CPUID4_INFO_IDX(i, index);
- for_each_cpu(sibling, c->llc_shared_map) {
+ for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
if (!cpu_online(sibling))
continue;
set_bit(sibling, this_leaf->shared_cpu_map);
@@ -870,8 +906,8 @@ static DEFINE_PER_CPU(struct _index_kobject *, ici_index_kobject);
#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(ici_index_kobject, x))[y]))
#define show_one_plus(file_name, object, val) \
-static ssize_t show_##file_name \
- (struct _cpuid4_info *this_leaf, char *buf) \
+static ssize_t show_##file_name(struct _cpuid4_info *this_leaf, char *buf, \
+ unsigned int cpu) \
{ \
return sprintf(buf, "%lu\n", (unsigned long)this_leaf->object + val); \
}
@@ -882,7 +918,8 @@ show_one_plus(physical_line_partition, ebx.split.physical_line_partition, 1);
show_one_plus(ways_of_associativity, ebx.split.ways_of_associativity, 1);
show_one_plus(number_of_sets, ecx.split.number_of_sets, 1);
-static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf)
+static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf,
+ unsigned int cpu)
{
return sprintf(buf, "%luK\n", this_leaf->size / 1024);
}
@@ -906,17 +943,20 @@ static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
return n;
}
-static inline ssize_t show_shared_cpu_map(struct _cpuid4_info *leaf, char *buf)
+static inline ssize_t show_shared_cpu_map(struct _cpuid4_info *leaf, char *buf,
+ unsigned int cpu)
{
return show_shared_cpu_map_func(leaf, 0, buf);
}
-static inline ssize_t show_shared_cpu_list(struct _cpuid4_info *leaf, char *buf)
+static inline ssize_t show_shared_cpu_list(struct _cpuid4_info *leaf, char *buf,
+ unsigned int cpu)
{
return show_shared_cpu_map_func(leaf, 1, buf);
}
-static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf)
+static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf,
+ unsigned int cpu)
{
switch (this_leaf->eax.split.type) {
case CACHE_TYPE_DATA:
@@ -974,6 +1014,9 @@ static struct attribute ** __cpuinit amd_l3_attrs(void)
if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
n += 2;
+ if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
+ n += 1;
+
attrs = kzalloc(n * sizeof (struct attribute *), GFP_KERNEL);
if (attrs == NULL)
return attrs = default_attrs;
@@ -986,6 +1029,9 @@ static struct attribute ** __cpuinit amd_l3_attrs(void)
attrs[n++] = &cache_disable_1.attr;
}
+ if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
+ attrs[n++] = &subcaches.attr;
+
return attrs;
}
#endif
@@ -998,7 +1044,7 @@ static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
ret = fattr->show ?
fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
- buf) :
+ buf, this_leaf->cpu) :
0;
return ret;
}
@@ -1012,7 +1058,7 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
ret = fattr->store ?
fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
- buf, count) :
+ buf, count, this_leaf->cpu) :
0;
return ret;
}
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 5bf2fac52aca..167f97b5596e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -527,15 +527,12 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
int i, err = 0;
struct threshold_bank *b = NULL;
char name[32];
-#ifdef CONFIG_SMP
- struct cpuinfo_x86 *c = &cpu_data(cpu);
-#endif
sprintf(name, "threshold_bank%i", bank);
#ifdef CONFIG_SMP
if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */
- i = cpumask_first(c->llc_shared_map);
+ i = cpumask_first(cpu_llc_shared_mask(cpu));
/* first core not up yet */
if (cpu_data(i).cpu_core_id)
@@ -555,7 +552,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
if (err)
goto out;
- cpumask_copy(b->cpus, c->llc_shared_map);
+ cpumask_copy(b->cpus, cpu_llc_shared_mask(cpu));
per_cpu(threshold_banks, cpu)[bank] = b;
goto out;
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 9d977a2ea693..4d98789b0664 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1389,7 +1389,7 @@ static void __init pmu_check_apic(void)
pr_info("no hardware sampling interrupt available.\n");
}
-int __init init_hw_perf_events(void)
+static int __init init_hw_perf_events(void)
{
struct event_constraint *c;
int err;
@@ -1608,7 +1608,7 @@ out:
return ret;
}
-int x86_pmu_event_init(struct perf_event *event)
+static int x86_pmu_event_init(struct perf_event *event)
{
struct pmu *tmp;
int err;
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 294f26da0c0c..0b5e2b546566 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -847,15 +847,21 @@ static int __init parse_memopt(char *p)
if (!p)
return -EINVAL;
-#ifdef CONFIG_X86_32
if (!strcmp(p, "nopentium")) {
+#ifdef CONFIG_X86_32
setup_clear_cpu_cap(X86_FEATURE_PSE);
return 0;
- }
+#else
+ printk(KERN_WARNING "mem=nopentium ignored! (only supported on x86_32)\n");
+ return -EINVAL;
#endif
+ }
userdef = 1;
mem_size = memparse(p, &p);
+ /* don't remove all of memory when handling "mem={invalid}" param */
+ if (mem_size == 0)
+ return -EINVAL;
e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1);
return 0;
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index aed1ffbeb0c9..891268c645ea 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -975,9 +975,12 @@ apicinterrupt X86_PLATFORM_IPI_VECTOR \
x86_platform_ipi smp_x86_platform_ipi
#ifdef CONFIG_SMP
-.irpc idx, "01234567"
+.irp idx,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \
+ 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
+.if NUM_INVALIDATE_TLB_VECTORS > \idx
apicinterrupt (INVALIDATE_TLB_VECTOR_START)+\idx \
invalidate_interrupt\idx smp_invalidate_interrupt
+.endif
.endr
#endif
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index c752e973958d..7aad10a63e04 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -164,14 +164,77 @@ static void __init smp_intr_init(void)
alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
/* IPIs for invalidation */
- alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0);
- alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1);
- alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2);
- alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3);
- alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4);
- alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5);
- alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6);
- alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7);
+#define ALLOC_INVTLB_VEC(NR) \
+ alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+NR, \
+ invalidate_interrupt##NR)
+
+ switch (NUM_INVALIDATE_TLB_VECTORS) {
+ default:
+ ALLOC_INVTLB_VEC(31);
+ case 31:
+ ALLOC_INVTLB_VEC(30);
+ case 30:
+ ALLOC_INVTLB_VEC(29);
+ case 29:
+ ALLOC_INVTLB_VEC(28);
+ case 28:
+ ALLOC_INVTLB_VEC(27);
+ case 27:
+ ALLOC_INVTLB_VEC(26);
+ case 26:
+ ALLOC_INVTLB_VEC(25);
+ case 25:
+ ALLOC_INVTLB_VEC(24);
+ case 24:
+ ALLOC_INVTLB_VEC(23);
+ case 23:
+ ALLOC_INVTLB_VEC(22);
+ case 22:
+ ALLOC_INVTLB_VEC(21);
+ case 21:
+ ALLOC_INVTLB_VEC(20);
+ case 20:
+ ALLOC_INVTLB_VEC(19);
+ case 19:
+ ALLOC_INVTLB_VEC(18);
+ case 18:
+ ALLOC_INVTLB_VEC(17);
+ case 17:
+ ALLOC_INVTLB_VEC(16);
+ case 16:
+ ALLOC_INVTLB_VEC(15);
+ case 15:
+ ALLOC_INVTLB_VEC(14);
+ case 14:
+ ALLOC_INVTLB_VEC(13);
+ case 13:
+ ALLOC_INVTLB_VEC(12);
+ case 12:
+ ALLOC_INVTLB_VEC(11);
+ case 11:
+ ALLOC_INVTLB_VEC(10);
+ case 10:
+ ALLOC_INVTLB_VEC(9);
+ case 9:
+ ALLOC_INVTLB_VEC(8);
+ case 8:
+ ALLOC_INVTLB_VEC(7);
+ case 7:
+ ALLOC_INVTLB_VEC(6);
+ case 6:
+ ALLOC_INVTLB_VEC(5);
+ case 5:
+ ALLOC_INVTLB_VEC(4);
+ case 4:
+ ALLOC_INVTLB_VEC(3);
+ case 3:
+ ALLOC_INVTLB_VEC(2);
+ case 2:
+ ALLOC_INVTLB_VEC(1);
+ case 1:
+ ALLOC_INVTLB_VEC(0);
+ break;
+ }
/* IPI for generic function call */
alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 0fe6d1a66c38..9fb8405451c5 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -66,7 +66,6 @@ struct microcode_amd {
unsigned int mpb[0];
};
-#define UCODE_MAX_SIZE 2048
#define UCODE_CONTAINER_SECTION_HDR 8
#define UCODE_CONTAINER_HEADER_SIZE 12
@@ -77,20 +76,20 @@ static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
struct cpuinfo_x86 *c = &cpu_data(cpu);
u32 dummy;
- memset(csig, 0, sizeof(*csig));
if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) {
- pr_warning("microcode: CPU%d: AMD CPU family 0x%x not "
- "supported\n", cpu, c->x86);
+ pr_warning("CPU%d: family %d not supported\n", cpu, c->x86);
return -1;
}
+
rdmsr(MSR_AMD64_PATCH_LEVEL, csig->rev, dummy);
- pr_info("CPU%d: patch_level=0x%x\n", cpu, csig->rev);
+ pr_info("CPU%d: patch_level=0x%08x\n", cpu, csig->rev);
+
return 0;
}
-static int get_matching_microcode(int cpu, void *mc, int rev)
+static int get_matching_microcode(int cpu, struct microcode_header_amd *mc_hdr,
+ int rev)
{
- struct microcode_header_amd *mc_header = mc;
unsigned int current_cpu_id;
u16 equiv_cpu_id = 0;
unsigned int i = 0;
@@ -109,17 +108,17 @@ static int get_matching_microcode(int cpu, void *mc, int rev)
if (!equiv_cpu_id)
return 0;
- if (mc_header->processor_rev_id != equiv_cpu_id)
+ if (mc_hdr->processor_rev_id != equiv_cpu_id)
return 0;
/* ucode might be chipset specific -- currently we don't support this */
- if (mc_header->nb_dev_id || mc_header->sb_dev_id) {
- pr_err("CPU%d: loading of chipset specific code not yet supported\n",
+ if (mc_hdr->nb_dev_id || mc_hdr->sb_dev_id) {
+ pr_err("CPU%d: chipset specific code not yet supported\n",
cpu);
return 0;
}
- if (mc_header->patch_id <= rev)
+ if (mc_hdr->patch_id <= rev)
return 0;
return 1;
@@ -144,71 +143,93 @@ static int apply_microcode_amd(int cpu)
/* check current patch id and patch's id for match */
if (rev != mc_amd->hdr.patch_id) {
- pr_err("CPU%d: update failed (for patch_level=0x%x)\n",
+ pr_err("CPU%d: update failed for patch_level=0x%08x\n",
cpu, mc_amd->hdr.patch_id);
return -1;
}
- pr_info("CPU%d: updated (new patch_level=0x%x)\n", cpu, rev);
+ pr_info("CPU%d: new patch_level=0x%08x\n", cpu, rev);
uci->cpu_sig.rev = rev;
return 0;
}
-static void *
-get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size)
+static unsigned int verify_ucode_size(int cpu, const u8 *buf, unsigned int size)
{
- unsigned int total_size;
- u8 section_hdr[UCODE_CONTAINER_SECTION_HDR];
- void *mc;
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
+ unsigned int max_size, actual_size;
+
+#define F1XH_MPB_MAX_SIZE 2048
+#define F14H_MPB_MAX_SIZE 1824
+#define F15H_MPB_MAX_SIZE 4096
+
+ switch (c->x86) {
+ case 0x14:
+ max_size = F14H_MPB_MAX_SIZE;
+ break;
+ case 0x15:
+ max_size = F15H_MPB_MAX_SIZE;
+ break;
+ default:
+ max_size = F1XH_MPB_MAX_SIZE;
+ break;
+ }
- get_ucode_data(section_hdr, buf, UCODE_CONTAINER_SECTION_HDR);
+ actual_size = buf[4] + (buf[5] << 8);
- if (section_hdr[0] != UCODE_UCODE_TYPE) {
- pr_err("error: invalid type field in container file section header\n");
- return NULL;
+ if (actual_size > size || actual_size > max_size) {
+ pr_err("section size mismatch\n");
+ return 0;
}
- total_size = (unsigned long) (section_hdr[4] + (section_hdr[5] << 8));
+ return actual_size;
+}
+
+static struct microcode_header_amd *
+get_next_ucode(int cpu, const u8 *buf, unsigned int size, unsigned int *mc_size)
+{
+ struct microcode_header_amd *mc = NULL;
+ unsigned int actual_size = 0;
- if (total_size > size || total_size > UCODE_MAX_SIZE) {
- pr_err("error: size mismatch\n");
- return NULL;
+ if (buf[0] != UCODE_UCODE_TYPE) {
+ pr_err("invalid type field in container file section header\n");
+ goto out;
}
- mc = vzalloc(UCODE_MAX_SIZE);
+ actual_size = verify_ucode_size(cpu, buf, size);
+ if (!actual_size)
+ goto out;
+
+ mc = vzalloc(actual_size);
if (!mc)
- return NULL;
+ goto out;
- get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, total_size);
- *mc_size = total_size + UCODE_CONTAINER_SECTION_HDR;
+ get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, actual_size);
+ *mc_size = actual_size + UCODE_CONTAINER_SECTION_HDR;
+out:
return mc;
}
static int install_equiv_cpu_table(const u8 *buf)
{
- u8 *container_hdr[UCODE_CONTAINER_HEADER_SIZE];
- unsigned int *buf_pos = (unsigned int *)container_hdr;
- unsigned long size;
-
- get_ucode_data(&container_hdr, buf, UCODE_CONTAINER_HEADER_SIZE);
-
- size = buf_pos[2];
-
- if (buf_pos[1] != UCODE_EQUIV_CPU_TABLE_TYPE || !size) {
- pr_err("error: invalid type field in container file section header\n");
- return 0;
+ unsigned int *ibuf = (unsigned int *)buf;
+ unsigned int type = ibuf[1];
+ unsigned int size = ibuf[2];
+
+ if (type != UCODE_EQUIV_CPU_TABLE_TYPE || !size) {
+ pr_err("empty section/"
+ "invalid type field in container file section header\n");
+ return -EINVAL;
}
equiv_cpu_table = vmalloc(size);
if (!equiv_cpu_table) {
pr_err("failed to allocate equivalent CPU table\n");
- return 0;
+ return -ENOMEM;
}
- buf += UCODE_CONTAINER_HEADER_SIZE;
- get_ucode_data(equiv_cpu_table, buf, size);
+ get_ucode_data(equiv_cpu_table, buf + UCODE_CONTAINER_HEADER_SIZE, size);
return size + UCODE_CONTAINER_HEADER_SIZE; /* add header length */
}
@@ -223,16 +244,16 @@ static enum ucode_state
generic_load_microcode(int cpu, const u8 *data, size_t size)
{
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+ struct microcode_header_amd *mc_hdr = NULL;
+ unsigned int mc_size, leftover;
+ unsigned long offset;
const u8 *ucode_ptr = data;
void *new_mc = NULL;
- void *mc;
- int new_rev = uci->cpu_sig.rev;
- unsigned int leftover;
- unsigned long offset;
+ unsigned int new_rev = uci->cpu_sig.rev;
enum ucode_state state = UCODE_OK;
offset = install_equiv_cpu_table(ucode_ptr);
- if (!offset) {
+ if (offset < 0) {
pr_err("failed to create equivalent cpu table\n");
return UCODE_ERROR;
}
@@ -241,64 +262,65 @@ generic_load_microcode(int cpu, const u8 *data, size_t size)
leftover = size - offset;
while (leftover) {
- unsigned int uninitialized_var(mc_size);
- struct microcode_header_amd *mc_header;
-
- mc = get_next_ucode(ucode_ptr, leftover, &mc_size);
- if (!mc)
+ mc_hdr = get_next_ucode(cpu, ucode_ptr, leftover, &mc_size);
+ if (!mc_hdr)
break;
- mc_header = (struct microcode_header_amd *)mc;
- if (get_matching_microcode(cpu, mc, new_rev)) {
+ if (get_matching_microcode(cpu, mc_hdr, new_rev)) {
vfree(new_mc);
- new_rev = mc_header->patch_id;
- new_mc = mc;
+ new_rev = mc_hdr->patch_id;
+ new_mc = mc_hdr;
} else
- vfree(mc);
+ vfree(mc_hdr);
ucode_ptr += mc_size;
leftover -= mc_size;
}
- if (new_mc) {
- if (!leftover) {
- vfree(uci->mc);
- uci->mc = new_mc;
- pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n",
- cpu, new_rev, uci->cpu_sig.rev);
- } else {
- vfree(new_mc);
- state = UCODE_ERROR;
- }
- } else
+ if (!new_mc) {
state = UCODE_NFOUND;
+ goto free_table;
+ }
+ if (!leftover) {
+ vfree(uci->mc);
+ uci->mc = new_mc;
+ pr_debug("CPU%d update ucode (0x%08x -> 0x%08x)\n",
+ cpu, uci->cpu_sig.rev, new_rev);
+ } else {
+ vfree(new_mc);
+ state = UCODE_ERROR;
+ }
+
+free_table:
free_equiv_cpu_table();
return state;
}
-static enum ucode_state request_microcode_fw(int cpu, struct device *device)
+static enum ucode_state request_microcode_amd(int cpu, struct device *device)
{
const char *fw_name = "amd-ucode/microcode_amd.bin";
- const struct firmware *firmware;
- enum ucode_state ret;
+ const struct firmware *fw;
+ enum ucode_state ret = UCODE_NFOUND;
- if (request_firmware(&firmware, fw_name, device)) {
- printk(KERN_ERR "microcode: failed to load file %s\n", fw_name);
- return UCODE_NFOUND;
+ if (request_firmware(&fw, fw_name, device)) {
+ pr_err("failed to load file %s\n", fw_name);
+ goto out;
}
- if (*(u32 *)firmware->data != UCODE_MAGIC) {
- pr_err("invalid UCODE_MAGIC (0x%08x)\n",
- *(u32 *)firmware->data);
- return UCODE_ERROR;
+ ret = UCODE_ERROR;
+ if (*(u32 *)fw->data != UCODE_MAGIC) {
+ pr_err("invalid magic value (0x%08x)\n", *(u32 *)fw->data);
+ goto fw_release;
}
- ret = generic_load_microcode(cpu, firmware->data, firmware->size);
+ ret = generic_load_microcode(cpu, fw->data, fw->size);
- release_firmware(firmware);
+fw_release:
+ release_firmware(fw);
+out:
return ret;
}
@@ -319,7 +341,7 @@ static void microcode_fini_cpu_amd(int cpu)
static struct microcode_ops microcode_amd_ops = {
.request_microcode_user = request_microcode_user,
- .request_microcode_fw = request_microcode_fw,
+ .request_microcode_fw = request_microcode_amd,
.collect_cpu_info = collect_cpu_info_amd,
.apply_microcode = apply_microcode_amd,
.microcode_fini_cpu = microcode_fini_cpu_amd,
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 1cca374a2bac..87af68e0e1e1 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -417,8 +417,10 @@ static int mc_sysdev_add(struct sys_device *sys_dev)
if (err)
return err;
- if (microcode_init_cpu(cpu) == UCODE_ERROR)
- err = -EINVAL;
+ if (microcode_init_cpu(cpu) == UCODE_ERROR) {
+ sysfs_remove_group(&sys_dev->kobj, &mc_attr_group);
+ return -EINVAL;
+ }
return err;
}
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index d3cfe26c0252..b5e37fd709df 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -293,10 +293,32 @@ static void __init init_gbpages(void)
else
direct_gbpages = 0;
}
+
+static void __init cleanup_highmap_brk_end(void)
+{
+ pud_t *pud;
+ pmd_t *pmd;
+
+ mmu_cr4_features = read_cr4();
+
+ /*
+ * _brk_end cannot change anymore, but it and _end may be
+ * located on different 2M pages. cleanup_highmap(), however,
+ * can only consider _end when it runs, so destroy any
+ * mappings beyond _brk_end here.
+ */
+ pud = pud_offset(pgd_offset_k(_brk_end), _brk_end);
+ pmd = pmd_offset(pud, _brk_end - 1);
+ while (++pmd <= pmd_offset(pud, (unsigned long)_end - 1))
+ pmd_clear(pmd);
+}
#else
static inline void init_gbpages(void)
{
}
+static inline void cleanup_highmap_brk_end(void)
+{
+}
#endif
static void __init reserve_brk(void)
@@ -307,6 +329,8 @@ static void __init reserve_brk(void)
/* Mark brk area as locked down and no longer taking any
new allocations */
_brk_start = 0;
+
+ cleanup_highmap_brk_end();
}
#ifdef CONFIG_BLK_DEV_INITRD
@@ -680,15 +704,6 @@ static int __init parse_reservelow(char *p)
early_param("reservelow", parse_reservelow);
-static u64 __init get_max_mapped(void)
-{
- u64 end = max_pfn_mapped;
-
- end <<= PAGE_SHIFT;
-
- return end;
-}
-
/*
* Determine if we were loaded by an EFI loader. If so, then we have also been
* passed the efi memmap, systab, etc., so we should use these data structures
@@ -950,14 +965,6 @@ void __init setup_arch(char **cmdline_p)
max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
max_pfn_mapped = max_low_pfn_mapped;
-#ifdef CONFIG_X86_64
- if (max_pfn > max_low_pfn) {
- max_pfn_mapped = init_memory_mapping(1UL<<32,
- max_pfn<<PAGE_SHIFT);
- /* can we preseve max_low_pfn ?*/
- max_low_pfn = max_pfn;
- }
-#endif
memblock.current_limit = get_max_mapped();
/*
@@ -1040,9 +1047,7 @@ void __init setup_arch(char **cmdline_p)
prefill_possible_map();
-#ifdef CONFIG_X86_64
init_cpu_to_node();
-#endif
init_apic_mappings();
ioapic_and_gsi_init();
@@ -1066,6 +1071,8 @@ void __init setup_arch(char **cmdline_p)
#endif
x86_init.oem.banner();
+ x86_init.timers.wallclock_init();
+
mcheck_init();
local_irq_save(flags);
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 002b79685f73..71f4727da373 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -225,10 +225,15 @@ void __init setup_per_cpu_areas(void)
per_cpu(x86_bios_cpu_apicid, cpu) =
early_per_cpu_map(x86_bios_cpu_apicid, cpu);
#endif
+#ifdef CONFIG_X86_32
+ per_cpu(x86_cpu_to_logical_apicid, cpu) =
+ early_per_cpu_map(x86_cpu_to_logical_apicid, cpu);
+#endif
#ifdef CONFIG_X86_64
per_cpu(irq_stack_ptr, cpu) =
per_cpu(irq_stack_union.irq_stack, cpu) +
IRQ_STACK_SIZE - 64;
+#endif
#ifdef CONFIG_NUMA
per_cpu(x86_cpu_to_node_map, cpu) =
early_per_cpu_map(x86_cpu_to_node_map, cpu);
@@ -242,7 +247,6 @@ void __init setup_per_cpu_areas(void)
*/
set_cpu_numa_node(cpu, early_cpu_to_node(cpu));
#endif
-#endif
/*
* Up to this point, the boot CPU has been using .init.data
* area. Reload any changed state for the boot CPU.
@@ -256,7 +260,10 @@ void __init setup_per_cpu_areas(void)
early_per_cpu_ptr(x86_cpu_to_apicid) = NULL;
early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL;
#endif
-#if defined(CONFIG_X86_64) && defined(CONFIG_NUMA)
+#ifdef CONFIG_X86_32
+ early_per_cpu_ptr(x86_cpu_to_logical_apicid) = NULL;
+#endif
+#ifdef CONFIG_NUMA
early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
#endif
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 08776a953487..91de347a5588 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -71,10 +71,6 @@
#include <asm/smpboot_hooks.h>
#include <asm/i8259.h>
-#ifdef CONFIG_X86_32
-u8 apicid_2_node[MAX_APICID];
-#endif
-
/* State of each CPU */
DEFINE_PER_CPU(int, cpu_state) = { 0 };
@@ -130,68 +126,14 @@ EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
DEFINE_PER_CPU(cpumask_var_t, cpu_core_map);
EXPORT_PER_CPU_SYMBOL(cpu_core_map);
+DEFINE_PER_CPU(cpumask_var_t, cpu_llc_shared_map);
+
/* Per CPU bogomips and other parameters */
DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
EXPORT_PER_CPU_SYMBOL(cpu_info);
atomic_t init_deasserted;
-#if defined(CONFIG_NUMA) && defined(CONFIG_X86_32)
-/* which node each logical CPU is on */
-int cpu_to_node_map[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0 };
-EXPORT_SYMBOL(cpu_to_node_map);
-
-/* set up a mapping between cpu and node. */
-static void map_cpu_to_node(int cpu, int node)
-{
- printk(KERN_INFO "Mapping cpu %d to node %d\n", cpu, node);
- cpumask_set_cpu(cpu, node_to_cpumask_map[node]);
- cpu_to_node_map[cpu] = node;
-}
-
-/* undo a mapping between cpu and node. */
-static void unmap_cpu_to_node(int cpu)
-{
- int node;
-
- printk(KERN_INFO "Unmapping cpu %d from all nodes\n", cpu);
- for (node = 0; node < MAX_NUMNODES; node++)
- cpumask_clear_cpu(cpu, node_to_cpumask_map[node]);
- cpu_to_node_map[cpu] = 0;
-}
-#else /* !(CONFIG_NUMA && CONFIG_X86_32) */
-#define map_cpu_to_node(cpu, node) ({})
-#define unmap_cpu_to_node(cpu) ({})
-#endif
-
-#ifdef CONFIG_X86_32
-static int boot_cpu_logical_apicid;
-
-u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly =
- { [0 ... NR_CPUS-1] = BAD_APICID };
-
-static void map_cpu_to_logical_apicid(void)
-{
- int cpu = smp_processor_id();
- int apicid = logical_smp_processor_id();
- int node = apic->apicid_to_node(apicid);
-
- if (!node_online(node))
- node = first_online_node;
-
- cpu_2_logical_apicid[cpu] = apicid;
- map_cpu_to_node(cpu, node);
-}
-
-void numa_remove_cpu(int cpu)
-{
- cpu_2_logical_apicid[cpu] = BAD_APICID;
- unmap_cpu_to_node(cpu);
-}
-#else
-#define map_cpu_to_logical_apicid() do {} while (0)
-#endif
-
/*
* Report back to the Boot Processor.
* Running on AP.
@@ -259,7 +201,6 @@ static void __cpuinit smp_callin(void)
apic->smp_callin_clear_local_apic();
setup_local_APIC();
end_local_APIC_setup();
- map_cpu_to_logical_apicid();
/*
* Need to setup vector mappings before we enable interrupts.
@@ -355,23 +296,6 @@ notrace static void __cpuinit start_secondary(void *unused)
cpu_idle();
}
-#ifdef CONFIG_CPUMASK_OFFSTACK
-/* In this case, llc_shared_map is a pointer to a cpumask. */
-static inline void copy_cpuinfo_x86(struct cpuinfo_x86 *dst,
- const struct cpuinfo_x86 *src)
-{
- struct cpumask *llc = dst->llc_shared_map;
- *dst = *src;
- dst->llc_shared_map = llc;
-}
-#else
-static inline void copy_cpuinfo_x86(struct cpuinfo_x86 *dst,
- const struct cpuinfo_x86 *src)
-{
- *dst = *src;
-}
-#endif /* CONFIG_CPUMASK_OFFSTACK */
-
/*
* The bootstrap kernel entry code has set these up. Save them for
* a given CPU
@@ -381,7 +305,7 @@ void __cpuinit smp_store_cpu_info(int id)
{
struct cpuinfo_x86 *c = &cpu_data(id);
- copy_cpuinfo_x86(c, &boot_cpu_data);
+ *c = boot_cpu_data;
c->cpu_index = id;
if (id != 0)
identify_secondary_cpu(c);
@@ -389,15 +313,12 @@ void __cpuinit smp_store_cpu_info(int id)
static void __cpuinit link_thread_siblings(int cpu1, int cpu2)
{
- struct cpuinfo_x86 *c1 = &cpu_data(cpu1);
- struct cpuinfo_x86 *c2 = &cpu_data(cpu2);
-
cpumask_set_cpu(cpu1, cpu_sibling_mask(cpu2));
cpumask_set_cpu(cpu2, cpu_sibling_mask(cpu1));
cpumask_set_cpu(cpu1, cpu_core_mask(cpu2));
cpumask_set_cpu(cpu2, cpu_core_mask(cpu1));
- cpumask_set_cpu(cpu1, c2->llc_shared_map);
- cpumask_set_cpu(cpu2, c1->llc_shared_map);
+ cpumask_set_cpu(cpu1, cpu_llc_shared_mask(cpu2));
+ cpumask_set_cpu(cpu2, cpu_llc_shared_mask(cpu1));
}
@@ -414,6 +335,7 @@ void __cpuinit set_cpu_sibling_map(int cpu)
if (cpu_has(c, X86_FEATURE_TOPOEXT)) {
if (c->phys_proc_id == o->phys_proc_id &&
+ per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i) &&
c->compute_unit_id == o->compute_unit_id)
link_thread_siblings(cpu, i);
} else if (c->phys_proc_id == o->phys_proc_id &&
@@ -425,7 +347,7 @@ void __cpuinit set_cpu_sibling_map(int cpu)
cpumask_set_cpu(cpu, cpu_sibling_mask(cpu));
}
- cpumask_set_cpu(cpu, c->llc_shared_map);
+ cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu));
if (__this_cpu_read(cpu_info.x86_max_cores) == 1) {
cpumask_copy(cpu_core_mask(cpu), cpu_sibling_mask(cpu));
@@ -436,8 +358,8 @@ void __cpuinit set_cpu_sibling_map(int cpu)
for_each_cpu(i, cpu_sibling_setup_mask) {
if (per_cpu(cpu_llc_id, cpu) != BAD_APICID &&
per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) {
- cpumask_set_cpu(i, c->llc_shared_map);
- cpumask_set_cpu(cpu, cpu_data(i).llc_shared_map);
+ cpumask_set_cpu(i, cpu_llc_shared_mask(cpu));
+ cpumask_set_cpu(cpu, cpu_llc_shared_mask(i));
}
if (c->phys_proc_id == cpu_data(i).phys_proc_id) {
cpumask_set_cpu(i, cpu_core_mask(cpu));
@@ -476,7 +398,7 @@ const struct cpumask *cpu_coregroup_mask(int cpu)
!(cpu_has(c, X86_FEATURE_AMD_DCM)))
return cpu_core_mask(cpu);
else
- return c->llc_shared_map;
+ return cpu_llc_shared_mask(cpu);
}
static void impress_friends(void)
@@ -960,7 +882,6 @@ static __init void disable_smp(void)
physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
else
physid_set_mask_of_physid(0, &phys_cpu_present_map);
- map_cpu_to_logical_apicid();
cpumask_set_cpu(0, cpu_sibling_mask(0));
cpumask_set_cpu(0, cpu_core_mask(0));
}
@@ -1089,21 +1010,19 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
preempt_disable();
smp_cpu_index_default();
- memcpy(__this_cpu_ptr(&cpu_info), &boot_cpu_data, sizeof(cpu_info));
- cpumask_copy(cpu_callin_mask, cpumask_of(0));
- mb();
+
/*
* Setup boot CPU information
*/
smp_store_cpu_info(0); /* Final full version of the data */
-#ifdef CONFIG_X86_32
- boot_cpu_logical_apicid = logical_smp_processor_id();
-#endif
+ cpumask_copy(cpu_callin_mask, cpumask_of(0));
+ mb();
+
current_thread_info()->cpu = 0; /* needed? */
for_each_possible_cpu(i) {
zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
- zalloc_cpumask_var(&cpu_data(i).llc_shared_map, GFP_KERNEL);
+ zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL);
}
set_cpu_sibling_map(0);
@@ -1139,8 +1058,6 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
bsp_end_local_APIC_setup();
- map_cpu_to_logical_apicid();
-
if (apic->setup_portio_remap)
apic->setup_portio_remap();
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index b35786dc9b8f..68c7b9aa5001 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -340,3 +340,4 @@ ENTRY(sys_call_table)
.long sys_fanotify_init
.long sys_fanotify_mark
.long sys_prlimit64 /* 340 */
+ .long sys_clock_adjtime
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index bf4700755184..e9f7a3c838c3 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -305,7 +305,7 @@ SECTIONS
}
#if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP)
- PERCPU(THREAD_SIZE)
+ PERCPU(PAGE_SIZE)
#endif
. = ALIGN(PAGE_SIZE);
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index 1b950d151e58..9796c2f3d074 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -52,6 +52,7 @@ extern void *__memcpy(void *, const void *, __kernel_size_t);
EXPORT_SYMBOL(memset);
EXPORT_SYMBOL(memcpy);
EXPORT_SYMBOL(__memcpy);
+EXPORT_SYMBOL(memmove);
EXPORT_SYMBOL(empty_zero_page);
#ifndef CONFIG_PARAVIRT
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index ceb2911aa439..c11514e9128b 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -70,6 +70,7 @@ struct x86_init_ops x86_init __initdata = {
.setup_percpu_clockev = setup_boot_APIC_clock,
.tsc_pre_init = x86_init_noop,
.timer_init = hpet_time_init,
+ .wallclock_init = x86_init_noop,
},
.iommu = {
diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S
new file mode 100644
index 000000000000..0ecb8433e5a8
--- /dev/null
+++ b/arch/x86/lib/memmove_64.S
@@ -0,0 +1,197 @@
+/*
+ * Normally compiler builtins are used, but sometimes the compiler calls out
+ * of line code. Based on asm-i386/string.h.
+ *
+ * This assembly file is re-written from memmove_64.c file.
+ * - Copyright 2011 Fenghua Yu <fenghua.yu@intel.com>
+ */
+#define _STRING_C
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
+
+#undef memmove
+
+/*
+ * Implement memmove(). This can handle overlap between src and dst.
+ *
+ * Input:
+ * rdi: dest
+ * rsi: src
+ * rdx: count
+ *
+ * Output:
+ * rax: dest
+ */
+ENTRY(memmove)
+ CFI_STARTPROC
+ /* Handle more 32bytes in loop */
+ mov %rdi, %rax
+ cmp $0x20, %rdx
+ jb 1f
+
+ /* Decide forward/backward copy mode */
+ cmp %rdi, %rsi
+ jb 2f
+
+ /*
+ * movsq instruction have many startup latency
+ * so we handle small size by general register.
+ */
+ cmp $680, %rdx
+ jb 3f
+ /*
+ * movsq instruction is only good for aligned case.
+ */
+
+ cmpb %dil, %sil
+ je 4f
+3:
+ sub $0x20, %rdx
+ /*
+ * We gobble 32byts forward in each loop.
+ */
+5:
+ sub $0x20, %rdx
+ movq 0*8(%rsi), %r11
+ movq 1*8(%rsi), %r10
+ movq 2*8(%rsi), %r9
+ movq 3*8(%rsi), %r8
+ leaq 4*8(%rsi), %rsi
+
+ movq %r11, 0*8(%rdi)
+ movq %r10, 1*8(%rdi)
+ movq %r9, 2*8(%rdi)
+ movq %r8, 3*8(%rdi)
+ leaq 4*8(%rdi), %rdi
+ jae 5b
+ addq $0x20, %rdx
+ jmp 1f
+ /*
+ * Handle data forward by movsq.
+ */
+ .p2align 4
+4:
+ movq %rdx, %rcx
+ movq -8(%rsi, %rdx), %r11
+ lea -8(%rdi, %rdx), %r10
+ shrq $3, %rcx
+ rep movsq
+ movq %r11, (%r10)
+ jmp 13f
+ /*
+ * Handle data backward by movsq.
+ */
+ .p2align 4
+7:
+ movq %rdx, %rcx
+ movq (%rsi), %r11
+ movq %rdi, %r10
+ leaq -8(%rsi, %rdx), %rsi
+ leaq -8(%rdi, %rdx), %rdi
+ shrq $3, %rcx
+ std
+ rep movsq
+ cld
+ movq %r11, (%r10)
+ jmp 13f
+
+ /*
+ * Start to prepare for backward copy.
+ */
+ .p2align 4
+2:
+ cmp $680, %rdx
+ jb 6f
+ cmp %dil, %sil
+ je 7b
+6:
+ /*
+ * Calculate copy position to tail.
+ */
+ addq %rdx, %rsi
+ addq %rdx, %rdi
+ subq $0x20, %rdx
+ /*
+ * We gobble 32byts backward in each loop.
+ */
+8:
+ subq $0x20, %rdx
+ movq -1*8(%rsi), %r11
+ movq -2*8(%rsi), %r10
+ movq -3*8(%rsi), %r9
+ movq -4*8(%rsi), %r8
+ leaq -4*8(%rsi), %rsi
+
+ movq %r11, -1*8(%rdi)
+ movq %r10, -2*8(%rdi)
+ movq %r9, -3*8(%rdi)
+ movq %r8, -4*8(%rdi)
+ leaq -4*8(%rdi), %rdi
+ jae 8b
+ /*
+ * Calculate copy position to head.
+ */
+ addq $0x20, %rdx
+ subq %rdx, %rsi
+ subq %rdx, %rdi
+1:
+ cmpq $16, %rdx
+ jb 9f
+ /*
+ * Move data from 16 bytes to 31 bytes.
+ */
+ movq 0*8(%rsi), %r11
+ movq 1*8(%rsi), %r10
+ movq -2*8(%rsi, %rdx), %r9
+ movq -1*8(%rsi, %rdx), %r8
+ movq %r11, 0*8(%rdi)
+ movq %r10, 1*8(%rdi)
+ movq %r9, -2*8(%rdi, %rdx)
+ movq %r8, -1*8(%rdi, %rdx)
+ jmp 13f
+ .p2align 4
+9:
+ cmpq $8, %rdx
+ jb 10f
+ /*
+ * Move data from 8 bytes to 15 bytes.
+ */
+ movq 0*8(%rsi), %r11
+ movq -1*8(%rsi, %rdx), %r10
+ movq %r11, 0*8(%rdi)
+ movq %r10, -1*8(%rdi, %rdx)
+ jmp 13f
+10:
+ cmpq $4, %rdx
+ jb 11f
+ /*
+ * Move data from 4 bytes to 7 bytes.
+ */
+ movl (%rsi), %r11d
+ movl -4(%rsi, %rdx), %r10d
+ movl %r11d, (%rdi)
+ movl %r10d, -4(%rdi, %rdx)
+ jmp 13f
+11:
+ cmp $2, %rdx
+ jb 12f
+ /*
+ * Move data from 2 bytes to 3 bytes.
+ */
+ movw (%rsi), %r11w
+ movw -2(%rsi, %rdx), %r10w
+ movw %r11w, (%rdi)
+ movw %r10w, -2(%rdi, %rdx)
+ jmp 13f
+12:
+ cmp $1, %rdx
+ jb 13f
+ /*
+ * Move data for 1 byte.
+ */
+ movb (%rsi), %r11b
+ movb %r11b, (%rdi)
+13:
+ retq
+ CFI_ENDPROC
+ENDPROC(memmove)
diff --git a/arch/x86/lib/memmove_64.c b/arch/x86/lib/memmove_64.c
deleted file mode 100644
index 6d0f0ec41b34..000000000000
--- a/arch/x86/lib/memmove_64.c
+++ /dev/null
@@ -1,192 +0,0 @@
-/* Normally compiler builtins are used, but sometimes the compiler calls out
- of line code. Based on asm-i386/string.h.
- */
-#define _STRING_C
-#include <linux/string.h>
-#include <linux/module.h>
-
-#undef memmove
-void *memmove(void *dest, const void *src, size_t count)
-{
- unsigned long d0,d1,d2,d3,d4,d5,d6,d7;
- char *ret;
-
- __asm__ __volatile__(
- /* Handle more 32bytes in loop */
- "mov %2, %3\n\t"
- "cmp $0x20, %0\n\t"
- "jb 1f\n\t"
-
- /* Decide forward/backward copy mode */
- "cmp %2, %1\n\t"
- "jb 2f\n\t"
-
- /*
- * movsq instruction have many startup latency
- * so we handle small size by general register.
- */
- "cmp $680, %0\n\t"
- "jb 3f\n\t"
- /*
- * movsq instruction is only good for aligned case.
- */
- "cmpb %%dil, %%sil\n\t"
- "je 4f\n\t"
- "3:\n\t"
- "sub $0x20, %0\n\t"
- /*
- * We gobble 32byts forward in each loop.
- */
- "5:\n\t"
- "sub $0x20, %0\n\t"
- "movq 0*8(%1), %4\n\t"
- "movq 1*8(%1), %5\n\t"
- "movq 2*8(%1), %6\n\t"
- "movq 3*8(%1), %7\n\t"
- "leaq 4*8(%1), %1\n\t"
-
- "movq %4, 0*8(%2)\n\t"
- "movq %5, 1*8(%2)\n\t"
- "movq %6, 2*8(%2)\n\t"
- "movq %7, 3*8(%2)\n\t"
- "leaq 4*8(%2), %2\n\t"
- "jae 5b\n\t"
- "addq $0x20, %0\n\t"
- "jmp 1f\n\t"
- /*
- * Handle data forward by movsq.
- */
- ".p2align 4\n\t"
- "4:\n\t"
- "movq %0, %8\n\t"
- "movq -8(%1, %0), %4\n\t"
- "lea -8(%2, %0), %5\n\t"
- "shrq $3, %8\n\t"
- "rep movsq\n\t"
- "movq %4, (%5)\n\t"
- "jmp 13f\n\t"
- /*
- * Handle data backward by movsq.
- */
- ".p2align 4\n\t"
- "7:\n\t"
- "movq %0, %8\n\t"
- "movq (%1), %4\n\t"
- "movq %2, %5\n\t"
- "leaq -8(%1, %0), %1\n\t"
- "leaq -8(%2, %0), %2\n\t"
- "shrq $3, %8\n\t"
- "std\n\t"
- "rep movsq\n\t"
- "cld\n\t"
- "movq %4, (%5)\n\t"
- "jmp 13f\n\t"
-
- /*
- * Start to prepare for backward copy.
- */
- ".p2align 4\n\t"
- "2:\n\t"
- "cmp $680, %0\n\t"
- "jb 6f \n\t"
- "cmp %%dil, %%sil\n\t"
- "je 7b \n\t"
- "6:\n\t"
- /*
- * Calculate copy position to tail.
- */
- "addq %0, %1\n\t"
- "addq %0, %2\n\t"
- "subq $0x20, %0\n\t"
- /*
- * We gobble 32byts backward in each loop.
- */
- "8:\n\t"
- "subq $0x20, %0\n\t"
- "movq -1*8(%1), %4\n\t"
- "movq -2*8(%1), %5\n\t"
- "movq -3*8(%1), %6\n\t"
- "movq -4*8(%1), %7\n\t"
- "leaq -4*8(%1), %1\n\t"
-
- "movq %4, -1*8(%2)\n\t"
- "movq %5, -2*8(%2)\n\t"
- "movq %6, -3*8(%2)\n\t"
- "movq %7, -4*8(%2)\n\t"
- "leaq -4*8(%2), %2\n\t"
- "jae 8b\n\t"
- /*
- * Calculate copy position to head.
- */
- "addq $0x20, %0\n\t"
- "subq %0, %1\n\t"
- "subq %0, %2\n\t"
- "1:\n\t"
- "cmpq $16, %0\n\t"
- "jb 9f\n\t"
- /*
- * Move data from 16 bytes to 31 bytes.
- */
- "movq 0*8(%1), %4\n\t"
- "movq 1*8(%1), %5\n\t"
- "movq -2*8(%1, %0), %6\n\t"
- "movq -1*8(%1, %0), %7\n\t"
- "movq %4, 0*8(%2)\n\t"
- "movq %5, 1*8(%2)\n\t"
- "movq %6, -2*8(%2, %0)\n\t"
- "movq %7, -1*8(%2, %0)\n\t"
- "jmp 13f\n\t"
- ".p2align 4\n\t"
- "9:\n\t"
- "cmpq $8, %0\n\t"
- "jb 10f\n\t"
- /*
- * Move data from 8 bytes to 15 bytes.
- */
- "movq 0*8(%1), %4\n\t"
- "movq -1*8(%1, %0), %5\n\t"
- "movq %4, 0*8(%2)\n\t"
- "movq %5, -1*8(%2, %0)\n\t"
- "jmp 13f\n\t"
- "10:\n\t"
- "cmpq $4, %0\n\t"
- "jb 11f\n\t"
- /*
- * Move data from 4 bytes to 7 bytes.
- */
- "movl (%1), %4d\n\t"
- "movl -4(%1, %0), %5d\n\t"
- "movl %4d, (%2)\n\t"
- "movl %5d, -4(%2, %0)\n\t"
- "jmp 13f\n\t"
- "11:\n\t"
- "cmp $2, %0\n\t"
- "jb 12f\n\t"
- /*
- * Move data from 2 bytes to 3 bytes.
- */
- "movw (%1), %4w\n\t"
- "movw -2(%1, %0), %5w\n\t"
- "movw %4w, (%2)\n\t"
- "movw %5w, -2(%2, %0)\n\t"
- "jmp 13f\n\t"
- "12:\n\t"
- "cmp $1, %0\n\t"
- "jb 13f\n\t"
- /*
- * Move data for 1 byte.
- */
- "movb (%1), %4b\n\t"
- "movb %4b, (%2)\n\t"
- "13:\n\t"
- : "=&d" (d0), "=&S" (d1), "=&D" (d2), "=&a" (ret) ,
- "=r"(d3), "=r"(d4), "=r"(d5), "=r"(d6), "=&c" (d7)
- :"0" (count),
- "1" (src),
- "2" (dest)
- :"memory");
-
- return ret;
-
-}
-EXPORT_SYMBOL(memmove);
diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology_64.c
index f21962c435ed..2523c3554de5 100644
--- a/arch/x86/mm/amdtopology_64.c
+++ b/arch/x86/mm/amdtopology_64.c
@@ -247,7 +247,7 @@ void __init amd_fake_nodes(const struct bootnode *nodes, int nr_nodes)
__acpi_map_pxm_to_node(nid, i);
#endif
}
- memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node));
+ memcpy(__apicid_to_node, fake_apicid_to_node, sizeof(__apicid_to_node));
}
#endif /* CONFIG_NUMA_EMU */
@@ -278,14 +278,16 @@ int __init amd_scan_nodes(void)
apicid_base = boot_cpu_physical_apicid;
}
- for_each_node_mask(i, node_possible_map) {
- int j;
-
+ for_each_node_mask(i, node_possible_map)
memblock_x86_register_active_regions(i,
nodes[i].start >> PAGE_SHIFT,
nodes[i].end >> PAGE_SHIFT);
+ init_memory_mapping_high();
+ for_each_node_mask(i, node_possible_map) {
+ int j;
+
for (j = apicid_base; j < cores + apicid_base; j++)
- apicid_to_node[(i << bits) + j] = i;
+ set_apicid_to_node((i << bits) + j, i);
setup_node_bootmem(i, nodes[i].start, nodes[i].end);
}
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 947f42abe820..b8054e087ead 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -33,7 +33,7 @@ int direct_gbpages
static void __init find_early_table_space(unsigned long end, int use_pse,
int use_gbpages)
{
- unsigned long puds, pmds, ptes, tables, start;
+ unsigned long puds, pmds, ptes, tables, start = 0, good_end = end;
phys_addr_t base;
puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
@@ -65,20 +65,11 @@ static void __init find_early_table_space(unsigned long end, int use_pse,
#ifdef CONFIG_X86_32
/* for fixmap */
tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE);
-#endif
- /*
- * RED-PEN putting page tables only on node 0 could
- * cause a hotspot and fill up ZONE_DMA. The page tables
- * need roughly 0.5KB per GB.
- */
-#ifdef CONFIG_X86_32
- start = 0x7000;
-#else
- start = 0x8000;
+ good_end = max_pfn_mapped << PAGE_SHIFT;
#endif
- base = memblock_find_in_range(start, max_pfn_mapped<<PAGE_SHIFT,
- tables, PAGE_SIZE);
+
+ base = memblock_find_in_range(start, good_end, tables, PAGE_SIZE);
if (base == MEMBLOCK_ERROR)
panic("Cannot find space for the kernel page tables");
@@ -279,25 +270,6 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
load_cr3(swapper_pg_dir);
#endif
-#ifdef CONFIG_X86_64
- if (!after_bootmem && !start) {
- pud_t *pud;
- pmd_t *pmd;
-
- mmu_cr4_features = read_cr4();
-
- /*
- * _brk_end cannot change anymore, but it and _end may be
- * located on different 2M pages. cleanup_highmap(), however,
- * can only consider _end when it runs, so destroy any
- * mappings beyond _brk_end here.
- */
- pud = pud_offset(pgd_offset_k(_brk_end), _brk_end);
- pmd = pmd_offset(pud, _brk_end - 1);
- while (++pmd <= pmd_offset(pud, (unsigned long)_end - 1))
- pmd_clear(pmd);
- }
-#endif
__flush_tlb_all();
if (!after_bootmem && e820_table_end > e820_table_start)
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 71a59296af80..194f2732ab77 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -333,12 +333,28 @@ static __ref void *alloc_low_page(unsigned long *phys)
return adr;
}
+static __ref void *map_low_page(void *virt)
+{
+ void *adr;
+ unsigned long phys, left;
+
+ if (after_bootmem)
+ return virt;
+
+ phys = __pa(virt);
+ left = phys & (PAGE_SIZE - 1);
+ adr = early_memremap(phys & PAGE_MASK, PAGE_SIZE);
+ adr = (void *)(((unsigned long)adr) | left);
+
+ return adr;
+}
+
static __ref void unmap_low_page(void *adr)
{
if (after_bootmem)
return;
- early_iounmap(adr, PAGE_SIZE);
+ early_iounmap((void *)((unsigned long)adr & PAGE_MASK), PAGE_SIZE);
}
static unsigned long __meminit
@@ -386,15 +402,6 @@ phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end,
}
static unsigned long __meminit
-phys_pte_update(pmd_t *pmd, unsigned long address, unsigned long end,
- pgprot_t prot)
-{
- pte_t *pte = (pte_t *)pmd_page_vaddr(*pmd);
-
- return phys_pte_init(pte, address, end, prot);
-}
-
-static unsigned long __meminit
phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
unsigned long page_size_mask, pgprot_t prot)
{
@@ -420,8 +427,10 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
if (pmd_val(*pmd)) {
if (!pmd_large(*pmd)) {
spin_lock(&init_mm.page_table_lock);
- last_map_addr = phys_pte_update(pmd, address,
+ pte = map_low_page((pte_t *)pmd_page_vaddr(*pmd));
+ last_map_addr = phys_pte_init(pte, address,
end, prot);
+ unmap_low_page(pte);
spin_unlock(&init_mm.page_table_lock);
continue;
}
@@ -468,18 +477,6 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
}
static unsigned long __meminit
-phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end,
- unsigned long page_size_mask, pgprot_t prot)
-{
- pmd_t *pmd = pmd_offset(pud, 0);
- unsigned long last_map_addr;
-
- last_map_addr = phys_pmd_init(pmd, address, end, page_size_mask, prot);
- __flush_tlb_all();
- return last_map_addr;
-}
-
-static unsigned long __meminit
phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
unsigned long page_size_mask)
{
@@ -504,8 +501,11 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
if (pud_val(*pud)) {
if (!pud_large(*pud)) {
- last_map_addr = phys_pmd_update(pud, addr, end,
+ pmd = map_low_page(pmd_offset(pud, 0));
+ last_map_addr = phys_pmd_init(pmd, addr, end,
page_size_mask, prot);
+ unmap_low_page(pmd);
+ __flush_tlb_all();
continue;
}
/*
@@ -553,17 +553,6 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
return last_map_addr;
}
-static unsigned long __meminit
-phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end,
- unsigned long page_size_mask)
-{
- pud_t *pud;
-
- pud = (pud_t *)pgd_page_vaddr(*pgd);
-
- return phys_pud_init(pud, addr, end, page_size_mask);
-}
-
unsigned long __meminit
kernel_physical_mapping_init(unsigned long start,
unsigned long end,
@@ -587,8 +576,10 @@ kernel_physical_mapping_init(unsigned long start,
next = end;
if (pgd_val(*pgd)) {
- last_map_addr = phys_pud_update(pgd, __pa(start),
+ pud = map_low_page((pud_t *)pgd_page_vaddr(*pgd));
+ last_map_addr = phys_pud_init(pud, __pa(start),
__pa(end), page_size_mask);
+ unmap_low_page(pud);
continue;
}
@@ -616,9 +607,63 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
int acpi, int k8)
{
memblock_x86_register_active_regions(0, start_pfn, end_pfn);
+ init_memory_mapping_high();
}
#endif
+struct mapping_work_data {
+ unsigned long start;
+ unsigned long end;
+ unsigned long pfn_mapped;
+};
+
+static int __init_refok
+mapping_work_fn(unsigned long start_pfn, unsigned long end_pfn, void *datax)
+{
+ struct mapping_work_data *data = datax;
+ unsigned long pfn_mapped;
+ unsigned long final_start, final_end;
+
+ final_start = max_t(unsigned long, start_pfn<<PAGE_SHIFT, data->start);
+ final_end = min_t(unsigned long, end_pfn<<PAGE_SHIFT, data->end);
+
+ if (final_end <= final_start)
+ return 0;
+
+ pfn_mapped = init_memory_mapping(final_start, final_end);
+
+ if (pfn_mapped > data->pfn_mapped)
+ data->pfn_mapped = pfn_mapped;
+
+ return 0;
+}
+
+static unsigned long __init_refok
+init_memory_mapping_active_regions(unsigned long start, unsigned long end)
+{
+ struct mapping_work_data data;
+
+ data.start = start;
+ data.end = end;
+ data.pfn_mapped = 0;
+
+ work_with_active_regions(MAX_NUMNODES, mapping_work_fn, &data);
+
+ return data.pfn_mapped;
+}
+
+void __init_refok init_memory_mapping_high(void)
+{
+ if (max_pfn > max_low_pfn) {
+ max_pfn_mapped = init_memory_mapping_active_regions(1UL<<32,
+ max_pfn<<PAGE_SHIFT);
+ /* can we preserve max_low_pfn ? */
+ max_low_pfn = max_pfn;
+
+ memblock.current_limit = get_max_mapped();
+ }
+}
+
void __init paging_init(void)
{
unsigned long max_zone_pfns[MAX_NR_ZONES];
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index ebf6d7887a38..9559d360fde7 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -26,12 +26,50 @@ static __init int numa_setup(char *opt)
early_param("numa", numa_setup);
/*
- * Which logical CPUs are on which nodes
+ * apicid, cpu, node mappings
*/
+s16 __apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
+ [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
+};
+
cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
EXPORT_SYMBOL(node_to_cpumask_map);
/*
+ * Map cpu index to node index
+ */
+DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
+EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
+
+void __cpuinit numa_set_node(int cpu, int node)
+{
+ int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
+
+ /* early setting, no percpu area yet */
+ if (cpu_to_node_map) {
+ cpu_to_node_map[cpu] = node;
+ return;
+ }
+
+#ifdef CONFIG_DEBUG_PER_CPU_MAPS
+ if (cpu >= nr_cpu_ids || !cpu_possible(cpu)) {
+ printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu);
+ dump_stack();
+ return;
+ }
+#endif
+ per_cpu(x86_cpu_to_node_map, cpu) = node;
+
+ if (node != NUMA_NO_NODE)
+ set_cpu_numa_node(cpu, node);
+}
+
+void __cpuinit numa_clear_node(int cpu)
+{
+ numa_set_node(cpu, NUMA_NO_NODE);
+}
+
+/*
* Allocate node_to_cpumask_map based on number of available nodes
* Requires node_possible_map to be valid.
*
@@ -57,7 +95,174 @@ void __init setup_node_to_cpumask_map(void)
pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids);
}
-#ifdef CONFIG_DEBUG_PER_CPU_MAPS
+/*
+ * There are unfortunately some poorly designed mainboards around that
+ * only connect memory to a single CPU. This breaks the 1:1 cpu->node
+ * mapping. To avoid this fill in the mapping for all possible CPUs,
+ * as the number of CPUs is not known yet. We round robin the existing
+ * nodes.
+ */
+void __init numa_init_array(void)
+{
+ int rr, i;
+
+ rr = first_node(node_online_map);
+ for (i = 0; i < nr_cpu_ids; i++) {
+ if (early_cpu_to_node(i) != NUMA_NO_NODE)
+ continue;
+ numa_set_node(i, rr);
+ rr = next_node(rr, node_online_map);
+ if (rr == MAX_NUMNODES)
+ rr = first_node(node_online_map);
+ }
+}
+
+static __init int find_near_online_node(int node)
+{
+ int n, val;
+ int min_val = INT_MAX;
+ int best_node = -1;
+
+ for_each_online_node(n) {
+ val = node_distance(node, n);
+
+ if (val < min_val) {
+ min_val = val;
+ best_node = n;
+ }
+ }
+
+ return best_node;
+}
+
+/*
+ * Setup early cpu_to_node.
+ *
+ * Populate cpu_to_node[] only if x86_cpu_to_apicid[],
+ * and apicid_to_node[] tables have valid entries for a CPU.
+ * This means we skip cpu_to_node[] initialisation for NUMA
+ * emulation and faking node case (when running a kernel compiled
+ * for NUMA on a non NUMA box), which is OK as cpu_to_node[]
+ * is already initialized in a round robin manner at numa_init_array,
+ * prior to this call, and this initialization is good enough
+ * for the fake NUMA cases.
+ *
+ * Called before the per_cpu areas are setup.
+ */
+void __init init_cpu_to_node(void)
+{
+ int cpu;
+ u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
+
+ BUG_ON(cpu_to_apicid == NULL);
+
+ for_each_possible_cpu(cpu) {
+ int node = numa_cpu_node(cpu);
+
+ if (node == NUMA_NO_NODE)
+ continue;
+ if (!node_online(node))
+ node = find_near_online_node(node);
+ numa_set_node(cpu, node);
+ }
+}
+
+#ifndef CONFIG_DEBUG_PER_CPU_MAPS
+
+# ifndef CONFIG_NUMA_EMU
+void __cpuinit numa_add_cpu(int cpu)
+{
+ cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
+}
+
+void __cpuinit numa_remove_cpu(int cpu)
+{
+ cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
+}
+# endif /* !CONFIG_NUMA_EMU */
+
+#else /* !CONFIG_DEBUG_PER_CPU_MAPS */
+
+int __cpu_to_node(int cpu)
+{
+ if (early_per_cpu_ptr(x86_cpu_to_node_map)) {
+ printk(KERN_WARNING
+ "cpu_to_node(%d): usage too early!\n", cpu);
+ dump_stack();
+ return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
+ }
+ return per_cpu(x86_cpu_to_node_map, cpu);
+}
+EXPORT_SYMBOL(__cpu_to_node);
+
+/*
+ * Same function as cpu_to_node() but used if called before the
+ * per_cpu areas are setup.
+ */
+int early_cpu_to_node(int cpu)
+{
+ if (early_per_cpu_ptr(x86_cpu_to_node_map))
+ return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
+
+ if (!cpu_possible(cpu)) {
+ printk(KERN_WARNING
+ "early_cpu_to_node(%d): no per_cpu area!\n", cpu);
+ dump_stack();
+ return NUMA_NO_NODE;
+ }
+ return per_cpu(x86_cpu_to_node_map, cpu);
+}
+
+struct cpumask __cpuinit *debug_cpumask_set_cpu(int cpu, int enable)
+{
+ int node = early_cpu_to_node(cpu);
+ struct cpumask *mask;
+ char buf[64];
+
+ if (node == NUMA_NO_NODE) {
+ /* early_cpu_to_node() already emits a warning and trace */
+ return NULL;
+ }
+ mask = node_to_cpumask_map[node];
+ if (!mask) {
+ pr_err("node_to_cpumask_map[%i] NULL\n", node);
+ dump_stack();
+ return NULL;
+ }
+
+ cpulist_scnprintf(buf, sizeof(buf), mask);
+ printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
+ enable ? "numa_add_cpu" : "numa_remove_cpu",
+ cpu, node, buf);
+ return mask;
+}
+
+# ifndef CONFIG_NUMA_EMU
+static void __cpuinit numa_set_cpumask(int cpu, int enable)
+{
+ struct cpumask *mask;
+
+ mask = debug_cpumask_set_cpu(cpu, enable);
+ if (!mask)
+ return;
+
+ if (enable)
+ cpumask_set_cpu(cpu, mask);
+ else
+ cpumask_clear_cpu(cpu, mask);
+}
+
+void __cpuinit numa_add_cpu(int cpu)
+{
+ numa_set_cpumask(cpu, 1);
+}
+
+void __cpuinit numa_remove_cpu(int cpu)
+{
+ numa_set_cpumask(cpu, 0);
+}
+# endif /* !CONFIG_NUMA_EMU */
+
/*
* Returns a pointer to the bitmask of CPUs on Node 'node'.
*/
@@ -80,4 +285,5 @@ const struct cpumask *cpumask_of_node(int node)
return node_to_cpumask_map[node];
}
EXPORT_SYMBOL(cpumask_of_node);
-#endif
+
+#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 84a3e4c9f277..505bb04654b5 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -110,6 +110,12 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
static unsigned long kva_start_pfn;
static unsigned long kva_pages;
+
+int __cpuinit numa_cpu_node(int cpu)
+{
+ return apic->x86_32_numa_cpu_node(cpu);
+}
+
/*
* FLAT - support for basic PC memory model with discontig enabled, essentially
* a single node with all available processors in it with a flat
@@ -361,6 +367,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
*/
get_memcfg_numa();
+ numa_init_array();
kva_pages = roundup(calculate_numa_remap_pages(), PTRS_PER_PTE);
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 95ea1551eebc..43ad3273561a 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -26,20 +26,10 @@ EXPORT_SYMBOL(node_data);
struct memnode memnode;
-s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
- [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
-};
-
static unsigned long __initdata nodemap_addr;
static unsigned long __initdata nodemap_size;
/*
- * Map cpu index to node index
- */
-DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
-EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
-
-/*
* Given a shift value, try to populate memnodemap[]
* Returns :
* 1 if OK
@@ -86,7 +76,7 @@ static int __init allocate_cachealigned_memnodemap(void)
addr = 0x8000;
nodemap_size = roundup(sizeof(s16) * memnodemapsize, L1_CACHE_BYTES);
- nodemap_addr = memblock_find_in_range(addr, max_pfn<<PAGE_SHIFT,
+ nodemap_addr = memblock_find_in_range(addr, get_max_mapped(),
nodemap_size, L1_CACHE_BYTES);
if (nodemap_addr == MEMBLOCK_ERROR) {
printk(KERN_ERR
@@ -234,28 +224,6 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
node_set_online(nodeid);
}
-/*
- * There are unfortunately some poorly designed mainboards around that
- * only connect memory to a single CPU. This breaks the 1:1 cpu->node
- * mapping. To avoid this fill in the mapping for all possible CPUs,
- * as the number of CPUs is not known yet. We round robin the existing
- * nodes.
- */
-void __init numa_init_array(void)
-{
- int rr, i;
-
- rr = first_node(node_online_map);
- for (i = 0; i < nr_cpu_ids; i++) {
- if (early_cpu_to_node(i) != NUMA_NO_NODE)
- continue;
- numa_set_node(i, rr);
- rr = next_node(rr, node_online_map);
- if (rr == MAX_NUMNODES)
- rr = first_node(node_online_map);
- }
-}
-
#ifdef CONFIG_NUMA_EMU
/* Numa emulation */
static struct bootnode nodes[MAX_NUMNODES] __initdata;
@@ -598,11 +566,12 @@ static int __init numa_emulation(unsigned long start_pfn,
* the e820 memory map.
*/
remove_all_active_ranges();
- for_each_node_mask(i, node_possible_map) {
+ for_each_node_mask(i, node_possible_map)
memblock_x86_register_active_regions(i, nodes[i].start >> PAGE_SHIFT,
nodes[i].end >> PAGE_SHIFT);
+ init_memory_mapping_high();
+ for_each_node_mask(i, node_possible_map)
setup_node_bootmem(i, nodes[i].start, nodes[i].end);
- }
setup_physnodes(addr, max_addr, acpi, amd);
fake_physnodes(acpi, amd, num_nodes);
numa_init_array();
@@ -658,6 +627,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn,
for (i = 0; i < nr_cpu_ids; i++)
numa_set_node(i, 0);
memblock_x86_register_active_regions(0, start_pfn, last_pfn);
+ init_memory_mapping_high();
setup_node_bootmem(0, start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT);
}
@@ -674,115 +644,33 @@ unsigned long __init numa_free_all_bootmem(void)
return pages;
}
-#ifdef CONFIG_NUMA
-
-static __init int find_near_online_node(int node)
+int __cpuinit numa_cpu_node(int cpu)
{
- int n, val;
- int min_val = INT_MAX;
- int best_node = -1;
-
- for_each_online_node(n) {
- val = node_distance(node, n);
-
- if (val < min_val) {
- min_val = val;
- best_node = n;
- }
- }
+ int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
- return best_node;
+ if (apicid != BAD_APICID)
+ return __apicid_to_node[apicid];
+ return NUMA_NO_NODE;
}
/*
- * Setup early cpu_to_node.
- *
- * Populate cpu_to_node[] only if x86_cpu_to_apicid[],
- * and apicid_to_node[] tables have valid entries for a CPU.
- * This means we skip cpu_to_node[] initialisation for NUMA
- * emulation and faking node case (when running a kernel compiled
- * for NUMA on a non NUMA box), which is OK as cpu_to_node[]
- * is already initialized in a round robin manner at numa_init_array,
- * prior to this call, and this initialization is good enough
- * for the fake NUMA cases.
+ * UGLINESS AHEAD: Currently, CONFIG_NUMA_EMU is 64bit only and makes use
+ * of 64bit specific data structures. The distinction is artificial and
+ * should be removed. numa_{add|remove}_cpu() are implemented in numa.c
+ * for both 32 and 64bit when CONFIG_NUMA_EMU is disabled but here when
+ * enabled.
*
- * Called before the per_cpu areas are setup.
+ * NUMA emulation is planned to be made generic and the following and other
+ * related code should be moved to numa.c.
*/
-void __init init_cpu_to_node(void)
-{
- int cpu;
- u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
-
- BUG_ON(cpu_to_apicid == NULL);
-
- for_each_possible_cpu(cpu) {
- int node;
- u16 apicid = cpu_to_apicid[cpu];
-
- if (apicid == BAD_APICID)
- continue;
- node = apicid_to_node[apicid];
- if (node == NUMA_NO_NODE)
- continue;
- if (!node_online(node))
- node = find_near_online_node(node);
- numa_set_node(cpu, node);
- }
-}
-#endif
-
-
-void __cpuinit numa_set_node(int cpu, int node)
-{
- int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
-
- /* early setting, no percpu area yet */
- if (cpu_to_node_map) {
- cpu_to_node_map[cpu] = node;
- return;
- }
-
-#ifdef CONFIG_DEBUG_PER_CPU_MAPS
- if (cpu >= nr_cpu_ids || !cpu_possible(cpu)) {
- printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu);
- dump_stack();
- return;
- }
-#endif
- per_cpu(x86_cpu_to_node_map, cpu) = node;
-
- if (node != NUMA_NO_NODE)
- set_cpu_numa_node(cpu, node);
-}
-
-void __cpuinit numa_clear_node(int cpu)
-{
- numa_set_node(cpu, NUMA_NO_NODE);
-}
-
-#ifndef CONFIG_DEBUG_PER_CPU_MAPS
-
-#ifndef CONFIG_NUMA_EMU
-void __cpuinit numa_add_cpu(int cpu)
-{
- cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
-}
-
-void __cpuinit numa_remove_cpu(int cpu)
-{
- cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
-}
-#else
+#ifdef CONFIG_NUMA_EMU
+# ifndef CONFIG_DEBUG_PER_CPU_MAPS
void __cpuinit numa_add_cpu(int cpu)
{
unsigned long addr;
- u16 apicid;
- int physnid;
- int nid = NUMA_NO_NODE;
+ int physnid, nid;
- apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
- if (apicid != BAD_APICID)
- nid = apicid_to_node[apicid];
+ nid = numa_cpu_node(cpu);
if (nid == NUMA_NO_NODE)
nid = early_cpu_to_node(cpu);
BUG_ON(nid == NUMA_NO_NODE || !node_online(nid));
@@ -816,53 +704,17 @@ void __cpuinit numa_remove_cpu(int cpu)
for_each_online_node(i)
cpumask_clear_cpu(cpu, node_to_cpumask_map[i]);
}
-#endif /* !CONFIG_NUMA_EMU */
-
-#else /* CONFIG_DEBUG_PER_CPU_MAPS */
-static struct cpumask __cpuinit *debug_cpumask_set_cpu(int cpu, int enable)
-{
- int node = early_cpu_to_node(cpu);
- struct cpumask *mask;
- char buf[64];
-
- mask = node_to_cpumask_map[node];
- if (!mask) {
- pr_err("node_to_cpumask_map[%i] NULL\n", node);
- dump_stack();
- return NULL;
- }
-
- cpulist_scnprintf(buf, sizeof(buf), mask);
- printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
- enable ? "numa_add_cpu" : "numa_remove_cpu",
- cpu, node, buf);
- return mask;
-}
-
-/*
- * --------- debug versions of the numa functions ---------
- */
-#ifndef CONFIG_NUMA_EMU
-static void __cpuinit numa_set_cpumask(int cpu, int enable)
-{
- struct cpumask *mask;
-
- mask = debug_cpumask_set_cpu(cpu, enable);
- if (!mask)
- return;
-
- if (enable)
- cpumask_set_cpu(cpu, mask);
- else
- cpumask_clear_cpu(cpu, mask);
-}
-#else
+# else /* !CONFIG_DEBUG_PER_CPU_MAPS */
static void __cpuinit numa_set_cpumask(int cpu, int enable)
{
int node = early_cpu_to_node(cpu);
struct cpumask *mask;
int i;
+ if (node == NUMA_NO_NODE) {
+ /* early_cpu_to_node() already emits a warning and trace */
+ return;
+ }
for_each_online_node(i) {
unsigned long addr;
@@ -880,7 +732,6 @@ static void __cpuinit numa_set_cpumask(int cpu, int enable)
cpumask_clear_cpu(cpu, mask);
}
}
-#endif /* CONFIG_NUMA_EMU */
void __cpuinit numa_add_cpu(int cpu)
{
@@ -891,39 +742,5 @@ void __cpuinit numa_remove_cpu(int cpu)
{
numa_set_cpumask(cpu, 0);
}
-
-int __cpu_to_node(int cpu)
-{
- if (early_per_cpu_ptr(x86_cpu_to_node_map)) {
- printk(KERN_WARNING
- "cpu_to_node(%d): usage too early!\n", cpu);
- dump_stack();
- return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
- }
- return per_cpu(x86_cpu_to_node_map, cpu);
-}
-EXPORT_SYMBOL(__cpu_to_node);
-
-/*
- * Same function as cpu_to_node() but used if called before the
- * per_cpu areas are setup.
- */
-int early_cpu_to_node(int cpu)
-{
- if (early_per_cpu_ptr(x86_cpu_to_node_map))
- return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
-
- if (!cpu_possible(cpu)) {
- printk(KERN_WARNING
- "early_cpu_to_node(%d): no per_cpu area!\n", cpu);
- dump_stack();
- return NUMA_NO_NODE;
- }
- return per_cpu(x86_cpu_to_node_map, cpu);
-}
-
-/*
- * --------- end of debug versions of the numa functions ---------
- */
-
-#endif /* CONFIG_DEBUG_PER_CPU_MAPS */
+# endif /* !CONFIG_DEBUG_PER_CPU_MAPS */
+#endif /* CONFIG_NUMA_EMU */
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c
index ae96e7b8051d..48651c6f657d 100644
--- a/arch/x86/mm/srat_32.c
+++ b/arch/x86/mm/srat_32.c
@@ -57,7 +57,7 @@ struct node_memory_chunk_s {
static struct node_memory_chunk_s __initdata node_memory_chunk[MAXCHUNKS];
static int __initdata num_memory_chunks; /* total number of memory chunks */
-static u8 __initdata apicid_to_pxm[MAX_APICID];
+static u8 __initdata apicid_to_pxm[MAX_LOCAL_APIC];
int acpi_numa __initdata;
@@ -254,8 +254,8 @@ int __init get_memcfg_from_srat(void)
printk(KERN_DEBUG "Number of memory chunks in system = %d\n",
num_memory_chunks);
- for (i = 0; i < MAX_APICID; i++)
- apicid_2_node[i] = pxm_to_node(apicid_to_pxm[i]);
+ for (i = 0; i < MAX_LOCAL_APIC; i++)
+ set_apicid_to_node(i, pxm_to_node(apicid_to_pxm[i]));
for (j = 0; j < num_memory_chunks; j++){
struct node_memory_chunk_s * chunk = &node_memory_chunk[j];
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 603d285d1daa..23498f8b09a2 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -79,7 +79,7 @@ static __init void bad_srat(void)
printk(KERN_ERR "SRAT: SRAT not used.\n");
acpi_numa = -1;
for (i = 0; i < MAX_LOCAL_APIC; i++)
- apicid_to_node[i] = NUMA_NO_NODE;
+ set_apicid_to_node(i, NUMA_NO_NODE);
for (i = 0; i < MAX_NUMNODES; i++) {
nodes[i].start = nodes[i].end = 0;
nodes_add[i].start = nodes_add[i].end = 0;
@@ -138,7 +138,7 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node);
return;
}
- apicid_to_node[apic_id] = node;
+ set_apicid_to_node(apic_id, node);
node_set(node, cpu_nodes_parsed);
acpi_numa = 1;
printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n",
@@ -178,7 +178,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
return;
}
- apicid_to_node[apic_id] = node;
+ set_apicid_to_node(apic_id, node);
node_set(node, cpu_nodes_parsed);
acpi_numa = 1;
printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n",
@@ -444,6 +444,8 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
return -1;
}
+ init_memory_mapping_high();
+
/* Account for nodes with cpus and no memory */
nodes_or(node_possible_map, nodes_parsed, cpu_nodes_parsed);
@@ -521,7 +523,7 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
* node, it must now point to the fake node ID.
*/
for (j = 0; j < MAX_LOCAL_APIC; j++)
- if (apicid_to_node[j] == nid &&
+ if (__apicid_to_node[j] == nid &&
fake_apicid_to_node[j] == NUMA_NO_NODE)
fake_apicid_to_node[j] = i;
}
@@ -532,13 +534,13 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
* value.
*/
for (i = 0; i < MAX_LOCAL_APIC; i++)
- if (apicid_to_node[i] != NUMA_NO_NODE &&
+ if (__apicid_to_node[i] != NUMA_NO_NODE &&
fake_apicid_to_node[i] == NUMA_NO_NODE)
fake_apicid_to_node[i] = 0;
for (i = 0; i < num_nodes; i++)
__acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i);
- memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node));
+ memcpy(__apicid_to_node, fake_apicid_to_node, sizeof(__apicid_to_node));
nodes_clear(nodes_parsed);
for (i = 0; i < num_nodes; i++)
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 6acc724d5d8f..55272d7c3b0b 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -179,12 +179,8 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask,
sender = this_cpu_read(tlb_vector_offset);
f = &flush_state[sender];
- /*
- * Could avoid this lock when
- * num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is
- * probably not worth checking this for a cache-hot lock.
- */
- raw_spin_lock(&f->tlbstate_lock);
+ if (nr_cpu_ids > NUM_INVALIDATE_TLB_VECTORS)
+ raw_spin_lock(&f->tlbstate_lock);
f->flush_mm = mm;
f->flush_va = va;
@@ -202,7 +198,8 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask,
f->flush_mm = NULL;
f->flush_va = 0;
- raw_spin_unlock(&f->tlbstate_lock);
+ if (nr_cpu_ids > NUM_INVALIDATE_TLB_VECTORS)
+ raw_spin_unlock(&f->tlbstate_lock);
}
void native_flush_tlb_others(const struct cpumask *cpumask,
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c
index e27dffbbb1a7..026e4931d162 100644
--- a/arch/x86/pci/amd_bus.c
+++ b/arch/x86/pci/amd_bus.c
@@ -350,7 +350,7 @@ static int __init early_fill_mp_bus_info(void)
#define ENABLE_CF8_EXT_CFG (1ULL << 46)
-static void enable_pci_io_ecs(void *unused)
+static void __cpuinit enable_pci_io_ecs(void *unused)
{
u64 reg;
rdmsrl(MSR_AMD64_NB_CFG, reg);
diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c
index ea6529e93c6f..5c0207bf959b 100644
--- a/arch/x86/platform/mrst/mrst.c
+++ b/arch/x86/platform/mrst/mrst.c
@@ -31,6 +31,7 @@
#include <asm/apic.h>
#include <asm/io_apic.h>
#include <asm/mrst.h>
+#include <asm/mrst-vrtc.h>
#include <asm/io.h>
#include <asm/i8259.h>
#include <asm/intel_scu_ipc.h>
@@ -268,6 +269,7 @@ void __init x86_mrst_early_setup(void)
x86_platform.calibrate_tsc = mrst_calibrate_tsc;
x86_platform.i8042_detect = mrst_i8042_detect;
+ x86_init.timers.wallclock_init = mrst_rtc_init;
x86_init.pci.init = pci_mrst_init;
x86_init.pci.fixup_irqs = x86_init_noop;
diff --git a/arch/x86/platform/mrst/vrtc.c b/arch/x86/platform/mrst/vrtc.c
index 32cd7edd71a0..04cf645feb92 100644
--- a/arch/x86/platform/mrst/vrtc.c
+++ b/arch/x86/platform/mrst/vrtc.c
@@ -100,22 +100,14 @@ int vrtc_set_mmss(unsigned long nowtime)
void __init mrst_rtc_init(void)
{
- unsigned long rtc_paddr;
- void __iomem *virt_base;
+ unsigned long vrtc_paddr = sfi_mrtc_array[0].phys_addr;
sfi_table_parse(SFI_SIG_MRTC, NULL, NULL, sfi_parse_mrtc);
- if (!sfi_mrtc_num)
+ if (!sfi_mrtc_num || !vrtc_paddr)
return;
- rtc_paddr = sfi_mrtc_array[0].phys_addr;
-
- /* vRTC's register address may not be page aligned */
- set_fixmap_nocache(FIX_LNW_VRTC, rtc_paddr);
-
- virt_base = (void __iomem *)__fix_to_virt(FIX_LNW_VRTC);
- virt_base += rtc_paddr & ~PAGE_MASK;
- vrtc_virt_base = virt_base;
-
+ vrtc_virt_base = (void __iomem *)set_fixmap_offset_nocache(FIX_LNW_VRTC,
+ vrtc_paddr);
x86_platform.get_wallclock = vrtc_get_time;
x86_platform.set_wallclock = vrtc_set_mmss;
}
diff --git a/arch/xtensa/include/asm/rwsem.h b/arch/xtensa/include/asm/rwsem.h
index e39edf5c86f2..249619e7e7f2 100644
--- a/arch/xtensa/include/asm/rwsem.h
+++ b/arch/xtensa/include/asm/rwsem.h
@@ -17,44 +17,12 @@
#error "Please don't include <asm/rwsem.h> directly, use <linux/rwsem.h> instead."
#endif
-#include <linux/list.h>
-#include <linux/spinlock.h>
-#include <asm/atomic.h>
-#include <asm/system.h>
-
-/*
- * the semaphore definition
- */
-struct rw_semaphore {
- signed long count;
#define RWSEM_UNLOCKED_VALUE 0x00000000
#define RWSEM_ACTIVE_BIAS 0x00000001
#define RWSEM_ACTIVE_MASK 0x0000ffff
#define RWSEM_WAITING_BIAS (-0x00010000)
#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
- spinlock_t wait_lock;
- struct list_head wait_list;
-};
-
-#define __RWSEM_INITIALIZER(name) \
- { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \
- LIST_HEAD_INIT((name).wait_list) }
-
-#define DECLARE_RWSEM(name) \
- struct rw_semaphore name = __RWSEM_INITIALIZER(name)
-
-extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
-
-static inline void init_rwsem(struct rw_semaphore *sem)
-{
- sem->count = RWSEM_UNLOCKED_VALUE;
- spin_lock_init(&sem->wait_lock);
- INIT_LIST_HEAD(&sem->wait_list);
-}
/*
* lock for reading
@@ -160,9 +128,4 @@ static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem)
return atomic_add_return(delta, (atomic_t *)(&sem->count));
}
-static inline int rwsem_is_locked(struct rw_semaphore *sem)
-{
- return (sem->count != 0);
-}
-
#endif /* _XTENSA_RWSEM_H */
diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c
index 88ba96e2df25..db18807bb9e7 100644
--- a/arch/xtensa/kernel/time.c
+++ b/arch/xtensa/kernel/time.c
@@ -96,16 +96,12 @@ again:
update_process_times(user_mode(get_irq_regs()));
#endif
- write_seqlock(&xtime_lock);
-
- do_timer(1); /* Linux handler in kernel/timer.c */
+ xtime_update(1); /* Linux handler in kernel/time/timekeeping */
/* Note that writing CCOMPARE clears the interrupt. */
next += CCOUNT_PER_JIFFY;
set_linux_timer(next);
-
- write_sequnlock(&xtime_lock);
}
/* Allow platform to do something useful (Wdog). */
diff --git a/drivers/char/mmtimer.c b/drivers/char/mmtimer.c
index e6d75627c6c8..33dc2298af73 100644
--- a/drivers/char/mmtimer.c
+++ b/drivers/char/mmtimer.c
@@ -53,6 +53,8 @@ MODULE_LICENSE("GPL");
#define RTC_BITS 55 /* 55 bits for this implementation */
+static struct k_clock sgi_clock;
+
extern unsigned long sn_rtc_cycles_per_second;
#define RTC_COUNTER_ADDR ((long *)LOCAL_MMR_ADDR(SH_RTC))
@@ -487,7 +489,7 @@ static int sgi_clock_get(clockid_t clockid, struct timespec *tp)
return 0;
};
-static int sgi_clock_set(clockid_t clockid, struct timespec *tp)
+static int sgi_clock_set(const clockid_t clockid, const struct timespec *tp)
{
u64 nsec;
@@ -763,15 +765,21 @@ static int sgi_timer_set(struct k_itimer *timr, int flags,
return err;
}
+static int sgi_clock_getres(const clockid_t which_clock, struct timespec *tp)
+{
+ tp->tv_sec = 0;
+ tp->tv_nsec = sgi_clock_period;
+ return 0;
+}
+
static struct k_clock sgi_clock = {
- .res = 0,
- .clock_set = sgi_clock_set,
- .clock_get = sgi_clock_get,
- .timer_create = sgi_timer_create,
- .nsleep = do_posix_clock_nonanosleep,
- .timer_set = sgi_timer_set,
- .timer_del = sgi_timer_del,
- .timer_get = sgi_timer_get
+ .clock_set = sgi_clock_set,
+ .clock_get = sgi_clock_get,
+ .clock_getres = sgi_clock_getres,
+ .timer_create = sgi_timer_create,
+ .timer_set = sgi_timer_set,
+ .timer_del = sgi_timer_del,
+ .timer_get = sgi_timer_get
};
/**
@@ -831,8 +839,8 @@ static int __init mmtimer_init(void)
(unsigned long) node);
}
- sgi_clock_period = sgi_clock.res = NSEC_PER_SEC / sn_rtc_cycles_per_second;
- register_posix_clock(CLOCK_SGI_CYCLE, &sgi_clock);
+ sgi_clock_period = NSEC_PER_SEC / sn_rtc_cycles_per_second;
+ posix_timers_register_clock(CLOCK_SGI_CYCLE, &sgi_clock);
printk(KERN_INFO "%s: v%s, %ld MHz\n", MMTIMER_DESC, MMTIMER_VERSION,
sn_rtc_cycles_per_second/(unsigned long)1E6);
diff --git a/drivers/rtc/rtc-mrst.c b/drivers/rtc/rtc-mrst.c
index 1db62db8469d..8d2cf6027120 100644
--- a/drivers/rtc/rtc-mrst.c
+++ b/drivers/rtc/rtc-mrst.c
@@ -62,6 +62,17 @@ static inline int is_intr(u8 rtc_intr)
return rtc_intr & RTC_IRQMASK;
}
+static inline unsigned char vrtc_is_updating(void)
+{
+ unsigned char uip;
+ unsigned long flags;
+
+ spin_lock_irqsave(&rtc_lock, flags);
+ uip = (vrtc_cmos_read(RTC_FREQ_SELECT) & RTC_UIP);
+ spin_unlock_irqrestore(&rtc_lock, flags);
+ return uip;
+}
+
/*
* rtc_time's year contains the increment over 1900, but vRTC's YEAR
* register can't be programmed to value larger than 0x64, so vRTC
@@ -76,7 +87,7 @@ static int mrst_read_time(struct device *dev, struct rtc_time *time)
{
unsigned long flags;
- if (rtc_is_updating())
+ if (vrtc_is_updating())
mdelay(20);
spin_lock_irqsave(&rtc_lock, flags);
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index dcd6a7c3a435..ca29e03c1fac 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -428,6 +428,7 @@ extern void unregister_ftrace_graph(void);
extern void ftrace_graph_init_task(struct task_struct *t);
extern void ftrace_graph_exit_task(struct task_struct *t);
+extern void ftrace_graph_init_idle_task(struct task_struct *t, int cpu);
static inline int task_curr_ret_stack(struct task_struct *t)
{
@@ -451,6 +452,7 @@ static inline void unpause_graph_tracing(void)
static inline void ftrace_graph_init_task(struct task_struct *t) { }
static inline void ftrace_graph_exit_task(struct task_struct *t) { }
+static inline void ftrace_graph_init_idle_task(struct task_struct *t, int cpu) { }
static inline int register_ftrace_graph(trace_func_graph_ret_t retfunc,
trace_func_graph_ent_t entryfunc)
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index e89c5c735d1e..5114eb304acf 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -57,7 +57,7 @@
* Used by threaded interrupts which need to keep the
* irq line disabled until the threaded handler has been run.
* IRQF_NO_SUSPEND - Do not disable this IRQ during suspend
- *
+ * IRQF_FORCE_RESUME - Force enable it on resume even if IRQF_NO_SUSPEND is set
*/
#define IRQF_DISABLED 0x00000020
#define IRQF_SAMPLE_RANDOM 0x00000040
@@ -69,6 +69,7 @@
#define IRQF_IRQPOLL 0x00001000
#define IRQF_ONESHOT 0x00002000
#define IRQF_NO_SUSPEND 0x00004000
+#define IRQF_FORCE_RESUME 0x00008000
#define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND)
diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index ce0775aa64c3..7ff16f7d3ed4 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -64,7 +64,7 @@ struct kthread_work {
};
#define KTHREAD_WORKER_INIT(worker) { \
- .lock = SPIN_LOCK_UNLOCKED, \
+ .lock = __SPIN_LOCK_UNLOCKED((worker).lock), \
.work_list = LIST_HEAD_INIT((worker).work_list), \
}
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 46f23999458d..5aaf45e38b56 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -518,6 +518,7 @@
#define PCI_DEVICE_ID_AMD_11H_NB_MISC 0x1303
#define PCI_DEVICE_ID_AMD_11H_NB_LINK 0x1304
#define PCI_DEVICE_ID_AMD_15H_NB_MISC 0x1603
+#define PCI_DEVICE_ID_AMD_15H_NB_LINK 0x1604
#define PCI_DEVICE_ID_AMD_CNB17H_F3 0x1703
#define PCI_DEVICE_ID_AMD_LANCE 0x2000
#define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001
diff --git a/include/linux/posix-clock.h b/include/linux/posix-clock.h
new file mode 100644
index 000000000000..369e19d3750b
--- /dev/null
+++ b/include/linux/posix-clock.h
@@ -0,0 +1,150 @@
+/*
+ * posix-clock.h - support for dynamic clock devices
+ *
+ * Copyright (C) 2010 OMICRON electronics GmbH
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#ifndef _LINUX_POSIX_CLOCK_H_
+#define _LINUX_POSIX_CLOCK_H_
+
+#include <linux/cdev.h>
+#include <linux/fs.h>
+#include <linux/poll.h>
+#include <linux/posix-timers.h>
+
+struct posix_clock;
+
+/**
+ * struct posix_clock_operations - functional interface to the clock
+ *
+ * Every posix clock is represented by a character device. Drivers may
+ * optionally offer extended capabilities by implementing the
+ * character device methods. The character device file operations are
+ * first handled by the clock device layer, then passed on to the
+ * driver by calling these functions.
+ *
+ * @owner: The clock driver should set to THIS_MODULE
+ * @clock_adjtime: Adjust the clock
+ * @clock_gettime: Read the current time
+ * @clock_getres: Get the clock resolution
+ * @clock_settime: Set the current time value
+ * @timer_create: Create a new timer
+ * @timer_delete: Remove a previously created timer
+ * @timer_gettime: Get remaining time and interval of a timer
+ * @timer_setttime: Set a timer's initial expiration and interval
+ * @fasync: Optional character device fasync method
+ * @mmap: Optional character device mmap method
+ * @open: Optional character device open method
+ * @release: Optional character device release method
+ * @ioctl: Optional character device ioctl method
+ * @read: Optional character device read method
+ * @poll: Optional character device poll method
+ */
+struct posix_clock_operations {
+ struct module *owner;
+
+ int (*clock_adjtime)(struct posix_clock *pc, struct timex *tx);
+
+ int (*clock_gettime)(struct posix_clock *pc, struct timespec *ts);
+
+ int (*clock_getres) (struct posix_clock *pc, struct timespec *ts);
+
+ int (*clock_settime)(struct posix_clock *pc,
+ const struct timespec *ts);
+
+ int (*timer_create) (struct posix_clock *pc, struct k_itimer *kit);
+
+ int (*timer_delete) (struct posix_clock *pc, struct k_itimer *kit);
+
+ void (*timer_gettime)(struct posix_clock *pc,
+ struct k_itimer *kit, struct itimerspec *tsp);
+
+ int (*timer_settime)(struct posix_clock *pc,
+ struct k_itimer *kit, int flags,
+ struct itimerspec *tsp, struct itimerspec *old);
+ /*
+ * Optional character device methods:
+ */
+ int (*fasync) (struct posix_clock *pc,
+ int fd, struct file *file, int on);
+
+ long (*ioctl) (struct posix_clock *pc,
+ unsigned int cmd, unsigned long arg);
+
+ int (*mmap) (struct posix_clock *pc,
+ struct vm_area_struct *vma);
+
+ int (*open) (struct posix_clock *pc, fmode_t f_mode);
+
+ uint (*poll) (struct posix_clock *pc,
+ struct file *file, poll_table *wait);
+
+ int (*release) (struct posix_clock *pc);
+
+ ssize_t (*read) (struct posix_clock *pc,
+ uint flags, char __user *buf, size_t cnt);
+};
+
+/**
+ * struct posix_clock - represents a dynamic posix clock
+ *
+ * @ops: Functional interface to the clock
+ * @cdev: Character device instance for this clock
+ * @kref: Reference count.
+ * @mutex: Protects the 'zombie' field from concurrent access.
+ * @zombie: If 'zombie' is true, then the hardware has disappeared.
+ * @release: A function to free the structure when the reference count reaches
+ * zero. May be NULL if structure is statically allocated.
+ *
+ * Drivers should embed their struct posix_clock within a private
+ * structure, obtaining a reference to it during callbacks using
+ * container_of().
+ */
+struct posix_clock {
+ struct posix_clock_operations ops;
+ struct cdev cdev;
+ struct kref kref;
+ struct mutex mutex;
+ bool zombie;
+ void (*release)(struct posix_clock *clk);
+};
+
+/**
+ * posix_clock_register() - register a new clock
+ * @clk: Pointer to the clock. Caller must provide 'ops' and 'release'
+ * @devid: Allocated device id
+ *
+ * A clock driver calls this function to register itself with the
+ * clock device subsystem. If 'clk' points to dynamically allocated
+ * memory, then the caller must provide a 'release' function to free
+ * that memory.
+ *
+ * Returns zero on success, non-zero otherwise.
+ */
+int posix_clock_register(struct posix_clock *clk, dev_t devid);
+
+/**
+ * posix_clock_unregister() - unregister a clock
+ * @clk: Clock instance previously registered via posix_clock_register()
+ *
+ * A clock driver calls this function to remove itself from the clock
+ * device subsystem. The posix_clock itself will remain (in an
+ * inactive state) until its reference count drops to zero, at which
+ * point it will be deallocated with its 'release' method.
+ */
+void posix_clock_unregister(struct posix_clock *clk);
+
+#endif
diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 3e23844a6990..d51243ae0726 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -4,6 +4,7 @@
#include <linux/spinlock.h>
#include <linux/list.h>
#include <linux/sched.h>
+#include <linux/timex.h>
union cpu_time_count {
cputime_t cpu;
@@ -17,10 +18,21 @@ struct cpu_timer_list {
int firing;
};
+/*
+ * Bit fields within a clockid:
+ *
+ * The most significant 29 bits hold either a pid or a file descriptor.
+ *
+ * Bit 2 indicates whether a cpu clock refers to a thread or a process.
+ *
+ * Bits 1 and 0 give the type: PROF=0, VIRT=1, SCHED=2, or FD=3.
+ *
+ * A clockid is invalid if bits 2, 1, and 0 are all set.
+ */
#define CPUCLOCK_PID(clock) ((pid_t) ~((clock) >> 3))
#define CPUCLOCK_PERTHREAD(clock) \
(((clock) & (clockid_t) CPUCLOCK_PERTHREAD_MASK) != 0)
-#define CPUCLOCK_PID_MASK 7
+
#define CPUCLOCK_PERTHREAD_MASK 4
#define CPUCLOCK_WHICH(clock) ((clock) & (clockid_t) CPUCLOCK_CLOCK_MASK)
#define CPUCLOCK_CLOCK_MASK 3
@@ -28,12 +40,17 @@ struct cpu_timer_list {
#define CPUCLOCK_VIRT 1
#define CPUCLOCK_SCHED 2
#define CPUCLOCK_MAX 3
+#define CLOCKFD CPUCLOCK_MAX
+#define CLOCKFD_MASK (CPUCLOCK_PERTHREAD_MASK|CPUCLOCK_CLOCK_MASK)
#define MAKE_PROCESS_CPUCLOCK(pid, clock) \
((~(clockid_t) (pid) << 3) | (clockid_t) (clock))
#define MAKE_THREAD_CPUCLOCK(tid, clock) \
MAKE_PROCESS_CPUCLOCK((tid), (clock) | CPUCLOCK_PERTHREAD_MASK)
+#define FD_TO_CLOCKID(fd) ((~(clockid_t) (fd) << 3) | CLOCKFD)
+#define CLOCKID_TO_FD(clk) ((unsigned int) ~((clk) >> 3))
+
/* POSIX.1b interval timer structure. */
struct k_itimer {
struct list_head list; /* free/ allocate list */
@@ -67,10 +84,11 @@ struct k_itimer {
};
struct k_clock {
- int res; /* in nanoseconds */
int (*clock_getres) (const clockid_t which_clock, struct timespec *tp);
- int (*clock_set) (const clockid_t which_clock, struct timespec * tp);
+ int (*clock_set) (const clockid_t which_clock,
+ const struct timespec *tp);
int (*clock_get) (const clockid_t which_clock, struct timespec * tp);
+ int (*clock_adj) (const clockid_t which_clock, struct timex *tx);
int (*timer_create) (struct k_itimer *timer);
int (*nsleep) (const clockid_t which_clock, int flags,
struct timespec *, struct timespec __user *);
@@ -84,28 +102,14 @@ struct k_clock {
struct itimerspec * cur_setting);
};
-void register_posix_clock(const clockid_t clock_id, struct k_clock *new_clock);
+extern struct k_clock clock_posix_cpu;
+extern struct k_clock clock_posix_dynamic;
-/* error handlers for timer_create, nanosleep and settime */
-int do_posix_clock_nonanosleep(const clockid_t, int flags, struct timespec *,
- struct timespec __user *);
-int do_posix_clock_nosettime(const clockid_t, struct timespec *tp);
+void posix_timers_register_clock(const clockid_t clock_id, struct k_clock *new_clock);
/* function to call to trigger timer event */
int posix_timer_event(struct k_itimer *timr, int si_private);
-int posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *ts);
-int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *ts);
-int posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *ts);
-int posix_cpu_timer_create(struct k_itimer *timer);
-int posix_cpu_nsleep(const clockid_t which_clock, int flags,
- struct timespec *rqtp, struct timespec __user *rmtp);
-long posix_cpu_nsleep_restart(struct restart_block *restart_block);
-int posix_cpu_timer_set(struct k_itimer *timer, int flags,
- struct itimerspec *new, struct itimerspec *old);
-int posix_cpu_timer_del(struct k_itimer *timer);
-void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp);
-
void posix_cpu_timer_schedule(struct k_itimer *timer);
void run_posix_cpu_timers(struct task_struct *task);
diff --git a/include/linux/rwlock_types.h b/include/linux/rwlock_types.h
index bd31808c7d8e..cc0072e93e36 100644
--- a/include/linux/rwlock_types.h
+++ b/include/linux/rwlock_types.h
@@ -43,14 +43,6 @@ typedef struct {
RW_DEP_MAP_INIT(lockname) }
#endif
-/*
- * RW_LOCK_UNLOCKED defeat lockdep state tracking and is hence
- * deprecated.
- *
- * Please use DEFINE_RWLOCK() or __RW_LOCK_UNLOCKED() as appropriate.
- */
-#define RW_LOCK_UNLOCKED __RW_LOCK_UNLOCKED(old_style_rw_init)
-
#define DEFINE_RWLOCK(x) rwlock_t x = __RW_LOCK_UNLOCKED(x)
#endif /* __LINUX_RWLOCK_TYPES_H */
diff --git a/include/linux/rwsem-spinlock.h b/include/linux/rwsem-spinlock.h
index bdfcc2527970..34701241b673 100644
--- a/include/linux/rwsem-spinlock.h
+++ b/include/linux/rwsem-spinlock.h
@@ -12,15 +12,7 @@
#error "please don't include linux/rwsem-spinlock.h directly, use linux/rwsem.h instead"
#endif
-#include <linux/spinlock.h>
-#include <linux/list.h>
-
#ifdef __KERNEL__
-
-#include <linux/types.h>
-
-struct rwsem_waiter;
-
/*
* the rw-semaphore definition
* - if activity is 0 then there are no active readers or writers
@@ -37,28 +29,7 @@ struct rw_semaphore {
#endif
};
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
-#else
-# define __RWSEM_DEP_MAP_INIT(lockname)
-#endif
-
-#define __RWSEM_INITIALIZER(name) \
-{ 0, __SPIN_LOCK_UNLOCKED(name.wait_lock), LIST_HEAD_INIT((name).wait_list) \
- __RWSEM_DEP_MAP_INIT(name) }
-
-#define DECLARE_RWSEM(name) \
- struct rw_semaphore name = __RWSEM_INITIALIZER(name)
-
-extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
- struct lock_class_key *key);
-
-#define init_rwsem(sem) \
-do { \
- static struct lock_class_key __key; \
- \
- __init_rwsem((sem), #sem, &__key); \
-} while (0)
+#define RWSEM_UNLOCKED_VALUE 0x00000000
extern void __down_read(struct rw_semaphore *sem);
extern int __down_read_trylock(struct rw_semaphore *sem);
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index efd348fe8ca7..a8afe9cd000c 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -11,6 +11,9 @@
#include <linux/types.h>
#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+
#include <asm/system.h>
#include <asm/atomic.h>
@@ -19,9 +22,57 @@ struct rw_semaphore;
#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
#include <linux/rwsem-spinlock.h> /* use a generic implementation */
#else
-#include <asm/rwsem.h> /* use an arch-specific implementation */
+/* All arch specific implementations share the same struct */
+struct rw_semaphore {
+ long count;
+ spinlock_t wait_lock;
+ struct list_head wait_list;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ struct lockdep_map dep_map;
+#endif
+};
+
+extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
+extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
+extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *);
+extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
+
+/* Include the arch specific part */
+#include <asm/rwsem.h>
+
+/* In all implementations count != 0 means locked */
+static inline int rwsem_is_locked(struct rw_semaphore *sem)
+{
+ return sem->count != 0;
+}
+
+#endif
+
+/* Common initializer macros and functions */
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
+#else
+# define __RWSEM_DEP_MAP_INIT(lockname)
#endif
+#define __RWSEM_INITIALIZER(name) \
+ { RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED(name.wait_lock), \
+ LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) }
+
+#define DECLARE_RWSEM(name) \
+ struct rw_semaphore name = __RWSEM_INITIALIZER(name)
+
+extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
+ struct lock_class_key *key);
+
+#define init_rwsem(sem) \
+do { \
+ static struct lock_class_key __key; \
+ \
+ __init_rwsem((sem), #sem, &__key); \
+} while (0)
+
/*
* lock for reading
*/
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 755c4764b10d..c15936fe998b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2046,7 +2046,7 @@ extern void release_uids(struct user_namespace *ns);
#include <asm/current.h>
-extern void do_timer(unsigned long ticks);
+extern void xtime_update(unsigned long ticks);
extern int wake_up_state(struct task_struct *tsk, unsigned int state);
extern int wake_up_process(struct task_struct *tsk);
@@ -2575,13 +2575,6 @@ static inline void inc_syscw(struct task_struct *tsk)
#define TASK_SIZE_OF(tsk) TASK_SIZE
#endif
-/*
- * Call the function if the target task is executing on a CPU right now:
- */
-extern void task_oncpu_function_call(struct task_struct *p,
- void (*func) (void *info), void *info);
-
-
#ifdef CONFIG_MM_OWNER
extern void mm_update_next_owner(struct mm_struct *mm);
extern void mm_init_owner(struct mm_struct *mm, struct task_struct *p);
diff --git a/include/linux/security.h b/include/linux/security.h
index ff2e25dcdeb8..49d875a2d6bc 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -54,7 +54,7 @@ struct audit_krule;
*/
extern int cap_capable(struct task_struct *tsk, const struct cred *cred,
int cap, int audit);
-extern int cap_settime(struct timespec *ts, struct timezone *tz);
+extern int cap_settime(const struct timespec *ts, const struct timezone *tz);
extern int cap_ptrace_access_check(struct task_struct *child, unsigned int mode);
extern int cap_ptrace_traceme(struct task_struct *parent);
extern int cap_capget(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted);
@@ -1382,7 +1382,7 @@ struct security_operations {
int (*quotactl) (int cmds, int type, int id, struct super_block *sb);
int (*quota_on) (struct dentry *dentry);
int (*syslog) (int type);
- int (*settime) (struct timespec *ts, struct timezone *tz);
+ int (*settime) (const struct timespec *ts, const struct timezone *tz);
int (*vm_enough_memory) (struct mm_struct *mm, long pages);
int (*bprm_set_creds) (struct linux_binprm *bprm);
@@ -1664,7 +1664,7 @@ int security_real_capable_noaudit(struct task_struct *tsk, int cap);
int security_quotactl(int cmds, int type, int id, struct super_block *sb);
int security_quota_on(struct dentry *dentry);
int security_syslog(int type);
-int security_settime(struct timespec *ts, struct timezone *tz);
+int security_settime(const struct timespec *ts, const struct timezone *tz);
int security_vm_enough_memory(long pages);
int security_vm_enough_memory_mm(struct mm_struct *mm, long pages);
int security_vm_enough_memory_kern(long pages);
@@ -1895,7 +1895,8 @@ static inline int security_syslog(int type)
return 0;
}
-static inline int security_settime(struct timespec *ts, struct timezone *tz)
+static inline int security_settime(const struct timespec *ts,
+ const struct timezone *tz)
{
return cap_settime(ts, tz);
}
diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h
index 851b7783720d..73548eb13a5d 100644
--- a/include/linux/spinlock_types.h
+++ b/include/linux/spinlock_types.h
@@ -81,14 +81,6 @@ typedef struct spinlock {
#define __SPIN_LOCK_UNLOCKED(lockname) \
(spinlock_t ) __SPIN_LOCK_INITIALIZER(lockname)
-/*
- * SPIN_LOCK_UNLOCKED defeats lockdep state tracking and is hence
- * deprecated.
- * Please use DEFINE_SPINLOCK() or __SPIN_LOCK_UNLOCKED() as
- * appropriate.
- */
-#define SPIN_LOCK_UNLOCKED __SPIN_LOCK_UNLOCKED(old_style_spin_init)
-
#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x)
#include <linux/rwlock_types.h>
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 98664db1be47..265b2639f0b7 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -313,6 +313,8 @@ asmlinkage long sys_clock_settime(clockid_t which_clock,
const struct timespec __user *tp);
asmlinkage long sys_clock_gettime(clockid_t which_clock,
struct timespec __user *tp);
+asmlinkage long sys_clock_adjtime(clockid_t which_clock,
+ struct timex __user *tx);
asmlinkage long sys_clock_getres(clockid_t which_clock,
struct timespec __user *tp);
asmlinkage long sys_clock_nanosleep(clockid_t which_clock, int flags,
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index c90696544176..20fc303947d3 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -18,9 +18,6 @@ struct compat_timespec;
struct restart_block {
long (*fn)(struct restart_block *);
union {
- struct {
- unsigned long arg0, arg1, arg2, arg3;
- };
/* For futex_wait and futex_wait_requeue_pi */
struct {
u32 __user *uaddr;
diff --git a/include/linux/time.h b/include/linux/time.h
index 1e6d3b59238d..379b9037b5b4 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -113,8 +113,6 @@ static inline struct timespec timespec_sub(struct timespec lhs,
#define timespec_valid(ts) \
(((ts)->tv_sec >= 0) && (((unsigned long) (ts)->tv_nsec) < NSEC_PER_SEC))
-extern seqlock_t xtime_lock;
-
extern void read_persistent_clock(struct timespec *ts);
extern void read_boot_clock(struct timespec *ts);
extern int update_persistent_clock(struct timespec now);
@@ -125,8 +123,8 @@ extern int timekeeping_suspended;
unsigned long get_seconds(void);
struct timespec current_kernel_time(void);
struct timespec __current_kernel_time(void); /* does not take xtime_lock */
-struct timespec __get_wall_to_monotonic(void); /* does not take xtime_lock */
struct timespec get_monotonic_coarse(void);
+void get_xtime_and_monotonic_offset(struct timespec *xtim, struct timespec *wtom);
#define CURRENT_TIME (current_kernel_time())
#define CURRENT_TIME_SEC ((struct timespec) { get_seconds(), 0 })
@@ -147,8 +145,9 @@ static inline u32 arch_gettimeoffset(void) { return 0; }
#endif
extern void do_gettimeofday(struct timeval *tv);
-extern int do_settimeofday(struct timespec *tv);
-extern int do_sys_settimeofday(struct timespec *tv, struct timezone *tz);
+extern int do_settimeofday(const struct timespec *tv);
+extern int do_sys_settimeofday(const struct timespec *tv,
+ const struct timezone *tz);
#define do_posix_clock_monotonic_gettime(ts) ktime_get_ts(ts)
extern long do_utimes(int dfd, const char __user *filename, struct timespec *times, int flags);
struct itimerval;
@@ -166,8 +165,8 @@ extern void monotonic_to_bootbased(struct timespec *ts);
extern struct timespec timespec_trunc(struct timespec t, unsigned gran);
extern int timekeeping_valid_for_hres(void);
extern u64 timekeeping_max_deferment(void);
-extern void update_wall_time(void);
extern void timekeeping_leap_insert(int leapsecond);
+extern int timekeeping_inject_offset(struct timespec *ts);
struct tms;
extern void do_sys_times(struct tms *);
diff --git a/include/linux/timex.h b/include/linux/timex.h
index d23999f9499d..aa60fe7b6ed6 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -73,7 +73,7 @@ struct timex {
long tolerance; /* clock frequency tolerance (ppm)
* (read only)
*/
- struct timeval time; /* (read only) */
+ struct timeval time; /* (read only, except for ADJ_SETOFFSET) */
long tick; /* (modified) usecs between clock ticks */
long ppsfreq; /* pps frequency (scaled ppm) (ro) */
@@ -102,6 +102,7 @@ struct timex {
#define ADJ_STATUS 0x0010 /* clock status */
#define ADJ_TIMECONST 0x0020 /* pll time constant */
#define ADJ_TAI 0x0080 /* set TAI offset */
+#define ADJ_SETOFFSET 0x0100 /* add 'time' to current time */
#define ADJ_MICRO 0x1000 /* select microsecond resolution */
#define ADJ_NANO 0x2000 /* select nanosecond resolution */
#define ADJ_TICK 0x4000 /* tick value */
diff --git a/kernel/compat.c b/kernel/compat.c
index c9e2ec0b34a8..38b1d2c1cbe8 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -52,6 +52,64 @@ static int compat_put_timeval(struct compat_timeval __user *o,
put_user(i->tv_usec, &o->tv_usec)) ? -EFAULT : 0;
}
+static int compat_get_timex(struct timex *txc, struct compat_timex __user *utp)
+{
+ memset(txc, 0, sizeof(struct timex));
+
+ if (!access_ok(VERIFY_READ, utp, sizeof(struct compat_timex)) ||
+ __get_user(txc->modes, &utp->modes) ||
+ __get_user(txc->offset, &utp->offset) ||
+ __get_user(txc->freq, &utp->freq) ||
+ __get_user(txc->maxerror, &utp->maxerror) ||
+ __get_user(txc->esterror, &utp->esterror) ||
+ __get_user(txc->status, &utp->status) ||
+ __get_user(txc->constant, &utp->constant) ||
+ __get_user(txc->precision, &utp->precision) ||
+ __get_user(txc->tolerance, &utp->tolerance) ||
+ __get_user(txc->time.tv_sec, &utp->time.tv_sec) ||
+ __get_user(txc->time.tv_usec, &utp->time.tv_usec) ||
+ __get_user(txc->tick, &utp->tick) ||
+ __get_user(txc->ppsfreq, &utp->ppsfreq) ||
+ __get_user(txc->jitter, &utp->jitter) ||
+ __get_user(txc->shift, &utp->shift) ||
+ __get_user(txc->stabil, &utp->stabil) ||
+ __get_user(txc->jitcnt, &utp->jitcnt) ||
+ __get_user(txc->calcnt, &utp->calcnt) ||
+ __get_user(txc->errcnt, &utp->errcnt) ||
+ __get_user(txc->stbcnt, &utp->stbcnt))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int compat_put_timex(struct compat_timex __user *utp, struct timex *txc)
+{
+ if (!access_ok(VERIFY_WRITE, utp, sizeof(struct compat_timex)) ||
+ __put_user(txc->modes, &utp->modes) ||
+ __put_user(txc->offset, &utp->offset) ||
+ __put_user(txc->freq, &utp->freq) ||
+ __put_user(txc->maxerror, &utp->maxerror) ||
+ __put_user(txc->esterror, &utp->esterror) ||
+ __put_user(txc->status, &utp->status) ||
+ __put_user(txc->constant, &utp->constant) ||
+ __put_user(txc->precision, &utp->precision) ||
+ __put_user(txc->tolerance, &utp->tolerance) ||
+ __put_user(txc->time.tv_sec, &utp->time.tv_sec) ||
+ __put_user(txc->time.tv_usec, &utp->time.tv_usec) ||
+ __put_user(txc->tick, &utp->tick) ||
+ __put_user(txc->ppsfreq, &utp->ppsfreq) ||
+ __put_user(txc->jitter, &utp->jitter) ||
+ __put_user(txc->shift, &utp->shift) ||
+ __put_user(txc->stabil, &utp->stabil) ||
+ __put_user(txc->jitcnt, &utp->jitcnt) ||
+ __put_user(txc->calcnt, &utp->calcnt) ||
+ __put_user(txc->errcnt, &utp->errcnt) ||
+ __put_user(txc->stbcnt, &utp->stbcnt) ||
+ __put_user(txc->tai, &utp->tai))
+ return -EFAULT;
+ return 0;
+}
+
asmlinkage long compat_sys_gettimeofday(struct compat_timeval __user *tv,
struct timezone __user *tz)
{
@@ -617,6 +675,29 @@ long compat_sys_clock_gettime(clockid_t which_clock,
return err;
}
+long compat_sys_clock_adjtime(clockid_t which_clock,
+ struct compat_timex __user *utp)
+{
+ struct timex txc;
+ mm_segment_t oldfs;
+ int err, ret;
+
+ err = compat_get_timex(&txc, utp);
+ if (err)
+ return err;
+
+ oldfs = get_fs();
+ set_fs(KERNEL_DS);
+ ret = sys_clock_adjtime(which_clock, (struct timex __user *) &txc);
+ set_fs(oldfs);
+
+ err = compat_put_timex(utp, &txc);
+ if (err)
+ return err;
+
+ return ret;
+}
+
long compat_sys_clock_getres(clockid_t which_clock,
struct compat_timespec __user *tp)
{
@@ -951,58 +1032,17 @@ asmlinkage long compat_sys_rt_sigsuspend(compat_sigset_t __user *unewset, compat
asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp)
{
struct timex txc;
- int ret;
-
- memset(&txc, 0, sizeof(struct timex));
+ int err, ret;
- if (!access_ok(VERIFY_READ, utp, sizeof(struct compat_timex)) ||
- __get_user(txc.modes, &utp->modes) ||
- __get_user(txc.offset, &utp->offset) ||
- __get_user(txc.freq, &utp->freq) ||
- __get_user(txc.maxerror, &utp->maxerror) ||
- __get_user(txc.esterror, &utp->esterror) ||
- __get_user(txc.status, &utp->status) ||
- __get_user(txc.constant, &utp->constant) ||
- __get_user(txc.precision, &utp->precision) ||
- __get_user(txc.tolerance, &utp->tolerance) ||
- __get_user(txc.time.tv_sec, &utp->time.tv_sec) ||
- __get_user(txc.time.tv_usec, &utp->time.tv_usec) ||
- __get_user(txc.tick, &utp->tick) ||
- __get_user(txc.ppsfreq, &utp->ppsfreq) ||
- __get_user(txc.jitter, &utp->jitter) ||
- __get_user(txc.shift, &utp->shift) ||
- __get_user(txc.stabil, &utp->stabil) ||
- __get_user(txc.jitcnt, &utp->jitcnt) ||
- __get_user(txc.calcnt, &utp->calcnt) ||
- __get_user(txc.errcnt, &utp->errcnt) ||
- __get_user(txc.stbcnt, &utp->stbcnt))
- return -EFAULT;
+ err = compat_get_timex(&txc, utp);
+ if (err)
+ return err;
ret = do_adjtimex(&txc);
- if (!access_ok(VERIFY_WRITE, utp, sizeof(struct compat_timex)) ||
- __put_user(txc.modes, &utp->modes) ||
- __put_user(txc.offset, &utp->offset) ||
- __put_user(txc.freq, &utp->freq) ||
- __put_user(txc.maxerror, &utp->maxerror) ||
- __put_user(txc.esterror, &utp->esterror) ||
- __put_user(txc.status, &utp->status) ||
- __put_user(txc.constant, &utp->constant) ||
- __put_user(txc.precision, &utp->precision) ||
- __put_user(txc.tolerance, &utp->tolerance) ||
- __put_user(txc.time.tv_sec, &utp->time.tv_sec) ||
- __put_user(txc.time.tv_usec, &utp->time.tv_usec) ||
- __put_user(txc.tick, &utp->tick) ||
- __put_user(txc.ppsfreq, &utp->ppsfreq) ||
- __put_user(txc.jitter, &utp->jitter) ||
- __put_user(txc.shift, &utp->shift) ||
- __put_user(txc.stabil, &utp->stabil) ||
- __put_user(txc.jitcnt, &utp->jitcnt) ||
- __put_user(txc.calcnt, &utp->calcnt) ||
- __put_user(txc.errcnt, &utp->errcnt) ||
- __put_user(txc.stbcnt, &utp->stbcnt) ||
- __put_user(txc.tai, &utp->tai))
- ret = -EFAULT;
+ err = compat_put_timex(utp, &txc);
+ if (err)
+ return err;
return ret;
}
diff --git a/kernel/cred.c b/kernel/cred.c
index 3a9d6dd53a6c..2343c132c5a7 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -35,7 +35,7 @@ static struct kmem_cache *cred_jar;
static struct thread_group_cred init_tgcred = {
.usage = ATOMIC_INIT(2),
.tgid = 0,
- .lock = SPIN_LOCK_UNLOCKED,
+ .lock = __SPIN_LOCK_UNLOCKED(init_cred.tgcred.lock),
};
#endif
diff --git a/kernel/futex.c b/kernel/futex.c
index b766d28accd6..64c38115c7b6 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1556,10 +1556,10 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
/*
* We are here either because we stole the rtmutex from the
- * pending owner or we are the pending owner which failed to
- * get the rtmutex. We have to replace the pending owner TID
- * in the user space variable. This must be atomic as we have
- * to preserve the owner died bit here.
+ * previous highest priority waiter or we are the highest priority
+ * waiter but failed to get the rtmutex the first time.
+ * We have to replace the newowner TID in the user space variable.
+ * This must be atomic as we have to preserve the owner died bit here.
*
* Note: We write the user space value _before_ changing the pi_state
* because we can fault here. Imagine swapped out pages or a fork
@@ -1608,8 +1608,8 @@ retry:
/*
* To handle the page fault we need to drop the hash bucket
- * lock here. That gives the other task (either the pending
- * owner itself or the task which stole the rtmutex) the
+ * lock here. That gives the other task (either the highest priority
+ * waiter itself or the task which stole the rtmutex) the
* chance to try the fixup of the pi_state. So once we are
* back from handling the fault we need to check the pi_state
* after reacquiring the hash bucket lock and before trying to
@@ -1685,18 +1685,20 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
/*
* pi_state is incorrect, some other task did a lock steal and
* we returned due to timeout or signal without taking the
- * rt_mutex. Too late. We can access the rt_mutex_owner without
- * locking, as the other task is now blocked on the hash bucket
- * lock. Fix the state up.
+ * rt_mutex. Too late.
*/
+ raw_spin_lock(&q->pi_state->pi_mutex.wait_lock);
owner = rt_mutex_owner(&q->pi_state->pi_mutex);
+ if (!owner)
+ owner = rt_mutex_next_owner(&q->pi_state->pi_mutex);
+ raw_spin_unlock(&q->pi_state->pi_mutex.wait_lock);
ret = fixup_pi_state_owner(uaddr, q, owner);
goto out;
}
/*
* Paranoia check. If we did not take the lock, then we should not be
- * the owner, nor the pending owner, of the rt_mutex.
+ * the owner of the rt_mutex.
*/
if (rt_mutex_owner(&q->pi_state->pi_mutex) == current)
printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p "
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 0c8d7c048615..57c4d33c9a9d 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -85,13 +85,8 @@ static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base)
{
ktime_t xtim, tomono;
struct timespec xts, tom;
- unsigned long seq;
- do {
- seq = read_seqbegin(&xtime_lock);
- xts = __current_kernel_time();
- tom = __get_wall_to_monotonic();
- } while (read_seqretry(&xtime_lock, seq));
+ get_xtime_and_monotonic_offset(&xts, &tom);
xtim = timespec_to_ktime(xts);
tomono = timespec_to_ktime(tom);
@@ -612,15 +607,11 @@ static void retrigger_next_event(void *arg)
{
struct hrtimer_cpu_base *base;
struct timespec realtime_offset, wtm;
- unsigned long seq;
if (!hrtimer_hres_active())
return;
- do {
- seq = read_seqbegin(&xtime_lock);
- wtm = __get_wall_to_monotonic();
- } while (read_seqretry(&xtime_lock, seq));
+ get_xtime_and_monotonic_offset(&realtime_offset, &wtm);
set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec);
base = &__get_cpu_var(hrtimer_bases);
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 282f20230e67..a7ac6e1e7074 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -251,7 +251,6 @@ int __init early_irq_init(void)
for (i = 0; i < count; i++) {
desc[i].irq_data.irq = i;
desc[i].irq_data.chip = &no_irq_chip;
- /* TODO : do this allocation on-demand ... */
desc[i].kstat_irqs = alloc_percpu(unsigned int);
alloc_masks(desc + i, GFP_KERNEL, node);
desc_smp_init(desc + i, node);
@@ -277,22 +276,6 @@ static void free_desc(unsigned int irq)
static inline int alloc_descs(unsigned int start, unsigned int cnt, int node)
{
-#if defined(CONFIG_KSTAT_IRQS_ONDEMAND)
- struct irq_desc *desc;
- unsigned int i;
-
- for (i = 0; i < cnt; i++) {
- desc = irq_to_desc(start + i);
- if (desc && !desc->kstat_irqs) {
- unsigned int __percpu *stats = alloc_percpu(unsigned int);
-
- if (!stats)
- return -1;
- if (cmpxchg(&desc->kstat_irqs, NULL, stats) != NULL)
- free_percpu(stats);
- }
- }
-#endif
return start;
}
#endif /* !CONFIG_SPARSE_IRQ */
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 0587c5ceaed8..47b2bf10afbc 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -179,7 +179,7 @@ static void irq_affinity_notify(struct work_struct *work)
cpumask_copy(cpumask, desc->pending_mask);
else
#endif
- cpumask_copy(cpumask, desc->affinity);
+ cpumask_copy(cpumask, desc->irq_data.affinity);
raw_spin_unlock_irqrestore(&desc->lock, flags);
notify->notify(notify, cpumask);
@@ -359,8 +359,17 @@ EXPORT_SYMBOL(disable_irq);
void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume)
{
- if (resume)
+ if (resume) {
+ if (!(desc->status & IRQ_SUSPENDED)) {
+ if (!desc->action)
+ return;
+ if (!(desc->action->flags & IRQF_FORCE_RESUME))
+ return;
+ /* Pretend that it got disabled ! */
+ desc->depth++;
+ }
desc->status &= ~IRQ_SUSPENDED;
+ }
switch (desc->depth) {
case 0:
@@ -948,9 +957,14 @@ out_thread:
*/
int setup_irq(unsigned int irq, struct irqaction *act)
{
+ int retval;
struct irq_desc *desc = irq_to_desc(irq);
- return __setup_irq(irq, desc, act);
+ chip_bus_lock(desc);
+ retval = __setup_irq(irq, desc, act);
+ chip_bus_sync_unlock(desc);
+
+ return retval;
}
EXPORT_SYMBOL_GPL(setup_irq);
diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c
index 0d4005d85b03..d6bfb89cce91 100644
--- a/kernel/irq/pm.c
+++ b/kernel/irq/pm.c
@@ -53,9 +53,6 @@ void resume_device_irqs(void)
for_each_irq_desc(irq, desc) {
unsigned long flags;
- if (!(desc->status & IRQ_SUSPENDED))
- continue;
-
raw_spin_lock_irqsave(&desc->lock, flags);
__enable_irq(desc, irq, true);
raw_spin_unlock_irqrestore(&desc->lock, flags);
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 999835b6112b..a353a4d6d00d 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -38,6 +38,79 @@
#include <asm/irq_regs.h>
+struct remote_function_call {
+ struct task_struct *p;
+ int (*func)(void *info);
+ void *info;
+ int ret;
+};
+
+static void remote_function(void *data)
+{
+ struct remote_function_call *tfc = data;
+ struct task_struct *p = tfc->p;
+
+ if (p) {
+ tfc->ret = -EAGAIN;
+ if (task_cpu(p) != smp_processor_id() || !task_curr(p))
+ return;
+ }
+
+ tfc->ret = tfc->func(tfc->info);
+}
+
+/**
+ * task_function_call - call a function on the cpu on which a task runs
+ * @p: the task to evaluate
+ * @func: the function to be called
+ * @info: the function call argument
+ *
+ * Calls the function @func when the task is currently running. This might
+ * be on the current CPU, which just calls the function directly
+ *
+ * returns: @func return value, or
+ * -ESRCH - when the process isn't running
+ * -EAGAIN - when the process moved away
+ */
+static int
+task_function_call(struct task_struct *p, int (*func) (void *info), void *info)
+{
+ struct remote_function_call data = {
+ .p = p,
+ .func = func,
+ .info = info,
+ .ret = -ESRCH, /* No such (running) process */
+ };
+
+ if (task_curr(p))
+ smp_call_function_single(task_cpu(p), remote_function, &data, 1);
+
+ return data.ret;
+}
+
+/**
+ * cpu_function_call - call a function on the cpu
+ * @func: the function to be called
+ * @info: the function call argument
+ *
+ * Calls the function @func on the remote cpu.
+ *
+ * returns: @func return value or -ENXIO when the cpu is offline
+ */
+static int cpu_function_call(int cpu, int (*func) (void *info), void *info)
+{
+ struct remote_function_call data = {
+ .p = NULL,
+ .func = func,
+ .info = info,
+ .ret = -ENXIO, /* No such CPU */
+ };
+
+ smp_call_function_single(cpu, remote_function, &data, 1);
+
+ return data.ret;
+}
+
enum event_type_t {
EVENT_FLEXIBLE = 0x1,
EVENT_PINNED = 0x2,
@@ -254,7 +327,6 @@ static void perf_unpin_context(struct perf_event_context *ctx)
raw_spin_lock_irqsave(&ctx->lock, flags);
--ctx->pin_count;
raw_spin_unlock_irqrestore(&ctx->lock, flags);
- put_ctx(ctx);
}
/*
@@ -618,35 +690,24 @@ __get_cpu_context(struct perf_event_context *ctx)
* We disable the event on the hardware level first. After that we
* remove it from the context list.
*/
-static void __perf_event_remove_from_context(void *info)
+static int __perf_remove_from_context(void *info)
{
struct perf_event *event = info;
struct perf_event_context *ctx = event->ctx;
struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
- /*
- * If this is a task context, we need to check whether it is
- * the current task context of this cpu. If not it has been
- * scheduled out before the smp call arrived.
- */
- if (ctx->task && cpuctx->task_ctx != ctx)
- return;
-
raw_spin_lock(&ctx->lock);
-
event_sched_out(event, cpuctx, ctx);
-
list_del_event(event, ctx);
-
raw_spin_unlock(&ctx->lock);
+
+ return 0;
}
/*
* Remove the event from a task's (or a CPU's) list of events.
*
- * Must be called with ctx->mutex held.
- *
* CPU events are removed with a smp call. For task events we only
* call when the task is on a CPU.
*
@@ -657,49 +718,48 @@ static void __perf_event_remove_from_context(void *info)
* When called from perf_event_exit_task, it's OK because the
* context has been detached from its task.
*/
-static void perf_event_remove_from_context(struct perf_event *event)
+static void perf_remove_from_context(struct perf_event *event)
{
struct perf_event_context *ctx = event->ctx;
struct task_struct *task = ctx->task;
+ lockdep_assert_held(&ctx->mutex);
+
if (!task) {
/*
* Per cpu events are removed via an smp call and
* the removal is always successful.
*/
- smp_call_function_single(event->cpu,
- __perf_event_remove_from_context,
- event, 1);
+ cpu_function_call(event->cpu, __perf_remove_from_context, event);
return;
}
retry:
- task_oncpu_function_call(task, __perf_event_remove_from_context,
- event);
+ if (!task_function_call(task, __perf_remove_from_context, event))
+ return;
raw_spin_lock_irq(&ctx->lock);
/*
- * If the context is active we need to retry the smp call.
+ * If we failed to find a running task, but find the context active now
+ * that we've acquired the ctx->lock, retry.
*/
- if (ctx->nr_active && !list_empty(&event->group_entry)) {
+ if (ctx->is_active) {
raw_spin_unlock_irq(&ctx->lock);
goto retry;
}
/*
- * The lock prevents that this context is scheduled in so we
- * can remove the event safely, if the call above did not
- * succeed.
+ * Since the task isn't running, its safe to remove the event, us
+ * holding the ctx->lock ensures the task won't get scheduled in.
*/
- if (!list_empty(&event->group_entry))
- list_del_event(event, ctx);
+ list_del_event(event, ctx);
raw_spin_unlock_irq(&ctx->lock);
}
/*
* Cross CPU call to disable a performance event
*/
-static void __perf_event_disable(void *info)
+static int __perf_event_disable(void *info)
{
struct perf_event *event = info;
struct perf_event_context *ctx = event->ctx;
@@ -708,9 +768,12 @@ static void __perf_event_disable(void *info)
/*
* If this is a per-task event, need to check whether this
* event's task is the current task on this cpu.
+ *
+ * Can trigger due to concurrent perf_event_context_sched_out()
+ * flipping contexts around.
*/
if (ctx->task && cpuctx->task_ctx != ctx)
- return;
+ return -EINVAL;
raw_spin_lock(&ctx->lock);
@@ -729,6 +792,8 @@ static void __perf_event_disable(void *info)
}
raw_spin_unlock(&ctx->lock);
+
+ return 0;
}
/*
@@ -753,13 +818,13 @@ void perf_event_disable(struct perf_event *event)
/*
* Disable the event on the cpu that it's on
*/
- smp_call_function_single(event->cpu, __perf_event_disable,
- event, 1);
+ cpu_function_call(event->cpu, __perf_event_disable, event);
return;
}
retry:
- task_oncpu_function_call(task, __perf_event_disable, event);
+ if (!task_function_call(task, __perf_event_disable, event))
+ return;
raw_spin_lock_irq(&ctx->lock);
/*
@@ -767,6 +832,11 @@ retry:
*/
if (event->state == PERF_EVENT_STATE_ACTIVE) {
raw_spin_unlock_irq(&ctx->lock);
+ /*
+ * Reload the task pointer, it might have been changed by
+ * a concurrent perf_event_context_sched_out().
+ */
+ task = ctx->task;
goto retry;
}
@@ -778,7 +848,6 @@ retry:
update_group_times(event);
event->state = PERF_EVENT_STATE_OFF;
}
-
raw_spin_unlock_irq(&ctx->lock);
}
@@ -928,12 +997,14 @@ static void add_event_to_ctx(struct perf_event *event,
event->tstamp_stopped = tstamp;
}
+static void perf_event_context_sched_in(struct perf_event_context *ctx);
+
/*
* Cross CPU call to install and enable a performance event
*
* Must be called with ctx->mutex held
*/
-static void __perf_install_in_context(void *info)
+static int __perf_install_in_context(void *info)
{
struct perf_event *event = info;
struct perf_event_context *ctx = event->ctx;
@@ -942,17 +1013,12 @@ static void __perf_install_in_context(void *info)
int err;
/*
- * If this is a task context, we need to check whether it is
- * the current task context of this cpu. If not it has been
- * scheduled out before the smp call arrived.
- * Or possibly this is the right context but it isn't
- * on this cpu because it had no events.
+ * In case we're installing a new context to an already running task,
+ * could also happen before perf_event_task_sched_in() on architectures
+ * which do context switches with IRQs enabled.
*/
- if (ctx->task && cpuctx->task_ctx != ctx) {
- if (cpuctx->task_ctx || ctx->task != current)
- return;
- cpuctx->task_ctx = ctx;
- }
+ if (ctx->task && !cpuctx->task_ctx)
+ perf_event_context_sched_in(ctx);
raw_spin_lock(&ctx->lock);
ctx->is_active = 1;
@@ -997,6 +1063,8 @@ static void __perf_install_in_context(void *info)
unlock:
raw_spin_unlock(&ctx->lock);
+
+ return 0;
}
/*
@@ -1008,8 +1076,6 @@ unlock:
* If the event is attached to a task which is on a CPU we use a smp
* call to enable it in the task context. The task might have been
* scheduled away, but we check this in the smp call again.
- *
- * Must be called with ctx->mutex held.
*/
static void
perf_install_in_context(struct perf_event_context *ctx,
@@ -1018,6 +1084,8 @@ perf_install_in_context(struct perf_event_context *ctx,
{
struct task_struct *task = ctx->task;
+ lockdep_assert_held(&ctx->mutex);
+
event->ctx = ctx;
if (!task) {
@@ -1025,31 +1093,29 @@ perf_install_in_context(struct perf_event_context *ctx,
* Per cpu events are installed via an smp call and
* the install is always successful.
*/
- smp_call_function_single(cpu, __perf_install_in_context,
- event, 1);
+ cpu_function_call(cpu, __perf_install_in_context, event);
return;
}
retry:
- task_oncpu_function_call(task, __perf_install_in_context,
- event);
+ if (!task_function_call(task, __perf_install_in_context, event))
+ return;
raw_spin_lock_irq(&ctx->lock);
/*
- * we need to retry the smp call.
+ * If we failed to find a running task, but find the context active now
+ * that we've acquired the ctx->lock, retry.
*/
- if (ctx->is_active && list_empty(&event->group_entry)) {
+ if (ctx->is_active) {
raw_spin_unlock_irq(&ctx->lock);
goto retry;
}
/*
- * The lock prevents that this context is scheduled in so we
- * can add the event safely, if it the call above did not
- * succeed.
+ * Since the task isn't running, its safe to add the event, us holding
+ * the ctx->lock ensures the task won't get scheduled in.
*/
- if (list_empty(&event->group_entry))
- add_event_to_ctx(event, ctx);
+ add_event_to_ctx(event, ctx);
raw_spin_unlock_irq(&ctx->lock);
}
@@ -1078,7 +1144,7 @@ static void __perf_event_mark_enabled(struct perf_event *event,
/*
* Cross CPU call to enable a performance event
*/
-static void __perf_event_enable(void *info)
+static int __perf_event_enable(void *info)
{
struct perf_event *event = info;
struct perf_event_context *ctx = event->ctx;
@@ -1086,18 +1152,10 @@ static void __perf_event_enable(void *info)
struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
int err;
- /*
- * If this is a per-task event, need to check whether this
- * event's task is the current task on this cpu.
- */
- if (ctx->task && cpuctx->task_ctx != ctx) {
- if (cpuctx->task_ctx || ctx->task != current)
- return;
- cpuctx->task_ctx = ctx;
- }
+ if (WARN_ON_ONCE(!ctx->is_active))
+ return -EINVAL;
raw_spin_lock(&ctx->lock);
- ctx->is_active = 1;
update_context_time(ctx);
if (event->state >= PERF_EVENT_STATE_INACTIVE)
@@ -1138,6 +1196,8 @@ static void __perf_event_enable(void *info)
unlock:
raw_spin_unlock(&ctx->lock);
+
+ return 0;
}
/*
@@ -1158,8 +1218,7 @@ void perf_event_enable(struct perf_event *event)
/*
* Enable the event on the cpu that it's on
*/
- smp_call_function_single(event->cpu, __perf_event_enable,
- event, 1);
+ cpu_function_call(event->cpu, __perf_event_enable, event);
return;
}
@@ -1178,8 +1237,15 @@ void perf_event_enable(struct perf_event *event)
event->state = PERF_EVENT_STATE_OFF;
retry:
+ if (!ctx->is_active) {
+ __perf_event_mark_enabled(event, ctx);
+ goto out;
+ }
+
raw_spin_unlock_irq(&ctx->lock);
- task_oncpu_function_call(task, __perf_event_enable, event);
+
+ if (!task_function_call(task, __perf_event_enable, event))
+ return;
raw_spin_lock_irq(&ctx->lock);
@@ -1187,15 +1253,14 @@ retry:
* If the context is active and the event is still off,
* we need to retry the cross-call.
*/
- if (ctx->is_active && event->state == PERF_EVENT_STATE_OFF)
+ if (ctx->is_active && event->state == PERF_EVENT_STATE_OFF) {
+ /*
+ * task could have been flipped by a concurrent
+ * perf_event_context_sched_out()
+ */
+ task = ctx->task;
goto retry;
-
- /*
- * Since we have the lock this context can't be scheduled
- * in, so we can change the state safely.
- */
- if (event->state == PERF_EVENT_STATE_OFF)
- __perf_event_mark_enabled(event, ctx);
+ }
out:
raw_spin_unlock_irq(&ctx->lock);
@@ -1339,8 +1404,8 @@ static void perf_event_sync_stat(struct perf_event_context *ctx,
}
}
-void perf_event_context_sched_out(struct task_struct *task, int ctxn,
- struct task_struct *next)
+static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
+ struct task_struct *next)
{
struct perf_event_context *ctx = task->perf_event_ctxp[ctxn];
struct perf_event_context *next_ctx;
@@ -1533,7 +1598,7 @@ static void task_ctx_sched_in(struct perf_event_context *ctx,
{
struct perf_cpu_context *cpuctx;
- cpuctx = __get_cpu_context(ctx);
+ cpuctx = __get_cpu_context(ctx);
if (cpuctx->task_ctx == ctx)
return;
@@ -1541,7 +1606,7 @@ static void task_ctx_sched_in(struct perf_event_context *ctx,
cpuctx->task_ctx = ctx;
}
-void perf_event_context_sched_in(struct perf_event_context *ctx)
+static void perf_event_context_sched_in(struct perf_event_context *ctx)
{
struct perf_cpu_context *cpuctx;
@@ -1627,7 +1692,7 @@ static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
* Reduce accuracy by one bit such that @a and @b converge
* to a similar magnitude.
*/
-#define REDUCE_FLS(a, b) \
+#define REDUCE_FLS(a, b) \
do { \
if (a##_fls > b##_fls) { \
a >>= 1; \
@@ -2213,6 +2278,9 @@ errout:
}
+/*
+ * Returns a matching context with refcount and pincount.
+ */
static struct perf_event_context *
find_get_context(struct pmu *pmu, struct task_struct *task, int cpu)
{
@@ -2237,6 +2305,7 @@ find_get_context(struct pmu *pmu, struct task_struct *task, int cpu)
cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
ctx = &cpuctx->ctx;
get_ctx(ctx);
+ ++ctx->pin_count;
return ctx;
}
@@ -2250,6 +2319,7 @@ retry:
ctx = perf_lock_task_context(task, ctxn, &flags);
if (ctx) {
unclone_ctx(ctx);
+ ++ctx->pin_count;
raw_spin_unlock_irqrestore(&ctx->lock, flags);
}
@@ -2271,8 +2341,10 @@ retry:
err = -ESRCH;
else if (task->perf_event_ctxp[ctxn])
err = -EAGAIN;
- else
+ else {
+ ++ctx->pin_count;
rcu_assign_pointer(task->perf_event_ctxp[ctxn], ctx);
+ }
mutex_unlock(&task->perf_event_mutex);
if (unlikely(err)) {
@@ -5950,10 +6022,10 @@ SYSCALL_DEFINE5(perf_event_open,
struct perf_event_context *gctx = group_leader->ctx;
mutex_lock(&gctx->mutex);
- perf_event_remove_from_context(group_leader);
+ perf_remove_from_context(group_leader);
list_for_each_entry(sibling, &group_leader->sibling_list,
group_entry) {
- perf_event_remove_from_context(sibling);
+ perf_remove_from_context(sibling);
put_ctx(gctx);
}
mutex_unlock(&gctx->mutex);
@@ -5976,6 +6048,7 @@ SYSCALL_DEFINE5(perf_event_open,
perf_install_in_context(ctx, event, cpu);
++ctx->generation;
+ perf_unpin_context(ctx);
mutex_unlock(&ctx->mutex);
event->owner = current;
@@ -6001,6 +6074,7 @@ SYSCALL_DEFINE5(perf_event_open,
return event_fd;
err_context:
+ perf_unpin_context(ctx);
put_ctx(ctx);
err_alloc:
free_event(event);
@@ -6051,6 +6125,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
mutex_lock(&ctx->mutex);
perf_install_in_context(ctx, event, cpu);
++ctx->generation;
+ perf_unpin_context(ctx);
mutex_unlock(&ctx->mutex);
return event;
@@ -6104,7 +6179,7 @@ __perf_event_exit_task(struct perf_event *child_event,
{
struct perf_event *parent_event;
- perf_event_remove_from_context(child_event);
+ perf_remove_from_context(child_event);
parent_event = child_event->parent;
/*
@@ -6411,7 +6486,7 @@ inherit_task_group(struct perf_event *event, struct task_struct *parent,
return 0;
}
- child_ctx = child->perf_event_ctxp[ctxn];
+ child_ctx = child->perf_event_ctxp[ctxn];
if (!child_ctx) {
/*
* This is executed from the parent task context, so
@@ -6526,6 +6601,7 @@ int perf_event_init_context(struct task_struct *child, int ctxn)
mutex_unlock(&parent_ctx->mutex);
perf_unpin_context(parent_ctx);
+ put_ctx(parent_ctx);
return ret;
}
@@ -6595,9 +6671,9 @@ static void __perf_event_exit_context(void *__info)
perf_pmu_rotate_stop(ctx->pmu);
list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry)
- __perf_event_remove_from_context(event);
+ __perf_remove_from_context(event);
list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry)
- __perf_event_remove_from_context(event);
+ __perf_remove_from_context(event);
}
static void perf_event_exit_cpu_context(int cpu)
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 05bb7173850e..67fea9d25d55 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -176,7 +176,8 @@ static inline cputime_t virt_ticks(struct task_struct *p)
return p->utime;
}
-int posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp)
+static int
+posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp)
{
int error = check_clock(which_clock);
if (!error) {
@@ -194,7 +195,8 @@ int posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp)
return error;
}
-int posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *tp)
+static int
+posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *tp)
{
/*
* You can never reset a CPU clock, but we check for other errors
@@ -317,7 +319,7 @@ static int cpu_clock_sample_group(const clockid_t which_clock,
}
-int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
+static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
{
const pid_t pid = CPUCLOCK_PID(which_clock);
int error = -EINVAL;
@@ -379,7 +381,7 @@ int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
* This is called from sys_timer_create() and do_cpu_nanosleep() with the
* new timer already all-zeros initialized.
*/
-int posix_cpu_timer_create(struct k_itimer *new_timer)
+static int posix_cpu_timer_create(struct k_itimer *new_timer)
{
int ret = 0;
const pid_t pid = CPUCLOCK_PID(new_timer->it_clock);
@@ -425,7 +427,7 @@ int posix_cpu_timer_create(struct k_itimer *new_timer)
* If we return TIMER_RETRY, it's necessary to release the timer's lock
* and try again. (This happens when the timer is in the middle of firing.)
*/
-int posix_cpu_timer_del(struct k_itimer *timer)
+static int posix_cpu_timer_del(struct k_itimer *timer)
{
struct task_struct *p = timer->it.cpu.task;
int ret = 0;
@@ -665,8 +667,8 @@ static int cpu_timer_sample_group(const clockid_t which_clock,
* If we return TIMER_RETRY, it's necessary to release the timer's lock
* and try again. (This happens when the timer is in the middle of firing.)
*/
-int posix_cpu_timer_set(struct k_itimer *timer, int flags,
- struct itimerspec *new, struct itimerspec *old)
+static int posix_cpu_timer_set(struct k_itimer *timer, int flags,
+ struct itimerspec *new, struct itimerspec *old)
{
struct task_struct *p = timer->it.cpu.task;
union cpu_time_count old_expires, new_expires, old_incr, val;
@@ -820,7 +822,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
return ret;
}
-void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
+static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
{
union cpu_time_count now;
struct task_struct *p = timer->it.cpu.task;
@@ -1481,11 +1483,13 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
return error;
}
-int posix_cpu_nsleep(const clockid_t which_clock, int flags,
- struct timespec *rqtp, struct timespec __user *rmtp)
+static long posix_cpu_nsleep_restart(struct restart_block *restart_block);
+
+static int posix_cpu_nsleep(const clockid_t which_clock, int flags,
+ struct timespec *rqtp, struct timespec __user *rmtp)
{
struct restart_block *restart_block =
- &current_thread_info()->restart_block;
+ &current_thread_info()->restart_block;
struct itimerspec it;
int error;
@@ -1501,56 +1505,47 @@ int posix_cpu_nsleep(const clockid_t which_clock, int flags,
if (error == -ERESTART_RESTARTBLOCK) {
- if (flags & TIMER_ABSTIME)
+ if (flags & TIMER_ABSTIME)
return -ERESTARTNOHAND;
/*
- * Report back to the user the time still remaining.
- */
- if (rmtp != NULL && copy_to_user(rmtp, &it.it_value, sizeof *rmtp))
+ * Report back to the user the time still remaining.
+ */
+ if (rmtp && copy_to_user(rmtp, &it.it_value, sizeof *rmtp))
return -EFAULT;
restart_block->fn = posix_cpu_nsleep_restart;
- restart_block->arg0 = which_clock;
- restart_block->arg1 = (unsigned long) rmtp;
- restart_block->arg2 = rqtp->tv_sec;
- restart_block->arg3 = rqtp->tv_nsec;
+ restart_block->nanosleep.index = which_clock;
+ restart_block->nanosleep.rmtp = rmtp;
+ restart_block->nanosleep.expires = timespec_to_ns(rqtp);
}
return error;
}
-long posix_cpu_nsleep_restart(struct restart_block *restart_block)
+static long posix_cpu_nsleep_restart(struct restart_block *restart_block)
{
- clockid_t which_clock = restart_block->arg0;
- struct timespec __user *rmtp;
+ clockid_t which_clock = restart_block->nanosleep.index;
struct timespec t;
struct itimerspec it;
int error;
- rmtp = (struct timespec __user *) restart_block->arg1;
- t.tv_sec = restart_block->arg2;
- t.tv_nsec = restart_block->arg3;
+ t = ns_to_timespec(restart_block->nanosleep.expires);
- restart_block->fn = do_no_restart_syscall;
error = do_cpu_nanosleep(which_clock, TIMER_ABSTIME, &t, &it);
if (error == -ERESTART_RESTARTBLOCK) {
+ struct timespec __user *rmtp = restart_block->nanosleep.rmtp;
/*
- * Report back to the user the time still remaining.
- */
- if (rmtp != NULL && copy_to_user(rmtp, &it.it_value, sizeof *rmtp))
+ * Report back to the user the time still remaining.
+ */
+ if (rmtp && copy_to_user(rmtp, &it.it_value, sizeof *rmtp))
return -EFAULT;
- restart_block->fn = posix_cpu_nsleep_restart;
- restart_block->arg0 = which_clock;
- restart_block->arg1 = (unsigned long) rmtp;
- restart_block->arg2 = t.tv_sec;
- restart_block->arg3 = t.tv_nsec;
+ restart_block->nanosleep.expires = timespec_to_ns(&t);
}
return error;
}
-
#define PROCESS_CLOCK MAKE_PROCESS_CPUCLOCK(0, CPUCLOCK_SCHED)
#define THREAD_CLOCK MAKE_THREAD_CPUCLOCK(0, CPUCLOCK_SCHED)
@@ -1594,38 +1589,37 @@ static int thread_cpu_timer_create(struct k_itimer *timer)
timer->it_clock = THREAD_CLOCK;
return posix_cpu_timer_create(timer);
}
-static int thread_cpu_nsleep(const clockid_t which_clock, int flags,
- struct timespec *rqtp, struct timespec __user *rmtp)
-{
- return -EINVAL;
-}
-static long thread_cpu_nsleep_restart(struct restart_block *restart_block)
-{
- return -EINVAL;
-}
+
+struct k_clock clock_posix_cpu = {
+ .clock_getres = posix_cpu_clock_getres,
+ .clock_set = posix_cpu_clock_set,
+ .clock_get = posix_cpu_clock_get,
+ .timer_create = posix_cpu_timer_create,
+ .nsleep = posix_cpu_nsleep,
+ .nsleep_restart = posix_cpu_nsleep_restart,
+ .timer_set = posix_cpu_timer_set,
+ .timer_del = posix_cpu_timer_del,
+ .timer_get = posix_cpu_timer_get,
+};
static __init int init_posix_cpu_timers(void)
{
struct k_clock process = {
- .clock_getres = process_cpu_clock_getres,
- .clock_get = process_cpu_clock_get,
- .clock_set = do_posix_clock_nosettime,
- .timer_create = process_cpu_timer_create,
- .nsleep = process_cpu_nsleep,
- .nsleep_restart = process_cpu_nsleep_restart,
+ .clock_getres = process_cpu_clock_getres,
+ .clock_get = process_cpu_clock_get,
+ .timer_create = process_cpu_timer_create,
+ .nsleep = process_cpu_nsleep,
+ .nsleep_restart = process_cpu_nsleep_restart,
};
struct k_clock thread = {
- .clock_getres = thread_cpu_clock_getres,
- .clock_get = thread_cpu_clock_get,
- .clock_set = do_posix_clock_nosettime,
- .timer_create = thread_cpu_timer_create,
- .nsleep = thread_cpu_nsleep,
- .nsleep_restart = thread_cpu_nsleep_restart,
+ .clock_getres = thread_cpu_clock_getres,
+ .clock_get = thread_cpu_clock_get,
+ .timer_create = thread_cpu_timer_create,
};
struct timespec ts;
- register_posix_clock(CLOCK_PROCESS_CPUTIME_ID, &process);
- register_posix_clock(CLOCK_THREAD_CPUTIME_ID, &thread);
+ posix_timers_register_clock(CLOCK_PROCESS_CPUTIME_ID, &process);
+ posix_timers_register_clock(CLOCK_THREAD_CPUTIME_ID, &thread);
cputime_to_timespec(cputime_one_jiffy, &ts);
onecputick = ts.tv_nsec;
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 93bd2eb2bc53..44fcff131b38 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -41,6 +41,7 @@
#include <linux/init.h>
#include <linux/compiler.h>
#include <linux/idr.h>
+#include <linux/posix-clock.h>
#include <linux/posix-timers.h>
#include <linux/syscalls.h>
#include <linux/wait.h>
@@ -81,6 +82,14 @@ static DEFINE_SPINLOCK(idr_lock);
#error "SIGEV_THREAD_ID must not share bit with other SIGEV values!"
#endif
+/*
+ * parisc wants ENOTSUP instead of EOPNOTSUPP
+ */
+#ifndef ENOTSUP
+# define ENANOSLEEP_NOTSUP EOPNOTSUPP
+#else
+# define ENANOSLEEP_NOTSUP ENOTSUP
+#endif
/*
* The timer ID is turned into a timer address by idr_find().
@@ -94,11 +103,7 @@ static DEFINE_SPINLOCK(idr_lock);
/*
* CLOCKs: The POSIX standard calls for a couple of clocks and allows us
* to implement others. This structure defines the various
- * clocks and allows the possibility of adding others. We
- * provide an interface to add clocks to the table and expect
- * the "arch" code to add at least one clock that is high
- * resolution. Here we define the standard CLOCK_REALTIME as a
- * 1/HZ resolution clock.
+ * clocks.
*
* RESOLUTION: Clock resolution is used to round up timer and interval
* times, NOT to report clock times, which are reported with as
@@ -108,20 +113,13 @@ static DEFINE_SPINLOCK(idr_lock);
* necessary code is written. The standard says we should say
* something about this issue in the documentation...
*
- * FUNCTIONS: The CLOCKs structure defines possible functions to handle
- * various clock functions. For clocks that use the standard
- * system timer code these entries should be NULL. This will
- * allow dispatch without the overhead of indirect function
- * calls. CLOCKS that depend on other sources (e.g. WWV or GPS)
- * must supply functions here, even if the function just returns
- * ENOSYS. The standard POSIX timer management code assumes the
- * following: 1.) The k_itimer struct (sched.h) is used for the
- * timer. 2.) The list, it_lock, it_clock, it_id and it_pid
- * fields are not modified by timer code.
+ * FUNCTIONS: The CLOCKs structure defines possible functions to
+ * handle various clock functions.
*
- * At this time all functions EXCEPT clock_nanosleep can be
- * redirected by the CLOCKS structure. Clock_nanosleep is in
- * there, but the code ignores it.
+ * The standard POSIX timer management code assumes the
+ * following: 1.) The k_itimer struct (sched.h) is used for
+ * the timer. 2.) The list, it_lock, it_clock, it_id and
+ * it_pid fields are not modified by timer code.
*
* Permissions: It is assumed that the clock_settime() function defined
* for each clock will take care of permission checks. Some
@@ -138,6 +136,7 @@ static struct k_clock posix_clocks[MAX_CLOCKS];
*/
static int common_nsleep(const clockid_t, int flags, struct timespec *t,
struct timespec __user *rmtp);
+static int common_timer_create(struct k_itimer *new_timer);
static void common_timer_get(struct k_itimer *, struct itimerspec *);
static int common_timer_set(struct k_itimer *, int,
struct itimerspec *, struct itimerspec *);
@@ -158,76 +157,24 @@ static inline void unlock_timer(struct k_itimer *timr, unsigned long flags)
spin_unlock_irqrestore(&timr->it_lock, flags);
}
-/*
- * Call the k_clock hook function if non-null, or the default function.
- */
-#define CLOCK_DISPATCH(clock, call, arglist) \
- ((clock) < 0 ? posix_cpu_##call arglist : \
- (posix_clocks[clock].call != NULL \
- ? (*posix_clocks[clock].call) arglist : common_##call arglist))
-
-/*
- * Default clock hook functions when the struct k_clock passed
- * to register_posix_clock leaves a function pointer null.
- *
- * The function common_CALL is the default implementation for
- * the function pointer CALL in struct k_clock.
- */
-
-static inline int common_clock_getres(const clockid_t which_clock,
- struct timespec *tp)
-{
- tp->tv_sec = 0;
- tp->tv_nsec = posix_clocks[which_clock].res;
- return 0;
-}
-
-/*
- * Get real time for posix timers
- */
-static int common_clock_get(clockid_t which_clock, struct timespec *tp)
+/* Get clock_realtime */
+static int posix_clock_realtime_get(clockid_t which_clock, struct timespec *tp)
{
ktime_get_real_ts(tp);
return 0;
}
-static inline int common_clock_set(const clockid_t which_clock,
- struct timespec *tp)
+/* Set clock_realtime */
+static int posix_clock_realtime_set(const clockid_t which_clock,
+ const struct timespec *tp)
{
return do_sys_settimeofday(tp, NULL);
}
-static int common_timer_create(struct k_itimer *new_timer)
-{
- hrtimer_init(&new_timer->it.real.timer, new_timer->it_clock, 0);
- return 0;
-}
-
-static int no_timer_create(struct k_itimer *new_timer)
-{
- return -EOPNOTSUPP;
-}
-
-static int no_nsleep(const clockid_t which_clock, int flags,
- struct timespec *tsave, struct timespec __user *rmtp)
+static int posix_clock_realtime_adj(const clockid_t which_clock,
+ struct timex *t)
{
- return -EOPNOTSUPP;
-}
-
-/*
- * Return nonzero if we know a priori this clockid_t value is bogus.
- */
-static inline int invalid_clockid(const clockid_t which_clock)
-{
- if (which_clock < 0) /* CPU clock, posix_cpu_* will check it */
- return 0;
- if ((unsigned) which_clock >= MAX_CLOCKS)
- return 1;
- if (posix_clocks[which_clock].clock_getres != NULL)
- return 0;
- if (posix_clocks[which_clock].res != 0)
- return 0;
- return 1;
+ return do_adjtimex(t);
}
/*
@@ -273,40 +220,45 @@ static int posix_get_coarse_res(const clockid_t which_clock, struct timespec *tp
static __init int init_posix_timers(void)
{
struct k_clock clock_realtime = {
- .clock_getres = hrtimer_get_res,
+ .clock_getres = hrtimer_get_res,
+ .clock_get = posix_clock_realtime_get,
+ .clock_set = posix_clock_realtime_set,
+ .clock_adj = posix_clock_realtime_adj,
+ .nsleep = common_nsleep,
+ .nsleep_restart = hrtimer_nanosleep_restart,
+ .timer_create = common_timer_create,
+ .timer_set = common_timer_set,
+ .timer_get = common_timer_get,
+ .timer_del = common_timer_del,
};
struct k_clock clock_monotonic = {
- .clock_getres = hrtimer_get_res,
- .clock_get = posix_ktime_get_ts,
- .clock_set = do_posix_clock_nosettime,
+ .clock_getres = hrtimer_get_res,
+ .clock_get = posix_ktime_get_ts,
+ .nsleep = common_nsleep,
+ .nsleep_restart = hrtimer_nanosleep_restart,
+ .timer_create = common_timer_create,
+ .timer_set = common_timer_set,
+ .timer_get = common_timer_get,
+ .timer_del = common_timer_del,
};
struct k_clock clock_monotonic_raw = {
- .clock_getres = hrtimer_get_res,
- .clock_get = posix_get_monotonic_raw,
- .clock_set = do_posix_clock_nosettime,
- .timer_create = no_timer_create,
- .nsleep = no_nsleep,
+ .clock_getres = hrtimer_get_res,
+ .clock_get = posix_get_monotonic_raw,
};
struct k_clock clock_realtime_coarse = {
- .clock_getres = posix_get_coarse_res,
- .clock_get = posix_get_realtime_coarse,
- .clock_set = do_posix_clock_nosettime,
- .timer_create = no_timer_create,
- .nsleep = no_nsleep,
+ .clock_getres = posix_get_coarse_res,
+ .clock_get = posix_get_realtime_coarse,
};
struct k_clock clock_monotonic_coarse = {
- .clock_getres = posix_get_coarse_res,
- .clock_get = posix_get_monotonic_coarse,
- .clock_set = do_posix_clock_nosettime,
- .timer_create = no_timer_create,
- .nsleep = no_nsleep,
+ .clock_getres = posix_get_coarse_res,
+ .clock_get = posix_get_monotonic_coarse,
};
- register_posix_clock(CLOCK_REALTIME, &clock_realtime);
- register_posix_clock(CLOCK_MONOTONIC, &clock_monotonic);
- register_posix_clock(CLOCK_MONOTONIC_RAW, &clock_monotonic_raw);
- register_posix_clock(CLOCK_REALTIME_COARSE, &clock_realtime_coarse);
- register_posix_clock(CLOCK_MONOTONIC_COARSE, &clock_monotonic_coarse);
+ posix_timers_register_clock(CLOCK_REALTIME, &clock_realtime);
+ posix_timers_register_clock(CLOCK_MONOTONIC, &clock_monotonic);
+ posix_timers_register_clock(CLOCK_MONOTONIC_RAW, &clock_monotonic_raw);
+ posix_timers_register_clock(CLOCK_REALTIME_COARSE, &clock_realtime_coarse);
+ posix_timers_register_clock(CLOCK_MONOTONIC_COARSE, &clock_monotonic_coarse);
posix_timers_cache = kmem_cache_create("posix_timers_cache",
sizeof (struct k_itimer), 0, SLAB_PANIC,
@@ -482,17 +434,29 @@ static struct pid *good_sigevent(sigevent_t * event)
return task_pid(rtn);
}
-void register_posix_clock(const clockid_t clock_id, struct k_clock *new_clock)
+void posix_timers_register_clock(const clockid_t clock_id,
+ struct k_clock *new_clock)
{
if ((unsigned) clock_id >= MAX_CLOCKS) {
- printk("POSIX clock register failed for clock_id %d\n",
+ printk(KERN_WARNING "POSIX clock register failed for clock_id %d\n",
+ clock_id);
+ return;
+ }
+
+ if (!new_clock->clock_get) {
+ printk(KERN_WARNING "POSIX clock id %d lacks clock_get()\n",
+ clock_id);
+ return;
+ }
+ if (!new_clock->clock_getres) {
+ printk(KERN_WARNING "POSIX clock id %d lacks clock_getres()\n",
clock_id);
return;
}
posix_clocks[clock_id] = *new_clock;
}
-EXPORT_SYMBOL_GPL(register_posix_clock);
+EXPORT_SYMBOL_GPL(posix_timers_register_clock);
static struct k_itimer * alloc_posix_timer(void)
{
@@ -523,19 +487,39 @@ static void release_posix_timer(struct k_itimer *tmr, int it_id_set)
kmem_cache_free(posix_timers_cache, tmr);
}
+static struct k_clock *clockid_to_kclock(const clockid_t id)
+{
+ if (id < 0)
+ return (id & CLOCKFD_MASK) == CLOCKFD ?
+ &clock_posix_dynamic : &clock_posix_cpu;
+
+ if (id >= MAX_CLOCKS || !posix_clocks[id].clock_getres)
+ return NULL;
+ return &posix_clocks[id];
+}
+
+static int common_timer_create(struct k_itimer *new_timer)
+{
+ hrtimer_init(&new_timer->it.real.timer, new_timer->it_clock, 0);
+ return 0;
+}
+
/* Create a POSIX.1b interval timer. */
SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock,
struct sigevent __user *, timer_event_spec,
timer_t __user *, created_timer_id)
{
+ struct k_clock *kc = clockid_to_kclock(which_clock);
struct k_itimer *new_timer;
int error, new_timer_id;
sigevent_t event;
int it_id_set = IT_ID_NOT_SET;
- if (invalid_clockid(which_clock))
+ if (!kc)
return -EINVAL;
+ if (!kc->timer_create)
+ return -EOPNOTSUPP;
new_timer = alloc_posix_timer();
if (unlikely(!new_timer))
@@ -597,7 +581,7 @@ SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock,
goto out;
}
- error = CLOCK_DISPATCH(which_clock, timer_create, (new_timer));
+ error = kc->timer_create(new_timer);
if (error)
goto out;
@@ -607,7 +591,7 @@ SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock,
spin_unlock_irq(&current->sighand->siglock);
return 0;
- /*
+ /*
* In the case of the timer belonging to another task, after
* the task is unlocked, the timer is owned by the other task
* and may cease to exist at any time. Don't use or modify
@@ -709,22 +693,28 @@ common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting)
SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id,
struct itimerspec __user *, setting)
{
- struct k_itimer *timr;
struct itimerspec cur_setting;
+ struct k_itimer *timr;
+ struct k_clock *kc;
unsigned long flags;
+ int ret = 0;
timr = lock_timer(timer_id, &flags);
if (!timr)
return -EINVAL;
- CLOCK_DISPATCH(timr->it_clock, timer_get, (timr, &cur_setting));
+ kc = clockid_to_kclock(timr->it_clock);
+ if (WARN_ON_ONCE(!kc || !kc->timer_get))
+ ret = -EINVAL;
+ else
+ kc->timer_get(timr, &cur_setting);
unlock_timer(timr, flags);
- if (copy_to_user(setting, &cur_setting, sizeof (cur_setting)))
+ if (!ret && copy_to_user(setting, &cur_setting, sizeof (cur_setting)))
return -EFAULT;
- return 0;
+ return ret;
}
/*
@@ -813,6 +803,7 @@ SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags,
int error = 0;
unsigned long flag;
struct itimerspec *rtn = old_setting ? &old_spec : NULL;
+ struct k_clock *kc;
if (!new_setting)
return -EINVAL;
@@ -828,8 +819,11 @@ retry:
if (!timr)
return -EINVAL;
- error = CLOCK_DISPATCH(timr->it_clock, timer_set,
- (timr, flags, &new_spec, rtn));
+ kc = clockid_to_kclock(timr->it_clock);
+ if (WARN_ON_ONCE(!kc || !kc->timer_set))
+ error = -EINVAL;
+ else
+ error = kc->timer_set(timr, flags, &new_spec, rtn);
unlock_timer(timr, flag);
if (error == TIMER_RETRY) {
@@ -844,7 +838,7 @@ retry:
return error;
}
-static inline int common_timer_del(struct k_itimer *timer)
+static int common_timer_del(struct k_itimer *timer)
{
timer->it.real.interval.tv64 = 0;
@@ -855,7 +849,11 @@ static inline int common_timer_del(struct k_itimer *timer)
static inline int timer_delete_hook(struct k_itimer *timer)
{
- return CLOCK_DISPATCH(timer->it_clock, timer_del, (timer));
+ struct k_clock *kc = clockid_to_kclock(timer->it_clock);
+
+ if (WARN_ON_ONCE(!kc || !kc->timer_del))
+ return -EINVAL;
+ return kc->timer_del(timer);
}
/* Delete a POSIX.1b interval timer. */
@@ -927,69 +925,76 @@ void exit_itimers(struct signal_struct *sig)
}
}
-/* Not available / possible... functions */
-int do_posix_clock_nosettime(const clockid_t clockid, struct timespec *tp)
-{
- return -EINVAL;
-}
-EXPORT_SYMBOL_GPL(do_posix_clock_nosettime);
-
-int do_posix_clock_nonanosleep(const clockid_t clock, int flags,
- struct timespec *t, struct timespec __user *r)
-{
-#ifndef ENOTSUP
- return -EOPNOTSUPP; /* aka ENOTSUP in userland for POSIX */
-#else /* parisc does define it separately. */
- return -ENOTSUP;
-#endif
-}
-EXPORT_SYMBOL_GPL(do_posix_clock_nonanosleep);
-
SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock,
const struct timespec __user *, tp)
{
+ struct k_clock *kc = clockid_to_kclock(which_clock);
struct timespec new_tp;
- if (invalid_clockid(which_clock))
+ if (!kc || !kc->clock_set)
return -EINVAL;
+
if (copy_from_user(&new_tp, tp, sizeof (*tp)))
return -EFAULT;
- return CLOCK_DISPATCH(which_clock, clock_set, (which_clock, &new_tp));
+ return kc->clock_set(which_clock, &new_tp);
}
SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock,
struct timespec __user *,tp)
{
+ struct k_clock *kc = clockid_to_kclock(which_clock);
struct timespec kernel_tp;
int error;
- if (invalid_clockid(which_clock))
+ if (!kc)
return -EINVAL;
- error = CLOCK_DISPATCH(which_clock, clock_get,
- (which_clock, &kernel_tp));
+
+ error = kc->clock_get(which_clock, &kernel_tp);
+
if (!error && copy_to_user(tp, &kernel_tp, sizeof (kernel_tp)))
error = -EFAULT;
return error;
+}
+
+SYSCALL_DEFINE2(clock_adjtime, const clockid_t, which_clock,
+ struct timex __user *, utx)
+{
+ struct k_clock *kc = clockid_to_kclock(which_clock);
+ struct timex ktx;
+ int err;
+
+ if (!kc)
+ return -EINVAL;
+ if (!kc->clock_adj)
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&ktx, utx, sizeof(ktx)))
+ return -EFAULT;
+ err = kc->clock_adj(which_clock, &ktx);
+
+ if (!err && copy_to_user(utx, &ktx, sizeof(ktx)))
+ return -EFAULT;
+
+ return err;
}
SYSCALL_DEFINE2(clock_getres, const clockid_t, which_clock,
struct timespec __user *, tp)
{
+ struct k_clock *kc = clockid_to_kclock(which_clock);
struct timespec rtn_tp;
int error;
- if (invalid_clockid(which_clock))
+ if (!kc)
return -EINVAL;
- error = CLOCK_DISPATCH(which_clock, clock_getres,
- (which_clock, &rtn_tp));
+ error = kc->clock_getres(which_clock, &rtn_tp);
- if (!error && tp && copy_to_user(tp, &rtn_tp, sizeof (rtn_tp))) {
+ if (!error && tp && copy_to_user(tp, &rtn_tp, sizeof (rtn_tp)))
error = -EFAULT;
- }
return error;
}
@@ -1009,10 +1014,13 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags,
const struct timespec __user *, rqtp,
struct timespec __user *, rmtp)
{
+ struct k_clock *kc = clockid_to_kclock(which_clock);
struct timespec t;
- if (invalid_clockid(which_clock))
+ if (!kc)
return -EINVAL;
+ if (!kc->nsleep)
+ return -ENANOSLEEP_NOTSUP;
if (copy_from_user(&t, rqtp, sizeof (struct timespec)))
return -EFAULT;
@@ -1020,27 +1028,20 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags,
if (!timespec_valid(&t))
return -EINVAL;
- return CLOCK_DISPATCH(which_clock, nsleep,
- (which_clock, flags, &t, rmtp));
-}
-
-/*
- * nanosleep_restart for monotonic and realtime clocks
- */
-static int common_nsleep_restart(struct restart_block *restart_block)
-{
- return hrtimer_nanosleep_restart(restart_block);
+ return kc->nsleep(which_clock, flags, &t, rmtp);
}
/*
* This will restart clock_nanosleep. This is required only by
* compat_clock_nanosleep_restart for now.
*/
-long
-clock_nanosleep_restart(struct restart_block *restart_block)
+long clock_nanosleep_restart(struct restart_block *restart_block)
{
- clockid_t which_clock = restart_block->arg0;
+ clockid_t which_clock = restart_block->nanosleep.index;
+ struct k_clock *kc = clockid_to_kclock(which_clock);
+
+ if (WARN_ON_ONCE(!kc || !kc->nsleep_restart))
+ return -EINVAL;
- return CLOCK_DISPATCH(which_clock, nsleep_restart,
- (restart_block));
+ return kc->nsleep_restart(restart_block);
}
diff --git a/kernel/rtmutex-debug.c b/kernel/rtmutex-debug.c
index ddabb54bb5c8..3c7cbc2c33be 100644
--- a/kernel/rtmutex-debug.c
+++ b/kernel/rtmutex-debug.c
@@ -215,7 +215,6 @@ void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter)
put_pid(waiter->deadlock_task_pid);
TRACE_WARN_ON(!plist_node_empty(&waiter->list_entry));
TRACE_WARN_ON(!plist_node_empty(&waiter->pi_list_entry));
- TRACE_WARN_ON(waiter->task);
memset(waiter, 0x22, sizeof(*waiter));
}
diff --git a/kernel/rtmutex-tester.c b/kernel/rtmutex-tester.c
index 66cb89bc5ef1..d5b543506cbc 100644
--- a/kernel/rtmutex-tester.c
+++ b/kernel/rtmutex-tester.c
@@ -9,7 +9,6 @@
#include <linux/kthread.h>
#include <linux/module.h>
#include <linux/sched.h>
-#include <linux/smp_lock.h>
#include <linux/spinlock.h>
#include <linux/sysdev.h>
#include <linux/timer.h>
@@ -27,7 +26,6 @@ struct test_thread_data {
int opcode;
int opdata;
int mutexes[MAX_RT_TEST_MUTEXES];
- int bkl;
int event;
struct sys_device sysdev;
};
@@ -46,8 +44,8 @@ enum test_opcodes {
RTTEST_LOCKINTNOWAIT, /* 6 Lock interruptible no wait in wakeup, data = lockindex */
RTTEST_LOCKCONT, /* 7 Continue locking after the wakeup delay */
RTTEST_UNLOCK, /* 8 Unlock, data = lockindex */
- RTTEST_LOCKBKL, /* 9 Lock BKL */
- RTTEST_UNLOCKBKL, /* 10 Unlock BKL */
+ RTTEST_LOCKBKL, /* 9 Was: Lock BKL */
+ RTTEST_UNLOCKBKL, /* 10 Was: Unlock BKL */
RTTEST_SIGNAL, /* 11 Signal other test thread, data = thread id */
RTTEST_RESETEVENT = 98, /* 98 Reset event counter */
RTTEST_RESET = 99, /* 99 Reset all pending operations */
@@ -74,13 +72,6 @@ static int handle_op(struct test_thread_data *td, int lockwakeup)
td->mutexes[i] = 0;
}
}
-
- if (!lockwakeup && td->bkl == 4) {
-#ifdef CONFIG_LOCK_KERNEL
- unlock_kernel();
-#endif
- td->bkl = 0;
- }
return 0;
case RTTEST_RESETEVENT:
@@ -131,25 +122,6 @@ static int handle_op(struct test_thread_data *td, int lockwakeup)
td->mutexes[id] = 0;
return 0;
- case RTTEST_LOCKBKL:
- if (td->bkl)
- return 0;
- td->bkl = 1;
-#ifdef CONFIG_LOCK_KERNEL
- lock_kernel();
-#endif
- td->bkl = 4;
- return 0;
-
- case RTTEST_UNLOCKBKL:
- if (td->bkl != 4)
- break;
-#ifdef CONFIG_LOCK_KERNEL
- unlock_kernel();
-#endif
- td->bkl = 0;
- return 0;
-
default:
break;
}
@@ -196,7 +168,6 @@ void schedule_rt_mutex_test(struct rt_mutex *mutex)
td->event = atomic_add_return(1, &rttest_event);
break;
- case RTTEST_LOCKBKL:
default:
break;
}
@@ -229,8 +200,6 @@ void schedule_rt_mutex_test(struct rt_mutex *mutex)
td->event = atomic_add_return(1, &rttest_event);
return;
- case RTTEST_LOCKBKL:
- return;
default:
return;
}
@@ -380,11 +349,11 @@ static ssize_t sysfs_test_status(struct sys_device *dev, struct sysdev_attribute
spin_lock(&rttest_lock);
curr += sprintf(curr,
- "O: %4d, E:%8d, S: 0x%08lx, P: %4d, N: %4d, B: %p, K: %d, M:",
+ "O: %4d, E:%8d, S: 0x%08lx, P: %4d, N: %4d, B: %p, M:",
td->opcode, td->event, tsk->state,
(MAX_RT_PRIO - 1) - tsk->prio,
(MAX_RT_PRIO - 1) - tsk->normal_prio,
- tsk->pi_blocked_on, td->bkl);
+ tsk->pi_blocked_on);
for (i = MAX_RT_TEST_MUTEXES - 1; i >=0 ; i--)
curr += sprintf(curr, "%d", td->mutexes[i]);
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index a9604815786a..ab449117aaf2 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -20,41 +20,34 @@
/*
* lock->owner state tracking:
*
- * lock->owner holds the task_struct pointer of the owner. Bit 0 and 1
- * are used to keep track of the "owner is pending" and "lock has
- * waiters" state.
+ * lock->owner holds the task_struct pointer of the owner. Bit 0
+ * is used to keep track of the "lock has waiters" state.
*
- * owner bit1 bit0
- * NULL 0 0 lock is free (fast acquire possible)
- * NULL 0 1 invalid state
- * NULL 1 0 Transitional State*
- * NULL 1 1 invalid state
- * taskpointer 0 0 lock is held (fast release possible)
- * taskpointer 0 1 task is pending owner
- * taskpointer 1 0 lock is held and has waiters
- * taskpointer 1 1 task is pending owner and lock has more waiters
- *
- * Pending ownership is assigned to the top (highest priority)
- * waiter of the lock, when the lock is released. The thread is woken
- * up and can now take the lock. Until the lock is taken (bit 0
- * cleared) a competing higher priority thread can steal the lock
- * which puts the woken up thread back on the waiters list.
+ * owner bit0
+ * NULL 0 lock is free (fast acquire possible)
+ * NULL 1 lock is free and has waiters and the top waiter
+ * is going to take the lock*
+ * taskpointer 0 lock is held (fast release possible)
+ * taskpointer 1 lock is held and has waiters**
*
* The fast atomic compare exchange based acquire and release is only
- * possible when bit 0 and 1 of lock->owner are 0.
+ * possible when bit 0 of lock->owner is 0.
+ *
+ * (*) It also can be a transitional state when grabbing the lock
+ * with ->wait_lock is held. To prevent any fast path cmpxchg to the lock,
+ * we need to set the bit0 before looking at the lock, and the owner may be
+ * NULL in this small time, hence this can be a transitional state.
*
- * (*) There's a small time where the owner can be NULL and the
- * "lock has waiters" bit is set. This can happen when grabbing the lock.
- * To prevent a cmpxchg of the owner releasing the lock, we need to set this
- * bit before looking at the lock, hence the reason this is a transitional
- * state.
+ * (**) There is a small time when bit 0 is set but there are no
+ * waiters. This can happen when grabbing the lock in the slow path.
+ * To prevent a cmpxchg of the owner releasing the lock, we need to
+ * set this bit before looking at the lock.
*/
static void
-rt_mutex_set_owner(struct rt_mutex *lock, struct task_struct *owner,
- unsigned long mask)
+rt_mutex_set_owner(struct rt_mutex *lock, struct task_struct *owner)
{
- unsigned long val = (unsigned long)owner | mask;
+ unsigned long val = (unsigned long)owner;
if (rt_mutex_has_waiters(lock))
val |= RT_MUTEX_HAS_WAITERS;
@@ -203,15 +196,14 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
* reached or the state of the chain has changed while we
* dropped the locks.
*/
- if (!waiter || !waiter->task)
+ if (!waiter)
goto out_unlock_pi;
/*
* Check the orig_waiter state. After we dropped the locks,
- * the previous owner of the lock might have released the lock
- * and made us the pending owner:
+ * the previous owner of the lock might have released the lock.
*/
- if (orig_waiter && !orig_waiter->task)
+ if (orig_waiter && !rt_mutex_owner(orig_lock))
goto out_unlock_pi;
/*
@@ -254,6 +246,17 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
/* Release the task */
raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+ if (!rt_mutex_owner(lock)) {
+ /*
+ * If the requeue above changed the top waiter, then we need
+ * to wake the new top waiter up to try to get the lock.
+ */
+
+ if (top_waiter != rt_mutex_top_waiter(lock))
+ wake_up_process(rt_mutex_top_waiter(lock)->task);
+ raw_spin_unlock(&lock->wait_lock);
+ goto out_put_task;
+ }
put_task_struct(task);
/* Grab the next task */
@@ -296,78 +299,16 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
}
/*
- * Optimization: check if we can steal the lock from the
- * assigned pending owner [which might not have taken the
- * lock yet]:
- */
-static inline int try_to_steal_lock(struct rt_mutex *lock,
- struct task_struct *task)
-{
- struct task_struct *pendowner = rt_mutex_owner(lock);
- struct rt_mutex_waiter *next;
- unsigned long flags;
-
- if (!rt_mutex_owner_pending(lock))
- return 0;
-
- if (pendowner == task)
- return 1;
-
- raw_spin_lock_irqsave(&pendowner->pi_lock, flags);
- if (task->prio >= pendowner->prio) {
- raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags);
- return 0;
- }
-
- /*
- * Check if a waiter is enqueued on the pending owners
- * pi_waiters list. Remove it and readjust pending owners
- * priority.
- */
- if (likely(!rt_mutex_has_waiters(lock))) {
- raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags);
- return 1;
- }
-
- /* No chain handling, pending owner is not blocked on anything: */
- next = rt_mutex_top_waiter(lock);
- plist_del(&next->pi_list_entry, &pendowner->pi_waiters);
- __rt_mutex_adjust_prio(pendowner);
- raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags);
-
- /*
- * We are going to steal the lock and a waiter was
- * enqueued on the pending owners pi_waiters queue. So
- * we have to enqueue this waiter into
- * task->pi_waiters list. This covers the case,
- * where task is boosted because it holds another
- * lock and gets unboosted because the booster is
- * interrupted, so we would delay a waiter with higher
- * priority as task->normal_prio.
- *
- * Note: in the rare case of a SCHED_OTHER task changing
- * its priority and thus stealing the lock, next->task
- * might be task:
- */
- if (likely(next->task != task)) {
- raw_spin_lock_irqsave(&task->pi_lock, flags);
- plist_add(&next->pi_list_entry, &task->pi_waiters);
- __rt_mutex_adjust_prio(task);
- raw_spin_unlock_irqrestore(&task->pi_lock, flags);
- }
- return 1;
-}
-
-/*
* Try to take an rt-mutex
*
- * This fails
- * - when the lock has a real owner
- * - when a different pending owner exists and has higher priority than current
- *
* Must be called with lock->wait_lock held.
+ *
+ * @lock: the lock to be acquired.
+ * @task: the task which wants to acquire the lock
+ * @waiter: the waiter that is queued to the lock's wait list. (could be NULL)
*/
-static int try_to_take_rt_mutex(struct rt_mutex *lock)
+static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
+ struct rt_mutex_waiter *waiter)
{
/*
* We have to be careful here if the atomic speedups are
@@ -390,15 +331,52 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock)
*/
mark_rt_mutex_waiters(lock);
- if (rt_mutex_owner(lock) && !try_to_steal_lock(lock, current))
+ if (rt_mutex_owner(lock))
return 0;
+ /*
+ * It will get the lock because of one of these conditions:
+ * 1) there is no waiter
+ * 2) higher priority than waiters
+ * 3) it is top waiter
+ */
+ if (rt_mutex_has_waiters(lock)) {
+ if (task->prio >= rt_mutex_top_waiter(lock)->list_entry.prio) {
+ if (!waiter || waiter != rt_mutex_top_waiter(lock))
+ return 0;
+ }
+ }
+
+ if (waiter || rt_mutex_has_waiters(lock)) {
+ unsigned long flags;
+ struct rt_mutex_waiter *top;
+
+ raw_spin_lock_irqsave(&task->pi_lock, flags);
+
+ /* remove the queued waiter. */
+ if (waiter) {
+ plist_del(&waiter->list_entry, &lock->wait_list);
+ task->pi_blocked_on = NULL;
+ }
+
+ /*
+ * We have to enqueue the top waiter(if it exists) into
+ * task->pi_waiters list.
+ */
+ if (rt_mutex_has_waiters(lock)) {
+ top = rt_mutex_top_waiter(lock);
+ top->pi_list_entry.prio = top->list_entry.prio;
+ plist_add(&top->pi_list_entry, &task->pi_waiters);
+ }
+ raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+ }
+
/* We got the lock. */
debug_rt_mutex_lock(lock);
- rt_mutex_set_owner(lock, current, 0);
+ rt_mutex_set_owner(lock, task);
- rt_mutex_deadlock_account_lock(lock, current);
+ rt_mutex_deadlock_account_lock(lock, task);
return 1;
}
@@ -436,6 +414,9 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+ if (!owner)
+ return 0;
+
if (waiter == rt_mutex_top_waiter(lock)) {
raw_spin_lock_irqsave(&owner->pi_lock, flags);
plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters);
@@ -472,21 +453,18 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
/*
* Wake up the next waiter on the lock.
*
- * Remove the top waiter from the current tasks waiter list and from
- * the lock waiter list. Set it as pending owner. Then wake it up.
+ * Remove the top waiter from the current tasks waiter list and wake it up.
*
* Called with lock->wait_lock held.
*/
static void wakeup_next_waiter(struct rt_mutex *lock)
{
struct rt_mutex_waiter *waiter;
- struct task_struct *pendowner;
unsigned long flags;
raw_spin_lock_irqsave(&current->pi_lock, flags);
waiter = rt_mutex_top_waiter(lock);
- plist_del(&waiter->list_entry, &lock->wait_list);
/*
* Remove it from current->pi_waiters. We do not adjust a
@@ -495,43 +473,19 @@ static void wakeup_next_waiter(struct rt_mutex *lock)
* lock->wait_lock.
*/
plist_del(&waiter->pi_list_entry, &current->pi_waiters);
- pendowner = waiter->task;
- waiter->task = NULL;
- rt_mutex_set_owner(lock, pendowner, RT_MUTEX_OWNER_PENDING);
+ rt_mutex_set_owner(lock, NULL);
raw_spin_unlock_irqrestore(&current->pi_lock, flags);
- /*
- * Clear the pi_blocked_on variable and enqueue a possible
- * waiter into the pi_waiters list of the pending owner. This
- * prevents that in case the pending owner gets unboosted a
- * waiter with higher priority than pending-owner->normal_prio
- * is blocked on the unboosted (pending) owner.
- */
- raw_spin_lock_irqsave(&pendowner->pi_lock, flags);
-
- WARN_ON(!pendowner->pi_blocked_on);
- WARN_ON(pendowner->pi_blocked_on != waiter);
- WARN_ON(pendowner->pi_blocked_on->lock != lock);
-
- pendowner->pi_blocked_on = NULL;
-
- if (rt_mutex_has_waiters(lock)) {
- struct rt_mutex_waiter *next;
-
- next = rt_mutex_top_waiter(lock);
- plist_add(&next->pi_list_entry, &pendowner->pi_waiters);
- }
- raw_spin_unlock_irqrestore(&pendowner->pi_lock, flags);
-
- wake_up_process(pendowner);
+ wake_up_process(waiter->task);
}
/*
- * Remove a waiter from a lock
+ * Remove a waiter from a lock and give up
*
- * Must be called with lock->wait_lock held
+ * Must be called with lock->wait_lock held and
+ * have just failed to try_to_take_rt_mutex().
*/
static void remove_waiter(struct rt_mutex *lock,
struct rt_mutex_waiter *waiter)
@@ -543,11 +497,13 @@ static void remove_waiter(struct rt_mutex *lock,
raw_spin_lock_irqsave(&current->pi_lock, flags);
plist_del(&waiter->list_entry, &lock->wait_list);
- waiter->task = NULL;
current->pi_blocked_on = NULL;
raw_spin_unlock_irqrestore(&current->pi_lock, flags);
- if (first && owner != current) {
+ if (!owner)
+ return;
+
+ if (first) {
raw_spin_lock_irqsave(&owner->pi_lock, flags);
@@ -614,21 +570,19 @@ void rt_mutex_adjust_pi(struct task_struct *task)
* or TASK_UNINTERRUPTIBLE)
* @timeout: the pre-initialized and started timer, or NULL for none
* @waiter: the pre-initialized rt_mutex_waiter
- * @detect_deadlock: passed to task_blocks_on_rt_mutex
*
* lock->wait_lock must be held by the caller.
*/
static int __sched
__rt_mutex_slowlock(struct rt_mutex *lock, int state,
struct hrtimer_sleeper *timeout,
- struct rt_mutex_waiter *waiter,
- int detect_deadlock)
+ struct rt_mutex_waiter *waiter)
{
int ret = 0;
for (;;) {
/* Try to acquire the lock: */
- if (try_to_take_rt_mutex(lock))
+ if (try_to_take_rt_mutex(lock, current, waiter))
break;
/*
@@ -645,39 +599,11 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
break;
}
- /*
- * waiter->task is NULL the first time we come here and
- * when we have been woken up by the previous owner
- * but the lock got stolen by a higher prio task.
- */
- if (!waiter->task) {
- ret = task_blocks_on_rt_mutex(lock, waiter, current,
- detect_deadlock);
- /*
- * If we got woken up by the owner then start loop
- * all over without going into schedule to try
- * to get the lock now:
- */
- if (unlikely(!waiter->task)) {
- /*
- * Reset the return value. We might
- * have returned with -EDEADLK and the
- * owner released the lock while we
- * were walking the pi chain.
- */
- ret = 0;
- continue;
- }
- if (unlikely(ret))
- break;
- }
-
raw_spin_unlock(&lock->wait_lock);
debug_rt_mutex_print_deadlock(waiter);
- if (waiter->task)
- schedule_rt_mutex(lock);
+ schedule_rt_mutex(lock);
raw_spin_lock(&lock->wait_lock);
set_current_state(state);
@@ -698,12 +624,11 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
int ret = 0;
debug_rt_mutex_init_waiter(&waiter);
- waiter.task = NULL;
raw_spin_lock(&lock->wait_lock);
/* Try to acquire the lock again: */
- if (try_to_take_rt_mutex(lock)) {
+ if (try_to_take_rt_mutex(lock, current, NULL)) {
raw_spin_unlock(&lock->wait_lock);
return 0;
}
@@ -717,12 +642,14 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
timeout->task = NULL;
}
- ret = __rt_mutex_slowlock(lock, state, timeout, &waiter,
- detect_deadlock);
+ ret = task_blocks_on_rt_mutex(lock, &waiter, current, detect_deadlock);
+
+ if (likely(!ret))
+ ret = __rt_mutex_slowlock(lock, state, timeout, &waiter);
set_current_state(TASK_RUNNING);
- if (unlikely(waiter.task))
+ if (unlikely(ret))
remove_waiter(lock, &waiter);
/*
@@ -737,14 +664,6 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
if (unlikely(timeout))
hrtimer_cancel(&timeout->timer);
- /*
- * Readjust priority, when we did not get the lock. We might
- * have been the pending owner and boosted. Since we did not
- * take the lock, the PI boost has to go.
- */
- if (unlikely(ret))
- rt_mutex_adjust_prio(current);
-
debug_rt_mutex_free_waiter(&waiter);
return ret;
@@ -762,7 +681,7 @@ rt_mutex_slowtrylock(struct rt_mutex *lock)
if (likely(rt_mutex_owner(lock) != current)) {
- ret = try_to_take_rt_mutex(lock);
+ ret = try_to_take_rt_mutex(lock, current, NULL);
/*
* try_to_take_rt_mutex() sets the lock waiters
* bit unconditionally. Clean this up.
@@ -992,7 +911,7 @@ void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
{
__rt_mutex_init(lock, NULL);
debug_rt_mutex_proxy_lock(lock, proxy_owner);
- rt_mutex_set_owner(lock, proxy_owner, 0);
+ rt_mutex_set_owner(lock, proxy_owner);
rt_mutex_deadlock_account_lock(lock, proxy_owner);
}
@@ -1008,7 +927,7 @@ void rt_mutex_proxy_unlock(struct rt_mutex *lock,
struct task_struct *proxy_owner)
{
debug_rt_mutex_proxy_unlock(lock);
- rt_mutex_set_owner(lock, NULL, 0);
+ rt_mutex_set_owner(lock, NULL);
rt_mutex_deadlock_account_unlock(proxy_owner);
}
@@ -1034,20 +953,14 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
raw_spin_lock(&lock->wait_lock);
- mark_rt_mutex_waiters(lock);
-
- if (!rt_mutex_owner(lock) || try_to_steal_lock(lock, task)) {
- /* We got the lock for task. */
- debug_rt_mutex_lock(lock);
- rt_mutex_set_owner(lock, task, 0);
+ if (try_to_take_rt_mutex(lock, task, NULL)) {
raw_spin_unlock(&lock->wait_lock);
- rt_mutex_deadlock_account_lock(lock, task);
return 1;
}
ret = task_blocks_on_rt_mutex(lock, waiter, task, detect_deadlock);
- if (ret && !waiter->task) {
+ if (ret && !rt_mutex_owner(lock)) {
/*
* Reset the return value. We might have
* returned with -EDEADLK and the owner
@@ -1056,6 +969,10 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
*/
ret = 0;
}
+
+ if (unlikely(ret))
+ remove_waiter(lock, waiter);
+
raw_spin_unlock(&lock->wait_lock);
debug_rt_mutex_print_deadlock(waiter);
@@ -1110,12 +1027,11 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
set_current_state(TASK_INTERRUPTIBLE);
- ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter,
- detect_deadlock);
+ ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter);
set_current_state(TASK_RUNNING);
- if (unlikely(waiter->task))
+ if (unlikely(ret))
remove_waiter(lock, waiter);
/*
@@ -1126,13 +1042,5 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
raw_spin_unlock(&lock->wait_lock);
- /*
- * Readjust priority, when we did not get the lock. We might have been
- * the pending owner and boosted. Since we did not take the lock, the
- * PI boost has to go.
- */
- if (unlikely(ret))
- rt_mutex_adjust_prio(current);
-
return ret;
}
diff --git a/kernel/rtmutex_common.h b/kernel/rtmutex_common.h
index 97a2f81866af..53a66c85261b 100644
--- a/kernel/rtmutex_common.h
+++ b/kernel/rtmutex_common.h
@@ -91,9 +91,8 @@ task_top_pi_waiter(struct task_struct *p)
/*
* lock->owner state tracking:
*/
-#define RT_MUTEX_OWNER_PENDING 1UL
-#define RT_MUTEX_HAS_WAITERS 2UL
-#define RT_MUTEX_OWNER_MASKALL 3UL
+#define RT_MUTEX_HAS_WAITERS 1UL
+#define RT_MUTEX_OWNER_MASKALL 1UL
static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock)
{
@@ -101,17 +100,6 @@ static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock)
((unsigned long)lock->owner & ~RT_MUTEX_OWNER_MASKALL);
}
-static inline struct task_struct *rt_mutex_real_owner(struct rt_mutex *lock)
-{
- return (struct task_struct *)
- ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS);
-}
-
-static inline unsigned long rt_mutex_owner_pending(struct rt_mutex *lock)
-{
- return (unsigned long)lock->owner & RT_MUTEX_OWNER_PENDING;
-}
-
/*
* PI-futex support (proxy locking functions, etc.):
*/
diff --git a/kernel/sched.c b/kernel/sched.c
index 2effcb71a478..859982e477ec 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2330,27 +2330,6 @@ void kick_process(struct task_struct *p)
EXPORT_SYMBOL_GPL(kick_process);
#endif /* CONFIG_SMP */
-/**
- * task_oncpu_function_call - call a function on the cpu on which a task runs
- * @p: the task to evaluate
- * @func: the function to be called
- * @info: the function call argument
- *
- * Calls the function @func when the task is currently running. This might
- * be on the current CPU, which just calls the function directly
- */
-void task_oncpu_function_call(struct task_struct *p,
- void (*func) (void *info), void *info)
-{
- int cpu;
-
- preempt_disable();
- cpu = task_cpu(p);
- if (task_curr(p))
- smp_call_function_single(cpu, func, info, 1);
- preempt_enable();
-}
-
#ifdef CONFIG_SMP
/*
* ->cpus_allowed is protected by either TASK_WAKING or rq->lock held.
@@ -2842,9 +2821,12 @@ static inline void
prepare_task_switch(struct rq *rq, struct task_struct *prev,
struct task_struct *next)
{
+ sched_info_switch(prev, next);
+ perf_event_task_sched_out(prev, next);
fire_sched_out_preempt_notifiers(prev, next);
prepare_lock_switch(rq, next);
prepare_arch_switch(next);
+ trace_sched_switch(prev, next);
}
/**
@@ -2977,7 +2959,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
struct mm_struct *mm, *oldmm;
prepare_task_switch(rq, prev, next);
- trace_sched_switch(prev, next);
+
mm = next->mm;
oldmm = prev->active_mm;
/*
@@ -4149,9 +4131,6 @@ need_resched_nonpreemptible:
rq->skip_clock_update = 0;
if (likely(prev != next)) {
- sched_info_switch(prev, next);
- perf_event_task_sched_out(prev, next);
-
rq->nr_switches++;
rq->curr = next;
++*switch_count;
@@ -5781,7 +5760,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
* The idle tasks have their own, simple scheduling class:
*/
idle->sched_class = &idle_sched_class;
- ftrace_graph_init_task(idle);
+ ftrace_graph_init_idle_task(idle, cpu);
}
/*
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 0cee50487629..3e757e60f45d 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -737,7 +737,10 @@ static int run_ksoftirqd(void * __bind_cpu)
don't process */
if (cpu_is_offline((long)__bind_cpu))
goto wait_to_die;
- do_softirq();
+ local_irq_disable();
+ if (local_softirq_pending())
+ __do_softirq();
+ local_irq_enable();
preempt_enable_no_resched();
cond_resched();
preempt_disable();
diff --git a/kernel/time.c b/kernel/time.c
index 55337a816b20..6430a7528fe4 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -150,7 +150,7 @@ static inline void warp_clock(void)
* various programs will get confused when the clock gets warped.
*/
-int do_sys_settimeofday(struct timespec *tv, struct timezone *tz)
+int do_sys_settimeofday(const struct timespec *tv, const struct timezone *tz)
{
static int firsttime = 1;
int error = 0;
@@ -644,8 +644,9 @@ u64 nsec_to_clock_t(u64 x)
#endif
}
+
/**
- * nsecs_to_jiffies64 - Convert nsecs in u64 to jiffies64
+ * nsecs_to_jiffies - Convert nsecs in u64 to jiffies
*
* @n: nsecs in u64
*
@@ -657,26 +658,13 @@ u64 nsec_to_clock_t(u64 x)
* NSEC_PER_SEC = 10^9 = (5^9 * 2^9) = (1953125 * 512)
* ULLONG_MAX ns = 18446744073.709551615 secs = about 584 years
*/
-u64 nsecs_to_jiffies64(u64 n)
+unsigned long nsecs_to_jiffies(u64 n)
{
-#if (NSEC_PER_SEC % HZ) == 0
- /* Common case, HZ = 100, 128, 200, 250, 256, 500, 512, 1000 etc. */
- return div_u64(n, NSEC_PER_SEC / HZ);
-#elif (HZ % 512) == 0
- /* overflow after 292 years if HZ = 1024 */
- return div_u64(n * HZ / 512, NSEC_PER_SEC / 512);
-#else
- /*
- * Generic case - optimized for cases where HZ is a multiple of 3.
- * overflow after 64.99 years, exact for HZ = 60, 72, 90, 120 etc.
- */
- return div_u64(n * 9, (9ull * NSEC_PER_SEC + HZ / 2) / HZ);
-#endif
+ return (unsigned long)nsecs_to_jiffies64(n);
}
-
/**
- * nsecs_to_jiffies - Convert nsecs in u64 to jiffies
+ * nsecs_to_jiffies64 - Convert nsecs in u64 to jiffies64
*
* @n: nsecs in u64
*
@@ -688,27 +676,22 @@ u64 nsecs_to_jiffies64(u64 n)
* NSEC_PER_SEC = 10^9 = (5^9 * 2^9) = (1953125 * 512)
* ULLONG_MAX ns = 18446744073.709551615 secs = about 584 years
*/
-unsigned long nsecs_to_jiffies(u64 n)
-{
- return (unsigned long)nsecs_to_jiffies64(n);
-}
-
-#if (BITS_PER_LONG < 64)
-u64 get_jiffies_64(void)
+u64 nsecs_to_jiffies64(u64 n)
{
- unsigned long seq;
- u64 ret;
-
- do {
- seq = read_seqbegin(&xtime_lock);
- ret = jiffies_64;
- } while (read_seqretry(&xtime_lock, seq));
- return ret;
-}
-EXPORT_SYMBOL(get_jiffies_64);
+#if (NSEC_PER_SEC % HZ) == 0
+ /* Common case, HZ = 100, 128, 200, 250, 256, 500, 512, 1000 etc. */
+ return div_u64(n, NSEC_PER_SEC / HZ);
+#elif (HZ % 512) == 0
+ /* overflow after 292 years if HZ = 1024 */
+ return div_u64(n * HZ / 512, NSEC_PER_SEC / 512);
+#else
+ /*
+ * Generic case - optimized for cases where HZ is a multiple of 3.
+ * overflow after 64.99 years, exact for HZ = 60, 72, 90, 120 etc.
+ */
+ return div_u64(n * 9, (9ull * NSEC_PER_SEC + HZ / 2) / HZ);
#endif
-
-EXPORT_SYMBOL(jiffies);
+}
/*
* Add two timespec values and do a safety check for overflow.
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index ee266620b06c..b0425991e9ac 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -1,4 +1,5 @@
-obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o timecompare.o timeconv.o
+obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o timecompare.o
+obj-y += timeconv.o posix-clock.o
obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD) += clockevents.o
obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index d7395fdfb9f3..0d74b9ba90c8 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -18,7 +18,6 @@
#include <linux/notifier.h>
#include <linux/smp.h>
#include <linux/sysdev.h>
-#include <linux/tick.h>
#include "tick-internal.h"
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index 5404a8456909..b2fa506667c0 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -22,8 +22,11 @@
************************************************************************/
#include <linux/clocksource.h>
#include <linux/jiffies.h>
+#include <linux/module.h>
#include <linux/init.h>
+#include "tick-internal.h"
+
/* The Jiffies based clocksource is the lowest common
* denominator clock source which should function on
* all systems. It has the same coarse resolution as
@@ -64,6 +67,23 @@ struct clocksource clocksource_jiffies = {
.shift = JIFFIES_SHIFT,
};
+#if (BITS_PER_LONG < 64)
+u64 get_jiffies_64(void)
+{
+ unsigned long seq;
+ u64 ret;
+
+ do {
+ seq = read_seqbegin(&xtime_lock);
+ ret = jiffies_64;
+ } while (read_seqretry(&xtime_lock, seq));
+ return ret;
+}
+EXPORT_SYMBOL(get_jiffies_64);
+#endif
+
+EXPORT_SYMBOL(jiffies);
+
static int __init init_jiffies_clocksource(void)
{
return clocksource_register(&clocksource_jiffies);
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 5c00242fa921..5ac593267a26 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -16,6 +16,8 @@
#include <linux/mm.h>
#include <linux/module.h>
+#include "tick-internal.h"
+
/*
* NTP timekeeping variables:
*/
@@ -646,6 +648,17 @@ int do_adjtimex(struct timex *txc)
hrtimer_cancel(&leap_timer);
}
+ if (txc->modes & ADJ_SETOFFSET) {
+ struct timespec delta;
+ if ((unsigned long)txc->time.tv_usec >= NSEC_PER_SEC)
+ return -EINVAL;
+ delta.tv_sec = txc->time.tv_sec;
+ delta.tv_nsec = txc->time.tv_usec;
+ if (!(txc->modes & ADJ_NANO))
+ delta.tv_nsec *= 1000;
+ timekeeping_inject_offset(&delta);
+ }
+
getnstimeofday(&ts);
write_seqlock_irq(&xtime_lock);
diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c
new file mode 100644
index 000000000000..04498cbf6002
--- /dev/null
+++ b/kernel/time/posix-clock.c
@@ -0,0 +1,441 @@
+/*
+ * posix-clock.c - support for dynamic clock devices
+ *
+ * Copyright (C) 2010 OMICRON electronics GmbH
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <linux/device.h>
+#include <linux/file.h>
+#include <linux/mutex.h>
+#include <linux/posix-clock.h>
+#include <linux/slab.h>
+#include <linux/syscalls.h>
+#include <linux/uaccess.h>
+
+static void delete_clock(struct kref *kref);
+
+/*
+ * Returns NULL if the posix_clock instance attached to 'fp' is old and stale.
+ */
+static struct posix_clock *get_posix_clock(struct file *fp)
+{
+ struct posix_clock *clk = fp->private_data;
+
+ mutex_lock(&clk->mutex);
+
+ if (!clk->zombie)
+ return clk;
+
+ mutex_unlock(&clk->mutex);
+
+ return NULL;
+}
+
+static void put_posix_clock(struct posix_clock *clk)
+{
+ mutex_unlock(&clk->mutex);
+}
+
+static ssize_t posix_clock_read(struct file *fp, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct posix_clock *clk = get_posix_clock(fp);
+ int err = -EINVAL;
+
+ if (!clk)
+ return -ENODEV;
+
+ if (clk->ops.read)
+ err = clk->ops.read(clk, fp->f_flags, buf, count);
+
+ put_posix_clock(clk);
+
+ return err;
+}
+
+static unsigned int posix_clock_poll(struct file *fp, poll_table *wait)
+{
+ struct posix_clock *clk = get_posix_clock(fp);
+ int result = 0;
+
+ if (!clk)
+ return -ENODEV;
+
+ if (clk->ops.poll)
+ result = clk->ops.poll(clk, fp, wait);
+
+ put_posix_clock(clk);
+
+ return result;
+}
+
+static int posix_clock_fasync(int fd, struct file *fp, int on)
+{
+ struct posix_clock *clk = get_posix_clock(fp);
+ int err = 0;
+
+ if (!clk)
+ return -ENODEV;
+
+ if (clk->ops.fasync)
+ err = clk->ops.fasync(clk, fd, fp, on);
+
+ put_posix_clock(clk);
+
+ return err;
+}
+
+static int posix_clock_mmap(struct file *fp, struct vm_area_struct *vma)
+{
+ struct posix_clock *clk = get_posix_clock(fp);
+ int err = -ENODEV;
+
+ if (!clk)
+ return -ENODEV;
+
+ if (clk->ops.mmap)
+ err = clk->ops.mmap(clk, vma);
+
+ put_posix_clock(clk);
+
+ return err;
+}
+
+static long posix_clock_ioctl(struct file *fp,
+ unsigned int cmd, unsigned long arg)
+{
+ struct posix_clock *clk = get_posix_clock(fp);
+ int err = -ENOTTY;
+
+ if (!clk)
+ return -ENODEV;
+
+ if (clk->ops.ioctl)
+ err = clk->ops.ioctl(clk, cmd, arg);
+
+ put_posix_clock(clk);
+
+ return err;
+}
+
+#ifdef CONFIG_COMPAT
+static long posix_clock_compat_ioctl(struct file *fp,
+ unsigned int cmd, unsigned long arg)
+{
+ struct posix_clock *clk = get_posix_clock(fp);
+ int err = -ENOTTY;
+
+ if (!clk)
+ return -ENODEV;
+
+ if (clk->ops.ioctl)
+ err = clk->ops.ioctl(clk, cmd, arg);
+
+ put_posix_clock(clk);
+
+ return err;
+}
+#endif
+
+static int posix_clock_open(struct inode *inode, struct file *fp)
+{
+ int err;
+ struct posix_clock *clk =
+ container_of(inode->i_cdev, struct posix_clock, cdev);
+
+ mutex_lock(&clk->mutex);
+
+ if (clk->zombie) {
+ err = -ENODEV;
+ goto out;
+ }
+ if (clk->ops.open)
+ err = clk->ops.open(clk, fp->f_mode);
+ else
+ err = 0;
+
+ if (!err) {
+ kref_get(&clk->kref);
+ fp->private_data = clk;
+ }
+out:
+ mutex_unlock(&clk->mutex);
+ return err;
+}
+
+static int posix_clock_release(struct inode *inode, struct file *fp)
+{
+ struct posix_clock *clk = fp->private_data;
+ int err = 0;
+
+ if (clk->ops.release)
+ err = clk->ops.release(clk);
+
+ kref_put(&clk->kref, delete_clock);
+
+ fp->private_data = NULL;
+
+ return err;
+}
+
+static const struct file_operations posix_clock_file_operations = {
+ .owner = THIS_MODULE,
+ .llseek = no_llseek,
+ .read = posix_clock_read,
+ .poll = posix_clock_poll,
+ .unlocked_ioctl = posix_clock_ioctl,
+ .open = posix_clock_open,
+ .release = posix_clock_release,
+ .fasync = posix_clock_fasync,
+ .mmap = posix_clock_mmap,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = posix_clock_compat_ioctl,
+#endif
+};
+
+int posix_clock_register(struct posix_clock *clk, dev_t devid)
+{
+ int err;
+
+ kref_init(&clk->kref);
+ mutex_init(&clk->mutex);
+
+ cdev_init(&clk->cdev, &posix_clock_file_operations);
+ clk->cdev.owner = clk->ops.owner;
+ err = cdev_add(&clk->cdev, devid, 1);
+ if (err)
+ goto no_cdev;
+
+ return err;
+no_cdev:
+ mutex_destroy(&clk->mutex);
+ return err;
+}
+EXPORT_SYMBOL_GPL(posix_clock_register);
+
+static void delete_clock(struct kref *kref)
+{
+ struct posix_clock *clk = container_of(kref, struct posix_clock, kref);
+ mutex_destroy(&clk->mutex);
+ if (clk->release)
+ clk->release(clk);
+}
+
+void posix_clock_unregister(struct posix_clock *clk)
+{
+ cdev_del(&clk->cdev);
+
+ mutex_lock(&clk->mutex);
+ clk->zombie = true;
+ mutex_unlock(&clk->mutex);
+
+ kref_put(&clk->kref, delete_clock);
+}
+EXPORT_SYMBOL_GPL(posix_clock_unregister);
+
+struct posix_clock_desc {
+ struct file *fp;
+ struct posix_clock *clk;
+};
+
+static int get_clock_desc(const clockid_t id, struct posix_clock_desc *cd)
+{
+ struct file *fp = fget(CLOCKID_TO_FD(id));
+ int err = -EINVAL;
+
+ if (!fp)
+ return err;
+
+ if (fp->f_op->open != posix_clock_open || !fp->private_data)
+ goto out;
+
+ cd->fp = fp;
+ cd->clk = get_posix_clock(fp);
+
+ err = cd->clk ? 0 : -ENODEV;
+out:
+ if (err)
+ fput(fp);
+ return err;
+}
+
+static void put_clock_desc(struct posix_clock_desc *cd)
+{
+ put_posix_clock(cd->clk);
+ fput(cd->fp);
+}
+
+static int pc_clock_adjtime(clockid_t id, struct timex *tx)
+{
+ struct posix_clock_desc cd;
+ int err;
+
+ err = get_clock_desc(id, &cd);
+ if (err)
+ return err;
+
+ if (cd.clk->ops.clock_adjtime)
+ err = cd.clk->ops.clock_adjtime(cd.clk, tx);
+ else
+ err = -EOPNOTSUPP;
+
+ put_clock_desc(&cd);
+
+ return err;
+}
+
+static int pc_clock_gettime(clockid_t id, struct timespec *ts)
+{
+ struct posix_clock_desc cd;
+ int err;
+
+ err = get_clock_desc(id, &cd);
+ if (err)
+ return err;
+
+ if (cd.clk->ops.clock_gettime)
+ err = cd.clk->ops.clock_gettime(cd.clk, ts);
+ else
+ err = -EOPNOTSUPP;
+
+ put_clock_desc(&cd);
+
+ return err;
+}
+
+static int pc_clock_getres(clockid_t id, struct timespec *ts)
+{
+ struct posix_clock_desc cd;
+ int err;
+
+ err = get_clock_desc(id, &cd);
+ if (err)
+ return err;
+
+ if (cd.clk->ops.clock_getres)
+ err = cd.clk->ops.clock_getres(cd.clk, ts);
+ else
+ err = -EOPNOTSUPP;
+
+ put_clock_desc(&cd);
+
+ return err;
+}
+
+static int pc_clock_settime(clockid_t id, const struct timespec *ts)
+{
+ struct posix_clock_desc cd;
+ int err;
+
+ err = get_clock_desc(id, &cd);
+ if (err)
+ return err;
+
+ if (cd.clk->ops.clock_settime)
+ err = cd.clk->ops.clock_settime(cd.clk, ts);
+ else
+ err = -EOPNOTSUPP;
+
+ put_clock_desc(&cd);
+
+ return err;
+}
+
+static int pc_timer_create(struct k_itimer *kit)
+{
+ clockid_t id = kit->it_clock;
+ struct posix_clock_desc cd;
+ int err;
+
+ err = get_clock_desc(id, &cd);
+ if (err)
+ return err;
+
+ if (cd.clk->ops.timer_create)
+ err = cd.clk->ops.timer_create(cd.clk, kit);
+ else
+ err = -EOPNOTSUPP;
+
+ put_clock_desc(&cd);
+
+ return err;
+}
+
+static int pc_timer_delete(struct k_itimer *kit)
+{
+ clockid_t id = kit->it_clock;
+ struct posix_clock_desc cd;
+ int err;
+
+ err = get_clock_desc(id, &cd);
+ if (err)
+ return err;
+
+ if (cd.clk->ops.timer_delete)
+ err = cd.clk->ops.timer_delete(cd.clk, kit);
+ else
+ err = -EOPNOTSUPP;
+
+ put_clock_desc(&cd);
+
+ return err;
+}
+
+static void pc_timer_gettime(struct k_itimer *kit, struct itimerspec *ts)
+{
+ clockid_t id = kit->it_clock;
+ struct posix_clock_desc cd;
+
+ if (get_clock_desc(id, &cd))
+ return;
+
+ if (cd.clk->ops.timer_gettime)
+ cd.clk->ops.timer_gettime(cd.clk, kit, ts);
+
+ put_clock_desc(&cd);
+}
+
+static int pc_timer_settime(struct k_itimer *kit, int flags,
+ struct itimerspec *ts, struct itimerspec *old)
+{
+ clockid_t id = kit->it_clock;
+ struct posix_clock_desc cd;
+ int err;
+
+ err = get_clock_desc(id, &cd);
+ if (err)
+ return err;
+
+ if (cd.clk->ops.timer_settime)
+ err = cd.clk->ops.timer_settime(cd.clk, kit, flags, ts, old);
+ else
+ err = -EOPNOTSUPP;
+
+ put_clock_desc(&cd);
+
+ return err;
+}
+
+struct k_clock clock_posix_dynamic = {
+ .clock_getres = pc_clock_getres,
+ .clock_set = pc_clock_settime,
+ .clock_get = pc_clock_gettime,
+ .clock_adj = pc_clock_adjtime,
+ .timer_create = pc_timer_create,
+ .timer_set = pc_timer_settime,
+ .timer_del = pc_timer_delete,
+ .timer_get = pc_timer_gettime,
+};
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 48b2761b5668..92ef9a54f0a4 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -18,7 +18,6 @@
#include <linux/percpu.h>
#include <linux/profile.h>
#include <linux/sched.h>
-#include <linux/tick.h>
#include "tick-internal.h"
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 051bc80a0c43..0e98fac3d479 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -18,7 +18,6 @@
#include <linux/percpu.h>
#include <linux/profile.h>
#include <linux/sched.h>
-#include <linux/tick.h>
#include <asm/irq_regs.h>
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index 290eefbc1f60..f77b93df0006 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -1,6 +1,10 @@
/*
* tick internal variable and functions used by low/high res code
*/
+#include <linux/hrtimer.h>
+#include <linux/tick.h>
+
+#ifdef CONFIG_GENERIC_CLOCKEVENTS_BUILD
#define TICK_DO_TIMER_NONE -1
#define TICK_DO_TIMER_BOOT -2
@@ -132,3 +136,8 @@ static inline int tick_device_is_functional(struct clock_event_device *dev)
{
return !(dev->features & CLOCK_EVT_FEAT_DUMMY);
}
+
+#endif
+
+extern void do_timer(unsigned long ticks);
+extern seqlock_t xtime_lock;
diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c
index 5cbc101f908b..2d04411a5f05 100644
--- a/kernel/time/tick-oneshot.c
+++ b/kernel/time/tick-oneshot.c
@@ -18,7 +18,6 @@
#include <linux/percpu.h>
#include <linux/profile.h>
#include <linux/sched.h>
-#include <linux/tick.h>
#include "tick-internal.h"
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index c55ea2433471..d5097c44b407 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -19,7 +19,6 @@
#include <linux/percpu.h>
#include <linux/profile.h>
#include <linux/sched.h>
-#include <linux/tick.h>
#include <linux/module.h>
#include <asm/irq_regs.h>
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index d27c7562902c..6262c1d18397 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -353,7 +353,7 @@ EXPORT_SYMBOL(do_gettimeofday);
*
* Sets the time of day to the new time and update NTP and notify hrtimers
*/
-int do_settimeofday(struct timespec *tv)
+int do_settimeofday(const struct timespec *tv)
{
struct timespec ts_delta;
unsigned long flags;
@@ -387,6 +387,42 @@ int do_settimeofday(struct timespec *tv)
EXPORT_SYMBOL(do_settimeofday);
+
+/**
+ * timekeeping_inject_offset - Adds or subtracts from the current time.
+ * @tv: pointer to the timespec variable containing the offset
+ *
+ * Adds or subtracts an offset value from the current time.
+ */
+int timekeeping_inject_offset(struct timespec *ts)
+{
+ unsigned long flags;
+
+ if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC)
+ return -EINVAL;
+
+ write_seqlock_irqsave(&xtime_lock, flags);
+
+ timekeeping_forward_now();
+
+ xtime = timespec_add(xtime, *ts);
+ wall_to_monotonic = timespec_sub(wall_to_monotonic, *ts);
+
+ timekeeper.ntp_error = 0;
+ ntp_clear();
+
+ update_vsyscall(&xtime, &wall_to_monotonic, timekeeper.clock,
+ timekeeper.mult);
+
+ write_sequnlock_irqrestore(&xtime_lock, flags);
+
+ /* signal hrtimers about time change */
+ clock_was_set();
+
+ return 0;
+}
+EXPORT_SYMBOL(timekeeping_inject_offset);
+
/**
* change_clocksource - Swaps clocksources if a new one is available
*
@@ -779,7 +815,7 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift)
*
* Called from the timer interrupt, must hold a write on xtime_lock.
*/
-void update_wall_time(void)
+static void update_wall_time(void)
{
struct clocksource *clock;
cycle_t offset;
@@ -910,11 +946,6 @@ struct timespec __current_kernel_time(void)
return xtime;
}
-struct timespec __get_wall_to_monotonic(void)
-{
- return wall_to_monotonic;
-}
-
struct timespec current_kernel_time(void)
{
struct timespec now;
@@ -946,3 +977,44 @@ struct timespec get_monotonic_coarse(void)
now.tv_nsec + mono.tv_nsec);
return now;
}
+
+/*
+ * The 64-bit jiffies value is not atomic - you MUST NOT read it
+ * without sampling the sequence number in xtime_lock.
+ * jiffies is defined in the linker script...
+ */
+void do_timer(unsigned long ticks)
+{
+ jiffies_64 += ticks;
+ update_wall_time();
+ calc_global_load(ticks);
+}
+
+/**
+ * get_xtime_and_monotonic_offset() - get xtime and wall_to_monotonic
+ * @xtim: pointer to timespec to be set with xtime
+ * @wtom: pointer to timespec to be set with wall_to_monotonic
+ */
+void get_xtime_and_monotonic_offset(struct timespec *xtim, struct timespec *wtom)
+{
+ unsigned long seq;
+
+ do {
+ seq = read_seqbegin(&xtime_lock);
+ *xtim = xtime;
+ *wtom = wall_to_monotonic;
+ } while (read_seqretry(&xtime_lock, seq));
+}
+
+/**
+ * xtime_update() - advances the timekeeping infrastructure
+ * @ticks: number of ticks, that have elapsed since the last call.
+ *
+ * Must be called with interrupts disabled.
+ */
+void xtime_update(unsigned long ticks)
+{
+ write_seqlock(&xtime_lock);
+ do_timer(ticks);
+ write_sequnlock(&xtime_lock);
+}
diff --git a/kernel/timer.c b/kernel/timer.c
index d6459923d245..c848cd8abe2a 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1295,19 +1295,6 @@ void run_local_timers(void)
raise_softirq(TIMER_SOFTIRQ);
}
-/*
- * The 64-bit jiffies value is not atomic - you MUST NOT read it
- * without sampling the sequence number in xtime_lock.
- * jiffies is defined in the linker script...
- */
-
-void do_timer(unsigned long ticks)
-{
- jiffies_64 += ticks;
- update_wall_time();
- calc_global_load(ticks);
-}
-
#ifdef __ARCH_WANT_SYS_ALARM
/*
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index f3dadae83883..888b611897d3 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3328,7 +3328,7 @@ static int start_graph_tracing(void)
/* The cpu_boot init_task->ret_stack will never be freed */
for_each_online_cpu(cpu) {
if (!idle_task(cpu)->ret_stack)
- ftrace_graph_init_task(idle_task(cpu));
+ ftrace_graph_init_idle_task(idle_task(cpu), cpu);
}
do {
@@ -3418,6 +3418,49 @@ void unregister_ftrace_graph(void)
mutex_unlock(&ftrace_lock);
}
+static DEFINE_PER_CPU(struct ftrace_ret_stack *, idle_ret_stack);
+
+static void
+graph_init_task(struct task_struct *t, struct ftrace_ret_stack *ret_stack)
+{
+ atomic_set(&t->tracing_graph_pause, 0);
+ atomic_set(&t->trace_overrun, 0);
+ t->ftrace_timestamp = 0;
+ /* make curr_ret_stack visable before we add the ret_stack */
+ smp_wmb();
+ t->ret_stack = ret_stack;
+}
+
+/*
+ * Allocate a return stack for the idle task. May be the first
+ * time through, or it may be done by CPU hotplug online.
+ */
+void ftrace_graph_init_idle_task(struct task_struct *t, int cpu)
+{
+ t->curr_ret_stack = -1;
+ /*
+ * The idle task has no parent, it either has its own
+ * stack or no stack at all.
+ */
+ if (t->ret_stack)
+ WARN_ON(t->ret_stack != per_cpu(idle_ret_stack, cpu));
+
+ if (ftrace_graph_active) {
+ struct ftrace_ret_stack *ret_stack;
+
+ ret_stack = per_cpu(idle_ret_stack, cpu);
+ if (!ret_stack) {
+ ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH
+ * sizeof(struct ftrace_ret_stack),
+ GFP_KERNEL);
+ if (!ret_stack)
+ return;
+ per_cpu(idle_ret_stack, cpu) = ret_stack;
+ }
+ graph_init_task(t, ret_stack);
+ }
+}
+
/* Allocate a return stack for newly created task */
void ftrace_graph_init_task(struct task_struct *t)
{
@@ -3433,12 +3476,7 @@ void ftrace_graph_init_task(struct task_struct *t)
GFP_KERNEL);
if (!ret_stack)
return;
- atomic_set(&t->tracing_graph_pause, 0);
- atomic_set(&t->trace_overrun, 0);
- t->ftrace_timestamp = 0;
- /* make curr_ret_stack visable before we add the ret_stack */
- smp_wmb();
- t->ret_stack = ret_stack;
+ graph_init_task(t, ret_stack);
}
}
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 2dec9bcde8b4..8435b43b1782 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -353,6 +353,43 @@ static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data)
kfree(data);
}
+/* Bitfield fetch function */
+struct bitfield_fetch_param {
+ struct fetch_param orig;
+ unsigned char hi_shift;
+ unsigned char low_shift;
+};
+
+#define DEFINE_FETCH_bitfield(type) \
+static __kprobes void FETCH_FUNC_NAME(bitfield, type)(struct pt_regs *regs,\
+ void *data, void *dest) \
+{ \
+ struct bitfield_fetch_param *bprm = data; \
+ type buf = 0; \
+ call_fetch(&bprm->orig, regs, &buf); \
+ if (buf) { \
+ buf <<= bprm->hi_shift; \
+ buf >>= bprm->low_shift; \
+ } \
+ *(type *)dest = buf; \
+}
+DEFINE_BASIC_FETCH_FUNCS(bitfield)
+#define fetch_bitfield_string NULL
+#define fetch_bitfield_string_size NULL
+
+static __kprobes void
+free_bitfield_fetch_param(struct bitfield_fetch_param *data)
+{
+ /*
+ * Don't check the bitfield itself, because this must be the
+ * last fetch function.
+ */
+ if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
+ free_deref_fetch_param(data->orig.data);
+ else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
+ free_symbol_cache(data->orig.data);
+ kfree(data);
+}
/* Default (unsigned long) fetch type */
#define __DEFAULT_FETCH_TYPE(t) u##t
#define _DEFAULT_FETCH_TYPE(t) __DEFAULT_FETCH_TYPE(t)
@@ -367,6 +404,7 @@ enum {
FETCH_MTD_memory,
FETCH_MTD_symbol,
FETCH_MTD_deref,
+ FETCH_MTD_bitfield,
FETCH_MTD_END,
};
@@ -387,6 +425,7 @@ ASSIGN_FETCH_FUNC(retval, ftype), \
ASSIGN_FETCH_FUNC(memory, ftype), \
ASSIGN_FETCH_FUNC(symbol, ftype), \
ASSIGN_FETCH_FUNC(deref, ftype), \
+ASSIGN_FETCH_FUNC(bitfield, ftype), \
} \
}
@@ -430,9 +469,33 @@ static const struct fetch_type *find_fetch_type(const char *type)
if (!type)
type = DEFAULT_FETCH_TYPE_STR;
+ /* Special case: bitfield */
+ if (*type == 'b') {
+ unsigned long bs;
+ type = strchr(type, '/');
+ if (!type)
+ goto fail;
+ type++;
+ if (strict_strtoul(type, 0, &bs))
+ goto fail;
+ switch (bs) {
+ case 8:
+ return find_fetch_type("u8");
+ case 16:
+ return find_fetch_type("u16");
+ case 32:
+ return find_fetch_type("u32");
+ case 64:
+ return find_fetch_type("u64");
+ default:
+ goto fail;
+ }
+ }
+
for (i = 0; i < ARRAY_SIZE(fetch_type_table); i++)
if (strcmp(type, fetch_type_table[i].name) == 0)
return &fetch_type_table[i];
+fail:
return NULL;
}
@@ -586,7 +649,9 @@ error:
static void free_probe_arg(struct probe_arg *arg)
{
- if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn))
+ if (CHECK_FETCH_FUNCS(bitfield, arg->fetch.fn))
+ free_bitfield_fetch_param(arg->fetch.data);
+ else if (CHECK_FETCH_FUNCS(deref, arg->fetch.fn))
free_deref_fetch_param(arg->fetch.data);
else if (CHECK_FETCH_FUNCS(symbol, arg->fetch.fn))
free_symbol_cache(arg->fetch.data);
@@ -767,16 +832,15 @@ static int __parse_probe_arg(char *arg, const struct fetch_type *t,
}
break;
case '+': /* deref memory */
+ arg++; /* Skip '+', because strict_strtol() rejects it. */
case '-':
tmp = strchr(arg, '(');
if (!tmp)
break;
*tmp = '\0';
- ret = strict_strtol(arg + 1, 0, &offset);
+ ret = strict_strtol(arg, 0, &offset);
if (ret)
break;
- if (arg[0] == '-')
- offset = -offset;
arg = tmp + 1;
tmp = strrchr(arg, ')');
if (tmp) {
@@ -807,6 +871,41 @@ static int __parse_probe_arg(char *arg, const struct fetch_type *t,
return ret;
}
+#define BYTES_TO_BITS(nb) ((BITS_PER_LONG * (nb)) / sizeof(long))
+
+/* Bitfield type needs to be parsed into a fetch function */
+static int __parse_bitfield_probe_arg(const char *bf,
+ const struct fetch_type *t,
+ struct fetch_param *f)
+{
+ struct bitfield_fetch_param *bprm;
+ unsigned long bw, bo;
+ char *tail;
+
+ if (*bf != 'b')
+ return 0;
+
+ bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
+ if (!bprm)
+ return -ENOMEM;
+ bprm->orig = *f;
+ f->fn = t->fetch[FETCH_MTD_bitfield];
+ f->data = (void *)bprm;
+
+ bw = simple_strtoul(bf + 1, &tail, 0); /* Use simple one */
+ if (bw == 0 || *tail != '@')
+ return -EINVAL;
+
+ bf = tail + 1;
+ bo = simple_strtoul(bf, &tail, 0);
+ if (tail == bf || *tail != '/')
+ return -EINVAL;
+
+ bprm->hi_shift = BYTES_TO_BITS(t->size) - (bw + bo);
+ bprm->low_shift = bprm->hi_shift + bo;
+ return (BYTES_TO_BITS(t->size) < (bw + bo)) ? -EINVAL : 0;
+}
+
/* String length checking wrapper */
static int parse_probe_arg(char *arg, struct trace_probe *tp,
struct probe_arg *parg, int is_return)
@@ -836,6 +935,8 @@ static int parse_probe_arg(char *arg, struct trace_probe *tp,
parg->offset = tp->size;
tp->size += parg->type->size;
ret = __parse_probe_arg(arg, parg->type, &parg->fetch, is_return);
+ if (ret >= 0 && t != NULL)
+ ret = __parse_bitfield_probe_arg(t, parg->type, &parg->fetch);
if (ret >= 0) {
parg->fetch_size.fn = get_fetch_size_function(parg->type,
parg->fetch.fn);
@@ -1130,7 +1231,7 @@ static int command_trace_probe(const char *buf)
return ret;
}
-#define WRITE_BUFSIZE 128
+#define WRITE_BUFSIZE 4096
static ssize_t probes_write(struct file *file, const char __user *buffer,
size_t count, loff_t *ppos)
diff --git a/lib/rwsem.c b/lib/rwsem.c
index f236d7cd5cf3..aa7c3052261f 100644
--- a/lib/rwsem.c
+++ b/lib/rwsem.c
@@ -222,8 +222,7 @@ rwsem_down_failed_common(struct rw_semaphore *sem,
/*
* wait for the read lock to be granted
*/
-asmregparm struct rw_semaphore __sched *
-rwsem_down_read_failed(struct rw_semaphore *sem)
+struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
{
return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_READ,
-RWSEM_ACTIVE_READ_BIAS);
@@ -232,8 +231,7 @@ rwsem_down_read_failed(struct rw_semaphore *sem)
/*
* wait for the write lock to be granted
*/
-asmregparm struct rw_semaphore __sched *
-rwsem_down_write_failed(struct rw_semaphore *sem)
+struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem)
{
return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_WRITE,
-RWSEM_ACTIVE_WRITE_BIAS);
@@ -243,7 +241,7 @@ rwsem_down_write_failed(struct rw_semaphore *sem)
* handle waking up a waiter on the semaphore
* - up_read/up_write has decremented the active part of count if we come here
*/
-asmregparm struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
+struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
{
unsigned long flags;
@@ -263,7 +261,7 @@ asmregparm struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
* - caller incremented waiting part of count and discovered it still negative
* - just wake up any readers at the front of the queue
*/
-asmregparm struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
+struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
{
unsigned long flags;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a873e61e312e..887ce3bd823d 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3616,6 +3616,34 @@ static int __meminit next_active_region_index_in_nid(int index, int nid)
return -1;
}
+/*
+ * Basic iterator support. Return the last range of PFNs for a node
+ * Note: nid == MAX_NUMNODES returns last region regardless of node
+ */
+static int __meminit last_active_region_index_in_nid(int nid)
+{
+ int i;
+
+ for (i = nr_nodemap_entries - 1; i >= 0; i--)
+ if (nid == MAX_NUMNODES || early_node_map[i].nid == nid)
+ return i;
+
+ return -1;
+}
+
+/*
+ * Basic iterator support. Return the previous active range of PFNs for a node
+ * Note: nid == MAX_NUMNODES returns next region regardless of node
+ */
+static int __meminit previous_active_region_index_in_nid(int index, int nid)
+{
+ for (index = index - 1; index >= 0; index--)
+ if (nid == MAX_NUMNODES || early_node_map[index].nid == nid)
+ return index;
+
+ return -1;
+}
+
#ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
/*
* Required by SPARSEMEM. Given a PFN, return what node the PFN is on.
@@ -3667,6 +3695,10 @@ bool __meminit early_pfn_in_nid(unsigned long pfn, int node)
for (i = first_active_region_index_in_nid(nid); i != -1; \
i = next_active_region_index_in_nid(i, nid))
+#define for_each_active_range_index_in_nid_reverse(i, nid) \
+ for (i = last_active_region_index_in_nid(nid); i != -1; \
+ i = previous_active_region_index_in_nid(i, nid))
+
/**
* free_bootmem_with_active_regions - Call free_bootmem_node for each active range
* @nid: The node to free memory on. If MAX_NUMNODES, all nodes are freed.
@@ -3705,7 +3737,7 @@ u64 __init find_memory_core_early(int nid, u64 size, u64 align,
int i;
/* Need to go over early_node_map to find out good range for node */
- for_each_active_range_index_in_nid(i, nid) {
+ for_each_active_range_index_in_nid_reverse(i, nid) {
u64 addr;
u64 ei_start, ei_last;
u64 final_start, final_end;
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 4c0383da1c9a..58848e3e392c 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -2654,11 +2654,6 @@ sub process {
WARN("Use of volatile is usually wrong: see Documentation/volatile-considered-harmful.txt\n" . $herecurr);
}
-# SPIN_LOCK_UNLOCKED & RW_LOCK_UNLOCKED are deprecated
- if ($line =~ /\b(SPIN_LOCK_UNLOCKED|RW_LOCK_UNLOCKED)/) {
- ERROR("Use of $1 is deprecated: see Documentation/spinlocks.txt\n" . $herecurr);
- }
-
# warn about #if 0
if ($line =~ /^.\s*\#\s*if\s+0\b/) {
CHK("if this code is redundant consider removing it\n" .
diff --git a/security/commoncap.c b/security/commoncap.c
index 64c2ed9c9015..dbfdaed4cc66 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -93,7 +93,7 @@ int cap_capable(struct task_struct *tsk, const struct cred *cred, int cap,
* Determine whether the current process may set the system clock and timezone
* information, returning 0 if permission granted, -ve if denied.
*/
-int cap_settime(struct timespec *ts, struct timezone *tz)
+int cap_settime(const struct timespec *ts, const struct timezone *tz)
{
if (!capable(CAP_SYS_TIME))
return -EPERM;
diff --git a/security/security.c b/security/security.c
index 08ca967eaa3d..5fee5dd00ff2 100644
--- a/security/security.c
+++ b/security/security.c
@@ -196,7 +196,7 @@ int security_syslog(int type)
return security_ops->syslog(type);
}
-int security_settime(struct timespec *ts, struct timezone *tz)
+int security_settime(const struct timespec *ts, const struct timezone *tz)
{
return security_ops->settime(ts, tz);
}
diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
index 86b797a35aa6..81c3220e04f3 100644
--- a/tools/perf/Documentation/perf-probe.txt
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -73,6 +73,17 @@ OPTIONS
(Only for --vars) Show external defined variables in addition to local
variables.
+-F::
+--funcs::
+ Show available functions in given module or kernel.
+
+--filter=FILTER::
+ (Only for --vars and --funcs) Set filter. FILTER is a combination of glob
+ pattern, see FILTER PATTERN for detail.
+ Default FILTER is "!__k???tab_* & !__crc_*" for --vars, and "!_*"
+ for --funcs.
+ If several filters are specified, only the last filter is used.
+
-f::
--force::
Forcibly add events with existing name.
@@ -135,6 +146,14 @@ e.g.
This provides some sort of flexibility and robustness to probe point definitions against minor code changes. For example, actual 10th line of schedule() can be moved easily by modifying schedule(), but the same line matching 'rq=cpu_rq*' may still exist in the function.)
+FILTER PATTERN
+--------------
+ The filter pattern is a glob matching pattern(s) to filter variables.
+ In addition, you can use "!" for specifying filter-out rule. You also can give several rules combined with "&" or "|", and fold those rules as one rule by using "(" ")".
+
+e.g.
+ With --filter "foo* | bar*", perf probe -V shows variables which start with "foo" or "bar".
+ With --filter "!foo* & *bar", perf probe -V shows variables which don't start with "foo" and end with "bar", like "fizzbar". But "foobar" is filtered out.
EXAMPLES
--------
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 7141c42e1469..94f73abdc56a 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -315,6 +315,7 @@ COMPAT_CFLAGS =
COMPAT_OBJS =
LIB_H =
LIB_OBJS =
+PYRF_OBJS =
SCRIPT_PERL =
SCRIPT_SH =
TEST_PROGRAMS =
@@ -324,6 +325,9 @@ SCRIPT_SH += perf-archive.sh
grep-libs = $(filter -l%,$(1))
strip-libs = $(filter-out -l%,$(1))
+$(OUTPUT)python/perf.so: $(PYRF_OBJS)
+ $(QUIET_GEN)python util/setup.py --quiet build_ext --build-lib='$(OUTPUT)python' \
+ --build-temp='$(OUTPUT)python/temp'
#
# No Perl scripts right now:
#
@@ -344,6 +348,8 @@ PROGRAMS += $(EXTRA_PROGRAMS)
#
PROGRAMS += $(OUTPUT)perf
+LANG_BINDINGS =
+
# List built-in command $C whose implementation cmd_$C() is not in
# builtin-$C.o but is linked in as part of some other command.
#
@@ -395,6 +401,7 @@ LIB_H += util/include/dwarf-regs.h
LIB_H += util/include/asm/dwarf2.h
LIB_H += util/include/asm/cpufeature.h
LIB_H += perf.h
+LIB_H += util/annotate.h
LIB_H += util/cache.h
LIB_H += util/callchain.h
LIB_H += util/build-id.h
@@ -402,6 +409,7 @@ LIB_H += util/debug.h
LIB_H += util/debugfs.h
LIB_H += util/event.h
LIB_H += util/evsel.h
+LIB_H += util/evlist.h
LIB_H += util/exec_cmd.h
LIB_H += util/types.h
LIB_H += util/levenshtein.h
@@ -416,6 +424,7 @@ LIB_H += util/help.h
LIB_H += util/session.h
LIB_H += util/strbuf.h
LIB_H += util/strlist.h
+LIB_H += util/strfilter.h
LIB_H += util/svghelper.h
LIB_H += util/run-command.h
LIB_H += util/sigchain.h
@@ -425,21 +434,25 @@ LIB_H += util/values.h
LIB_H += util/sort.h
LIB_H += util/hist.h
LIB_H += util/thread.h
+LIB_H += util/thread_map.h
LIB_H += util/trace-event.h
LIB_H += util/probe-finder.h
LIB_H += util/probe-event.h
LIB_H += util/pstack.h
LIB_H += util/cpumap.h
+LIB_H += util/top.h
LIB_H += $(ARCH_INCLUDE)
LIB_OBJS += $(OUTPUT)util/abspath.o
LIB_OBJS += $(OUTPUT)util/alias.o
+LIB_OBJS += $(OUTPUT)util/annotate.o
LIB_OBJS += $(OUTPUT)util/build-id.o
LIB_OBJS += $(OUTPUT)util/config.o
LIB_OBJS += $(OUTPUT)util/ctype.o
LIB_OBJS += $(OUTPUT)util/debugfs.o
LIB_OBJS += $(OUTPUT)util/environment.o
LIB_OBJS += $(OUTPUT)util/event.o
+LIB_OBJS += $(OUTPUT)util/evlist.o
LIB_OBJS += $(OUTPUT)util/evsel.o
LIB_OBJS += $(OUTPUT)util/exec_cmd.o
LIB_OBJS += $(OUTPUT)util/help.o
@@ -455,6 +468,8 @@ LIB_OBJS += $(OUTPUT)util/quote.o
LIB_OBJS += $(OUTPUT)util/strbuf.o
LIB_OBJS += $(OUTPUT)util/string.o
LIB_OBJS += $(OUTPUT)util/strlist.o
+LIB_OBJS += $(OUTPUT)util/strfilter.o
+LIB_OBJS += $(OUTPUT)util/top.o
LIB_OBJS += $(OUTPUT)util/usage.o
LIB_OBJS += $(OUTPUT)util/wrapper.o
LIB_OBJS += $(OUTPUT)util/sigchain.o
@@ -469,6 +484,7 @@ LIB_OBJS += $(OUTPUT)util/map.o
LIB_OBJS += $(OUTPUT)util/pstack.o
LIB_OBJS += $(OUTPUT)util/session.o
LIB_OBJS += $(OUTPUT)util/thread.o
+LIB_OBJS += $(OUTPUT)util/thread_map.o
LIB_OBJS += $(OUTPUT)util/trace-event-parse.o
LIB_OBJS += $(OUTPUT)util/trace-event-read.o
LIB_OBJS += $(OUTPUT)util/trace-event-info.o
@@ -514,6 +530,20 @@ BUILTIN_OBJS += $(OUTPUT)builtin-inject.o
PERFLIBS = $(LIB_FILE)
+# Files needed for the python binding, perf.so
+# pyrf is just an internal name needed for all those wrappers.
+# This has to be in sync with what is in the 'sources' variable in
+# tools/perf/util/setup.py
+
+PYRF_OBJS += $(OUTPUT)util/cpumap.o
+PYRF_OBJS += $(OUTPUT)util/ctype.o
+PYRF_OBJS += $(OUTPUT)util/evlist.o
+PYRF_OBJS += $(OUTPUT)util/evsel.o
+PYRF_OBJS += $(OUTPUT)util/python.o
+PYRF_OBJS += $(OUTPUT)util/thread_map.o
+PYRF_OBJS += $(OUTPUT)util/util.o
+PYRF_OBJS += $(OUTPUT)util/xyarray.o
+
#
# Platform specific tweaks
#
@@ -595,6 +625,7 @@ else
LIB_OBJS += $(OUTPUT)util/ui/browsers/annotate.o
LIB_OBJS += $(OUTPUT)util/ui/browsers/hists.o
LIB_OBJS += $(OUTPUT)util/ui/browsers/map.o
+ LIB_OBJS += $(OUTPUT)util/ui/browsers/top.o
LIB_OBJS += $(OUTPUT)util/ui/helpline.o
LIB_OBJS += $(OUTPUT)util/ui/progress.o
LIB_OBJS += $(OUTPUT)util/ui/util.o
@@ -635,12 +666,14 @@ else
PYTHON_EMBED_CCOPTS = `python-config --cflags 2>/dev/null`
FLAGS_PYTHON_EMBED=$(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS)
ifneq ($(call try-cc,$(SOURCE_PYTHON_EMBED),$(FLAGS_PYTHON_EMBED)),y)
+ msg := $(warning No Python.h found, install python-dev[el] to have python support in 'perf script' and to build the python bindings)
BASIC_CFLAGS += -DNO_LIBPYTHON
else
ALL_LDFLAGS += $(PYTHON_EMBED_LDFLAGS)
EXTLIBS += $(PYTHON_EMBED_LIBADD)
LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-python.o
LIB_OBJS += $(OUTPUT)scripts/python/Perf-Trace-Util/Context.o
+ LANG_BINDINGS += $(OUTPUT)python/perf.so
endif
endif
@@ -927,7 +960,7 @@ export TAR INSTALL DESTDIR SHELL_PATH
SHELL = $(SHELL_PATH)
-all:: shell_compatibility_test $(ALL_PROGRAMS) $(BUILT_INS) $(OTHER_PROGRAMS) $(OUTPUT)PERF-BUILD-OPTIONS
+all:: shell_compatibility_test $(ALL_PROGRAMS) $(LANG_BINDINGS) $(BUILT_INS) $(OTHER_PROGRAMS) $(OUTPUT)PERF-BUILD-OPTIONS
ifneq (,$X)
$(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_PROGRAMS) $(BUILT_INS) perf$X)), test '$p' -ef '$p$X' || $(RM) '$p';)
endif
@@ -1024,6 +1057,9 @@ $(OUTPUT)util/ui/browser.o: util/ui/browser.c $(OUTPUT)PERF-CFLAGS
$(OUTPUT)util/ui/browsers/annotate.o: util/ui/browsers/annotate.c $(OUTPUT)PERF-CFLAGS
$(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DENABLE_SLFUTURE_CONST $<
+$(OUTPUT)util/ui/browsers/top.o: util/ui/browsers/top.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DENABLE_SLFUTURE_CONST $<
+
$(OUTPUT)util/ui/browsers/hists.o: util/ui/browsers/hists.c $(OUTPUT)PERF-CFLAGS
$(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DENABLE_SLFUTURE_CONST $<
@@ -1257,17 +1293,17 @@ distclean: clean
# $(RM) configure
clean:
- $(RM) *.o */*.o */*/*.o */*/*/*.o $(LIB_FILE)
+ $(RM) $(OUTPUT){*.o,*/*.o,*/*/*.o,*/*/*/*.o,$(LIB_FILE),perf-archive}
$(RM) $(ALL_PROGRAMS) $(BUILT_INS) perf$X
$(RM) $(TEST_PROGRAMS)
$(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope*
- $(RM) -r autom4te.cache
- $(RM) config.log config.mak.autogen config.mak.append config.status config.cache
$(RM) -r $(PERF_TARNAME) .doc-tmp-dir
$(RM) $(PERF_TARNAME).tar.gz perf-core_$(PERF_VERSION)-*.tar.gz
$(RM) $(htmldocs).tar.gz $(manpages).tar.gz
$(MAKE) -C Documentation/ clean
$(RM) $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)PERF-CFLAGS $(OUTPUT)PERF-BUILD-OPTIONS
+ @python util/setup.py clean --build-lib='$(OUTPUT)python' \
+ --build-temp='$(OUTPUT)python/temp'
.PHONY: all install clean strip
.PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell
diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c
index d9ab3ce446ac..0c7454f8b8a9 100644
--- a/tools/perf/bench/sched-pipe.c
+++ b/tools/perf/bench/sched-pipe.c
@@ -55,7 +55,7 @@ int bench_sched_pipe(int argc, const char **argv,
* discarding returned value of read(), write()
* causes error in building environment for perf
*/
- int ret, wait_stat;
+ int __used ret, wait_stat;
pid_t pid, retpid;
argc = parse_options(argc, argv, options,
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 8879463807e4..427182953fd7 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -9,6 +9,7 @@
#include "util/util.h"
+#include "util/util.h"
#include "util/color.h"
#include <linux/list.h>
#include "util/cache.h"
@@ -18,6 +19,7 @@
#include "perf.h"
#include "util/debug.h"
+#include "util/annotate.h"
#include "util/event.h"
#include "util/parse-options.h"
#include "util/parse-events.h"
@@ -55,15 +57,29 @@ static int hists__add_entry(struct hists *self, struct addr_location *al)
if (he == NULL)
return -ENOMEM;
- return hist_entry__inc_addr_samples(he, al->addr);
+ if (he->ms.sym != NULL) {
+ /*
+ * All aggregated on the first sym_hist.
+ */
+ struct annotation *notes = symbol__annotation(he->ms.sym);
+ if (notes->src == NULL &&
+ symbol__alloc_hist(he->ms.sym, 1) < 0)
+ return -ENOMEM;
+
+ return hist_entry__inc_addr_samples(he, 0, al->addr);
+ }
+
+ return 0;
}
-static int process_sample_event(event_t *event, struct sample_data *sample,
+static int process_sample_event(union perf_event *event,
+ struct perf_sample *sample,
struct perf_session *session)
{
struct addr_location al;
- if (event__preprocess_sample(event, session, &al, sample, NULL) < 0) {
+ if (perf_event__preprocess_sample(event, session, &al, sample,
+ symbol__annotate_init) < 0) {
pr_warning("problem processing %d event, skipping it.\n",
event->header.type);
return -1;
@@ -78,245 +94,10 @@ static int process_sample_event(event_t *event, struct sample_data *sample,
return 0;
}
-static int objdump_line__print(struct objdump_line *self,
- struct list_head *head,
- struct hist_entry *he, u64 len)
-{
- struct symbol *sym = he->ms.sym;
- static const char *prev_line;
- static const char *prev_color;
-
- if (self->offset != -1) {
- const char *path = NULL;
- unsigned int hits = 0;
- double percent = 0.0;
- const char *color;
- struct sym_priv *priv = symbol__priv(sym);
- struct sym_ext *sym_ext = priv->ext;
- struct sym_hist *h = priv->hist;
- s64 offset = self->offset;
- struct objdump_line *next = objdump__get_next_ip_line(head, self);
-
- while (offset < (s64)len &&
- (next == NULL || offset < next->offset)) {
- if (sym_ext) {
- if (path == NULL)
- path = sym_ext[offset].path;
- percent += sym_ext[offset].percent;
- } else
- hits += h->ip[offset];
-
- ++offset;
- }
-
- if (sym_ext == NULL && h->sum)
- percent = 100.0 * hits / h->sum;
-
- color = get_percent_color(percent);
-
- /*
- * Also color the filename and line if needed, with
- * the same color than the percentage. Don't print it
- * twice for close colored ip with the same filename:line
- */
- if (path) {
- if (!prev_line || strcmp(prev_line, path)
- || color != prev_color) {
- color_fprintf(stdout, color, " %s", path);
- prev_line = path;
- prev_color = color;
- }
- }
-
- color_fprintf(stdout, color, " %7.2f", percent);
- printf(" : ");
- color_fprintf(stdout, PERF_COLOR_BLUE, "%s\n", self->line);
- } else {
- if (!*self->line)
- printf(" :\n");
- else
- printf(" : %s\n", self->line);
- }
-
- return 0;
-}
-
-static struct rb_root root_sym_ext;
-
-static void insert_source_line(struct sym_ext *sym_ext)
-{
- struct sym_ext *iter;
- struct rb_node **p = &root_sym_ext.rb_node;
- struct rb_node *parent = NULL;
-
- while (*p != NULL) {
- parent = *p;
- iter = rb_entry(parent, struct sym_ext, node);
-
- if (sym_ext->percent > iter->percent)
- p = &(*p)->rb_left;
- else
- p = &(*p)->rb_right;
- }
-
- rb_link_node(&sym_ext->node, parent, p);
- rb_insert_color(&sym_ext->node, &root_sym_ext);
-}
-
-static void free_source_line(struct hist_entry *he, int len)
-{
- struct sym_priv *priv = symbol__priv(he->ms.sym);
- struct sym_ext *sym_ext = priv->ext;
- int i;
-
- if (!sym_ext)
- return;
-
- for (i = 0; i < len; i++)
- free(sym_ext[i].path);
- free(sym_ext);
-
- priv->ext = NULL;
- root_sym_ext = RB_ROOT;
-}
-
-/* Get the filename:line for the colored entries */
-static void
-get_source_line(struct hist_entry *he, int len, const char *filename)
-{
- struct symbol *sym = he->ms.sym;
- u64 start;
- int i;
- char cmd[PATH_MAX * 2];
- struct sym_ext *sym_ext;
- struct sym_priv *priv = symbol__priv(sym);
- struct sym_hist *h = priv->hist;
-
- if (!h->sum)
- return;
-
- sym_ext = priv->ext = calloc(len, sizeof(struct sym_ext));
- if (!priv->ext)
- return;
-
- start = he->ms.map->unmap_ip(he->ms.map, sym->start);
-
- for (i = 0; i < len; i++) {
- char *path = NULL;
- size_t line_len;
- u64 offset;
- FILE *fp;
-
- sym_ext[i].percent = 100.0 * h->ip[i] / h->sum;
- if (sym_ext[i].percent <= 0.5)
- continue;
-
- offset = start + i;
- sprintf(cmd, "addr2line -e %s %016" PRIx64, filename, offset);
- fp = popen(cmd, "r");
- if (!fp)
- continue;
-
- if (getline(&path, &line_len, fp) < 0 || !line_len)
- goto next;
-
- sym_ext[i].path = malloc(sizeof(char) * line_len + 1);
- if (!sym_ext[i].path)
- goto next;
-
- strcpy(sym_ext[i].path, path);
- insert_source_line(&sym_ext[i]);
-
- next:
- pclose(fp);
- }
-}
-
-static void print_summary(const char *filename)
+static int hist_entry__tty_annotate(struct hist_entry *he, int evidx)
{
- struct sym_ext *sym_ext;
- struct rb_node *node;
-
- printf("\nSorted summary for file %s\n", filename);
- printf("----------------------------------------------\n\n");
-
- if (RB_EMPTY_ROOT(&root_sym_ext)) {
- printf(" Nothing higher than %1.1f%%\n", MIN_GREEN);
- return;
- }
-
- node = rb_first(&root_sym_ext);
- while (node) {
- double percent;
- const char *color;
- char *path;
-
- sym_ext = rb_entry(node, struct sym_ext, node);
- percent = sym_ext->percent;
- color = get_percent_color(percent);
- path = sym_ext->path;
-
- color_fprintf(stdout, color, " %7.2f %s", percent, path);
- node = rb_next(node);
- }
-}
-
-static void hist_entry__print_hits(struct hist_entry *self)
-{
- struct symbol *sym = self->ms.sym;
- struct sym_priv *priv = symbol__priv(sym);
- struct sym_hist *h = priv->hist;
- u64 len = sym->end - sym->start, offset;
-
- for (offset = 0; offset < len; ++offset)
- if (h->ip[offset] != 0)
- printf("%*" PRIx64 ": %" PRIu64 "\n", BITS_PER_LONG / 2,
- sym->start + offset, h->ip[offset]);
- printf("%*s: %" PRIu64 "\n", BITS_PER_LONG / 2, "h->sum", h->sum);
-}
-
-static int hist_entry__tty_annotate(struct hist_entry *he)
-{
- struct map *map = he->ms.map;
- struct dso *dso = map->dso;
- struct symbol *sym = he->ms.sym;
- const char *filename = dso->long_name, *d_filename;
- u64 len;
- LIST_HEAD(head);
- struct objdump_line *pos, *n;
-
- if (hist_entry__annotate(he, &head, 0) < 0)
- return -1;
-
- if (full_paths)
- d_filename = filename;
- else
- d_filename = basename(filename);
-
- len = sym->end - sym->start;
-
- if (print_line) {
- get_source_line(he, len, filename);
- print_summary(filename);
- }
-
- printf("\n\n------------------------------------------------\n");
- printf(" Percent | Source code & Disassembly of %s\n", d_filename);
- printf("------------------------------------------------\n");
-
- if (verbose)
- hist_entry__print_hits(he);
-
- list_for_each_entry_safe(pos, n, &head, node) {
- objdump_line__print(pos, &head, he, len);
- list_del(&pos->node);
- objdump_line__free(pos);
- }
-
- if (print_line)
- free_source_line(he, len);
-
- return 0;
+ return symbol__tty_annotate(he->ms.sym, he->ms.map, evidx,
+ print_line, full_paths, 0, 0);
}
static void hists__find_annotations(struct hists *self)
@@ -326,13 +107,13 @@ static void hists__find_annotations(struct hists *self)
while (nd) {
struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node);
- struct sym_priv *priv;
+ struct annotation *notes;
if (he->ms.sym == NULL || he->ms.map->dso->annotate_warned)
goto find_next;
- priv = symbol__priv(he->ms.sym);
- if (priv->hist == NULL) {
+ notes = symbol__annotation(he->ms.sym);
+ if (notes->src == NULL) {
find_next:
if (key == KEY_LEFT)
nd = rb_prev(nd);
@@ -342,7 +123,8 @@ find_next:
}
if (use_browser > 0) {
- key = hist_entry__tui_annotate(he);
+ /* For now all is aggregated on the first */
+ key = hist_entry__tui_annotate(he, 0);
switch (key) {
case KEY_RIGHT:
next = rb_next(nd);
@@ -357,24 +139,25 @@ find_next:
if (next != NULL)
nd = next;
} else {
- hist_entry__tty_annotate(he);
+ /* For now all is aggregated on the first */
+ hist_entry__tty_annotate(he, 0);
nd = rb_next(nd);
/*
* Since we have a hist_entry per IP for the same
- * symbol, free he->ms.sym->hist to signal we already
+ * symbol, free he->ms.sym->src to signal we already
* processed this symbol.
*/
- free(priv->hist);
- priv->hist = NULL;
+ free(notes->src);
+ notes->src = NULL;
}
}
}
static struct perf_event_ops event_ops = {
.sample = process_sample_event,
- .mmap = event__process_mmap,
- .comm = event__process_comm,
- .fork = event__process_task,
+ .mmap = perf_event__process_mmap,
+ .comm = perf_event__process_comm,
+ .fork = perf_event__process_task,
.ordered_samples = true,
.ordering_requires_timestamps = true,
};
@@ -451,9 +234,9 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __used)
else if (use_tui)
use_browser = 1;
- setup_browser();
+ setup_browser(true);
- symbol_conf.priv_size = sizeof(struct sym_priv);
+ symbol_conf.priv_size = sizeof(struct annotation);
symbol_conf.try_vmlinux_path = true;
if (symbol__init() < 0)
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 3153e492dbcc..6b7d91160ecb 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -30,13 +30,13 @@ static int hists__add_entry(struct hists *self,
return -ENOMEM;
}
-static int diff__process_sample_event(event_t *event,
- struct sample_data *sample,
+static int diff__process_sample_event(union perf_event *event,
+ struct perf_sample *sample,
struct perf_session *session)
{
struct addr_location al;
- if (event__preprocess_sample(event, session, &al, sample, NULL) < 0) {
+ if (perf_event__preprocess_sample(event, session, &al, sample, NULL) < 0) {
pr_warning("problem processing %d event, skipping it.\n",
event->header.type);
return -1;
@@ -56,11 +56,11 @@ static int diff__process_sample_event(event_t *event,
static struct perf_event_ops event_ops = {
.sample = diff__process_sample_event,
- .mmap = event__process_mmap,
- .comm = event__process_comm,
- .exit = event__process_task,
- .fork = event__process_task,
- .lost = event__process_lost,
+ .mmap = perf_event__process_mmap,
+ .comm = perf_event__process_comm,
+ .exit = perf_event__process_task,
+ .fork = perf_event__process_task,
+ .lost = perf_event__process_lost,
.ordered_samples = true,
.ordering_requires_timestamps = true,
};
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 0c78ffa7bf67..e29f04ed3396 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -16,8 +16,8 @@
static char const *input_name = "-";
static bool inject_build_ids;
-static int event__repipe_synth(event_t *event,
- struct perf_session *session __used)
+static int perf_event__repipe_synth(union perf_event *event,
+ struct perf_session *session __used)
{
uint32_t size;
void *buf = event;
@@ -36,41 +36,44 @@ static int event__repipe_synth(event_t *event,
return 0;
}
-static int event__repipe(event_t *event, struct sample_data *sample __used,
- struct perf_session *session)
+static int perf_event__repipe(union perf_event *event,
+ struct perf_sample *sample __used,
+ struct perf_session *session)
{
- return event__repipe_synth(event, session);
+ return perf_event__repipe_synth(event, session);
}
-static int event__repipe_mmap(event_t *self, struct sample_data *sample,
- struct perf_session *session)
+static int perf_event__repipe_mmap(union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_session *session)
{
int err;
- err = event__process_mmap(self, sample, session);
- event__repipe(self, sample, session);
+ err = perf_event__process_mmap(event, sample, session);
+ perf_event__repipe(event, sample, session);
return err;
}
-static int event__repipe_task(event_t *self, struct sample_data *sample,
- struct perf_session *session)
+static int perf_event__repipe_task(union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_session *session)
{
int err;
- err = event__process_task(self, sample, session);
- event__repipe(self, sample, session);
+ err = perf_event__process_task(event, sample, session);
+ perf_event__repipe(event, sample, session);
return err;
}
-static int event__repipe_tracing_data(event_t *self,
- struct perf_session *session)
+static int perf_event__repipe_tracing_data(union perf_event *event,
+ struct perf_session *session)
{
int err;
- event__repipe_synth(self, session);
- err = event__process_tracing_data(self, session);
+ perf_event__repipe_synth(event, session);
+ err = perf_event__process_tracing_data(event, session);
return err;
}
@@ -109,8 +112,8 @@ static int dso__inject_build_id(struct dso *self, struct perf_session *session)
if (self->kernel)
misc = PERF_RECORD_MISC_KERNEL;
- err = event__synthesize_build_id(self, misc, event__repipe,
- machine, session);
+ err = perf_event__synthesize_build_id(self, misc, perf_event__repipe,
+ machine, session);
if (err) {
pr_err("Can't synthesize build_id event for %s\n", self->long_name);
return -1;
@@ -119,8 +122,9 @@ static int dso__inject_build_id(struct dso *self, struct perf_session *session)
return 0;
}
-static int event__inject_buildid(event_t *event, struct sample_data *sample,
- struct perf_session *session)
+static int perf_event__inject_buildid(union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_session *session)
{
struct addr_location al;
struct thread *thread;
@@ -155,24 +159,24 @@ static int event__inject_buildid(event_t *event, struct sample_data *sample,
}
repipe:
- event__repipe(event, sample, session);
+ perf_event__repipe(event, sample, session);
return 0;
}
struct perf_event_ops inject_ops = {
- .sample = event__repipe,
- .mmap = event__repipe,
- .comm = event__repipe,
- .fork = event__repipe,
- .exit = event__repipe,
- .lost = event__repipe,
- .read = event__repipe,
- .throttle = event__repipe,
- .unthrottle = event__repipe,
- .attr = event__repipe_synth,
- .event_type = event__repipe_synth,
- .tracing_data = event__repipe_synth,
- .build_id = event__repipe_synth,
+ .sample = perf_event__repipe,
+ .mmap = perf_event__repipe,
+ .comm = perf_event__repipe,
+ .fork = perf_event__repipe,
+ .exit = perf_event__repipe,
+ .lost = perf_event__repipe,
+ .read = perf_event__repipe,
+ .throttle = perf_event__repipe,
+ .unthrottle = perf_event__repipe,
+ .attr = perf_event__repipe_synth,
+ .event_type = perf_event__repipe_synth,
+ .tracing_data = perf_event__repipe_synth,
+ .build_id = perf_event__repipe_synth,
};
extern volatile int session_done;
@@ -190,10 +194,10 @@ static int __cmd_inject(void)
signal(SIGINT, sig_handler);
if (inject_build_ids) {
- inject_ops.sample = event__inject_buildid;
- inject_ops.mmap = event__repipe_mmap;
- inject_ops.fork = event__repipe_task;
- inject_ops.tracing_data = event__repipe_tracing_data;
+ inject_ops.sample = perf_event__inject_buildid;
+ inject_ops.mmap = perf_event__repipe_mmap;
+ inject_ops.fork = perf_event__repipe_task;
+ inject_ops.tracing_data = perf_event__repipe_tracing_data;
}
session = perf_session__new(input_name, O_RDONLY, false, true, &inject_ops);
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index d97256d65980..7f618f4e7b79 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -275,9 +275,8 @@ static void process_free_event(void *data,
s_alloc->alloc_cpu = -1;
}
-static void
-process_raw_event(event_t *raw_event __used, void *data,
- int cpu, u64 timestamp, struct thread *thread)
+static void process_raw_event(union perf_event *raw_event __used, void *data,
+ int cpu, u64 timestamp, struct thread *thread)
{
struct event *event;
int type;
@@ -304,7 +303,8 @@ process_raw_event(event_t *raw_event __used, void *data,
}
}
-static int process_sample_event(event_t *event, struct sample_data *sample,
+static int process_sample_event(union perf_event *event,
+ struct perf_sample *sample,
struct perf_session *session)
{
struct thread *thread = perf_session__findnew(session, event->ip.pid);
@@ -325,7 +325,7 @@ static int process_sample_event(event_t *event, struct sample_data *sample,
static struct perf_event_ops event_ops = {
.sample = process_sample_event,
- .comm = event__process_comm,
+ .comm = perf_event__process_comm,
.ordered_samples = true,
};
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 2b36defc5d73..e00d93847c44 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -834,14 +834,14 @@ static void dump_info(void)
die("Unknown type of information\n");
}
-static int process_sample_event(event_t *self, struct sample_data *sample,
+static int process_sample_event(union perf_event *event, struct perf_sample *sample,
struct perf_session *s)
{
struct thread *thread = perf_session__findnew(s, sample->tid);
if (thread == NULL) {
pr_debug("problem processing %d event, skipping it.\n",
- self->header.type);
+ event->header.type);
return -1;
}
@@ -852,7 +852,7 @@ static int process_sample_event(event_t *self, struct sample_data *sample,
static struct perf_event_ops eops = {
.sample = process_sample_event,
- .comm = event__process_comm,
+ .comm = perf_event__process_comm,
.ordered_samples = true,
};
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index add163c9f0e7..2c0e64d0b4aa 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -36,6 +36,7 @@
#include "builtin.h"
#include "util/util.h"
#include "util/strlist.h"
+#include "util/strfilter.h"
#include "util/symbol.h"
#include "util/debug.h"
#include "util/debugfs.h"
@@ -43,6 +44,8 @@
#include "util/probe-finder.h"
#include "util/probe-event.h"
+#define DEFAULT_VAR_FILTER "!__k???tab_* & !__crc_*"
+#define DEFAULT_FUNC_FILTER "!_*"
#define MAX_PATH_LEN 256
/* Session management structure */
@@ -52,6 +55,7 @@ static struct {
bool show_lines;
bool show_vars;
bool show_ext_vars;
+ bool show_funcs;
bool mod_events;
int nevents;
struct perf_probe_event events[MAX_PROBES];
@@ -59,6 +63,7 @@ static struct {
struct line_range line_range;
const char *target_module;
int max_probe_points;
+ struct strfilter *filter;
} params;
/* Parse an event definition. Note that any error must die. */
@@ -157,6 +162,27 @@ static int opt_show_vars(const struct option *opt __used,
}
#endif
+static int opt_set_filter(const struct option *opt __used,
+ const char *str, int unset __used)
+{
+ const char *err;
+
+ if (str) {
+ pr_debug2("Set filter: %s\n", str);
+ if (params.filter)
+ strfilter__delete(params.filter);
+ params.filter = strfilter__new(str, &err);
+ if (!params.filter) {
+ pr_err("Filter parse error at %td.\n", err - str + 1);
+ pr_err("Source: \"%s\"\n", str);
+ pr_err(" %*c\n", (int)(err - str + 1), '^');
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
static const char * const probe_usage[] = {
"perf probe [<options>] 'PROBEDEF' ['PROBEDEF' ...]",
"perf probe [<options>] --add 'PROBEDEF' [--add 'PROBEDEF' ...]",
@@ -221,6 +247,13 @@ static const struct option options[] = {
OPT__DRY_RUN(&probe_event_dry_run),
OPT_INTEGER('\0', "max-probes", &params.max_probe_points,
"Set how many probe points can be found for a probe."),
+ OPT_BOOLEAN('F', "funcs", &params.show_funcs,
+ "Show potential probe-able functions."),
+ OPT_CALLBACK('\0', "filter", NULL,
+ "[!]FILTER", "Set a filter (with --vars/funcs only)\n"
+ "\t\t\t(default: \"" DEFAULT_VAR_FILTER "\" for --vars,\n"
+ "\t\t\t \"" DEFAULT_FUNC_FILTER "\" for --funcs)",
+ opt_set_filter),
OPT_END()
};
@@ -246,7 +279,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
params.max_probe_points = MAX_PROBES;
if ((!params.nevents && !params.dellist && !params.list_events &&
- !params.show_lines))
+ !params.show_lines && !params.show_funcs))
usage_with_options(probe_usage, options);
/*
@@ -267,12 +300,41 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
pr_err(" Error: Don't use --list with --vars.\n");
usage_with_options(probe_usage, options);
}
+ if (params.show_funcs) {
+ pr_err(" Error: Don't use --list with --funcs.\n");
+ usage_with_options(probe_usage, options);
+ }
ret = show_perf_probe_events();
if (ret < 0)
pr_err(" Error: Failed to show event list. (%d)\n",
ret);
return ret;
}
+ if (params.show_funcs) {
+ if (params.nevents != 0 || params.dellist) {
+ pr_err(" Error: Don't use --funcs with"
+ " --add/--del.\n");
+ usage_with_options(probe_usage, options);
+ }
+ if (params.show_lines) {
+ pr_err(" Error: Don't use --funcs with --line.\n");
+ usage_with_options(probe_usage, options);
+ }
+ if (params.show_vars) {
+ pr_err(" Error: Don't use --funcs with --vars.\n");
+ usage_with_options(probe_usage, options);
+ }
+ if (!params.filter)
+ params.filter = strfilter__new(DEFAULT_FUNC_FILTER,
+ NULL);
+ ret = show_available_funcs(params.target_module,
+ params.filter);
+ strfilter__delete(params.filter);
+ if (ret < 0)
+ pr_err(" Error: Failed to show functions."
+ " (%d)\n", ret);
+ return ret;
+ }
#ifdef DWARF_SUPPORT
if (params.show_lines) {
@@ -297,10 +359,16 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
" --add/--del.\n");
usage_with_options(probe_usage, options);
}
+ if (!params.filter)
+ params.filter = strfilter__new(DEFAULT_VAR_FILTER,
+ NULL);
+
ret = show_available_vars(params.events, params.nevents,
params.max_probe_points,
params.target_module,
+ params.filter,
params.show_ext_vars);
+ strfilter__delete(params.filter);
if (ret < 0)
pr_err(" Error: Failed to show vars. (%d)\n", ret);
return ret;
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 60cac6f92e8b..12e0e41696d9 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -18,17 +18,20 @@
#include "util/header.h"
#include "util/event.h"
+#include "util/evlist.h"
#include "util/evsel.h"
#include "util/debug.h"
#include "util/session.h"
#include "util/symbol.h"
#include "util/cpumap.h"
+#include "util/thread_map.h"
#include <unistd.h>
#include <sched.h>
#include <sys/mman.h>
#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
+#define SID(e, x, y) xyarray__entry(e->id, x, y)
enum write_mode_t {
WRITE_FORCE,
@@ -39,14 +42,13 @@ static u64 user_interval = ULLONG_MAX;
static u64 default_interval = 0;
static u64 sample_type;
-static struct cpu_map *cpus;
static unsigned int page_size;
static unsigned int mmap_pages = 128;
static unsigned int user_freq = UINT_MAX;
static int freq = 1000;
static int output;
static int pipe_output = 0;
-static const char *output_name = "perf.data";
+static const char *output_name = NULL;
static int group = 0;
static int realtime_prio = 0;
static bool nodelay = false;
@@ -55,7 +57,6 @@ static bool sample_id_all_avail = true;
static bool system_wide = false;
static pid_t target_pid = -1;
static pid_t target_tid = -1;
-static struct thread_map *threads;
static pid_t child_pid = -1;
static bool no_inherit = false;
static enum write_mode_t write_mode = WRITE_FORCE;
@@ -66,51 +67,17 @@ static bool sample_address = false;
static bool sample_time = false;
static bool no_buildid = false;
static bool no_buildid_cache = false;
+static struct perf_evlist *evsel_list;
static long samples = 0;
static u64 bytes_written = 0;
-static struct pollfd *event_array;
-
-static int nr_poll = 0;
-static int nr_cpu = 0;
-
static int file_new = 1;
static off_t post_processing_offset;
static struct perf_session *session;
static const char *cpu_list;
-struct mmap_data {
- void *base;
- unsigned int mask;
- unsigned int prev;
-};
-
-static struct mmap_data mmap_array[MAX_NR_CPUS];
-
-static unsigned long mmap_read_head(struct mmap_data *md)
-{
- struct perf_event_mmap_page *pc = md->base;
- long head;
-
- head = pc->data_head;
- rmb();
-
- return head;
-}
-
-static void mmap_write_tail(struct mmap_data *md, unsigned long tail)
-{
- struct perf_event_mmap_page *pc = md->base;
-
- /*
- * ensure all reads are done before we write the tail out.
- */
- /* mb(); */
- pc->data_tail = tail;
-}
-
static void advance_output(size_t size)
{
bytes_written += size;
@@ -131,42 +98,26 @@ static void write_output(void *buf, size_t size)
}
}
-static int process_synthesized_event(event_t *event,
- struct sample_data *sample __used,
+static int process_synthesized_event(union perf_event *event,
+ struct perf_sample *sample __used,
struct perf_session *self __used)
{
write_output(event, event->header.size);
return 0;
}
-static void mmap_read(struct mmap_data *md)
+static void mmap_read(struct perf_mmap *md)
{
- unsigned int head = mmap_read_head(md);
+ unsigned int head = perf_mmap__read_head(md);
unsigned int old = md->prev;
unsigned char *data = md->base + page_size;
unsigned long size;
void *buf;
- int diff;
- /*
- * If we're further behind than half the buffer, there's a chance
- * the writer will bite our tail and mess up the samples under us.
- *
- * If we somehow ended up ahead of the head, we got messed up.
- *
- * In either case, truncate and restart at head.
- */
- diff = head - old;
- if (diff < 0) {
- fprintf(stderr, "WARNING: failed to keep up with mmap data\n");
- /*
- * head points to a known good entry, start there.
- */
- old = head;
- }
+ if (old == head)
+ return;
- if (old != head)
- samples++;
+ samples++;
size = head - old;
@@ -185,7 +136,7 @@ static void mmap_read(struct mmap_data *md)
write_output(buf, size);
md->prev = old;
- mmap_write_tail(md, old);
+ perf_mmap__write_tail(md, old);
}
static volatile int done = 0;
@@ -209,8 +160,6 @@ static void sig_atexit(void)
kill(getpid(), signr);
}
-static int group_fd;
-
static struct perf_header_attr *get_header_attr(struct perf_event_attr *a, int nr)
{
struct perf_header_attr *h_attr;
@@ -234,28 +183,47 @@ static void create_counter(struct perf_evsel *evsel, int cpu)
char *filter = evsel->filter;
struct perf_event_attr *attr = &evsel->attr;
struct perf_header_attr *h_attr;
- int track = !evsel->idx; /* only the first counter needs these */
+ struct perf_sample_id *sid;
int thread_index;
int ret;
- struct {
- u64 count;
- u64 time_enabled;
- u64 time_running;
- u64 id;
- } read_data;
- /*
- * Check if parse_single_tracepoint_event has already asked for
- * PERF_SAMPLE_TIME.
- *
- * XXX this is kludgy but short term fix for problems introduced by
- * eac23d1c that broke 'perf script' by having different sample_types
- * when using multiple tracepoint events when we use a perf binary
- * that tries to use sample_id_all on an older kernel.
- *
- * We need to move counter creation to perf_session, support
- * different sample_types, etc.
- */
- bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
+
+ for (thread_index = 0; thread_index < evsel_list->threads->nr; thread_index++) {
+ h_attr = get_header_attr(attr, evsel->idx);
+ if (h_attr == NULL)
+ die("nomem\n");
+
+ if (!file_new) {
+ if (memcmp(&h_attr->attr, attr, sizeof(*attr))) {
+ fprintf(stderr, "incompatible append\n");
+ exit(-1);
+ }
+ }
+
+ sid = SID(evsel, cpu, thread_index);
+ if (perf_header_attr__add_id(h_attr, sid->id) < 0) {
+ pr_warning("Not enough memory to add id\n");
+ exit(-1);
+ }
+
+ if (filter != NULL) {
+ ret = ioctl(FD(evsel, cpu, thread_index),
+ PERF_EVENT_IOC_SET_FILTER, filter);
+ if (ret) {
+ error("failed to set filter with %d (%s)\n", errno,
+ strerror(errno));
+ exit(-1);
+ }
+ }
+ }
+
+ if (!sample_type)
+ sample_type = attr->sample_type;
+}
+
+static void config_attr(struct perf_evsel *evsel, struct perf_evlist *evlist)
+{
+ struct perf_event_attr *attr = &evsel->attr;
+ int track = !evsel->idx; /* only the first counter needs these */
attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
PERF_FORMAT_TOTAL_TIME_RUNNING |
@@ -263,7 +231,7 @@ static void create_counter(struct perf_evsel *evsel, int cpu)
attr->sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
- if (nr_counters > 1)
+ if (evlist->nr_entries > 1)
attr->sample_type |= PERF_SAMPLE_ID;
/*
@@ -315,19 +283,40 @@ static void create_counter(struct perf_evsel *evsel, int cpu)
attr->mmap = track;
attr->comm = track;
- attr->inherit = !no_inherit;
+
if (target_pid == -1 && target_tid == -1 && !system_wide) {
attr->disabled = 1;
attr->enable_on_exec = 1;
}
-retry_sample_id:
- attr->sample_id_all = sample_id_all_avail ? 1 : 0;
+}
- for (thread_index = 0; thread_index < threads->nr; thread_index++) {
-try_again:
- FD(evsel, nr_cpu, thread_index) = sys_perf_event_open(attr, threads->map[thread_index], cpu, group_fd, 0);
+static void open_counters(struct perf_evlist *evlist)
+{
+ struct perf_evsel *pos;
+ int cpu;
+
+ list_for_each_entry(pos, &evlist->entries, node) {
+ struct perf_event_attr *attr = &pos->attr;
+ /*
+ * Check if parse_single_tracepoint_event has already asked for
+ * PERF_SAMPLE_TIME.
+ *
+ * XXX this is kludgy but short term fix for problems introduced by
+ * eac23d1c that broke 'perf script' by having different sample_types
+ * when using multiple tracepoint events when we use a perf binary
+ * that tries to use sample_id_all on an older kernel.
+ *
+ * We need to move counter creation to perf_session, support
+ * different sample_types, etc.
+ */
+ bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
- if (FD(evsel, nr_cpu, thread_index) < 0) {
+ config_attr(pos, evlist);
+retry_sample_id:
+ attr->sample_id_all = sample_id_all_avail ? 1 : 0;
+try_again:
+ if (perf_evsel__open(pos, evlist->cpus, evlist->threads, group,
+ !no_inherit) < 0) {
int err = errno;
if (err == EPERM || err == EACCES)
@@ -364,7 +353,7 @@ try_again:
}
printf("\n");
error("sys_perf_event_open() syscall returned with %d (%s). /bin/dmesg may provide additional information.\n",
- FD(evsel, nr_cpu, thread_index), strerror(err));
+ err, strerror(err));
#if defined(__i386__) || defined(__x86_64__)
if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP)
@@ -375,90 +364,16 @@ try_again:
#endif
die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
- exit(-1);
- }
-
- h_attr = get_header_attr(attr, evsel->idx);
- if (h_attr == NULL)
- die("nomem\n");
-
- if (!file_new) {
- if (memcmp(&h_attr->attr, attr, sizeof(*attr))) {
- fprintf(stderr, "incompatible append\n");
- exit(-1);
- }
- }
-
- if (read(FD(evsel, nr_cpu, thread_index), &read_data, sizeof(read_data)) == -1) {
- perror("Unable to read perf file descriptor");
- exit(-1);
- }
-
- if (perf_header_attr__add_id(h_attr, read_data.id) < 0) {
- pr_warning("Not enough memory to add id\n");
- exit(-1);
- }
-
- assert(FD(evsel, nr_cpu, thread_index) >= 0);
- fcntl(FD(evsel, nr_cpu, thread_index), F_SETFL, O_NONBLOCK);
-
- /*
- * First counter acts as the group leader:
- */
- if (group && group_fd == -1)
- group_fd = FD(evsel, nr_cpu, thread_index);
-
- if (evsel->idx || thread_index) {
- struct perf_evsel *first;
- first = list_entry(evsel_list.next, struct perf_evsel, node);
- ret = ioctl(FD(evsel, nr_cpu, thread_index),
- PERF_EVENT_IOC_SET_OUTPUT,
- FD(first, nr_cpu, 0));
- if (ret) {
- error("failed to set output: %d (%s)\n", errno,
- strerror(errno));
- exit(-1);
- }
- } else {
- mmap_array[nr_cpu].prev = 0;
- mmap_array[nr_cpu].mask = mmap_pages*page_size - 1;
- mmap_array[nr_cpu].base = mmap(NULL, (mmap_pages+1)*page_size,
- PROT_READ | PROT_WRITE, MAP_SHARED, FD(evsel, nr_cpu, thread_index), 0);
- if (mmap_array[nr_cpu].base == MAP_FAILED) {
- error("failed to mmap with %d (%s)\n", errno, strerror(errno));
- exit(-1);
- }
-
- event_array[nr_poll].fd = FD(evsel, nr_cpu, thread_index);
- event_array[nr_poll].events = POLLIN;
- nr_poll++;
- }
-
- if (filter != NULL) {
- ret = ioctl(FD(evsel, nr_cpu, thread_index),
- PERF_EVENT_IOC_SET_FILTER, filter);
- if (ret) {
- error("failed to set filter with %d (%s)\n", errno,
- strerror(errno));
- exit(-1);
- }
}
}
- if (!sample_type)
- sample_type = attr->sample_type;
-}
-
-static void open_counters(int cpu)
-{
- struct perf_evsel *pos;
+ if (perf_evlist__mmap(evlist, mmap_pages, false) < 0)
+ die("failed to mmap with %d (%s)\n", errno, strerror(errno));
- group_fd = -1;
-
- list_for_each_entry(pos, &evsel_list, node)
- create_counter(pos, cpu);
-
- nr_cpu++;
+ for (cpu = 0; cpu < evsel_list->cpus->nr; ++cpu) {
+ list_for_each_entry(pos, &evlist->entries, node)
+ create_counter(pos, cpu);
+ }
}
static int process_buildids(void)
@@ -481,14 +396,14 @@ static void atexit_header(void)
if (!no_buildid)
process_buildids();
- perf_header__write(&session->header, output, true);
+ perf_header__write(&session->header, evsel_list, output, true);
perf_session__delete(session);
- perf_evsel_list__delete();
+ perf_evlist__delete(evsel_list);
symbol__exit();
}
}
-static void event__synthesize_guest_os(struct machine *machine, void *data)
+static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
{
int err;
struct perf_session *psession = data;
@@ -504,8 +419,8 @@ static void event__synthesize_guest_os(struct machine *machine, void *data)
*method is used to avoid symbol missing when the first addr is
*in module instead of in guest kernel.
*/
- err = event__synthesize_modules(process_synthesized_event,
- psession, machine);
+ err = perf_event__synthesize_modules(process_synthesized_event,
+ psession, machine);
if (err < 0)
pr_err("Couldn't record guest kernel [%d]'s reference"
" relocation symbol.\n", machine->pid);
@@ -514,11 +429,12 @@ static void event__synthesize_guest_os(struct machine *machine, void *data)
* We use _stext for guest kernel because guest kernel's /proc/kallsyms
* have no _text sometimes.
*/
- err = event__synthesize_kernel_mmap(process_synthesized_event,
- psession, machine, "_text");
+ err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
+ psession, machine, "_text");
if (err < 0)
- err = event__synthesize_kernel_mmap(process_synthesized_event,
- psession, machine, "_stext");
+ err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
+ psession, machine,
+ "_stext");
if (err < 0)
pr_err("Couldn't record guest kernel [%d]'s reference"
" relocation symbol.\n", machine->pid);
@@ -533,9 +449,9 @@ static void mmap_read_all(void)
{
int i;
- for (i = 0; i < nr_cpu; i++) {
- if (mmap_array[i].base)
- mmap_read(&mmap_array[i]);
+ for (i = 0; i < evsel_list->cpus->nr; i++) {
+ if (evsel_list->mmap[i].base)
+ mmap_read(&evsel_list->mmap[i]);
}
if (perf_header__has_feat(&session->header, HEADER_TRACE_INFO))
@@ -566,18 +482,26 @@ static int __cmd_record(int argc, const char **argv)
exit(-1);
}
- if (!strcmp(output_name, "-"))
- pipe_output = 1;
- else if (!stat(output_name, &st) && st.st_size) {
- if (write_mode == WRITE_FORCE) {
- char oldname[PATH_MAX];
- snprintf(oldname, sizeof(oldname), "%s.old",
- output_name);
- unlink(oldname);
- rename(output_name, oldname);
+ if (!output_name) {
+ if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
+ pipe_output = 1;
+ else
+ output_name = "perf.data";
+ }
+ if (output_name) {
+ if (!strcmp(output_name, "-"))
+ pipe_output = 1;
+ else if (!stat(output_name, &st) && st.st_size) {
+ if (write_mode == WRITE_FORCE) {
+ char oldname[PATH_MAX];
+ snprintf(oldname, sizeof(oldname), "%s.old",
+ output_name);
+ unlink(oldname);
+ rename(output_name, oldname);
+ }
+ } else if (write_mode == WRITE_APPEND) {
+ write_mode = WRITE_FORCE;
}
- } else if (write_mode == WRITE_APPEND) {
- write_mode = WRITE_FORCE;
}
flags = O_CREAT|O_RDWR;
@@ -611,7 +535,7 @@ static int __cmd_record(int argc, const char **argv)
goto out_delete_session;
}
- if (have_tracepoints(&evsel_list))
+ if (have_tracepoints(&evsel_list->entries))
perf_header__set_feat(&session->header, HEADER_TRACE_INFO);
/*
@@ -659,7 +583,7 @@ static int __cmd_record(int argc, const char **argv)
}
if (!system_wide && target_tid == -1 && target_pid == -1)
- threads->map[0] = child_pid;
+ evsel_list->threads->map[0] = child_pid;
close(child_ready_pipe[1]);
close(go_pipe[0]);
@@ -673,12 +597,7 @@ static int __cmd_record(int argc, const char **argv)
close(child_ready_pipe[0]);
}
- if (!system_wide && no_inherit && !cpu_list) {
- open_counters(-1);
- } else {
- for (i = 0; i < cpus->nr; i++)
- open_counters(cpus->map[i]);
- }
+ open_counters(evsel_list);
perf_session__set_sample_type(session, sample_type);
@@ -687,7 +606,8 @@ static int __cmd_record(int argc, const char **argv)
if (err < 0)
return err;
} else if (file_new) {
- err = perf_header__write(&session->header, output, false);
+ err = perf_header__write(&session->header, evsel_list,
+ output, false);
if (err < 0)
return err;
}
@@ -697,22 +617,22 @@ static int __cmd_record(int argc, const char **argv)
perf_session__set_sample_id_all(session, sample_id_all_avail);
if (pipe_output) {
- err = event__synthesize_attrs(&session->header,
- process_synthesized_event,
- session);
+ err = perf_event__synthesize_attrs(&session->header,
+ process_synthesized_event,
+ session);
if (err < 0) {
pr_err("Couldn't synthesize attrs.\n");
return err;
}
- err = event__synthesize_event_types(process_synthesized_event,
- session);
+ err = perf_event__synthesize_event_types(process_synthesized_event,
+ session);
if (err < 0) {
pr_err("Couldn't synthesize event_types.\n");
return err;
}
- if (have_tracepoints(&evsel_list)) {
+ if (have_tracepoints(&evsel_list->entries)) {
/*
* FIXME err <= 0 here actually means that
* there were no tracepoints so its not really
@@ -721,9 +641,9 @@ static int __cmd_record(int argc, const char **argv)
* return this more properly and also
* propagate errors that now are calling die()
*/
- err = event__synthesize_tracing_data(output, &evsel_list,
- process_synthesized_event,
- session);
+ err = perf_event__synthesize_tracing_data(output, evsel_list,
+ process_synthesized_event,
+ session);
if (err <= 0) {
pr_err("Couldn't record tracing data.\n");
return err;
@@ -738,31 +658,34 @@ static int __cmd_record(int argc, const char **argv)
return -1;
}
- err = event__synthesize_kernel_mmap(process_synthesized_event,
- session, machine, "_text");
+ err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
+ session, machine, "_text");
if (err < 0)
- err = event__synthesize_kernel_mmap(process_synthesized_event,
- session, machine, "_stext");
+ err = perf_event__synthesize_kernel_mmap(process_synthesized_event,
+ session, machine, "_stext");
if (err < 0)
pr_err("Couldn't record kernel reference relocation symbol\n"
"Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
"Check /proc/kallsyms permission or run as root.\n");
- err = event__synthesize_modules(process_synthesized_event,
- session, machine);
+ err = perf_event__synthesize_modules(process_synthesized_event,
+ session, machine);
if (err < 0)
pr_err("Couldn't record kernel module information.\n"
"Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
"Check /proc/modules permission or run as root.\n");
if (perf_guest)
- perf_session__process_machines(session, event__synthesize_guest_os);
+ perf_session__process_machines(session,
+ perf_event__synthesize_guest_os);
if (!system_wide)
- event__synthesize_thread_map(threads, process_synthesized_event,
- session);
+ perf_event__synthesize_thread_map(evsel_list->threads,
+ process_synthesized_event,
+ session);
else
- event__synthesize_threads(process_synthesized_event, session);
+ perf_event__synthesize_threads(process_synthesized_event,
+ session);
if (realtime_prio) {
struct sched_param param;
@@ -789,17 +712,17 @@ static int __cmd_record(int argc, const char **argv)
if (hits == samples) {
if (done)
break;
- err = poll(event_array, nr_poll, -1);
+ err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
waking++;
}
if (done) {
- for (i = 0; i < nr_cpu; i++) {
+ for (i = 0; i < evsel_list->cpus->nr; i++) {
struct perf_evsel *pos;
- list_for_each_entry(pos, &evsel_list, node) {
+ list_for_each_entry(pos, &evsel_list->entries, node) {
for (thread = 0;
- thread < threads->nr;
+ thread < evsel_list->threads->nr;
thread++)
ioctl(FD(pos, i, thread),
PERF_EVENT_IOC_DISABLE);
@@ -838,10 +761,10 @@ static const char * const record_usage[] = {
static bool force, append_file;
const struct option record_options[] = {
- OPT_CALLBACK('e', "event", NULL, "event",
+ OPT_CALLBACK('e', "event", &evsel_list, "event",
"event selector. use 'perf list' to list available events",
parse_events),
- OPT_CALLBACK(0, "filter", NULL, "filter",
+ OPT_CALLBACK(0, "filter", &evsel_list, "filter",
"event filter", parse_filter),
OPT_INTEGER('p', "pid", &target_pid,
"record events on existing process id"),
@@ -892,6 +815,10 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
int err = -ENOMEM;
struct perf_evsel *pos;
+ evsel_list = perf_evlist__new(NULL, NULL);
+ if (evsel_list == NULL)
+ return -ENOMEM;
+
argc = parse_options(argc, argv, record_options, record_usage,
PARSE_OPT_STOP_AT_NON_OPTION);
if (!argc && target_pid == -1 && target_tid == -1 &&
@@ -913,7 +840,8 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
if (no_buildid_cache || no_buildid)
disable_buildid_cache();
- if (list_empty(&evsel_list) && perf_evsel_list__create_default() < 0) {
+ if (evsel_list->nr_entries == 0 &&
+ perf_evlist__add_default(evsel_list) < 0) {
pr_err("Not enough memory for event selector list\n");
goto out_symbol_exit;
}
@@ -921,27 +849,19 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
if (target_pid != -1)
target_tid = target_pid;
- threads = thread_map__new(target_pid, target_tid);
- if (threads == NULL) {
- pr_err("Problems finding threads of monitor\n");
+ if (perf_evlist__create_maps(evsel_list, target_pid,
+ target_tid, cpu_list) < 0)
usage_with_options(record_usage, record_options);
- }
-
- cpus = cpu_map__new(cpu_list);
- if (cpus == NULL) {
- perror("failed to parse CPUs map");
- return -1;
- }
- list_for_each_entry(pos, &evsel_list, node) {
- if (perf_evsel__alloc_fd(pos, cpus->nr, threads->nr) < 0)
+ list_for_each_entry(pos, &evsel_list->entries, node) {
+ if (perf_evsel__alloc_fd(pos, evsel_list->cpus->nr,
+ evsel_list->threads->nr) < 0)
goto out_free_fd;
if (perf_header__push_event(pos->attr.config, event_name(pos)))
goto out_free_fd;
}
- event_array = malloc((sizeof(struct pollfd) * MAX_NR_CPUS *
- MAX_COUNTERS * threads->nr));
- if (!event_array)
+
+ if (perf_evlist__alloc_pollfd(evsel_list) < 0)
goto out_free_fd;
if (user_interval != ULLONG_MAX)
@@ -959,16 +879,12 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
} else {
fprintf(stderr, "frequency and count are zero, aborting\n");
err = -EINVAL;
- goto out_free_event_array;
+ goto out_free_fd;
}
err = __cmd_record(argc, argv);
-
-out_free_event_array:
- free(event_array);
out_free_fd:
- thread_map__delete(threads);
- threads = NULL;
+ perf_evlist__delete_maps(evsel_list);
out_symbol_exit:
symbol__exit();
return err;
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index c27e31f289e6..f9a99a1ce609 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -9,6 +9,7 @@
#include "util/util.h"
+#include "util/annotate.h"
#include "util/color.h"
#include <linux/list.h>
#include "util/cache.h"
@@ -43,6 +44,7 @@ static const char default_pretty_printing_style[] = "normal";
static const char *pretty_printing_style = default_pretty_printing_style;
static char callchain_default_opt[] = "fractal,0.5";
+static symbol_filter_t annotate_init;
static struct hists *perf_session__hists_findnew(struct perf_session *self,
u64 event_stream, u32 type,
@@ -77,86 +79,96 @@ static struct hists *perf_session__hists_findnew(struct perf_session *self,
return new;
}
-static int perf_session__add_hist_entry(struct perf_session *self,
+static int perf_session__add_hist_entry(struct perf_session *session,
struct addr_location *al,
- struct sample_data *data)
+ struct perf_sample *sample)
{
- struct map_symbol *syms = NULL;
struct symbol *parent = NULL;
- int err = -ENOMEM;
+ int err = 0;
struct hist_entry *he;
struct hists *hists;
struct perf_event_attr *attr;
- if ((sort__has_parent || symbol_conf.use_callchain) && data->callchain) {
- syms = perf_session__resolve_callchain(self, al->thread,
- data->callchain, &parent);
- if (syms == NULL)
- return -ENOMEM;
+ if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) {
+ err = perf_session__resolve_callchain(session, al->thread,
+ sample->callchain, &parent);
+ if (err)
+ return err;
}
- attr = perf_header__find_attr(data->id, &self->header);
+ attr = perf_header__find_attr(sample->id, &session->header);
if (attr)
- hists = perf_session__hists_findnew(self, data->id, attr->type, attr->config);
+ hists = perf_session__hists_findnew(session, sample->id, attr->type, attr->config);
else
- hists = perf_session__hists_findnew(self, data->id, 0, 0);
+ hists = perf_session__hists_findnew(session, sample->id, 0, 0);
if (hists == NULL)
- goto out_free_syms;
- he = __hists__add_entry(hists, al, parent, data->period);
+ return -ENOMEM;
+
+ he = __hists__add_entry(hists, al, parent, sample->period);
if (he == NULL)
- goto out_free_syms;
- err = 0;
+ return -ENOMEM;
+
if (symbol_conf.use_callchain) {
- err = callchain_append(he->callchain, data->callchain, syms,
- data->period);
+ err = callchain_append(he->callchain, &session->callchain_cursor,
+ sample->period);
if (err)
- goto out_free_syms;
+ return err;
}
/*
* Only in the newt browser we are doing integrated annotation,
* so we don't allocated the extra space needed because the stdio
* code will not use it.
*/
- if (use_browser > 0)
- err = hist_entry__inc_addr_samples(he, al->addr);
-out_free_syms:
- free(syms);
+ if (al->sym != NULL && use_browser > 0) {
+ /*
+ * All aggregated on the first sym_hist.
+ */
+ struct annotation *notes = symbol__annotation(he->ms.sym);
+ if (notes->src == NULL &&
+ symbol__alloc_hist(he->ms.sym, 1) < 0)
+ err = -ENOMEM;
+ else
+ err = hist_entry__inc_addr_samples(he, 0, al->addr);
+ }
+
return err;
}
static int add_event_total(struct perf_session *session,
- struct sample_data *data,
+ struct perf_sample *sample,
struct perf_event_attr *attr)
{
struct hists *hists;
if (attr)
- hists = perf_session__hists_findnew(session, data->id,
+ hists = perf_session__hists_findnew(session, sample->id,
attr->type, attr->config);
else
- hists = perf_session__hists_findnew(session, data->id, 0, 0);
+ hists = perf_session__hists_findnew(session, sample->id, 0, 0);
if (!hists)
return -ENOMEM;
- hists->stats.total_period += data->period;
+ hists->stats.total_period += sample->period;
/*
* FIXME: add_event_total should be moved from here to
* perf_session__process_event so that the proper hist is passed to
* the event_op methods.
*/
hists__inc_nr_events(hists, PERF_RECORD_SAMPLE);
- session->hists.stats.total_period += data->period;
+ session->hists.stats.total_period += sample->period;
return 0;
}
-static int process_sample_event(event_t *event, struct sample_data *sample,
+static int process_sample_event(union perf_event *event,
+ struct perf_sample *sample,
struct perf_session *session)
{
struct addr_location al;
struct perf_event_attr *attr;
- if (event__preprocess_sample(event, session, &al, sample, NULL) < 0) {
+ if (perf_event__preprocess_sample(event, session, &al, sample,
+ annotate_init) < 0) {
fprintf(stderr, "problem processing %d event, skipping it.\n",
event->header.type);
return -1;
@@ -180,7 +192,8 @@ static int process_sample_event(event_t *event, struct sample_data *sample,
return 0;
}
-static int process_read_event(event_t *event, struct sample_data *sample __used,
+static int process_read_event(union perf_event *event,
+ struct perf_sample *sample __used,
struct perf_session *session __used)
{
struct perf_event_attr *attr;
@@ -222,7 +235,7 @@ static int perf_session__setup_sample_type(struct perf_session *self)
} else if (!dont_use_callchains && callchain_param.mode != CHAIN_NONE &&
!symbol_conf.use_callchain) {
symbol_conf.use_callchain = true;
- if (register_callchain_param(&callchain_param) < 0) {
+ if (callchain_register_param(&callchain_param) < 0) {
fprintf(stderr, "Can't register callchain"
" params\n");
return -EINVAL;
@@ -233,17 +246,17 @@ static int perf_session__setup_sample_type(struct perf_session *self)
}
static struct perf_event_ops event_ops = {
- .sample = process_sample_event,
- .mmap = event__process_mmap,
- .comm = event__process_comm,
- .exit = event__process_task,
- .fork = event__process_task,
- .lost = event__process_lost,
- .read = process_read_event,
- .attr = event__process_attr,
- .event_type = event__process_event_type,
- .tracing_data = event__process_tracing_data,
- .build_id = event__process_build_id,
+ .sample = process_sample_event,
+ .mmap = perf_event__process_mmap,
+ .comm = perf_event__process_comm,
+ .exit = perf_event__process_task,
+ .fork = perf_event__process_task,
+ .lost = perf_event__process_lost,
+ .read = process_read_event,
+ .attr = perf_event__process_attr,
+ .event_type = perf_event__process_event_type,
+ .tracing_data = perf_event__process_tracing_data,
+ .build_id = perf_event__process_build_id,
.ordered_samples = true,
.ordering_requires_timestamps = true,
};
@@ -347,7 +360,7 @@ static int __cmd_report(void)
}
if (use_browser > 0)
- hists__tui_browse_tree(&session->hists_tree, help);
+ hists__tui_browse_tree(&session->hists_tree, help, 0);
else
hists__tty_browse_tree(&session->hists_tree, help);
@@ -424,7 +437,7 @@ parse_callchain_opt(const struct option *opt __used, const char *arg,
if (tok2)
callchain_param.print_limit = strtod(tok2, &endptr);
setup:
- if (register_callchain_param(&callchain_param) < 0) {
+ if (callchain_register_param(&callchain_param) < 0) {
fprintf(stderr, "Can't register callchain params\n");
return -1;
}
@@ -498,7 +511,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
use_browser = 1;
if (strcmp(input_name, "-") != 0)
- setup_browser();
+ setup_browser(true);
else
use_browser = 0;
/*
@@ -507,7 +520,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
* implementation.
*/
if (use_browser > 0) {
- symbol_conf.priv_size = sizeof(struct sym_priv);
+ symbol_conf.priv_size = sizeof(struct annotation);
+ annotate_init = symbol__annotate_init;
/*
* For searching by name on the "Browse map details".
* providing it only in verbose mode not to bloat too
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 29acb894e035..a32f411faeac 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -369,11 +369,6 @@ static void
process_sched_event(struct task_desc *this_task __used, struct sched_atom *atom)
{
int ret = 0;
- u64 now;
- long long delta;
-
- now = get_nsecs();
- delta = start_time + atom->timestamp - now;
switch (atom->type) {
case SCHED_EVENT_RUN:
@@ -562,7 +557,7 @@ static void wait_for_tasks(void)
static void run_one_test(void)
{
- u64 T0, T1, delta, avg_delta, fluct, std_dev;
+ u64 T0, T1, delta, avg_delta, fluct;
T0 = get_nsecs();
wait_for_tasks();
@@ -578,7 +573,6 @@ static void run_one_test(void)
else
fluct = delta - avg_delta;
sum_fluct += fluct;
- std_dev = sum_fluct / nr_runs / sqrt(nr_runs);
if (!run_avg)
run_avg = delta;
run_avg = (run_avg*9 + delta)/10;
@@ -799,7 +793,7 @@ replay_switch_event(struct trace_switch_event *switch_event,
u64 timestamp,
struct thread *thread __used)
{
- struct task_desc *prev, *next;
+ struct task_desc *prev, __used *next;
u64 timestamp0;
s64 delta;
@@ -1404,7 +1398,7 @@ map_switch_event(struct trace_switch_event *switch_event,
u64 timestamp,
struct thread *thread __used)
{
- struct thread *sched_out, *sched_in;
+ struct thread *sched_out __used, *sched_in;
int new_shortname;
u64 timestamp0;
s64 delta;
@@ -1580,9 +1574,9 @@ process_sched_migrate_task_event(void *data, struct perf_session *session,
event, cpu, timestamp, thread);
}
-static void
-process_raw_event(event_t *raw_event __used, struct perf_session *session,
- void *data, int cpu, u64 timestamp, struct thread *thread)
+static void process_raw_event(union perf_event *raw_event __used,
+ struct perf_session *session, void *data, int cpu,
+ u64 timestamp, struct thread *thread)
{
struct event *event;
int type;
@@ -1607,7 +1601,8 @@ process_raw_event(event_t *raw_event __used, struct perf_session *session,
process_sched_migrate_task_event(data, session, event, cpu, timestamp, thread);
}
-static int process_sample_event(event_t *event, struct sample_data *sample,
+static int process_sample_event(union perf_event *event,
+ struct perf_sample *sample,
struct perf_session *session)
{
struct thread *thread;
@@ -1635,9 +1630,9 @@ static int process_sample_event(event_t *event, struct sample_data *sample,
static struct perf_event_ops event_ops = {
.sample = process_sample_event,
- .comm = event__process_comm,
- .lost = event__process_lost,
- .fork = event__process_task,
+ .comm = perf_event__process_comm,
+ .lost = perf_event__process_lost,
+ .fork = perf_event__process_task,
.ordered_samples = true,
};
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index b766c2a9ac97..5f40df635dcb 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -63,7 +63,8 @@ static int cleanup_scripting(void)
static char const *input_name = "perf.data";
-static int process_sample_event(event_t *event, struct sample_data *sample,
+static int process_sample_event(union perf_event *event,
+ struct perf_sample *sample,
struct perf_session *session)
{
struct thread *thread = perf_session__findnew(session, event->ip.pid);
@@ -100,14 +101,14 @@ static int process_sample_event(event_t *event, struct sample_data *sample,
}
static struct perf_event_ops event_ops = {
- .sample = process_sample_event,
- .comm = event__process_comm,
- .attr = event__process_attr,
- .event_type = event__process_event_type,
- .tracing_data = event__process_tracing_data,
- .build_id = event__process_build_id,
- .ordering_requires_timestamps = true,
+ .sample = process_sample_event,
+ .comm = perf_event__process_comm,
+ .attr = perf_event__process_attr,
+ .event_type = perf_event__process_event_type,
+ .tracing_data = perf_event__process_tracing_data,
+ .build_id = perf_event__process_build_id,
.ordered_samples = true,
+ .ordering_requires_timestamps = true,
};
extern volatile int session_done;
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index a482a191a0ca..806a9998fcd5 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -43,11 +43,13 @@
#include "util/parse-options.h"
#include "util/parse-events.h"
#include "util/event.h"
+#include "util/evlist.h"
#include "util/evsel.h"
#include "util/debug.h"
#include "util/header.h"
#include "util/cpumap.h"
#include "util/thread.h"
+#include "util/thread_map.h"
#include <sys/prctl.h>
#include <math.h>
@@ -71,8 +73,9 @@ static struct perf_event_attr default_attrs[] = {
};
+struct perf_evlist *evsel_list;
+
static bool system_wide = false;
-static struct cpu_map *cpus;
static int run_idx = 0;
static int run_count = 1;
@@ -81,7 +84,6 @@ static bool scale = true;
static bool no_aggr = false;
static pid_t target_pid = -1;
static pid_t target_tid = -1;
-static struct thread_map *threads;
static pid_t child_pid = -1;
static bool null_run = false;
static bool big_num = true;
@@ -166,7 +168,7 @@ static int create_perf_stat_counter(struct perf_evsel *evsel)
PERF_FORMAT_TOTAL_TIME_RUNNING;
if (system_wide)
- return perf_evsel__open_per_cpu(evsel, cpus);
+ return perf_evsel__open_per_cpu(evsel, evsel_list->cpus, false, false);
attr->inherit = !no_inherit;
if (target_pid == -1 && target_tid == -1) {
@@ -174,7 +176,7 @@ static int create_perf_stat_counter(struct perf_evsel *evsel)
attr->enable_on_exec = 1;
}
- return perf_evsel__open_per_thread(evsel, threads);
+ return perf_evsel__open_per_thread(evsel, evsel_list->threads, false, false);
}
/*
@@ -199,7 +201,8 @@ static int read_counter_aggr(struct perf_evsel *counter)
u64 *count = counter->counts->aggr.values;
int i;
- if (__perf_evsel__read(counter, cpus->nr, threads->nr, scale) < 0)
+ if (__perf_evsel__read(counter, evsel_list->cpus->nr,
+ evsel_list->threads->nr, scale) < 0)
return -1;
for (i = 0; i < 3; i++)
@@ -232,7 +235,7 @@ static int read_counter(struct perf_evsel *counter)
u64 *count;
int cpu;
- for (cpu = 0; cpu < cpus->nr; cpu++) {
+ for (cpu = 0; cpu < evsel_list->cpus->nr; cpu++) {
if (__perf_evsel__read_on_cpu(counter, cpu, 0, scale) < 0)
return -1;
@@ -297,7 +300,7 @@ static int run_perf_stat(int argc __used, const char **argv)
}
if (target_tid == -1 && target_pid == -1 && !system_wide)
- threads->map[0] = child_pid;
+ evsel_list->threads->map[0] = child_pid;
/*
* Wait for the child to be ready to exec.
@@ -309,7 +312,7 @@ static int run_perf_stat(int argc __used, const char **argv)
close(child_ready_pipe[0]);
}
- list_for_each_entry(counter, &evsel_list, node) {
+ list_for_each_entry(counter, &evsel_list->entries, node) {
if (create_perf_stat_counter(counter) < 0) {
if (errno == -EPERM || errno == -EACCES) {
error("You may not have permission to collect %sstats.\n"
@@ -347,14 +350,15 @@ static int run_perf_stat(int argc __used, const char **argv)
update_stats(&walltime_nsecs_stats, t1 - t0);
if (no_aggr) {
- list_for_each_entry(counter, &evsel_list, node) {
+ list_for_each_entry(counter, &evsel_list->entries, node) {
read_counter(counter);
- perf_evsel__close_fd(counter, cpus->nr, 1);
+ perf_evsel__close_fd(counter, evsel_list->cpus->nr, 1);
}
} else {
- list_for_each_entry(counter, &evsel_list, node) {
+ list_for_each_entry(counter, &evsel_list->entries, node) {
read_counter_aggr(counter);
- perf_evsel__close_fd(counter, cpus->nr, threads->nr);
+ perf_evsel__close_fd(counter, evsel_list->cpus->nr,
+ evsel_list->threads->nr);
}
}
@@ -382,7 +386,7 @@ static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg)
if (no_aggr)
sprintf(cpustr, "CPU%*d%s",
csv_output ? 0 : -4,
- cpus->map[cpu], csv_sep);
+ evsel_list->cpus->map[cpu], csv_sep);
fprintf(stderr, fmt, cpustr, msecs, csv_sep, event_name(evsel));
@@ -410,7 +414,7 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
if (no_aggr)
sprintf(cpustr, "CPU%*d%s",
csv_output ? 0 : -4,
- cpus->map[cpu], csv_sep);
+ evsel_list->cpus->map[cpu], csv_sep);
else
cpu = 0;
@@ -496,14 +500,14 @@ static void print_counter(struct perf_evsel *counter)
u64 ena, run, val;
int cpu;
- for (cpu = 0; cpu < cpus->nr; cpu++) {
+ for (cpu = 0; cpu < evsel_list->cpus->nr; cpu++) {
val = counter->counts->cpu[cpu].val;
ena = counter->counts->cpu[cpu].ena;
run = counter->counts->cpu[cpu].run;
if (run == 0 || ena == 0) {
fprintf(stderr, "CPU%*d%s%*s%s%-24s",
csv_output ? 0 : -4,
- cpus->map[cpu], csv_sep,
+ evsel_list->cpus->map[cpu], csv_sep,
csv_output ? 0 : 18,
"<not counted>", csv_sep,
event_name(counter));
@@ -555,10 +559,10 @@ static void print_stat(int argc, const char **argv)
}
if (no_aggr) {
- list_for_each_entry(counter, &evsel_list, node)
+ list_for_each_entry(counter, &evsel_list->entries, node)
print_counter(counter);
} else {
- list_for_each_entry(counter, &evsel_list, node)
+ list_for_each_entry(counter, &evsel_list->entries, node)
print_counter_aggr(counter);
}
@@ -610,7 +614,7 @@ static int stat__set_big_num(const struct option *opt __used,
}
static const struct option options[] = {
- OPT_CALLBACK('e', "event", NULL, "event",
+ OPT_CALLBACK('e', "event", &evsel_list, "event",
"event selector. use 'perf list' to list available events",
parse_events),
OPT_BOOLEAN('i', "no-inherit", &no_inherit,
@@ -648,6 +652,10 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
setlocale(LC_ALL, "");
+ evsel_list = perf_evlist__new(NULL, NULL);
+ if (evsel_list == NULL)
+ return -ENOMEM;
+
argc = parse_options(argc, argv, options, stat_usage,
PARSE_OPT_STOP_AT_NON_OPTION);
@@ -679,44 +687,41 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
usage_with_options(stat_usage, options);
/* Set attrs and nr_counters if no event is selected and !null_run */
- if (!null_run && !nr_counters) {
+ if (!null_run && !evsel_list->nr_entries) {
size_t c;
- nr_counters = ARRAY_SIZE(default_attrs);
-
for (c = 0; c < ARRAY_SIZE(default_attrs); ++c) {
- pos = perf_evsel__new(&default_attrs[c],
- nr_counters);
+ pos = perf_evsel__new(&default_attrs[c], c);
if (pos == NULL)
goto out;
- list_add(&pos->node, &evsel_list);
+ perf_evlist__add(evsel_list, pos);
}
}
if (target_pid != -1)
target_tid = target_pid;
- threads = thread_map__new(target_pid, target_tid);
- if (threads == NULL) {
+ evsel_list->threads = thread_map__new(target_pid, target_tid);
+ if (evsel_list->threads == NULL) {
pr_err("Problems finding threads of monitor\n");
usage_with_options(stat_usage, options);
}
if (system_wide)
- cpus = cpu_map__new(cpu_list);
+ evsel_list->cpus = cpu_map__new(cpu_list);
else
- cpus = cpu_map__dummy_new();
+ evsel_list->cpus = cpu_map__dummy_new();
- if (cpus == NULL) {
+ if (evsel_list->cpus == NULL) {
perror("failed to parse CPUs map");
usage_with_options(stat_usage, options);
return -1;
}
- list_for_each_entry(pos, &evsel_list, node) {
+ list_for_each_entry(pos, &evsel_list->entries, node) {
if (perf_evsel__alloc_stat_priv(pos) < 0 ||
- perf_evsel__alloc_counts(pos, cpus->nr) < 0 ||
- perf_evsel__alloc_fd(pos, cpus->nr, threads->nr) < 0)
+ perf_evsel__alloc_counts(pos, evsel_list->cpus->nr) < 0 ||
+ perf_evsel__alloc_fd(pos, evsel_list->cpus->nr, evsel_list->threads->nr) < 0)
goto out_free_fd;
}
@@ -741,11 +746,10 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
if (status != -1)
print_stat(argc, argv);
out_free_fd:
- list_for_each_entry(pos, &evsel_list, node)
+ list_for_each_entry(pos, &evsel_list->entries, node)
perf_evsel__free_stat_priv(pos);
- perf_evsel_list__delete();
+ perf_evlist__delete_maps(evsel_list);
out:
- thread_map__delete(threads);
- threads = NULL;
+ perf_evlist__delete(evsel_list);
return status;
}
diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c
index 5dcdba653d70..1b2106c58f66 100644
--- a/tools/perf/builtin-test.c
+++ b/tools/perf/builtin-test.c
@@ -7,10 +7,11 @@
#include "util/cache.h"
#include "util/debug.h"
+#include "util/evlist.h"
#include "util/parse-options.h"
-#include "util/session.h"
+#include "util/parse-events.h"
#include "util/symbol.h"
-#include "util/thread.h"
+#include "util/thread_map.h"
static long page_size;
@@ -238,14 +239,14 @@ out:
#include "util/evsel.h"
#include <sys/types.h>
-static int trace_event__id(const char *event_name)
+static int trace_event__id(const char *evname)
{
char *filename;
int err = -1, fd;
if (asprintf(&filename,
"/sys/kernel/debug/tracing/events/syscalls/%s/id",
- event_name) < 0)
+ evname) < 0)
return -1;
fd = open(filename, O_RDONLY);
@@ -289,7 +290,7 @@ static int test__open_syscall_event(void)
goto out_thread_map_delete;
}
- if (perf_evsel__open_per_thread(evsel, threads) < 0) {
+ if (perf_evsel__open_per_thread(evsel, threads, false, false) < 0) {
pr_debug("failed to open counter: %s, "
"tweak /proc/sys/kernel/perf_event_paranoid?\n",
strerror(errno));
@@ -347,9 +348,9 @@ static int test__open_syscall_event_on_all_cpus(void)
}
cpus = cpu_map__new(NULL);
- if (threads == NULL) {
- pr_debug("thread_map__new\n");
- return -1;
+ if (cpus == NULL) {
+ pr_debug("cpu_map__new\n");
+ goto out_thread_map_delete;
}
@@ -364,7 +365,7 @@ static int test__open_syscall_event_on_all_cpus(void)
goto out_thread_map_delete;
}
- if (perf_evsel__open(evsel, cpus, threads) < 0) {
+ if (perf_evsel__open(evsel, cpus, threads, false, false) < 0) {
pr_debug("failed to open counter: %s, "
"tweak /proc/sys/kernel/perf_event_paranoid?\n",
strerror(errno));
@@ -408,6 +409,8 @@ static int test__open_syscall_event_on_all_cpus(void)
goto out_close_fd;
}
+ err = 0;
+
for (cpu = 0; cpu < cpus->nr; ++cpu) {
unsigned int expected;
@@ -416,18 +419,18 @@ static int test__open_syscall_event_on_all_cpus(void)
if (perf_evsel__read_on_cpu(evsel, cpu, 0) < 0) {
pr_debug("perf_evsel__open_read_on_cpu\n");
- goto out_close_fd;
+ err = -1;
+ break;
}
expected = nr_open_calls + cpu;
if (evsel->counts->cpu[cpu].val != expected) {
pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls on cpu %d, got %" PRIu64 "\n",
expected, cpus->map[cpu], evsel->counts->cpu[cpu].val);
- goto out_close_fd;
+ err = -1;
}
}
- err = 0;
out_close_fd:
perf_evsel__close_fd(evsel, 1, threads->nr);
out_evsel_delete:
@@ -437,6 +440,159 @@ out_thread_map_delete:
return err;
}
+/*
+ * This test will generate random numbers of calls to some getpid syscalls,
+ * then establish an mmap for a group of events that are created to monitor
+ * the syscalls.
+ *
+ * It will receive the events, using mmap, use its PERF_SAMPLE_ID generated
+ * sample.id field to map back to its respective perf_evsel instance.
+ *
+ * Then it checks if the number of syscalls reported as perf events by
+ * the kernel corresponds to the number of syscalls made.
+ */
+static int test__basic_mmap(void)
+{
+ int err = -1;
+ union perf_event *event;
+ struct thread_map *threads;
+ struct cpu_map *cpus;
+ struct perf_evlist *evlist;
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_TRACEPOINT,
+ .read_format = PERF_FORMAT_ID,
+ .sample_type = PERF_SAMPLE_ID,
+ .watermark = 0,
+ };
+ cpu_set_t cpu_set;
+ const char *syscall_names[] = { "getsid", "getppid", "getpgrp",
+ "getpgid", };
+ pid_t (*syscalls[])(void) = { (void *)getsid, getppid, getpgrp,
+ (void*)getpgid };
+#define nsyscalls ARRAY_SIZE(syscall_names)
+ int ids[nsyscalls];
+ unsigned int nr_events[nsyscalls],
+ expected_nr_events[nsyscalls], i, j;
+ struct perf_evsel *evsels[nsyscalls], *evsel;
+
+ for (i = 0; i < nsyscalls; ++i) {
+ char name[64];
+
+ snprintf(name, sizeof(name), "sys_enter_%s", syscall_names[i]);
+ ids[i] = trace_event__id(name);
+ if (ids[i] < 0) {
+ pr_debug("Is debugfs mounted on /sys/kernel/debug?\n");
+ return -1;
+ }
+ nr_events[i] = 0;
+ expected_nr_events[i] = random() % 257;
+ }
+
+ threads = thread_map__new(-1, getpid());
+ if (threads == NULL) {
+ pr_debug("thread_map__new\n");
+ return -1;
+ }
+
+ cpus = cpu_map__new(NULL);
+ if (cpus == NULL) {
+ pr_debug("cpu_map__new\n");
+ goto out_free_threads;
+ }
+
+ CPU_ZERO(&cpu_set);
+ CPU_SET(cpus->map[0], &cpu_set);
+ sched_setaffinity(0, sizeof(cpu_set), &cpu_set);
+ if (sched_setaffinity(0, sizeof(cpu_set), &cpu_set) < 0) {
+ pr_debug("sched_setaffinity() failed on CPU %d: %s ",
+ cpus->map[0], strerror(errno));
+ goto out_free_cpus;
+ }
+
+ evlist = perf_evlist__new(cpus, threads);
+ if (evlist == NULL) {
+ pr_debug("perf_evlist__new\n");
+ goto out_free_cpus;
+ }
+
+ /* anonymous union fields, can't be initialized above */
+ attr.wakeup_events = 1;
+ attr.sample_period = 1;
+
+ for (i = 0; i < nsyscalls; ++i) {
+ attr.config = ids[i];
+ evsels[i] = perf_evsel__new(&attr, i);
+ if (evsels[i] == NULL) {
+ pr_debug("perf_evsel__new\n");
+ goto out_free_evlist;
+ }
+
+ perf_evlist__add(evlist, evsels[i]);
+
+ if (perf_evsel__open(evsels[i], cpus, threads, false, false) < 0) {
+ pr_debug("failed to open counter: %s, "
+ "tweak /proc/sys/kernel/perf_event_paranoid?\n",
+ strerror(errno));
+ goto out_close_fd;
+ }
+ }
+
+ if (perf_evlist__mmap(evlist, 128, true) < 0) {
+ pr_debug("failed to mmap events: %d (%s)\n", errno,
+ strerror(errno));
+ goto out_close_fd;
+ }
+
+ for (i = 0; i < nsyscalls; ++i)
+ for (j = 0; j < expected_nr_events[i]; ++j) {
+ int foo = syscalls[i]();
+ ++foo;
+ }
+
+ while ((event = perf_evlist__read_on_cpu(evlist, 0)) != NULL) {
+ struct perf_sample sample;
+
+ if (event->header.type != PERF_RECORD_SAMPLE) {
+ pr_debug("unexpected %s event\n",
+ perf_event__name(event->header.type));
+ goto out_munmap;
+ }
+
+ perf_event__parse_sample(event, attr.sample_type, false, &sample);
+ evsel = perf_evlist__id2evsel(evlist, sample.id);
+ if (evsel == NULL) {
+ pr_debug("event with id %" PRIu64
+ " doesn't map to an evsel\n", sample.id);
+ goto out_munmap;
+ }
+ nr_events[evsel->idx]++;
+ }
+
+ list_for_each_entry(evsel, &evlist->entries, node) {
+ if (nr_events[evsel->idx] != expected_nr_events[evsel->idx]) {
+ pr_debug("expected %d %s events, got %d\n",
+ expected_nr_events[evsel->idx],
+ event_name(evsel), nr_events[evsel->idx]);
+ goto out_munmap;
+ }
+ }
+
+ err = 0;
+out_munmap:
+ perf_evlist__munmap(evlist);
+out_close_fd:
+ for (i = 0; i < nsyscalls; ++i)
+ perf_evsel__close_fd(evsels[i], 1, threads->nr);
+out_free_evlist:
+ perf_evlist__delete(evlist);
+out_free_cpus:
+ cpu_map__delete(cpus);
+out_free_threads:
+ thread_map__delete(threads);
+ return err;
+#undef nsyscalls
+}
+
static struct test {
const char *desc;
int (*func)(void);
@@ -454,6 +610,10 @@ static struct test {
.func = test__open_syscall_event_on_all_cpus,
},
{
+ .desc = "read samples using the mmap interface",
+ .func = test__basic_mmap,
+ },
+ {
.func = NULL,
},
};
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index 746cf03cb05d..0801275c500e 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -276,21 +276,24 @@ static int cpus_cstate_state[MAX_CPUS];
static u64 cpus_pstate_start_times[MAX_CPUS];
static u64 cpus_pstate_state[MAX_CPUS];
-static int process_comm_event(event_t *event, struct sample_data *sample __used,
+static int process_comm_event(union perf_event *event,
+ struct perf_sample *sample __used,
struct perf_session *session __used)
{
pid_set_comm(event->comm.tid, event->comm.comm);
return 0;
}
-static int process_fork_event(event_t *event, struct sample_data *sample __used,
+static int process_fork_event(union perf_event *event,
+ struct perf_sample *sample __used,
struct perf_session *session __used)
{
pid_fork(event->fork.pid, event->fork.ppid, event->fork.time);
return 0;
}
-static int process_exit_event(event_t *event, struct sample_data *sample __used,
+static int process_exit_event(union perf_event *event,
+ struct perf_sample *sample __used,
struct perf_session *session __used)
{
pid_exit(event->fork.pid, event->fork.time);
@@ -486,8 +489,8 @@ static void sched_switch(int cpu, u64 timestamp, struct trace_entry *te)
}
-static int process_sample_event(event_t *event __used,
- struct sample_data *sample,
+static int process_sample_event(union perf_event *event __used,
+ struct perf_sample *sample,
struct perf_session *session)
{
struct trace_entry *te;
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 5a29d9cd9486..c9fd66d4a082 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -20,11 +20,16 @@
#include "perf.h"
+#include "util/annotate.h"
+#include "util/cache.h"
#include "util/color.h"
+#include "util/evlist.h"
#include "util/evsel.h"
#include "util/session.h"
#include "util/symbol.h"
#include "util/thread.h"
+#include "util/thread_map.h"
+#include "util/top.h"
#include "util/util.h"
#include <linux/rbtree.h>
#include "util/parse-options.h"
@@ -45,7 +50,6 @@
#include <errno.h>
#include <time.h>
#include <sched.h>
-#include <pthread.h>
#include <sys/syscall.h>
#include <sys/ioctl.h>
@@ -60,85 +64,42 @@
#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
+static struct perf_top top = {
+ .count_filter = 5,
+ .delay_secs = 2,
+ .display_weighted = -1,
+ .target_pid = -1,
+ .target_tid = -1,
+ .active_symbols = LIST_HEAD_INIT(top.active_symbols),
+ .active_symbols_lock = PTHREAD_MUTEX_INITIALIZER,
+ .freq = 1000, /* 1 KHz */
+};
+
static bool system_wide = false;
-static int default_interval = 0;
+static bool use_tui, use_stdio;
-static int count_filter = 5;
-static int print_entries;
+static int default_interval = 0;
-static int target_pid = -1;
-static int target_tid = -1;
-static struct thread_map *threads;
static bool inherit = false;
-static struct cpu_map *cpus;
static int realtime_prio = 0;
static bool group = false;
static unsigned int page_size;
-static unsigned int mmap_pages = 16;
-static int freq = 1000; /* 1 KHz */
+static unsigned int mmap_pages = 128;
-static int delay_secs = 2;
-static bool zero = false;
static bool dump_symtab = false;
-static bool hide_kernel_symbols = false;
-static bool hide_user_symbols = false;
static struct winsize winsize;
-/*
- * Source
- */
-
-struct source_line {
- u64 eip;
- unsigned long count[MAX_COUNTERS];
- char *line;
- struct source_line *next;
-};
-
static const char *sym_filter = NULL;
struct sym_entry *sym_filter_entry = NULL;
struct sym_entry *sym_filter_entry_sched = NULL;
static int sym_pcnt_filter = 5;
-static int sym_counter = 0;
-static struct perf_evsel *sym_evsel = NULL;
-static int display_weighted = -1;
-static const char *cpu_list;
-
-/*
- * Symbols
- */
-
-struct sym_entry_source {
- struct source_line *source;
- struct source_line *lines;
- struct source_line **lines_tail;
- pthread_mutex_t lock;
-};
-
-struct sym_entry {
- struct rb_node rb_node;
- struct list_head node;
- unsigned long snap_count;
- double weight;
- int skip;
- u16 name_len;
- u8 origin;
- struct map *map;
- struct sym_entry_source *src;
- unsigned long count[0];
-};
/*
* Source functions
*/
-static inline struct symbol *sym_entry__symbol(struct sym_entry *self)
-{
- return ((void *)self) + symbol_conf.priv_size;
-}
-
void get_term_dimensions(struct winsize *ws)
{
char *s = getenv("LINES");
@@ -163,10 +124,10 @@ void get_term_dimensions(struct winsize *ws)
static void update_print_entries(struct winsize *ws)
{
- print_entries = ws->ws_row;
+ top.print_entries = ws->ws_row;
- if (print_entries > 9)
- print_entries -= 9;
+ if (top.print_entries > 9)
+ top.print_entries -= 9;
}
static void sig_winch_handler(int sig __used)
@@ -178,12 +139,9 @@ static void sig_winch_handler(int sig __used)
static int parse_source(struct sym_entry *syme)
{
struct symbol *sym;
- struct sym_entry_source *source;
+ struct annotation *notes;
struct map *map;
- FILE *file;
- char command[PATH_MAX*2];
- const char *path;
- u64 len;
+ int err = -1;
if (!syme)
return -1;
@@ -194,411 +152,136 @@ static int parse_source(struct sym_entry *syme)
/*
* We can't annotate with just /proc/kallsyms
*/
- if (map->dso->origin == DSO__ORIG_KERNEL)
+ if (map->dso->origin == DSO__ORIG_KERNEL) {
+ pr_err("Can't annotate %s: No vmlinux file was found in the "
+ "path\n", sym->name);
+ sleep(1);
return -1;
-
- if (syme->src == NULL) {
- syme->src = zalloc(sizeof(*source));
- if (syme->src == NULL)
- return -1;
- pthread_mutex_init(&syme->src->lock, NULL);
}
- source = syme->src;
-
- if (source->lines) {
- pthread_mutex_lock(&source->lock);
+ notes = symbol__annotation(sym);
+ if (notes->src != NULL) {
+ pthread_mutex_lock(&notes->lock);
goto out_assign;
}
- path = map->dso->long_name;
-
- len = sym->end - sym->start;
-
- sprintf(command,
- "objdump --start-address=%#0*" PRIx64 " --stop-address=%#0*" PRIx64 " -dS %s",
- BITS_PER_LONG / 4, map__rip_2objdump(map, sym->start),
- BITS_PER_LONG / 4, map__rip_2objdump(map, sym->end), path);
-
- file = popen(command, "r");
- if (!file)
- return -1;
- pthread_mutex_lock(&source->lock);
- source->lines_tail = &source->lines;
- while (!feof(file)) {
- struct source_line *src;
- size_t dummy = 0;
- char *c, *sep;
+ pthread_mutex_lock(&notes->lock);
- src = malloc(sizeof(struct source_line));
- assert(src != NULL);
- memset(src, 0, sizeof(struct source_line));
-
- if (getline(&src->line, &dummy, file) < 0)
- break;
- if (!src->line)
- break;
-
- c = strchr(src->line, '\n');
- if (c)
- *c = 0;
-
- src->next = NULL;
- *source->lines_tail = src;
- source->lines_tail = &src->next;
-
- src->eip = strtoull(src->line, &sep, 16);
- if (*sep == ':')
- src->eip = map__objdump_2ip(map, src->eip);
- else /* this line has no ip info (e.g. source line) */
- src->eip = 0;
+ if (symbol__alloc_hist(sym, top.evlist->nr_entries) < 0) {
+ pr_err("Not enough memory for annotating '%s' symbol!\n",
+ sym->name);
+ sleep(1);
+ goto out_unlock;
}
- pclose(file);
+
+ err = symbol__annotate(sym, syme->map, 0);
+ if (err == 0) {
out_assign:
sym_filter_entry = syme;
- pthread_mutex_unlock(&source->lock);
- return 0;
+ }
+out_unlock:
+ pthread_mutex_unlock(&notes->lock);
+ return err;
}
static void __zero_source_counters(struct sym_entry *syme)
{
- int i;
- struct source_line *line;
-
- line = syme->src->lines;
- while (line) {
- for (i = 0; i < nr_counters; i++)
- line->count[i] = 0;
- line = line->next;
- }
+ struct symbol *sym = sym_entry__symbol(syme);
+ symbol__annotate_zero_histograms(sym);
}
static void record_precise_ip(struct sym_entry *syme, int counter, u64 ip)
{
- struct source_line *line;
+ struct annotation *notes;
+ struct symbol *sym;
if (syme != sym_filter_entry)
return;
- if (pthread_mutex_trylock(&syme->src->lock))
- return;
-
- if (syme->src == NULL || syme->src->source == NULL)
- goto out_unlock;
-
- for (line = syme->src->lines; line; line = line->next) {
- /* skip lines without IP info */
- if (line->eip == 0)
- continue;
- if (line->eip == ip) {
- line->count[counter]++;
- break;
- }
- if (line->eip > ip)
- break;
- }
-out_unlock:
- pthread_mutex_unlock(&syme->src->lock);
-}
-
-#define PATTERN_LEN (BITS_PER_LONG / 4 + 2)
-
-static void lookup_sym_source(struct sym_entry *syme)
-{
- struct symbol *symbol = sym_entry__symbol(syme);
- struct source_line *line;
- char pattern[PATTERN_LEN + 1];
-
- sprintf(pattern, "%0*" PRIx64 " <", BITS_PER_LONG / 4,
- map__rip_2objdump(syme->map, symbol->start));
-
- pthread_mutex_lock(&syme->src->lock);
- for (line = syme->src->lines; line; line = line->next) {
- if (memcmp(line->line, pattern, PATTERN_LEN) == 0) {
- syme->src->source = line;
- break;
- }
- }
- pthread_mutex_unlock(&syme->src->lock);
-}
+ sym = sym_entry__symbol(syme);
+ notes = symbol__annotation(sym);
-static void show_lines(struct source_line *queue, int count, int total)
-{
- int i;
- struct source_line *line;
+ if (pthread_mutex_trylock(&notes->lock))
+ return;
- line = queue;
- for (i = 0; i < count; i++) {
- float pcnt = 100.0*(float)line->count[sym_counter]/(float)total;
+ ip = syme->map->map_ip(syme->map, ip);
+ symbol__inc_addr_samples(sym, syme->map, counter, ip);
- printf("%8li %4.1f%%\t%s\n", line->count[sym_counter], pcnt, line->line);
- line = line->next;
- }
+ pthread_mutex_unlock(&notes->lock);
}
-#define TRACE_COUNT 3
-
static void show_details(struct sym_entry *syme)
{
+ struct annotation *notes;
struct symbol *symbol;
- struct source_line *line;
- struct source_line *line_queue = NULL;
- int displayed = 0;
- int line_queue_count = 0, total = 0, more = 0;
+ int more;
if (!syme)
return;
- if (!syme->src->source)
- lookup_sym_source(syme);
-
- if (!syme->src->source)
- return;
-
symbol = sym_entry__symbol(syme);
- printf("Showing %s for %s\n", event_name(sym_evsel), symbol->name);
- printf(" Events Pcnt (>=%d%%)\n", sym_pcnt_filter);
+ notes = symbol__annotation(symbol);
- pthread_mutex_lock(&syme->src->lock);
- line = syme->src->source;
- while (line) {
- total += line->count[sym_counter];
- line = line->next;
- }
-
- line = syme->src->source;
- while (line) {
- float pcnt = 0.0;
-
- if (!line_queue_count)
- line_queue = line;
- line_queue_count++;
-
- if (line->count[sym_counter])
- pcnt = 100.0 * line->count[sym_counter] / (float)total;
- if (pcnt >= (float)sym_pcnt_filter) {
- if (displayed <= print_entries)
- show_lines(line_queue, line_queue_count, total);
- else more++;
- displayed += line_queue_count;
- line_queue_count = 0;
- line_queue = NULL;
- } else if (line_queue_count > TRACE_COUNT) {
- line_queue = line_queue->next;
- line_queue_count--;
- }
+ pthread_mutex_lock(&notes->lock);
- line->count[sym_counter] = zero ? 0 : line->count[sym_counter] * 7 / 8;
- line = line->next;
- }
- pthread_mutex_unlock(&syme->src->lock);
- if (more)
- printf("%d lines not displayed, maybe increase display entries [e]\n", more);
-}
-
-/*
- * Symbols will be added here in event__process_sample and will get out
- * after decayed.
- */
-static LIST_HEAD(active_symbols);
-static pthread_mutex_t active_symbols_lock = PTHREAD_MUTEX_INITIALIZER;
-
-/*
- * Ordering weight: count-1 * count-2 * ... / count-n
- */
-static double sym_weight(const struct sym_entry *sym)
-{
- double weight = sym->snap_count;
- int counter;
-
- if (!display_weighted)
- return weight;
-
- for (counter = 1; counter < nr_counters-1; counter++)
- weight *= sym->count[counter];
+ if (notes->src == NULL)
+ goto out_unlock;
- weight /= (sym->count[counter] + 1);
+ printf("Showing %s for %s\n", event_name(top.sym_evsel), symbol->name);
+ printf(" Events Pcnt (>=%d%%)\n", sym_pcnt_filter);
- return weight;
+ more = symbol__annotate_printf(symbol, syme->map, top.sym_evsel->idx,
+ 0, sym_pcnt_filter, top.print_entries, 4);
+ if (top.zero)
+ symbol__annotate_zero_histogram(symbol, top.sym_evsel->idx);
+ else
+ symbol__annotate_decay_histogram(symbol, top.sym_evsel->idx);
+ if (more != 0)
+ printf("%d lines not displayed, maybe increase display entries [e]\n", more);
+out_unlock:
+ pthread_mutex_unlock(&notes->lock);
}
-static long samples;
-static long kernel_samples, us_samples;
-static long exact_samples;
-static long guest_us_samples, guest_kernel_samples;
static const char CONSOLE_CLEAR[] = "";
static void __list_insert_active_sym(struct sym_entry *syme)
{
- list_add(&syme->node, &active_symbols);
-}
-
-static void list_remove_active_sym(struct sym_entry *syme)
-{
- pthread_mutex_lock(&active_symbols_lock);
- list_del_init(&syme->node);
- pthread_mutex_unlock(&active_symbols_lock);
-}
-
-static void rb_insert_active_sym(struct rb_root *tree, struct sym_entry *se)
-{
- struct rb_node **p = &tree->rb_node;
- struct rb_node *parent = NULL;
- struct sym_entry *iter;
-
- while (*p != NULL) {
- parent = *p;
- iter = rb_entry(parent, struct sym_entry, rb_node);
-
- if (se->weight > iter->weight)
- p = &(*p)->rb_left;
- else
- p = &(*p)->rb_right;
- }
-
- rb_link_node(&se->rb_node, parent, p);
- rb_insert_color(&se->rb_node, tree);
+ list_add(&syme->node, &top.active_symbols);
}
-static void print_sym_table(void)
+static void print_sym_table(struct perf_session *session)
{
- int printed = 0, j;
- struct perf_evsel *counter;
- int snap = !display_weighted ? sym_counter : 0;
- float samples_per_sec = samples/delay_secs;
- float ksamples_per_sec = kernel_samples/delay_secs;
- float us_samples_per_sec = (us_samples)/delay_secs;
- float guest_kernel_samples_per_sec = (guest_kernel_samples)/delay_secs;
- float guest_us_samples_per_sec = (guest_us_samples)/delay_secs;
- float esamples_percent = (100.0*exact_samples)/samples;
- float sum_ksamples = 0.0;
- struct sym_entry *syme, *n;
- struct rb_root tmp = RB_ROOT;
+ char bf[160];
+ int printed = 0;
struct rb_node *nd;
- int sym_width = 0, dso_width = 0, dso_short_width = 0;
+ struct sym_entry *syme;
+ struct rb_root tmp = RB_ROOT;
const int win_width = winsize.ws_col - 1;
-
- samples = us_samples = kernel_samples = exact_samples = 0;
- guest_kernel_samples = guest_us_samples = 0;
-
- /* Sort the active symbols */
- pthread_mutex_lock(&active_symbols_lock);
- syme = list_entry(active_symbols.next, struct sym_entry, node);
- pthread_mutex_unlock(&active_symbols_lock);
-
- list_for_each_entry_safe_from(syme, n, &active_symbols, node) {
- syme->snap_count = syme->count[snap];
- if (syme->snap_count != 0) {
-
- if ((hide_user_symbols &&
- syme->origin == PERF_RECORD_MISC_USER) ||
- (hide_kernel_symbols &&
- syme->origin == PERF_RECORD_MISC_KERNEL)) {
- list_remove_active_sym(syme);
- continue;
- }
- syme->weight = sym_weight(syme);
- rb_insert_active_sym(&tmp, syme);
- sum_ksamples += syme->snap_count;
-
- for (j = 0; j < nr_counters; j++)
- syme->count[j] = zero ? 0 : syme->count[j] * 7 / 8;
- } else
- list_remove_active_sym(syme);
- }
+ int sym_width, dso_width, dso_short_width;
+ float sum_ksamples = perf_top__decay_samples(&top, &tmp);
puts(CONSOLE_CLEAR);
- printf("%-*.*s\n", win_width, win_width, graph_dotted_line);
- if (!perf_guest) {
- printf(" PerfTop:%8.0f irqs/sec kernel:%4.1f%%"
- " exact: %4.1f%% [",
- samples_per_sec,
- 100.0 - (100.0 * ((samples_per_sec - ksamples_per_sec) /
- samples_per_sec)),
- esamples_percent);
- } else {
- printf(" PerfTop:%8.0f irqs/sec kernel:%4.1f%% us:%4.1f%%"
- " guest kernel:%4.1f%% guest us:%4.1f%%"
- " exact: %4.1f%% [",
- samples_per_sec,
- 100.0 - (100.0 * ((samples_per_sec-ksamples_per_sec) /
- samples_per_sec)),
- 100.0 - (100.0 * ((samples_per_sec-us_samples_per_sec) /
- samples_per_sec)),
- 100.0 - (100.0 * ((samples_per_sec -
- guest_kernel_samples_per_sec) /
- samples_per_sec)),
- 100.0 - (100.0 * ((samples_per_sec -
- guest_us_samples_per_sec) /
- samples_per_sec)),
- esamples_percent);
- }
-
- if (nr_counters == 1 || !display_weighted) {
- struct perf_evsel *first;
- first = list_entry(evsel_list.next, struct perf_evsel, node);
- printf("%" PRIu64, (uint64_t)first->attr.sample_period);
- if (freq)
- printf("Hz ");
- else
- printf(" ");
- }
+ perf_top__header_snprintf(&top, bf, sizeof(bf));
+ printf("%s\n", bf);
- if (!display_weighted)
- printf("%s", event_name(sym_evsel));
- else list_for_each_entry(counter, &evsel_list, node) {
- if (counter->idx)
- printf("/");
+ perf_top__reset_sample_counters(&top);
- printf("%s", event_name(counter));
- }
-
- printf( "], ");
-
- if (target_pid != -1)
- printf(" (target_pid: %d", target_pid);
- else if (target_tid != -1)
- printf(" (target_tid: %d", target_tid);
- else
- printf(" (all");
+ printf("%-*.*s\n", win_width, win_width, graph_dotted_line);
- if (cpu_list)
- printf(", CPU%s: %s)\n", cpus->nr > 1 ? "s" : "", cpu_list);
- else {
- if (target_tid != -1)
- printf(")\n");
- else
- printf(", %d CPU%s)\n", cpus->nr, cpus->nr > 1 ? "s" : "");
+ if (session->hists.stats.total_lost != 0) {
+ color_fprintf(stdout, PERF_COLOR_RED, "WARNING:");
+ printf(" LOST %" PRIu64 " events, Check IO/CPU overload\n",
+ session->hists.stats.total_lost);
}
- printf("%-*.*s\n", win_width, win_width, graph_dotted_line);
-
if (sym_filter_entry) {
show_details(sym_filter_entry);
return;
}
- /*
- * Find the longest symbol name that will be displayed
- */
- for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) {
- syme = rb_entry(nd, struct sym_entry, rb_node);
- if (++printed > print_entries ||
- (int)syme->snap_count < count_filter)
- continue;
-
- if (syme->map->dso->long_name_len > dso_width)
- dso_width = syme->map->dso->long_name_len;
-
- if (syme->map->dso->short_name_len > dso_short_width)
- dso_short_width = syme->map->dso->short_name_len;
-
- if (syme->name_len > sym_width)
- sym_width = syme->name_len;
- }
-
- printed = 0;
+ perf_top__find_widths(&top, &tmp, &dso_width, &dso_short_width,
+ &sym_width);
if (sym_width + dso_width > winsize.ws_col - 29) {
dso_width = dso_short_width;
@@ -606,7 +289,7 @@ static void print_sym_table(void)
sym_width = winsize.ws_col - dso_width - 29;
}
putchar('\n');
- if (nr_counters == 1)
+ if (top.evlist->nr_entries == 1)
printf(" samples pcnt");
else
printf(" weight samples pcnt");
@@ -615,7 +298,7 @@ static void print_sym_table(void)
printf(" RIP ");
printf(" %-*.*s DSO\n", sym_width, sym_width, "function");
printf(" %s _______ _____",
- nr_counters == 1 ? " " : "______");
+ top.evlist->nr_entries == 1 ? " " : "______");
if (verbose)
printf(" ________________");
printf(" %-*.*s", sym_width, sym_width, graph_line);
@@ -628,13 +311,14 @@ static void print_sym_table(void)
syme = rb_entry(nd, struct sym_entry, rb_node);
sym = sym_entry__symbol(syme);
- if (++printed > print_entries || (int)syme->snap_count < count_filter)
+ if (++printed > top.print_entries ||
+ (int)syme->snap_count < top.count_filter)
continue;
pcnt = 100.0 - (100.0 * ((sum_ksamples - syme->snap_count) /
sum_ksamples));
- if (nr_counters == 1 || !display_weighted)
+ if (top.evlist->nr_entries == 1 || !top.display_weighted)
printf("%20.2f ", syme->weight);
else
printf("%9.1f %10ld ", syme->weight, syme->snap_count);
@@ -693,10 +377,8 @@ static void prompt_symbol(struct sym_entry **target, const char *msg)
/* zero counters of active symbol */
if (syme) {
- pthread_mutex_lock(&syme->src->lock);
__zero_source_counters(syme);
*target = NULL;
- pthread_mutex_unlock(&syme->src->lock);
}
fprintf(stdout, "\n%s: ", msg);
@@ -707,11 +389,11 @@ static void prompt_symbol(struct sym_entry **target, const char *msg)
if (p)
*p = 0;
- pthread_mutex_lock(&active_symbols_lock);
- syme = list_entry(active_symbols.next, struct sym_entry, node);
- pthread_mutex_unlock(&active_symbols_lock);
+ pthread_mutex_lock(&top.active_symbols_lock);
+ syme = list_entry(top.active_symbols.next, struct sym_entry, node);
+ pthread_mutex_unlock(&top.active_symbols_lock);
- list_for_each_entry_safe_from(syme, n, &active_symbols, node) {
+ list_for_each_entry_safe_from(syme, n, &top.active_symbols, node) {
struct symbol *sym = sym_entry__symbol(syme);
if (!strcmp(buf, sym->name)) {
@@ -741,28 +423,28 @@ static void print_mapped_keys(void)
}
fprintf(stdout, "\nMapped keys:\n");
- fprintf(stdout, "\t[d] display refresh delay. \t(%d)\n", delay_secs);
- fprintf(stdout, "\t[e] display entries (lines). \t(%d)\n", print_entries);
+ fprintf(stdout, "\t[d] display refresh delay. \t(%d)\n", top.delay_secs);
+ fprintf(stdout, "\t[e] display entries (lines). \t(%d)\n", top.print_entries);
- if (nr_counters > 1)
- fprintf(stdout, "\t[E] active event counter. \t(%s)\n", event_name(sym_evsel));
+ if (top.evlist->nr_entries > 1)
+ fprintf(stdout, "\t[E] active event counter. \t(%s)\n", event_name(top.sym_evsel));
- fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", count_filter);
+ fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", top.count_filter);
fprintf(stdout, "\t[F] annotate display filter (percent). \t(%d%%)\n", sym_pcnt_filter);
fprintf(stdout, "\t[s] annotate symbol. \t(%s)\n", name?: "NULL");
fprintf(stdout, "\t[S] stop annotation.\n");
- if (nr_counters > 1)
- fprintf(stdout, "\t[w] toggle display weighted/count[E]r. \t(%d)\n", display_weighted ? 1 : 0);
+ if (top.evlist->nr_entries > 1)
+ fprintf(stdout, "\t[w] toggle display weighted/count[E]r. \t(%d)\n", top.display_weighted ? 1 : 0);
fprintf(stdout,
"\t[K] hide kernel_symbols symbols. \t(%s)\n",
- hide_kernel_symbols ? "yes" : "no");
+ top.hide_kernel_symbols ? "yes" : "no");
fprintf(stdout,
"\t[U] hide user symbols. \t(%s)\n",
- hide_user_symbols ? "yes" : "no");
- fprintf(stdout, "\t[z] toggle sample zeroing. \t(%d)\n", zero ? 1 : 0);
+ top.hide_user_symbols ? "yes" : "no");
+ fprintf(stdout, "\t[z] toggle sample zeroing. \t(%d)\n", top.zero ? 1 : 0);
fprintf(stdout, "\t[qQ] quit.\n");
}
@@ -783,7 +465,7 @@ static int key_mapped(int c)
return 1;
case 'E':
case 'w':
- return nr_counters > 1 ? 1 : 0;
+ return top.evlist->nr_entries > 1 ? 1 : 0;
default:
break;
}
@@ -818,47 +500,47 @@ static void handle_keypress(struct perf_session *session, int c)
switch (c) {
case 'd':
- prompt_integer(&delay_secs, "Enter display delay");
- if (delay_secs < 1)
- delay_secs = 1;
+ prompt_integer(&top.delay_secs, "Enter display delay");
+ if (top.delay_secs < 1)
+ top.delay_secs = 1;
break;
case 'e':
- prompt_integer(&print_entries, "Enter display entries (lines)");
- if (print_entries == 0) {
+ prompt_integer(&top.print_entries, "Enter display entries (lines)");
+ if (top.print_entries == 0) {
sig_winch_handler(SIGWINCH);
signal(SIGWINCH, sig_winch_handler);
} else
signal(SIGWINCH, SIG_DFL);
break;
case 'E':
- if (nr_counters > 1) {
+ if (top.evlist->nr_entries > 1) {
fprintf(stderr, "\nAvailable events:");
- list_for_each_entry(sym_evsel, &evsel_list, node)
- fprintf(stderr, "\n\t%d %s", sym_evsel->idx, event_name(sym_evsel));
+ list_for_each_entry(top.sym_evsel, &top.evlist->entries, node)
+ fprintf(stderr, "\n\t%d %s", top.sym_evsel->idx, event_name(top.sym_evsel));
- prompt_integer(&sym_counter, "Enter details event counter");
+ prompt_integer(&top.sym_counter, "Enter details event counter");
- if (sym_counter >= nr_counters) {
- sym_evsel = list_entry(evsel_list.next, struct perf_evsel, node);
- sym_counter = 0;
- fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(sym_evsel));
+ if (top.sym_counter >= top.evlist->nr_entries) {
+ top.sym_evsel = list_entry(top.evlist->entries.next, struct perf_evsel, node);
+ top.sym_counter = 0;
+ fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(top.sym_evsel));
sleep(1);
break;
}
- list_for_each_entry(sym_evsel, &evsel_list, node)
- if (sym_evsel->idx == sym_counter)
+ list_for_each_entry(top.sym_evsel, &top.evlist->entries, node)
+ if (top.sym_evsel->idx == top.sym_counter)
break;
- } else sym_counter = 0;
+ } else top.sym_counter = 0;
break;
case 'f':
- prompt_integer(&count_filter, "Enter display event count filter");
+ prompt_integer(&top.count_filter, "Enter display event count filter");
break;
case 'F':
prompt_percent(&sym_pcnt_filter, "Enter details display event filter (percent)");
break;
case 'K':
- hide_kernel_symbols = !hide_kernel_symbols;
+ top.hide_kernel_symbols = !top.hide_kernel_symbols;
break;
case 'q':
case 'Q':
@@ -875,26 +557,32 @@ static void handle_keypress(struct perf_session *session, int c)
else {
struct sym_entry *syme = sym_filter_entry;
- pthread_mutex_lock(&syme->src->lock);
sym_filter_entry = NULL;
__zero_source_counters(syme);
- pthread_mutex_unlock(&syme->src->lock);
}
break;
case 'U':
- hide_user_symbols = !hide_user_symbols;
+ top.hide_user_symbols = !top.hide_user_symbols;
break;
case 'w':
- display_weighted = ~display_weighted;
+ top.display_weighted = ~top.display_weighted;
break;
case 'z':
- zero = !zero;
+ top.zero = !top.zero;
break;
default:
break;
}
}
+static void *display_thread_tui(void *arg __used)
+{
+ perf_top__tui_browser(&top);
+ exit_browser(0);
+ exit(0);
+ return NULL;
+}
+
static void *display_thread(void *arg __used)
{
struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
@@ -909,13 +597,13 @@ static void *display_thread(void *arg __used)
tc.c_cc[VTIME] = 0;
repeat:
- delay_msecs = delay_secs * 1000;
+ delay_msecs = top.delay_secs * 1000;
tcsetattr(0, TCSANOW, &tc);
/* trash return*/
getc(stdin);
do {
- print_sym_table();
+ print_sym_table(session);
} while (!poll(&stdin_poll, 1, delay_msecs) == 1);
c = getc(stdin);
@@ -930,6 +618,7 @@ repeat:
/* Tag samples to be skipped. */
static const char *skip_symbols[] = {
"default_idle",
+ "native_safe_halt",
"cpu_idle",
"enter_idle",
"exit_idle",
@@ -965,7 +654,7 @@ static int symbol_filter(struct map *map, struct symbol *sym)
syme = symbol__priv(sym);
syme->map = map;
- syme->src = NULL;
+ symbol__annotate_init(map, sym);
if (!sym_filter_entry && sym_filter && !strcmp(name, sym_filter)) {
/* schedule initial sym_filter_entry setup */
@@ -980,44 +669,40 @@ static int symbol_filter(struct map *map, struct symbol *sym)
}
}
- if (!syme->skip)
- syme->name_len = strlen(sym->name);
-
return 0;
}
-static void event__process_sample(const event_t *self,
- struct sample_data *sample,
- struct perf_session *session,
- struct perf_evsel *evsel)
+static void perf_event__process_sample(const union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_session *session)
{
- u64 ip = self->ip.ip;
+ u64 ip = event->ip.ip;
struct sym_entry *syme;
struct addr_location al;
struct machine *machine;
- u8 origin = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+ u8 origin = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
- ++samples;
+ ++top.samples;
switch (origin) {
case PERF_RECORD_MISC_USER:
- ++us_samples;
- if (hide_user_symbols)
+ ++top.us_samples;
+ if (top.hide_user_symbols)
return;
machine = perf_session__find_host_machine(session);
break;
case PERF_RECORD_MISC_KERNEL:
- ++kernel_samples;
- if (hide_kernel_symbols)
+ ++top.kernel_samples;
+ if (top.hide_kernel_symbols)
return;
machine = perf_session__find_host_machine(session);
break;
case PERF_RECORD_MISC_GUEST_KERNEL:
- ++guest_kernel_samples;
- machine = perf_session__find_machine(session, self->ip.pid);
+ ++top.guest_kernel_samples;
+ machine = perf_session__find_machine(session, event->ip.pid);
break;
case PERF_RECORD_MISC_GUEST_USER:
- ++guest_us_samples;
+ ++top.guest_us_samples;
/*
* TODO: we don't process guest user from host side
* except simple counting.
@@ -1029,15 +714,15 @@ static void event__process_sample(const event_t *self,
if (!machine && perf_guest) {
pr_err("Can't find guest [%d]'s kernel information\n",
- self->ip.pid);
+ event->ip.pid);
return;
}
- if (self->header.misc & PERF_RECORD_MISC_EXACT_IP)
- exact_samples++;
+ if (event->header.misc & PERF_RECORD_MISC_EXACT_IP)
+ top.exact_samples++;
- if (event__preprocess_sample(self, session, &al, sample,
- symbol_filter) < 0 ||
+ if (perf_event__preprocess_sample(event, session, &al, sample,
+ symbol_filter) < 0 ||
al.filtered)
return;
@@ -1082,166 +767,67 @@ static void event__process_sample(const event_t *self,
syme = symbol__priv(al.sym);
if (!syme->skip) {
- syme->count[evsel->idx]++;
+ struct perf_evsel *evsel;
+
syme->origin = origin;
+ evsel = perf_evlist__id2evsel(top.evlist, sample->id);
+ assert(evsel != NULL);
+ syme->count[evsel->idx]++;
record_precise_ip(syme, evsel->idx, ip);
- pthread_mutex_lock(&active_symbols_lock);
+ pthread_mutex_lock(&top.active_symbols_lock);
if (list_empty(&syme->node) || !syme->node.next)
__list_insert_active_sym(syme);
- pthread_mutex_unlock(&active_symbols_lock);
+ pthread_mutex_unlock(&top.active_symbols_lock);
}
}
-struct mmap_data {
- void *base;
- int mask;
- unsigned int prev;
-};
-
-static int perf_evsel__alloc_mmap_per_thread(struct perf_evsel *evsel,
- int ncpus, int nthreads)
+static void perf_session__mmap_read_cpu(struct perf_session *self, int cpu)
{
- evsel->priv = xyarray__new(ncpus, nthreads, sizeof(struct mmap_data));
- return evsel->priv != NULL ? 0 : -ENOMEM;
-}
+ struct perf_sample sample;
+ union perf_event *event;
-static void perf_evsel__free_mmap(struct perf_evsel *evsel)
-{
- xyarray__delete(evsel->priv);
- evsel->priv = NULL;
-}
-
-static unsigned int mmap_read_head(struct mmap_data *md)
-{
- struct perf_event_mmap_page *pc = md->base;
- int head;
-
- head = pc->data_head;
- rmb();
-
- return head;
-}
+ while ((event = perf_evlist__read_on_cpu(top.evlist, cpu)) != NULL) {
+ perf_session__parse_sample(self, event, &sample);
-static void perf_session__mmap_read_counter(struct perf_session *self,
- struct perf_evsel *evsel,
- int cpu, int thread_idx)
-{
- struct xyarray *mmap_array = evsel->priv;
- struct mmap_data *md = xyarray__entry(mmap_array, cpu, thread_idx);
- unsigned int head = mmap_read_head(md);
- unsigned int old = md->prev;
- unsigned char *data = md->base + page_size;
- struct sample_data sample;
- int diff;
-
- /*
- * If we're further behind than half the buffer, there's a chance
- * the writer will bite our tail and mess up the samples under us.
- *
- * If we somehow ended up ahead of the head, we got messed up.
- *
- * In either case, truncate and restart at head.
- */
- diff = head - old;
- if (diff > md->mask / 2 || diff < 0) {
- fprintf(stderr, "WARNING: failed to keep up with mmap data.\n");
-
- /*
- * head points to a known good entry, start there.
- */
- old = head;
- }
-
- for (; old != head;) {
- event_t *event = (event_t *)&data[old & md->mask];
-
- event_t event_copy;
-
- size_t size = event->header.size;
-
- /*
- * Event straddles the mmap boundary -- header should always
- * be inside due to u64 alignment of output.
- */
- if ((old & md->mask) + size != ((old + size) & md->mask)) {
- unsigned int offset = old;
- unsigned int len = min(sizeof(*event), size), cpy;
- void *dst = &event_copy;
-
- do {
- cpy = min(md->mask + 1 - (offset & md->mask), len);
- memcpy(dst, &data[offset & md->mask], cpy);
- offset += cpy;
- dst += cpy;
- len -= cpy;
- } while (len);
-
- event = &event_copy;
- }
-
- event__parse_sample(event, self, &sample);
if (event->header.type == PERF_RECORD_SAMPLE)
- event__process_sample(event, &sample, self, evsel);
+ perf_event__process_sample(event, &sample, self);
else
- event__process(event, &sample, self);
- old += size;
+ perf_event__process(event, &sample, self);
}
-
- md->prev = old;
}
-static struct pollfd *event_array;
-
static void perf_session__mmap_read(struct perf_session *self)
{
- struct perf_evsel *counter;
- int i, thread_index;
-
- for (i = 0; i < cpus->nr; i++) {
- list_for_each_entry(counter, &evsel_list, node) {
- for (thread_index = 0;
- thread_index < threads->nr;
- thread_index++) {
- perf_session__mmap_read_counter(self,
- counter, i, thread_index);
- }
- }
- }
-}
+ int i;
-int nr_poll;
-int group_fd;
+ for (i = 0; i < top.evlist->cpus->nr; i++)
+ perf_session__mmap_read_cpu(self, i);
+}
-static void start_counter(int i, struct perf_evsel *evsel)
+static void start_counters(struct perf_evlist *evlist)
{
- struct xyarray *mmap_array = evsel->priv;
- struct mmap_data *mm;
- struct perf_event_attr *attr;
- int cpu = -1;
- int thread_index;
-
- if (target_tid == -1)
- cpu = cpus->map[i];
+ struct perf_evsel *counter;
- attr = &evsel->attr;
+ list_for_each_entry(counter, &evlist->entries, node) {
+ struct perf_event_attr *attr = &counter->attr;
- attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID;
+ attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID;
- if (freq) {
- attr->sample_type |= PERF_SAMPLE_PERIOD;
- attr->freq = 1;
- attr->sample_freq = freq;
- }
+ if (top.freq) {
+ attr->sample_type |= PERF_SAMPLE_PERIOD;
+ attr->freq = 1;
+ attr->sample_freq = top.freq;
+ }
- attr->inherit = (cpu < 0) && inherit;
- attr->mmap = 1;
+ if (evlist->nr_entries > 1) {
+ attr->sample_type |= PERF_SAMPLE_ID;
+ attr->read_format |= PERF_FORMAT_ID;
+ }
- for (thread_index = 0; thread_index < threads->nr; thread_index++) {
+ attr->mmap = 1;
try_again:
- FD(evsel, i, thread_index) = sys_perf_event_open(attr,
- threads->map[thread_index], cpu, group_fd, 0);
-
- if (FD(evsel, i, thread_index) < 0) {
+ if (perf_evsel__open(counter, top.evlist->cpus,
+ top.evlist->threads, group, inherit) < 0) {
int err = errno;
if (err == EPERM || err == EACCES)
@@ -1253,8 +839,8 @@ try_again:
* based cpu-clock-tick sw counter, which
* is always available even if no PMU support:
*/
- if (attr->type == PERF_TYPE_HARDWARE
- && attr->config == PERF_COUNT_HW_CPU_CYCLES) {
+ if (attr->type == PERF_TYPE_HARDWARE &&
+ attr->config == PERF_COUNT_HW_CPU_CYCLES) {
if (verbose)
warning(" ... trying to fall back to cpu-clock-ticks\n");
@@ -1264,39 +850,23 @@ try_again:
goto try_again;
}
printf("\n");
- error("sys_perf_event_open() syscall returned with %d (%s). /bin/dmesg may provide additional information.\n",
- FD(evsel, i, thread_index), strerror(err));
+ error("sys_perf_event_open() syscall returned with %d "
+ "(%s). /bin/dmesg may provide additional information.\n",
+ err, strerror(err));
die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
exit(-1);
}
- assert(FD(evsel, i, thread_index) >= 0);
- fcntl(FD(evsel, i, thread_index), F_SETFL, O_NONBLOCK);
-
- /*
- * First counter acts as the group leader:
- */
- if (group && group_fd == -1)
- group_fd = FD(evsel, i, thread_index);
-
- event_array[nr_poll].fd = FD(evsel, i, thread_index);
- event_array[nr_poll].events = POLLIN;
- nr_poll++;
-
- mm = xyarray__entry(mmap_array, i, thread_index);
- mm->prev = 0;
- mm->mask = mmap_pages*page_size - 1;
- mm->base = mmap(NULL, (mmap_pages+1)*page_size,
- PROT_READ, MAP_SHARED, FD(evsel, i, thread_index), 0);
- if (mm->base == MAP_FAILED)
- die("failed to mmap with %d (%s)\n", errno, strerror(errno));
}
+
+ if (perf_evlist__mmap(evlist, mmap_pages, false) < 0)
+ die("failed to mmap with %d (%s)\n", errno, strerror(errno));
}
static int __cmd_top(void)
{
pthread_t thread;
- struct perf_evsel *counter;
- int i, ret;
+ struct perf_evsel *first;
+ int ret __used;
/*
* FIXME: perf_session__new should allow passing a O_MMAP, so that all this
* mmap reading, etc is encapsulated in it. Use O_WRONLY for now.
@@ -1305,23 +875,23 @@ static int __cmd_top(void)
if (session == NULL)
return -ENOMEM;
- if (target_tid != -1)
- event__synthesize_thread_map(threads, event__process, session);
+ if (top.target_tid != -1)
+ perf_event__synthesize_thread_map(top.evlist->threads,
+ perf_event__process, session);
else
- event__synthesize_threads(event__process, session);
+ perf_event__synthesize_threads(perf_event__process, session);
- for (i = 0; i < cpus->nr; i++) {
- group_fd = -1;
- list_for_each_entry(counter, &evsel_list, node)
- start_counter(i, counter);
- }
+ start_counters(top.evlist);
+ first = list_entry(top.evlist->entries.next, struct perf_evsel, node);
+ perf_session__set_sample_type(session, first->attr.sample_type);
/* Wait for a minimal set of events before starting the snapshot */
- poll(&event_array[0], nr_poll, 100);
+ poll(top.evlist->pollfd, top.evlist->nr_fds, 100);
perf_session__mmap_read(session);
- if (pthread_create(&thread, NULL, display_thread, session)) {
+ if (pthread_create(&thread, NULL, (use_browser > 0 ? display_thread_tui :
+ display_thread), session)) {
printf("Could not create display thread.\n");
exit(-1);
}
@@ -1337,12 +907,12 @@ static int __cmd_top(void)
}
while (1) {
- int hits = samples;
+ u64 hits = top.samples;
perf_session__mmap_read(session);
- if (hits == samples)
- ret = poll(event_array, nr_poll, 100);
+ if (hits == top.samples)
+ ret = poll(top.evlist->pollfd, top.evlist->nr_fds, 100);
}
return 0;
@@ -1354,31 +924,31 @@ static const char * const top_usage[] = {
};
static const struct option options[] = {
- OPT_CALLBACK('e', "event", NULL, "event",
+ OPT_CALLBACK('e', "event", &top.evlist, "event",
"event selector. use 'perf list' to list available events",
parse_events),
OPT_INTEGER('c', "count", &default_interval,
"event period to sample"),
- OPT_INTEGER('p', "pid", &target_pid,
+ OPT_INTEGER('p', "pid", &top.target_pid,
"profile events on existing process id"),
- OPT_INTEGER('t', "tid", &target_tid,
+ OPT_INTEGER('t', "tid", &top.target_tid,
"profile events on existing thread id"),
OPT_BOOLEAN('a', "all-cpus", &system_wide,
"system-wide collection from all CPUs"),
- OPT_STRING('C', "cpu", &cpu_list, "cpu",
+ OPT_STRING('C', "cpu", &top.cpu_list, "cpu",
"list of cpus to monitor"),
OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
"file", "vmlinux pathname"),
- OPT_BOOLEAN('K', "hide_kernel_symbols", &hide_kernel_symbols,
+ OPT_BOOLEAN('K', "hide_kernel_symbols", &top.hide_kernel_symbols,
"hide kernel symbols"),
OPT_UINTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"),
OPT_INTEGER('r', "realtime", &realtime_prio,
"collect data with this RT SCHED_FIFO priority"),
- OPT_INTEGER('d', "delay", &delay_secs,
+ OPT_INTEGER('d', "delay", &top.delay_secs,
"number of seconds to delay between refreshes"),
OPT_BOOLEAN('D', "dump-symtab", &dump_symtab,
"dump the symbol table used for profiling"),
- OPT_INTEGER('f', "count-filter", &count_filter,
+ OPT_INTEGER('f', "count-filter", &top.count_filter,
"only display functions with more events than this"),
OPT_BOOLEAN('g', "group", &group,
"put the counters into a counter group"),
@@ -1386,14 +956,16 @@ static const struct option options[] = {
"child tasks inherit counters"),
OPT_STRING('s', "sym-annotate", &sym_filter, "symbol name",
"symbol to annotate"),
- OPT_BOOLEAN('z', "zero", &zero,
+ OPT_BOOLEAN('z', "zero", &top.zero,
"zero history across updates"),
- OPT_INTEGER('F', "freq", &freq,
+ OPT_INTEGER('F', "freq", &top.freq,
"profile at this frequency"),
- OPT_INTEGER('E', "entries", &print_entries,
+ OPT_INTEGER('E', "entries", &top.print_entries,
"display this many functions"),
- OPT_BOOLEAN('U', "hide_user_symbols", &hide_user_symbols,
+ OPT_BOOLEAN('U', "hide_user_symbols", &top.hide_user_symbols,
"hide user symbols"),
+ OPT_BOOLEAN(0, "tui", &use_tui, "Use the TUI interface"),
+ OPT_BOOLEAN(0, "stdio", &use_stdio, "Use the stdio interface"),
OPT_INCR('v', "verbose", &verbose,
"be more verbose (show counter open errors, etc)"),
OPT_END()
@@ -1404,64 +976,68 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
struct perf_evsel *pos;
int status = -ENOMEM;
+ top.evlist = perf_evlist__new(NULL, NULL);
+ if (top.evlist == NULL)
+ return -ENOMEM;
+
page_size = sysconf(_SC_PAGE_SIZE);
argc = parse_options(argc, argv, options, top_usage, 0);
if (argc)
usage_with_options(top_usage, options);
- if (target_pid != -1)
- target_tid = target_pid;
+ /*
+ * XXX For now start disabled, only using TUI if explicitely asked for.
+ * Change that when handle_keys equivalent gets written, live annotation
+ * done, etc.
+ */
+ use_browser = 0;
- threads = thread_map__new(target_pid, target_tid);
- if (threads == NULL) {
- pr_err("Problems finding threads of monitor\n");
- usage_with_options(top_usage, options);
- }
+ if (use_stdio)
+ use_browser = 0;
+ else if (use_tui)
+ use_browser = 1;
- event_array = malloc((sizeof(struct pollfd) *
- MAX_NR_CPUS * MAX_COUNTERS * threads->nr));
- if (!event_array)
- return -ENOMEM;
+ setup_browser(false);
/* CPU and PID are mutually exclusive */
- if (target_tid > 0 && cpu_list) {
+ if (top.target_tid > 0 && top.cpu_list) {
printf("WARNING: PID switch overriding CPU\n");
sleep(1);
- cpu_list = NULL;
+ top.cpu_list = NULL;
}
- if (!nr_counters && perf_evsel_list__create_default() < 0) {
+ if (top.target_pid != -1)
+ top.target_tid = top.target_pid;
+
+ if (perf_evlist__create_maps(top.evlist, top.target_pid,
+ top.target_tid, top.cpu_list) < 0)
+ usage_with_options(top_usage, options);
+
+ if (!top.evlist->nr_entries &&
+ perf_evlist__add_default(top.evlist) < 0) {
pr_err("Not enough memory for event selector list\n");
return -ENOMEM;
}
- if (delay_secs < 1)
- delay_secs = 1;
+ if (top.delay_secs < 1)
+ top.delay_secs = 1;
/*
* User specified count overrides default frequency.
*/
if (default_interval)
- freq = 0;
- else if (freq) {
- default_interval = freq;
+ top.freq = 0;
+ else if (top.freq) {
+ default_interval = top.freq;
} else {
fprintf(stderr, "frequency and count are zero, aborting\n");
exit(EXIT_FAILURE);
}
- if (target_tid != -1)
- cpus = cpu_map__dummy_new();
- else
- cpus = cpu_map__new(cpu_list);
-
- if (cpus == NULL)
- usage_with_options(top_usage, options);
-
- list_for_each_entry(pos, &evsel_list, node) {
- if (perf_evsel__alloc_mmap_per_thread(pos, cpus->nr, threads->nr) < 0 ||
- perf_evsel__alloc_fd(pos, cpus->nr, threads->nr) < 0)
+ list_for_each_entry(pos, &top.evlist->entries, node) {
+ if (perf_evsel__alloc_fd(pos, top.evlist->cpus->nr,
+ top.evlist->threads->nr) < 0)
goto out_free_fd;
/*
* Fill in the ones not specifically initialized via -c:
@@ -1472,26 +1048,28 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
pos->attr.sample_period = default_interval;
}
- sym_evsel = list_entry(evsel_list.next, struct perf_evsel, node);
+ if (perf_evlist__alloc_pollfd(top.evlist) < 0 ||
+ perf_evlist__alloc_mmap(top.evlist) < 0)
+ goto out_free_fd;
+
+ top.sym_evsel = list_entry(top.evlist->entries.next, struct perf_evsel, node);
- symbol_conf.priv_size = (sizeof(struct sym_entry) +
- (nr_counters + 1) * sizeof(unsigned long));
+ symbol_conf.priv_size = (sizeof(struct sym_entry) + sizeof(struct annotation) +
+ (top.evlist->nr_entries + 1) * sizeof(unsigned long));
symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
if (symbol__init() < 0)
return -1;
get_term_dimensions(&winsize);
- if (print_entries == 0) {
+ if (top.print_entries == 0) {
update_print_entries(&winsize);
signal(SIGWINCH, sig_winch_handler);
}
status = __cmd_top();
out_free_fd:
- list_for_each_entry(pos, &evsel_list, node)
- perf_evsel__free_mmap(pos);
- perf_evsel_list__delete();
+ perf_evlist__delete(top.evlist);
return status;
}
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 95aaf565c704..a5fc660c1f12 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -94,6 +94,32 @@ void get_term_dimensions(struct winsize *ws);
#include "util/types.h"
#include <stdbool.h>
+struct perf_mmap {
+ void *base;
+ int mask;
+ unsigned int prev;
+};
+
+static inline unsigned int perf_mmap__read_head(struct perf_mmap *mm)
+{
+ struct perf_event_mmap_page *pc = mm->base;
+ int head = pc->data_head;
+ rmb();
+ return head;
+}
+
+static inline void perf_mmap__write_tail(struct perf_mmap *md,
+ unsigned long tail)
+{
+ struct perf_event_mmap_page *pc = md->base;
+
+ /*
+ * ensure all reads are done before we write the tail out.
+ */
+ /* mb(); */
+ pc->data_tail = tail;
+}
+
/*
* prctl(PR_TASK_PERF_EVENTS_DISABLE) will (cheaply) disable all
* counters in the current task.
diff --git a/tools/perf/python/twatch.py b/tools/perf/python/twatch.py
new file mode 100755
index 000000000000..df638c438a9f
--- /dev/null
+++ b/tools/perf/python/twatch.py
@@ -0,0 +1,41 @@
+#! /usr/bin/python
+# -*- python -*-
+# -*- coding: utf-8 -*-
+# twatch - Experimental use of the perf python interface
+# Copyright (C) 2011 Arnaldo Carvalho de Melo <acme@redhat.com>
+#
+# This application is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; version 2.
+#
+# This application is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+
+import perf
+
+def main():
+ cpus = perf.cpu_map()
+ threads = perf.thread_map()
+ evsel = perf.evsel(task = 1, comm = 1, mmap = 0,
+ wakeup_events = 1, sample_period = 1,
+ sample_id_all = 1,
+ sample_type = perf.SAMPLE_PERIOD | perf.SAMPLE_TID | perf.SAMPLE_CPU | perf.SAMPLE_TID)
+ evsel.open(cpus = cpus, threads = threads);
+ evlist = perf.evlist(cpus, threads)
+ evlist.add(evsel)
+ evlist.mmap()
+ while True:
+ evlist.poll(timeout = -1)
+ for cpu in cpus:
+ event = evlist.read_on_cpu(cpu)
+ if not event:
+ continue
+ print "cpu: %2d, pid: %4d, tid: %4d" % (event.sample_cpu,
+ event.sample_pid,
+ event.sample_tid),
+ print event
+
+if __name__ == '__main__':
+ main()
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
new file mode 100644
index 000000000000..02976b895f27
--- /dev/null
+++ b/tools/perf/util/annotate.c
@@ -0,0 +1,593 @@
+/*
+ * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Parts came from builtin-annotate.c, see those files for further
+ * copyright notes.
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+
+#include "util.h"
+#include "build-id.h"
+#include "color.h"
+#include "cache.h"
+#include "symbol.h"
+#include "debug.h"
+#include "annotate.h"
+#include <pthread.h>
+
+int symbol__annotate_init(struct map *map __used, struct symbol *sym)
+{
+ struct annotation *notes = symbol__annotation(sym);
+ pthread_mutex_init(&notes->lock, NULL);
+ return 0;
+}
+
+int symbol__alloc_hist(struct symbol *sym, int nevents)
+{
+ struct annotation *notes = symbol__annotation(sym);
+ size_t sizeof_sym_hist = (sizeof(struct sym_hist) +
+ (sym->end - sym->start) * sizeof(u64));
+
+ notes->src = zalloc(sizeof(*notes->src) + nevents * sizeof_sym_hist);
+ if (notes->src == NULL)
+ return -1;
+ notes->src->sizeof_sym_hist = sizeof_sym_hist;
+ notes->src->nr_histograms = nevents;
+ INIT_LIST_HEAD(&notes->src->source);
+ return 0;
+}
+
+void symbol__annotate_zero_histograms(struct symbol *sym)
+{
+ struct annotation *notes = symbol__annotation(sym);
+
+ pthread_mutex_lock(&notes->lock);
+ if (notes->src != NULL)
+ memset(notes->src->histograms, 0,
+ notes->src->nr_histograms * notes->src->sizeof_sym_hist);
+ pthread_mutex_unlock(&notes->lock);
+}
+
+int symbol__inc_addr_samples(struct symbol *sym, struct map *map,
+ int evidx, u64 addr)
+{
+ unsigned offset;
+ struct annotation *notes;
+ struct sym_hist *h;
+
+ notes = symbol__annotation(sym);
+ if (notes->src == NULL)
+ return -ENOMEM;
+
+ pr_debug3("%s: addr=%#" PRIx64 "\n", __func__, map->unmap_ip(map, addr));
+
+ if (addr >= sym->end)
+ return 0;
+
+ offset = addr - sym->start;
+ h = annotation__histogram(notes, evidx);
+ h->sum++;
+ h->addr[offset]++;
+
+ pr_debug3("%#" PRIx64 " %s: period++ [addr: %#" PRIx64 ", %#" PRIx64
+ ", evidx=%d] => %" PRIu64 "\n", sym->start, sym->name,
+ addr, addr - sym->start, evidx, h->addr[offset]);
+ return 0;
+}
+
+static struct objdump_line *objdump_line__new(s64 offset, char *line, size_t privsize)
+{
+ struct objdump_line *self = malloc(sizeof(*self) + privsize);
+
+ if (self != NULL) {
+ self->offset = offset;
+ self->line = line;
+ }
+
+ return self;
+}
+
+void objdump_line__free(struct objdump_line *self)
+{
+ free(self->line);
+ free(self);
+}
+
+static void objdump__add_line(struct list_head *head, struct objdump_line *line)
+{
+ list_add_tail(&line->node, head);
+}
+
+struct objdump_line *objdump__get_next_ip_line(struct list_head *head,
+ struct objdump_line *pos)
+{
+ list_for_each_entry_continue(pos, head, node)
+ if (pos->offset >= 0)
+ return pos;
+
+ return NULL;
+}
+
+static int objdump_line__print(struct objdump_line *oline, struct symbol *sym,
+ int evidx, u64 len, int min_pcnt,
+ int printed, int max_lines,
+ struct objdump_line *queue)
+{
+ static const char *prev_line;
+ static const char *prev_color;
+
+ if (oline->offset != -1) {
+ const char *path = NULL;
+ unsigned int hits = 0;
+ double percent = 0.0;
+ const char *color;
+ struct annotation *notes = symbol__annotation(sym);
+ struct source_line *src_line = notes->src->lines;
+ struct sym_hist *h = annotation__histogram(notes, evidx);
+ s64 offset = oline->offset;
+ struct objdump_line *next;
+
+ next = objdump__get_next_ip_line(&notes->src->source, oline);
+
+ while (offset < (s64)len &&
+ (next == NULL || offset < next->offset)) {
+ if (src_line) {
+ if (path == NULL)
+ path = src_line[offset].path;
+ percent += src_line[offset].percent;
+ } else
+ hits += h->addr[offset];
+
+ ++offset;
+ }
+
+ if (src_line == NULL && h->sum)
+ percent = 100.0 * hits / h->sum;
+
+ if (percent < min_pcnt)
+ return -1;
+
+ if (max_lines && printed >= max_lines)
+ return 1;
+
+ if (queue != NULL) {
+ list_for_each_entry_from(queue, &notes->src->source, node) {
+ if (queue == oline)
+ break;
+ objdump_line__print(queue, sym, evidx, len,
+ 0, 0, 1, NULL);
+ }
+ }
+
+ color = get_percent_color(percent);
+
+ /*
+ * Also color the filename and line if needed, with
+ * the same color than the percentage. Don't print it
+ * twice for close colored addr with the same filename:line
+ */
+ if (path) {
+ if (!prev_line || strcmp(prev_line, path)
+ || color != prev_color) {
+ color_fprintf(stdout, color, " %s", path);
+ prev_line = path;
+ prev_color = color;
+ }
+ }
+
+ color_fprintf(stdout, color, " %7.2f", percent);
+ printf(" : ");
+ color_fprintf(stdout, PERF_COLOR_BLUE, "%s\n", oline->line);
+ } else if (max_lines && printed >= max_lines)
+ return 1;
+ else {
+ if (queue)
+ return -1;
+
+ if (!*oline->line)
+ printf(" :\n");
+ else
+ printf(" : %s\n", oline->line);
+ }
+
+ return 0;
+}
+
+static int symbol__parse_objdump_line(struct symbol *sym, struct map *map,
+ FILE *file, size_t privsize)
+{
+ struct annotation *notes = symbol__annotation(sym);
+ struct objdump_line *objdump_line;
+ char *line = NULL, *tmp, *tmp2, *c;
+ size_t line_len;
+ s64 line_ip, offset = -1;
+
+ if (getline(&line, &line_len, file) < 0)
+ return -1;
+
+ if (!line)
+ return -1;
+
+ while (line_len != 0 && isspace(line[line_len - 1]))
+ line[--line_len] = '\0';
+
+ c = strchr(line, '\n');
+ if (c)
+ *c = 0;
+
+ line_ip = -1;
+
+ /*
+ * Strip leading spaces:
+ */
+ tmp = line;
+ while (*tmp) {
+ if (*tmp != ' ')
+ break;
+ tmp++;
+ }
+
+ if (*tmp) {
+ /*
+ * Parse hexa addresses followed by ':'
+ */
+ line_ip = strtoull(tmp, &tmp2, 16);
+ if (*tmp2 != ':' || tmp == tmp2 || tmp2[1] == '\0')
+ line_ip = -1;
+ }
+
+ if (line_ip != -1) {
+ u64 start = map__rip_2objdump(map, sym->start),
+ end = map__rip_2objdump(map, sym->end);
+
+ offset = line_ip - start;
+ if (offset < 0 || (u64)line_ip > end)
+ offset = -1;
+ }
+
+ objdump_line = objdump_line__new(offset, line, privsize);
+ if (objdump_line == NULL) {
+ free(line);
+ return -1;
+ }
+ objdump__add_line(&notes->src->source, objdump_line);
+
+ return 0;
+}
+
+int symbol__annotate(struct symbol *sym, struct map *map, size_t privsize)
+{
+ struct dso *dso = map->dso;
+ char *filename = dso__build_id_filename(dso, NULL, 0);
+ bool free_filename = true;
+ char command[PATH_MAX * 2];
+ FILE *file;
+ int err = 0;
+ char symfs_filename[PATH_MAX];
+
+ if (filename) {
+ snprintf(symfs_filename, sizeof(symfs_filename), "%s%s",
+ symbol_conf.symfs, filename);
+ }
+
+ if (filename == NULL) {
+ if (dso->has_build_id) {
+ pr_err("Can't annotate %s: not enough memory\n",
+ sym->name);
+ return -ENOMEM;
+ }
+ goto fallback;
+ } else if (readlink(symfs_filename, command, sizeof(command)) < 0 ||
+ strstr(command, "[kernel.kallsyms]") ||
+ access(symfs_filename, R_OK)) {
+ free(filename);
+fallback:
+ /*
+ * If we don't have build-ids or the build-id file isn't in the
+ * cache, or is just a kallsyms file, well, lets hope that this
+ * DSO is the same as when 'perf record' ran.
+ */
+ filename = dso->long_name;
+ snprintf(symfs_filename, sizeof(symfs_filename), "%s%s",
+ symbol_conf.symfs, filename);
+ free_filename = false;
+ }
+
+ if (dso->origin == DSO__ORIG_KERNEL) {
+ if (dso->annotate_warned)
+ goto out_free_filename;
+ err = -ENOENT;
+ dso->annotate_warned = 1;
+ pr_err("Can't annotate %s: No vmlinux file was found in the "
+ "path\n", sym->name);
+ goto out_free_filename;
+ }
+
+ pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__,
+ filename, sym->name, map->unmap_ip(map, sym->start),
+ map->unmap_ip(map, sym->end));
+
+ pr_debug("annotating [%p] %30s : [%p] %30s\n",
+ dso, dso->long_name, sym, sym->name);
+
+ snprintf(command, sizeof(command),
+ "objdump --start-address=0x%016" PRIx64
+ " --stop-address=0x%016" PRIx64 " -dS -C %s|grep -v %s|expand",
+ map__rip_2objdump(map, sym->start),
+ map__rip_2objdump(map, sym->end),
+ symfs_filename, filename);
+
+ pr_debug("Executing: %s\n", command);
+
+ file = popen(command, "r");
+ if (!file)
+ goto out_free_filename;
+
+ while (!feof(file))
+ if (symbol__parse_objdump_line(sym, map, file, privsize) < 0)
+ break;
+
+ pclose(file);
+out_free_filename:
+ if (free_filename)
+ free(filename);
+ return err;
+}
+
+static void insert_source_line(struct rb_root *root, struct source_line *src_line)
+{
+ struct source_line *iter;
+ struct rb_node **p = &root->rb_node;
+ struct rb_node *parent = NULL;
+
+ while (*p != NULL) {
+ parent = *p;
+ iter = rb_entry(parent, struct source_line, node);
+
+ if (src_line->percent > iter->percent)
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ }
+
+ rb_link_node(&src_line->node, parent, p);
+ rb_insert_color(&src_line->node, root);
+}
+
+static void symbol__free_source_line(struct symbol *sym, int len)
+{
+ struct annotation *notes = symbol__annotation(sym);
+ struct source_line *src_line = notes->src->lines;
+ int i;
+
+ for (i = 0; i < len; i++)
+ free(src_line[i].path);
+
+ free(src_line);
+ notes->src->lines = NULL;
+}
+
+/* Get the filename:line for the colored entries */
+static int symbol__get_source_line(struct symbol *sym, struct map *map,
+ int evidx, struct rb_root *root, int len,
+ const char *filename)
+{
+ u64 start;
+ int i;
+ char cmd[PATH_MAX * 2];
+ struct source_line *src_line;
+ struct annotation *notes = symbol__annotation(sym);
+ struct sym_hist *h = annotation__histogram(notes, evidx);
+
+ if (!h->sum)
+ return 0;
+
+ src_line = notes->src->lines = calloc(len, sizeof(struct source_line));
+ if (!notes->src->lines)
+ return -1;
+
+ start = map->unmap_ip(map, sym->start);
+
+ for (i = 0; i < len; i++) {
+ char *path = NULL;
+ size_t line_len;
+ u64 offset;
+ FILE *fp;
+
+ src_line[i].percent = 100.0 * h->addr[i] / h->sum;
+ if (src_line[i].percent <= 0.5)
+ continue;
+
+ offset = start + i;
+ sprintf(cmd, "addr2line -e %s %016" PRIx64, filename, offset);
+ fp = popen(cmd, "r");
+ if (!fp)
+ continue;
+
+ if (getline(&path, &line_len, fp) < 0 || !line_len)
+ goto next;
+
+ src_line[i].path = malloc(sizeof(char) * line_len + 1);
+ if (!src_line[i].path)
+ goto next;
+
+ strcpy(src_line[i].path, path);
+ insert_source_line(root, &src_line[i]);
+
+ next:
+ pclose(fp);
+ }
+
+ return 0;
+}
+
+static void print_summary(struct rb_root *root, const char *filename)
+{
+ struct source_line *src_line;
+ struct rb_node *node;
+
+ printf("\nSorted summary for file %s\n", filename);
+ printf("----------------------------------------------\n\n");
+
+ if (RB_EMPTY_ROOT(root)) {
+ printf(" Nothing higher than %1.1f%%\n", MIN_GREEN);
+ return;
+ }
+
+ node = rb_first(root);
+ while (node) {
+ double percent;
+ const char *color;
+ char *path;
+
+ src_line = rb_entry(node, struct source_line, node);
+ percent = src_line->percent;
+ color = get_percent_color(percent);
+ path = src_line->path;
+
+ color_fprintf(stdout, color, " %7.2f %s", percent, path);
+ node = rb_next(node);
+ }
+}
+
+static void symbol__annotate_hits(struct symbol *sym, int evidx)
+{
+ struct annotation *notes = symbol__annotation(sym);
+ struct sym_hist *h = annotation__histogram(notes, evidx);
+ u64 len = sym->end - sym->start, offset;
+
+ for (offset = 0; offset < len; ++offset)
+ if (h->addr[offset] != 0)
+ printf("%*" PRIx64 ": %" PRIu64 "\n", BITS_PER_LONG / 2,
+ sym->start + offset, h->addr[offset]);
+ printf("%*s: %" PRIu64 "\n", BITS_PER_LONG / 2, "h->sum", h->sum);
+}
+
+int symbol__annotate_printf(struct symbol *sym, struct map *map, int evidx,
+ bool full_paths, int min_pcnt, int max_lines,
+ int context)
+{
+ struct dso *dso = map->dso;
+ const char *filename = dso->long_name, *d_filename;
+ struct annotation *notes = symbol__annotation(sym);
+ struct objdump_line *pos, *queue = NULL;
+ int printed = 2, queue_len = 0;
+ int more = 0;
+ u64 len;
+
+ if (full_paths)
+ d_filename = filename;
+ else
+ d_filename = basename(filename);
+
+ len = sym->end - sym->start;
+
+ printf(" Percent | Source code & Disassembly of %s\n", d_filename);
+ printf("------------------------------------------------\n");
+
+ if (verbose)
+ symbol__annotate_hits(sym, evidx);
+
+ list_for_each_entry(pos, &notes->src->source, node) {
+ if (context && queue == NULL) {
+ queue = pos;
+ queue_len = 0;
+ }
+
+ switch (objdump_line__print(pos, sym, evidx, len, min_pcnt,
+ printed, max_lines, queue)) {
+ case 0:
+ ++printed;
+ if (context) {
+ printed += queue_len;
+ queue = NULL;
+ queue_len = 0;
+ }
+ break;
+ case 1:
+ /* filtered by max_lines */
+ ++more;
+ break;
+ case -1:
+ default:
+ /*
+ * Filtered by min_pcnt or non IP lines when
+ * context != 0
+ */
+ if (!context)
+ break;
+ if (queue_len == context)
+ queue = list_entry(queue->node.next, typeof(*queue), node);
+ else
+ ++queue_len;
+ break;
+ }
+ }
+
+ return more;
+}
+
+void symbol__annotate_zero_histogram(struct symbol *sym, int evidx)
+{
+ struct annotation *notes = symbol__annotation(sym);
+ struct sym_hist *h = annotation__histogram(notes, evidx);
+
+ memset(h, 0, notes->src->sizeof_sym_hist);
+}
+
+void symbol__annotate_decay_histogram(struct symbol *sym, int evidx)
+{
+ struct annotation *notes = symbol__annotation(sym);
+ struct sym_hist *h = annotation__histogram(notes, evidx);
+ struct objdump_line *pos;
+
+ h->sum = 0;
+
+ list_for_each_entry(pos, &notes->src->source, node) {
+ if (pos->offset != -1) {
+ h->addr[pos->offset] = h->addr[pos->offset] * 7 / 8;
+ h->sum += h->addr[pos->offset];
+ }
+ }
+}
+
+void objdump_line_list__purge(struct list_head *head)
+{
+ struct objdump_line *pos, *n;
+
+ list_for_each_entry_safe(pos, n, head, node) {
+ list_del(&pos->node);
+ objdump_line__free(pos);
+ }
+}
+
+int symbol__tty_annotate(struct symbol *sym, struct map *map, int evidx,
+ bool print_lines, bool full_paths, int min_pcnt,
+ int max_lines)
+{
+ struct dso *dso = map->dso;
+ const char *filename = dso->long_name;
+ struct rb_root source_line = RB_ROOT;
+ u64 len;
+
+ if (symbol__annotate(sym, map, 0) < 0)
+ return -1;
+
+ len = sym->end - sym->start;
+
+ if (print_lines) {
+ symbol__get_source_line(sym, map, evidx, &source_line,
+ len, filename);
+ print_summary(&source_line, filename);
+ }
+
+ symbol__annotate_printf(sym, map, evidx, full_paths,
+ min_pcnt, max_lines, 0);
+ if (print_lines)
+ symbol__free_source_line(sym, len);
+
+ objdump_line_list__purge(&symbol__annotation(sym)->src->source);
+
+ return 0;
+}
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
new file mode 100644
index 000000000000..e848803fcd48
--- /dev/null
+++ b/tools/perf/util/annotate.h
@@ -0,0 +1,101 @@
+#ifndef __PERF_ANNOTATE_H
+#define __PERF_ANNOTATE_H
+
+#include <stdbool.h>
+#include "types.h"
+#include "symbol.h"
+#include <linux/list.h>
+#include <linux/rbtree.h>
+
+struct objdump_line {
+ struct list_head node;
+ s64 offset;
+ char *line;
+};
+
+void objdump_line__free(struct objdump_line *self);
+struct objdump_line *objdump__get_next_ip_line(struct list_head *head,
+ struct objdump_line *pos);
+
+struct sym_hist {
+ u64 sum;
+ u64 addr[0];
+};
+
+struct source_line {
+ struct rb_node node;
+ double percent;
+ char *path;
+};
+
+/** struct annotated_source - symbols with hits have this attached as in sannotation
+ *
+ * @histogram: Array of addr hit histograms per event being monitored
+ * @lines: If 'print_lines' is specified, per source code line percentages
+ * @source: source parsed from objdump -dS
+ *
+ * lines is allocated, percentages calculated and all sorted by percentage
+ * when the annotation is about to be presented, so the percentages are for
+ * one of the entries in the histogram array, i.e. for the event/counter being
+ * presented. It is deallocated right after symbol__{tui,tty,etc}_annotate
+ * returns.
+ */
+struct annotated_source {
+ struct list_head source;
+ struct source_line *lines;
+ int nr_histograms;
+ int sizeof_sym_hist;
+ struct sym_hist histograms[0];
+};
+
+struct annotation {
+ pthread_mutex_t lock;
+ struct annotated_source *src;
+};
+
+struct sannotation {
+ struct annotation annotation;
+ struct symbol symbol;
+};
+
+static inline struct sym_hist *annotation__histogram(struct annotation *notes, int idx)
+{
+ return (((void *)&notes->src->histograms) +
+ (notes->src->sizeof_sym_hist * idx));
+}
+
+static inline struct annotation *symbol__annotation(struct symbol *sym)
+{
+ struct sannotation *a = container_of(sym, struct sannotation, symbol);
+ return &a->annotation;
+}
+
+int symbol__inc_addr_samples(struct symbol *sym, struct map *map,
+ int evidx, u64 addr);
+int symbol__alloc_hist(struct symbol *sym, int nevents);
+void symbol__annotate_zero_histograms(struct symbol *sym);
+
+int symbol__annotate(struct symbol *sym, struct map *map, size_t privsize);
+int symbol__annotate_init(struct map *map __used, struct symbol *sym);
+int symbol__annotate_printf(struct symbol *sym, struct map *map, int evidx,
+ bool full_paths, int min_pcnt, int max_lines,
+ int context);
+void symbol__annotate_zero_histogram(struct symbol *sym, int evidx);
+void symbol__annotate_decay_histogram(struct symbol *sym, int evidx);
+void objdump_line_list__purge(struct list_head *head);
+
+int symbol__tty_annotate(struct symbol *sym, struct map *map, int evidx,
+ bool print_lines, bool full_paths, int min_pcnt,
+ int max_lines);
+
+#ifdef NO_NEWT_SUPPORT
+static inline int symbol__tui_annotate(struct symbol *sym __used,
+ struct map *map __used, int evidx __used)
+{
+ return 0;
+}
+#else
+int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx);
+#endif
+
+#endif /* __PERF_ANNOTATE_H */
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index deffb8c96071..31f934af9861 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -14,8 +14,8 @@
#include <linux/kernel.h>
#include "debug.h"
-static int build_id__mark_dso_hit(event_t *event,
- struct sample_data *sample __used,
+static int build_id__mark_dso_hit(union perf_event *event,
+ struct perf_sample *sample __used,
struct perf_session *session)
{
struct addr_location al;
@@ -37,13 +37,14 @@ static int build_id__mark_dso_hit(event_t *event,
return 0;
}
-static int event__exit_del_thread(event_t *self, struct sample_data *sample __used,
- struct perf_session *session)
+static int perf_event__exit_del_thread(union perf_event *event,
+ struct perf_sample *sample __used,
+ struct perf_session *session)
{
- struct thread *thread = perf_session__findnew(session, self->fork.tid);
+ struct thread *thread = perf_session__findnew(session, event->fork.tid);
- dump_printf("(%d:%d):(%d:%d)\n", self->fork.pid, self->fork.tid,
- self->fork.ppid, self->fork.ptid);
+ dump_printf("(%d:%d):(%d:%d)\n", event->fork.pid, event->fork.tid,
+ event->fork.ppid, event->fork.ptid);
if (thread) {
rb_erase(&thread->rb_node, &session->threads);
@@ -56,9 +57,9 @@ static int event__exit_del_thread(event_t *self, struct sample_data *sample __us
struct perf_event_ops build_id__mark_dso_hit_ops = {
.sample = build_id__mark_dso_hit,
- .mmap = event__process_mmap,
- .fork = event__process_task,
- .exit = event__exit_del_thread,
+ .mmap = perf_event__process_mmap,
+ .fork = perf_event__process_task,
+ .exit = perf_event__exit_del_thread,
};
char *dso__build_id_filename(struct dso *self, char *bf, size_t size)
diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h
index a7729797fd96..fc5e5a09d5b9 100644
--- a/tools/perf/util/cache.h
+++ b/tools/perf/util/cache.h
@@ -34,13 +34,14 @@ extern int pager_use_color;
extern int use_browser;
#ifdef NO_NEWT_SUPPORT
-static inline void setup_browser(void)
+static inline void setup_browser(bool fallback_to_pager)
{
- setup_pager();
+ if (fallback_to_pager)
+ setup_pager();
}
static inline void exit_browser(bool wait_for_ok __used) {}
#else
-void setup_browser(void);
+void setup_browser(bool fallback_to_pager);
void exit_browser(bool wait_for_ok);
#endif
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index e12d539417b2..9f7106a8d9a4 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2009-2010, Frederic Weisbecker <fweisbec@gmail.com>
+ * Copyright (C) 2009-2011, Frederic Weisbecker <fweisbec@gmail.com>
*
* Handle the callchains from the stream in an ad-hoc radix tree and then
* sort them in an rbtree.
@@ -18,7 +18,8 @@
#include "util.h"
#include "callchain.h"
-bool ip_callchain__valid(struct ip_callchain *chain, const event_t *event)
+bool ip_callchain__valid(struct ip_callchain *chain,
+ const union perf_event *event)
{
unsigned int chain_size = event->header.size;
chain_size -= (unsigned long)&event->ip.__more_data - (unsigned long)event;
@@ -26,10 +27,10 @@ bool ip_callchain__valid(struct ip_callchain *chain, const event_t *event)
}
#define chain_for_each_child(child, parent) \
- list_for_each_entry(child, &parent->children, brothers)
+ list_for_each_entry(child, &parent->children, siblings)
#define chain_for_each_child_safe(child, next, parent) \
- list_for_each_entry_safe(child, next, &parent->children, brothers)
+ list_for_each_entry_safe(child, next, &parent->children, siblings)
static void
rb_insert_callchain(struct rb_root *root, struct callchain_node *chain,
@@ -38,14 +39,14 @@ rb_insert_callchain(struct rb_root *root, struct callchain_node *chain,
struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL;
struct callchain_node *rnode;
- u64 chain_cumul = cumul_hits(chain);
+ u64 chain_cumul = callchain_cumul_hits(chain);
while (*p) {
u64 rnode_cumul;
parent = *p;
rnode = rb_entry(parent, struct callchain_node, rb_node);
- rnode_cumul = cumul_hits(rnode);
+ rnode_cumul = callchain_cumul_hits(rnode);
switch (mode) {
case CHAIN_FLAT:
@@ -104,7 +105,7 @@ static void __sort_chain_graph_abs(struct callchain_node *node,
chain_for_each_child(child, node) {
__sort_chain_graph_abs(child, min_hit);
- if (cumul_hits(child) >= min_hit)
+ if (callchain_cumul_hits(child) >= min_hit)
rb_insert_callchain(&node->rb_root, child,
CHAIN_GRAPH_ABS);
}
@@ -129,7 +130,7 @@ static void __sort_chain_graph_rel(struct callchain_node *node,
chain_for_each_child(child, node) {
__sort_chain_graph_rel(child, min_percent);
- if (cumul_hits(child) >= min_hit)
+ if (callchain_cumul_hits(child) >= min_hit)
rb_insert_callchain(&node->rb_root, child,
CHAIN_GRAPH_REL);
}
@@ -143,7 +144,7 @@ sort_chain_graph_rel(struct rb_root *rb_root, struct callchain_root *chain_root,
rb_root->rb_node = chain_root->node.rb_root.rb_node;
}
-int register_callchain_param(struct callchain_param *param)
+int callchain_register_param(struct callchain_param *param)
{
switch (param->mode) {
case CHAIN_GRAPH_ABS:
@@ -189,32 +190,27 @@ create_child(struct callchain_node *parent, bool inherit_children)
chain_for_each_child(next, new)
next->parent = new;
}
- list_add_tail(&new->brothers, &parent->children);
+ list_add_tail(&new->siblings, &parent->children);
return new;
}
-struct resolved_ip {
- u64 ip;
- struct map_symbol ms;
-};
-
-struct resolved_chain {
- u64 nr;
- struct resolved_ip ips[0];
-};
-
-
/*
* Fill the node with callchain values
*/
static void
-fill_node(struct callchain_node *node, struct resolved_chain *chain, int start)
+fill_node(struct callchain_node *node, struct callchain_cursor *cursor)
{
- unsigned int i;
+ struct callchain_cursor_node *cursor_node;
+
+ node->val_nr = cursor->nr - cursor->pos;
+ if (!node->val_nr)
+ pr_warning("Warning: empty node in callchain tree\n");
- for (i = start; i < chain->nr; i++) {
+ cursor_node = callchain_cursor_current(cursor);
+
+ while (cursor_node) {
struct callchain_list *call;
call = zalloc(sizeof(*call));
@@ -222,23 +218,25 @@ fill_node(struct callchain_node *node, struct resolved_chain *chain, int start)
perror("not enough memory for the code path tree");
return;
}
- call->ip = chain->ips[i].ip;
- call->ms = chain->ips[i].ms;
+ call->ip = cursor_node->ip;
+ call->ms.sym = cursor_node->sym;
+ call->ms.map = cursor_node->map;
list_add_tail(&call->list, &node->val);
+
+ callchain_cursor_advance(cursor);
+ cursor_node = callchain_cursor_current(cursor);
}
- node->val_nr = chain->nr - start;
- if (!node->val_nr)
- pr_warning("Warning: empty node in callchain tree\n");
}
static void
-add_child(struct callchain_node *parent, struct resolved_chain *chain,
- int start, u64 period)
+add_child(struct callchain_node *parent,
+ struct callchain_cursor *cursor,
+ u64 period)
{
struct callchain_node *new;
new = create_child(parent, false);
- fill_node(new, chain, start);
+ fill_node(new, cursor);
new->children_hit = 0;
new->hit = period;
@@ -250,9 +248,10 @@ add_child(struct callchain_node *parent, struct resolved_chain *chain,
* Then create another child to host the given callchain of new branch
*/
static void
-split_add_child(struct callchain_node *parent, struct resolved_chain *chain,
- struct callchain_list *to_split, int idx_parents, int idx_local,
- u64 period)
+split_add_child(struct callchain_node *parent,
+ struct callchain_cursor *cursor,
+ struct callchain_list *to_split,
+ u64 idx_parents, u64 idx_local, u64 period)
{
struct callchain_node *new;
struct list_head *old_tail;
@@ -272,14 +271,14 @@ split_add_child(struct callchain_node *parent, struct resolved_chain *chain,
/* split the hits */
new->hit = parent->hit;
new->children_hit = parent->children_hit;
- parent->children_hit = cumul_hits(new);
+ parent->children_hit = callchain_cumul_hits(new);
new->val_nr = parent->val_nr - idx_local;
parent->val_nr = idx_local;
/* create a new child for the new branch if any */
- if (idx_total < chain->nr) {
+ if (idx_total < cursor->nr) {
parent->hit = 0;
- add_child(parent, chain, idx_total, period);
+ add_child(parent, cursor, period);
parent->children_hit += period;
} else {
parent->hit = period;
@@ -287,36 +286,41 @@ split_add_child(struct callchain_node *parent, struct resolved_chain *chain,
}
static int
-append_chain(struct callchain_node *root, struct resolved_chain *chain,
- unsigned int start, u64 period);
+append_chain(struct callchain_node *root,
+ struct callchain_cursor *cursor,
+ u64 period);
static void
-append_chain_children(struct callchain_node *root, struct resolved_chain *chain,
- unsigned int start, u64 period)
+append_chain_children(struct callchain_node *root,
+ struct callchain_cursor *cursor,
+ u64 period)
{
struct callchain_node *rnode;
/* lookup in childrens */
chain_for_each_child(rnode, root) {
- unsigned int ret = append_chain(rnode, chain, start, period);
+ unsigned int ret = append_chain(rnode, cursor, period);
if (!ret)
goto inc_children_hit;
}
/* nothing in children, add to the current node */
- add_child(root, chain, start, period);
+ add_child(root, cursor, period);
inc_children_hit:
root->children_hit += period;
}
static int
-append_chain(struct callchain_node *root, struct resolved_chain *chain,
- unsigned int start, u64 period)
+append_chain(struct callchain_node *root,
+ struct callchain_cursor *cursor,
+ u64 period)
{
+ struct callchain_cursor_node *curr_snap = cursor->curr;
struct callchain_list *cnode;
- unsigned int i = start;
+ u64 start = cursor->pos;
bool found = false;
+ u64 matches;
/*
* Lookup in the current node
@@ -324,141 +328,134 @@ append_chain(struct callchain_node *root, struct resolved_chain *chain,
* anywhere inside a function.
*/
list_for_each_entry(cnode, &root->val, list) {
+ struct callchain_cursor_node *node;
struct symbol *sym;
- if (i == chain->nr)
+ node = callchain_cursor_current(cursor);
+ if (!node)
break;
- sym = chain->ips[i].ms.sym;
+ sym = node->sym;
if (cnode->ms.sym && sym) {
if (cnode->ms.sym->start != sym->start)
break;
- } else if (cnode->ip != chain->ips[i].ip)
+ } else if (cnode->ip != node->ip)
break;
if (!found)
found = true;
- i++;
+
+ callchain_cursor_advance(cursor);
}
/* matches not, relay on the parent */
- if (!found)
+ if (!found) {
+ cursor->curr = curr_snap;
+ cursor->pos = start;
return -1;
+ }
+
+ matches = cursor->pos - start;
/* we match only a part of the node. Split it and add the new chain */
- if (i - start < root->val_nr) {
- split_add_child(root, chain, cnode, start, i - start, period);
+ if (matches < root->val_nr) {
+ split_add_child(root, cursor, cnode, start, matches, period);
return 0;
}
/* we match 100% of the path, increment the hit */
- if (i - start == root->val_nr && i == chain->nr) {
+ if (matches == root->val_nr && cursor->pos == cursor->nr) {
root->hit += period;
return 0;
}
/* We match the node and still have a part remaining */
- append_chain_children(root, chain, i, period);
+ append_chain_children(root, cursor, period);
return 0;
}
-static void filter_context(struct ip_callchain *old, struct resolved_chain *new,
- struct map_symbol *syms)
-{
- int i, j = 0;
-
- for (i = 0; i < (int)old->nr; i++) {
- if (old->ips[i] >= PERF_CONTEXT_MAX)
- continue;
-
- new->ips[j].ip = old->ips[i];
- new->ips[j].ms = syms[i];
- j++;
- }
-
- new->nr = j;
-}
-
-
-int callchain_append(struct callchain_root *root, struct ip_callchain *chain,
- struct map_symbol *syms, u64 period)
+int callchain_append(struct callchain_root *root,
+ struct callchain_cursor *cursor,
+ u64 period)
{
- struct resolved_chain *filtered;
-
- if (!chain->nr)
+ if (!cursor->nr)
return 0;
- filtered = zalloc(sizeof(*filtered) +
- chain->nr * sizeof(struct resolved_ip));
- if (!filtered)
- return -ENOMEM;
-
- filter_context(chain, filtered, syms);
-
- if (!filtered->nr)
- goto end;
+ callchain_cursor_commit(cursor);
- append_chain_children(&root->node, filtered, 0, period);
+ append_chain_children(&root->node, cursor, period);
- if (filtered->nr > root->max_depth)
- root->max_depth = filtered->nr;
-end:
- free(filtered);
+ if (cursor->nr > root->max_depth)
+ root->max_depth = cursor->nr;
return 0;
}
static int
-merge_chain_branch(struct callchain_node *dst, struct callchain_node *src,
- struct resolved_chain *chain)
+merge_chain_branch(struct callchain_cursor *cursor,
+ struct callchain_node *dst, struct callchain_node *src)
{
+ struct callchain_cursor_node **old_last = cursor->last;
struct callchain_node *child, *next_child;
struct callchain_list *list, *next_list;
- int old_pos = chain->nr;
+ int old_pos = cursor->nr;
int err = 0;
list_for_each_entry_safe(list, next_list, &src->val, list) {
- chain->ips[chain->nr].ip = list->ip;
- chain->ips[chain->nr].ms = list->ms;
- chain->nr++;
+ callchain_cursor_append(cursor, list->ip,
+ list->ms.map, list->ms.sym);
list_del(&list->list);
free(list);
}
- if (src->hit)
- append_chain_children(dst, chain, 0, src->hit);
+ if (src->hit) {
+ callchain_cursor_commit(cursor);
+ append_chain_children(dst, cursor, src->hit);
+ }
chain_for_each_child_safe(child, next_child, src) {
- err = merge_chain_branch(dst, child, chain);
+ err = merge_chain_branch(cursor, dst, child);
if (err)
break;
- list_del(&child->brothers);
+ list_del(&child->siblings);
free(child);
}
- chain->nr = old_pos;
+ cursor->nr = old_pos;
+ cursor->last = old_last;
return err;
}
-int callchain_merge(struct callchain_root *dst, struct callchain_root *src)
+int callchain_merge(struct callchain_cursor *cursor,
+ struct callchain_root *dst, struct callchain_root *src)
+{
+ return merge_chain_branch(cursor, &dst->node, &src->node);
+}
+
+int callchain_cursor_append(struct callchain_cursor *cursor,
+ u64 ip, struct map *map, struct symbol *sym)
{
- struct resolved_chain *chain;
- int err;
+ struct callchain_cursor_node *node = *cursor->last;
- chain = malloc(sizeof(*chain) +
- src->max_depth * sizeof(struct resolved_ip));
- if (!chain)
- return -ENOMEM;
+ if (!node) {
+ node = calloc(sizeof(*node), 1);
+ if (!node)
+ return -ENOMEM;
- chain->nr = 0;
+ *cursor->last = node;
+ }
- err = merge_chain_branch(&dst->node, &src->node, chain);
+ node->ip = ip;
+ node->map = map;
+ node->sym = sym;
- free(chain);
+ cursor->nr++;
- return err;
+ cursor->last = &node->next;
+
+ return 0;
}
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index c15fb8c24ad2..1a79df9f739f 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -16,7 +16,7 @@ enum chain_mode {
struct callchain_node {
struct callchain_node *parent;
- struct list_head brothers;
+ struct list_head siblings;
struct list_head children;
struct list_head val;
struct rb_node rb_node; /* to sort nodes in an rbtree */
@@ -49,9 +49,30 @@ struct callchain_list {
struct list_head list;
};
+/*
+ * A callchain cursor is a single linked list that
+ * let one feed a callchain progressively.
+ * It keeps persitent allocated entries to minimize
+ * allocations.
+ */
+struct callchain_cursor_node {
+ u64 ip;
+ struct map *map;
+ struct symbol *sym;
+ struct callchain_cursor_node *next;
+};
+
+struct callchain_cursor {
+ u64 nr;
+ struct callchain_cursor_node *first;
+ struct callchain_cursor_node **last;
+ u64 pos;
+ struct callchain_cursor_node *curr;
+};
+
static inline void callchain_init(struct callchain_root *root)
{
- INIT_LIST_HEAD(&root->node.brothers);
+ INIT_LIST_HEAD(&root->node.siblings);
INIT_LIST_HEAD(&root->node.children);
INIT_LIST_HEAD(&root->node.val);
@@ -61,15 +82,54 @@ static inline void callchain_init(struct callchain_root *root)
root->max_depth = 0;
}
-static inline u64 cumul_hits(struct callchain_node *node)
+static inline u64 callchain_cumul_hits(struct callchain_node *node)
{
return node->hit + node->children_hit;
}
-int register_callchain_param(struct callchain_param *param);
-int callchain_append(struct callchain_root *root, struct ip_callchain *chain,
- struct map_symbol *syms, u64 period);
-int callchain_merge(struct callchain_root *dst, struct callchain_root *src);
+int callchain_register_param(struct callchain_param *param);
+int callchain_append(struct callchain_root *root,
+ struct callchain_cursor *cursor,
+ u64 period);
+
+int callchain_merge(struct callchain_cursor *cursor,
+ struct callchain_root *dst, struct callchain_root *src);
+
+bool ip_callchain__valid(struct ip_callchain *chain,
+ const union perf_event *event);
+/*
+ * Initialize a cursor before adding entries inside, but keep
+ * the previously allocated entries as a cache.
+ */
+static inline void callchain_cursor_reset(struct callchain_cursor *cursor)
+{
+ cursor->nr = 0;
+ cursor->last = &cursor->first;
+}
+
+int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip,
+ struct map *map, struct symbol *sym);
-bool ip_callchain__valid(struct ip_callchain *chain, const event_t *event);
+/* Close a cursor writing session. Initialize for the reader */
+static inline void callchain_cursor_commit(struct callchain_cursor *cursor)
+{
+ cursor->curr = cursor->first;
+ cursor->pos = 0;
+}
+
+/* Cursor reading iteration helpers */
+static inline struct callchain_cursor_node *
+callchain_cursor_current(struct callchain_cursor *cursor)
+{
+ if (cursor->pos == cursor->nr)
+ return NULL;
+
+ return cursor->curr;
+}
+
+static inline void callchain_cursor_advance(struct callchain_cursor *cursor)
+{
+ cursor->curr = cursor->curr->next;
+ cursor->pos++;
+}
#endif /* __PERF_CALLCHAIN_H */
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 3ccaa1043383..6893eec693ab 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -177,3 +177,8 @@ struct cpu_map *cpu_map__dummy_new(void)
return cpus;
}
+
+void cpu_map__delete(struct cpu_map *map)
+{
+ free(map);
+}
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index f7a4f42f6307..072c0a374794 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -8,6 +8,6 @@ struct cpu_map {
struct cpu_map *cpu_map__new(const char *cpu_list);
struct cpu_map *cpu_map__dummy_new(void);
-void *cpu_map__delete(struct cpu_map *map);
+void cpu_map__delete(struct cpu_map *map);
#endif /* __PERF_CPUMAP_H */
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
index 01bbe8ecec3f..d4536a9e0d8c 100644
--- a/tools/perf/util/debug.c
+++ b/tools/perf/util/debug.c
@@ -57,7 +57,7 @@ void ui__warning(const char *format, ...)
}
#endif
-void trace_event(event_t *event)
+void trace_event(union perf_event *event)
{
unsigned char *raw_event = (void *)event;
const char *color = PERF_COLOR_BLUE;
diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h
index ca35fd66b5df..93516cf4682c 100644
--- a/tools/perf/util/debug.h
+++ b/tools/perf/util/debug.h
@@ -9,7 +9,7 @@ extern int verbose;
extern bool quiet, dump_trace;
int dump_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2)));
-void trace_event(event_t *event);
+void trace_event(union perf_event *event);
struct ui_progress;
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 50d0a931497a..fbf5754c8866 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -6,8 +6,9 @@
#include "string.h"
#include "strlist.h"
#include "thread.h"
+#include "thread_map.h"
-static const char *event__name[] = {
+static const char *perf_event__names[] = {
[0] = "TOTAL",
[PERF_RECORD_MMAP] = "MMAP",
[PERF_RECORD_LOST] = "LOST",
@@ -25,16 +26,16 @@ static const char *event__name[] = {
[PERF_RECORD_FINISHED_ROUND] = "FINISHED_ROUND",
};
-const char *event__get_event_name(unsigned int id)
+const char *perf_event__name(unsigned int id)
{
- if (id >= ARRAY_SIZE(event__name))
+ if (id >= ARRAY_SIZE(perf_event__names))
return "INVALID";
- if (!event__name[id])
+ if (!perf_event__names[id])
return "UNKNOWN";
- return event__name[id];
+ return perf_event__names[id];
}
-static struct sample_data synth_sample = {
+static struct perf_sample synth_sample = {
.pid = -1,
.tid = -1,
.time = -1,
@@ -43,9 +44,9 @@ static struct sample_data synth_sample = {
.period = 1,
};
-static pid_t event__synthesize_comm(event_t *event, pid_t pid, int full,
- event__handler_t process,
- struct perf_session *session)
+static pid_t perf_event__synthesize_comm(union perf_event *event, pid_t pid,
+ int full, perf_event__handler_t process,
+ struct perf_session *session)
{
char filename[PATH_MAX];
char bf[BUFSIZ];
@@ -126,9 +127,10 @@ out:
return tgid;
}
-static int event__synthesize_mmap_events(event_t *event, pid_t pid, pid_t tgid,
- event__handler_t process,
- struct perf_session *session)
+static int perf_event__synthesize_mmap_events(union perf_event *event,
+ pid_t pid, pid_t tgid,
+ perf_event__handler_t process,
+ struct perf_session *session)
{
char filename[PATH_MAX];
FILE *fp;
@@ -199,14 +201,14 @@ static int event__synthesize_mmap_events(event_t *event, pid_t pid, pid_t tgid,
return 0;
}
-int event__synthesize_modules(event__handler_t process,
- struct perf_session *session,
- struct machine *machine)
+int perf_event__synthesize_modules(perf_event__handler_t process,
+ struct perf_session *session,
+ struct machine *machine)
{
struct rb_node *nd;
struct map_groups *kmaps = &machine->kmaps;
- event_t *event = zalloc(sizeof(event->mmap) + session->id_hdr_size);
-
+ union perf_event *event = zalloc((sizeof(event->mmap) +
+ session->id_hdr_size));
if (event == NULL) {
pr_debug("Not enough memory synthesizing mmap event "
"for kernel modules\n");
@@ -251,23 +253,24 @@ int event__synthesize_modules(event__handler_t process,
return 0;
}
-static int __event__synthesize_thread(event_t *comm_event, event_t *mmap_event,
- pid_t pid, event__handler_t process,
+static int __event__synthesize_thread(union perf_event *comm_event,
+ union perf_event *mmap_event,
+ pid_t pid, perf_event__handler_t process,
struct perf_session *session)
{
- pid_t tgid = event__synthesize_comm(comm_event, pid, 1, process,
+ pid_t tgid = perf_event__synthesize_comm(comm_event, pid, 1, process,
session);
if (tgid == -1)
return -1;
- return event__synthesize_mmap_events(mmap_event, pid, tgid,
+ return perf_event__synthesize_mmap_events(mmap_event, pid, tgid,
process, session);
}
-int event__synthesize_thread_map(struct thread_map *threads,
- event__handler_t process,
- struct perf_session *session)
+int perf_event__synthesize_thread_map(struct thread_map *threads,
+ perf_event__handler_t process,
+ struct perf_session *session)
{
- event_t *comm_event, *mmap_event;
+ union perf_event *comm_event, *mmap_event;
int err = -1, thread;
comm_event = malloc(sizeof(comm_event->comm) + session->id_hdr_size);
@@ -294,12 +297,12 @@ out:
return err;
}
-int event__synthesize_threads(event__handler_t process,
- struct perf_session *session)
+int perf_event__synthesize_threads(perf_event__handler_t process,
+ struct perf_session *session)
{
DIR *proc;
struct dirent dirent, *next;
- event_t *comm_event, *mmap_event;
+ union perf_event *comm_event, *mmap_event;
int err = -1;
comm_event = malloc(sizeof(comm_event->comm) + session->id_hdr_size);
@@ -357,10 +360,10 @@ static int find_symbol_cb(void *arg, const char *name, char type,
return 1;
}
-int event__synthesize_kernel_mmap(event__handler_t process,
- struct perf_session *session,
- struct machine *machine,
- const char *symbol_name)
+int perf_event__synthesize_kernel_mmap(perf_event__handler_t process,
+ struct perf_session *session,
+ struct machine *machine,
+ const char *symbol_name)
{
size_t size;
const char *filename, *mmap_name;
@@ -374,8 +377,8 @@ int event__synthesize_kernel_mmap(event__handler_t process,
* kernels.
*/
struct process_symbol_args args = { .name = symbol_name, };
- event_t *event = zalloc(sizeof(event->mmap) + session->id_hdr_size);
-
+ union perf_event *event = zalloc((sizeof(event->mmap) +
+ session->id_hdr_size));
if (event == NULL) {
pr_debug("Not enough memory synthesizing mmap event "
"for kernel modules\n");
@@ -448,14 +451,15 @@ static int thread__set_comm_adjust(struct thread *self, const char *comm,
return 0;
}
-int event__process_comm(event_t *self, struct sample_data *sample __used,
- struct perf_session *session)
+int perf_event__process_comm(union perf_event *event,
+ struct perf_sample *sample __used,
+ struct perf_session *session)
{
- struct thread *thread = perf_session__findnew(session, self->comm.tid);
+ struct thread *thread = perf_session__findnew(session, event->comm.tid);
- dump_printf(": %s:%d\n", self->comm.comm, self->comm.tid);
+ dump_printf(": %s:%d\n", event->comm.comm, event->comm.tid);
- if (thread == NULL || thread__set_comm_adjust(thread, self->comm.comm,
+ if (thread == NULL || thread__set_comm_adjust(thread, event->comm.comm,
&session->hists)) {
dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n");
return -1;
@@ -464,19 +468,21 @@ int event__process_comm(event_t *self, struct sample_data *sample __used,
return 0;
}
-int event__process_lost(event_t *self, struct sample_data *sample __used,
- struct perf_session *session)
+int perf_event__process_lost(union perf_event *event,
+ struct perf_sample *sample __used,
+ struct perf_session *session)
{
dump_printf(": id:%" PRIu64 ": lost:%" PRIu64 "\n",
- self->lost.id, self->lost.lost);
- session->hists.stats.total_lost += self->lost.lost;
+ event->lost.id, event->lost.lost);
+ session->hists.stats.total_lost += event->lost.lost;
return 0;
}
-static void event_set_kernel_mmap_len(struct map **maps, event_t *self)
+static void perf_event__set_kernel_mmap_len(union perf_event *event,
+ struct map **maps)
{
- maps[MAP__FUNCTION]->start = self->mmap.start;
- maps[MAP__FUNCTION]->end = self->mmap.start + self->mmap.len;
+ maps[MAP__FUNCTION]->start = event->mmap.start;
+ maps[MAP__FUNCTION]->end = event->mmap.start + event->mmap.len;
/*
* Be a bit paranoid here, some perf.data file came with
* a zero sized synthesized MMAP event for the kernel.
@@ -485,8 +491,8 @@ static void event_set_kernel_mmap_len(struct map **maps, event_t *self)
maps[MAP__FUNCTION]->end = ~0ULL;
}
-static int event__process_kernel_mmap(event_t *self,
- struct perf_session *session)
+static int perf_event__process_kernel_mmap(union perf_event *event,
+ struct perf_session *session)
{
struct map *map;
char kmmap_prefix[PATH_MAX];
@@ -494,9 +500,9 @@ static int event__process_kernel_mmap(event_t *self,
enum dso_kernel_type kernel_type;
bool is_kernel_mmap;
- machine = perf_session__findnew_machine(session, self->mmap.pid);
+ machine = perf_session__findnew_machine(session, event->mmap.pid);
if (!machine) {
- pr_err("Can't find id %d's machine\n", self->mmap.pid);
+ pr_err("Can't find id %d's machine\n", event->mmap.pid);
goto out_problem;
}
@@ -506,17 +512,17 @@ static int event__process_kernel_mmap(event_t *self,
else
kernel_type = DSO_TYPE_GUEST_KERNEL;
- is_kernel_mmap = memcmp(self->mmap.filename,
+ is_kernel_mmap = memcmp(event->mmap.filename,
kmmap_prefix,
strlen(kmmap_prefix)) == 0;
- if (self->mmap.filename[0] == '/' ||
- (!is_kernel_mmap && self->mmap.filename[0] == '[')) {
+ if (event->mmap.filename[0] == '/' ||
+ (!is_kernel_mmap && event->mmap.filename[0] == '[')) {
char short_module_name[1024];
char *name, *dot;
- if (self->mmap.filename[0] == '/') {
- name = strrchr(self->mmap.filename, '/');
+ if (event->mmap.filename[0] == '/') {
+ name = strrchr(event->mmap.filename, '/');
if (name == NULL)
goto out_problem;
@@ -528,10 +534,10 @@ static int event__process_kernel_mmap(event_t *self,
"[%.*s]", (int)(dot - name), name);
strxfrchar(short_module_name, '-', '_');
} else
- strcpy(short_module_name, self->mmap.filename);
+ strcpy(short_module_name, event->mmap.filename);
- map = machine__new_module(machine, self->mmap.start,
- self->mmap.filename);
+ map = machine__new_module(machine, event->mmap.start,
+ event->mmap.filename);
if (map == NULL)
goto out_problem;
@@ -541,9 +547,9 @@ static int event__process_kernel_mmap(event_t *self,
map->dso->short_name = name;
map->dso->sname_alloc = 1;
- map->end = map->start + self->mmap.len;
+ map->end = map->start + event->mmap.len;
} else if (is_kernel_mmap) {
- const char *symbol_name = (self->mmap.filename +
+ const char *symbol_name = (event->mmap.filename +
strlen(kmmap_prefix));
/*
* Should be there already, from the build-id table in
@@ -558,10 +564,10 @@ static int event__process_kernel_mmap(event_t *self,
if (__machine__create_kernel_maps(machine, kernel) < 0)
goto out_problem;
- event_set_kernel_mmap_len(machine->vmlinux_maps, self);
+ perf_event__set_kernel_mmap_len(event, machine->vmlinux_maps);
perf_session__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps,
symbol_name,
- self->mmap.pgoff);
+ event->mmap.pgoff);
if (machine__is_default_guest(machine)) {
/*
* preload dso of guest kernel and modules
@@ -575,22 +581,23 @@ out_problem:
return -1;
}
-int event__process_mmap(event_t *self, struct sample_data *sample __used,
- struct perf_session *session)
+int perf_event__process_mmap(union perf_event *event,
+ struct perf_sample *sample __used,
+ struct perf_session *session)
{
struct machine *machine;
struct thread *thread;
struct map *map;
- u8 cpumode = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+ u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
int ret = 0;
dump_printf(" %d/%d: [%#" PRIx64 "(%#" PRIx64 ") @ %#" PRIx64 "]: %s\n",
- self->mmap.pid, self->mmap.tid, self->mmap.start,
- self->mmap.len, self->mmap.pgoff, self->mmap.filename);
+ event->mmap.pid, event->mmap.tid, event->mmap.start,
+ event->mmap.len, event->mmap.pgoff, event->mmap.filename);
if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL ||
cpumode == PERF_RECORD_MISC_KERNEL) {
- ret = event__process_kernel_mmap(self, session);
+ ret = perf_event__process_kernel_mmap(event, session);
if (ret < 0)
goto out_problem;
return 0;
@@ -599,12 +606,12 @@ int event__process_mmap(event_t *self, struct sample_data *sample __used,
machine = perf_session__find_host_machine(session);
if (machine == NULL)
goto out_problem;
- thread = perf_session__findnew(session, self->mmap.pid);
+ thread = perf_session__findnew(session, event->mmap.pid);
if (thread == NULL)
goto out_problem;
- map = map__new(&machine->user_dsos, self->mmap.start,
- self->mmap.len, self->mmap.pgoff,
- self->mmap.pid, self->mmap.filename,
+ map = map__new(&machine->user_dsos, event->mmap.start,
+ event->mmap.len, event->mmap.pgoff,
+ event->mmap.pid, event->mmap.filename,
MAP__FUNCTION);
if (map == NULL)
goto out_problem;
@@ -617,16 +624,17 @@ out_problem:
return 0;
}
-int event__process_task(event_t *self, struct sample_data *sample __used,
- struct perf_session *session)
+int perf_event__process_task(union perf_event *event,
+ struct perf_sample *sample __used,
+ struct perf_session *session)
{
- struct thread *thread = perf_session__findnew(session, self->fork.tid);
- struct thread *parent = perf_session__findnew(session, self->fork.ptid);
+ struct thread *thread = perf_session__findnew(session, event->fork.tid);
+ struct thread *parent = perf_session__findnew(session, event->fork.ptid);
- dump_printf("(%d:%d):(%d:%d)\n", self->fork.pid, self->fork.tid,
- self->fork.ppid, self->fork.ptid);
+ dump_printf("(%d:%d):(%d:%d)\n", event->fork.pid, event->fork.tid,
+ event->fork.ppid, event->fork.ptid);
- if (self->header.type == PERF_RECORD_EXIT) {
+ if (event->header.type == PERF_RECORD_EXIT) {
perf_session__remove_thread(session, thread);
return 0;
}
@@ -640,20 +648,22 @@ int event__process_task(event_t *self, struct sample_data *sample __used,
return 0;
}
-int event__process(event_t *event, struct sample_data *sample,
- struct perf_session *session)
+int perf_event__process(union perf_event *event, struct perf_sample *sample,
+ struct perf_session *session)
{
switch (event->header.type) {
case PERF_RECORD_COMM:
- event__process_comm(event, sample, session);
+ perf_event__process_comm(event, sample, session);
break;
case PERF_RECORD_MMAP:
- event__process_mmap(event, sample, session);
+ perf_event__process_mmap(event, sample, session);
break;
case PERF_RECORD_FORK:
case PERF_RECORD_EXIT:
- event__process_task(event, sample, session);
+ perf_event__process_task(event, sample, session);
break;
+ case PERF_RECORD_LOST:
+ perf_event__process_lost(event, sample, session);
default:
break;
}
@@ -762,12 +772,14 @@ static void dso__calc_col_width(struct dso *self, struct hists *hists)
self->slen_calculated = 1;
}
-int event__preprocess_sample(const event_t *self, struct perf_session *session,
- struct addr_location *al, struct sample_data *data,
- symbol_filter_t filter)
+int perf_event__preprocess_sample(const union perf_event *event,
+ struct perf_session *session,
+ struct addr_location *al,
+ struct perf_sample *sample,
+ symbol_filter_t filter)
{
- u8 cpumode = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
- struct thread *thread = perf_session__findnew(session, self->ip.pid);
+ u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+ struct thread *thread = perf_session__findnew(session, event->ip.pid);
if (thread == NULL)
return -1;
@@ -789,12 +801,12 @@ int event__preprocess_sample(const event_t *self, struct perf_session *session,
machine__create_kernel_maps(&session->host_machine);
thread__find_addr_map(thread, session, cpumode, MAP__FUNCTION,
- self->ip.pid, self->ip.ip, al);
+ event->ip.pid, event->ip.ip, al);
dump_printf(" ...... dso: %s\n",
al->map ? al->map->dso->long_name :
al->level == 'H' ? "[hypervisor]" : "<not found>");
al->sym = NULL;
- al->cpu = data->cpu;
+ al->cpu = sample->cpu;
if (al->map) {
if (symbol_conf.dso_list &&
@@ -834,128 +846,3 @@ out_filtered:
al->filtered = true;
return 0;
}
-
-static int event__parse_id_sample(const event_t *event,
- struct perf_session *session,
- struct sample_data *sample)
-{
- const u64 *array;
- u64 type;
-
- sample->cpu = sample->pid = sample->tid = -1;
- sample->stream_id = sample->id = sample->time = -1ULL;
-
- if (!session->sample_id_all)
- return 0;
-
- array = event->sample.array;
- array += ((event->header.size -
- sizeof(event->header)) / sizeof(u64)) - 1;
- type = session->sample_type;
-
- if (type & PERF_SAMPLE_CPU) {
- u32 *p = (u32 *)array;
- sample->cpu = *p;
- array--;
- }
-
- if (type & PERF_SAMPLE_STREAM_ID) {
- sample->stream_id = *array;
- array--;
- }
-
- if (type & PERF_SAMPLE_ID) {
- sample->id = *array;
- array--;
- }
-
- if (type & PERF_SAMPLE_TIME) {
- sample->time = *array;
- array--;
- }
-
- if (type & PERF_SAMPLE_TID) {
- u32 *p = (u32 *)array;
- sample->pid = p[0];
- sample->tid = p[1];
- }
-
- return 0;
-}
-
-int event__parse_sample(const event_t *event, struct perf_session *session,
- struct sample_data *data)
-{
- const u64 *array;
- u64 type;
-
- if (event->header.type != PERF_RECORD_SAMPLE)
- return event__parse_id_sample(event, session, data);
-
- array = event->sample.array;
- type = session->sample_type;
-
- if (type & PERF_SAMPLE_IP) {
- data->ip = event->ip.ip;
- array++;
- }
-
- if (type & PERF_SAMPLE_TID) {
- u32 *p = (u32 *)array;
- data->pid = p[0];
- data->tid = p[1];
- array++;
- }
-
- if (type & PERF_SAMPLE_TIME) {
- data->time = *array;
- array++;
- }
-
- if (type & PERF_SAMPLE_ADDR) {
- data->addr = *array;
- array++;
- }
-
- data->id = -1ULL;
- if (type & PERF_SAMPLE_ID) {
- data->id = *array;
- array++;
- }
-
- if (type & PERF_SAMPLE_STREAM_ID) {
- data->stream_id = *array;
- array++;
- }
-
- if (type & PERF_SAMPLE_CPU) {
- u32 *p = (u32 *)array;
- data->cpu = *p;
- array++;
- } else
- data->cpu = -1;
-
- if (type & PERF_SAMPLE_PERIOD) {
- data->period = *array;
- array++;
- }
-
- if (type & PERF_SAMPLE_READ) {
- pr_debug("PERF_SAMPLE_READ is unsuported for now\n");
- return -1;
- }
-
- if (type & PERF_SAMPLE_CALLCHAIN) {
- data->callchain = (struct ip_callchain *)array;
- array += 1 + data->callchain->nr;
- }
-
- if (type & PERF_SAMPLE_RAW) {
- u32 *p = (u32 *)array;
- data->raw_size = *p;
- p++;
- data->raw_data = p;
- }
-
- return 0;
-}
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index cc7b52f9b492..9c35170fb379 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -61,7 +61,7 @@ struct sample_event {
u64 array[];
};
-struct sample_data {
+struct perf_sample {
u64 ip;
u32 pid, tid;
u64 time;
@@ -117,7 +117,7 @@ struct tracing_data_event {
u32 size;
};
-typedef union event_union {
+union perf_event {
struct perf_event_header header;
struct ip_event ip;
struct mmap_event mmap;
@@ -130,50 +130,54 @@ typedef union event_union {
struct event_type_event event_type;
struct tracing_data_event tracing_data;
struct build_id_event build_id;
-} event_t;
+};
-void event__print_totals(void);
+void perf_event__print_totals(void);
struct perf_session;
struct thread_map;
-typedef int (*event__handler_synth_t)(event_t *event,
+typedef int (*perf_event__handler_synth_t)(union perf_event *event,
+ struct perf_session *session);
+typedef int (*perf_event__handler_t)(union perf_event *event,
+ struct perf_sample *sample,
struct perf_session *session);
-typedef int (*event__handler_t)(event_t *event, struct sample_data *sample,
- struct perf_session *session);
-
-int event__synthesize_thread_map(struct thread_map *threads,
- event__handler_t process,
- struct perf_session *session);
-int event__synthesize_threads(event__handler_t process,
- struct perf_session *session);
-int event__synthesize_kernel_mmap(event__handler_t process,
- struct perf_session *session,
- struct machine *machine,
- const char *symbol_name);
-
-int event__synthesize_modules(event__handler_t process,
- struct perf_session *session,
- struct machine *machine);
-
-int event__process_comm(event_t *self, struct sample_data *sample,
- struct perf_session *session);
-int event__process_lost(event_t *self, struct sample_data *sample,
- struct perf_session *session);
-int event__process_mmap(event_t *self, struct sample_data *sample,
- struct perf_session *session);
-int event__process_task(event_t *self, struct sample_data *sample,
+
+int perf_event__synthesize_thread_map(struct thread_map *threads,
+ perf_event__handler_t process,
+ struct perf_session *session);
+int perf_event__synthesize_threads(perf_event__handler_t process,
+ struct perf_session *session);
+int perf_event__synthesize_kernel_mmap(perf_event__handler_t process,
+ struct perf_session *session,
+ struct machine *machine,
+ const char *symbol_name);
+
+int perf_event__synthesize_modules(perf_event__handler_t process,
+ struct perf_session *session,
+ struct machine *machine);
+
+int perf_event__process_comm(union perf_event *event, struct perf_sample *sample,
+ struct perf_session *session);
+int perf_event__process_lost(union perf_event *event, struct perf_sample *sample,
+ struct perf_session *session);
+int perf_event__process_mmap(union perf_event *event, struct perf_sample *sample,
+ struct perf_session *session);
+int perf_event__process_task(union perf_event *event, struct perf_sample *sample,
+ struct perf_session *session);
+int perf_event__process(union perf_event *event, struct perf_sample *sample,
struct perf_session *session);
-int event__process(event_t *event, struct sample_data *sample,
- struct perf_session *session);
struct addr_location;
-int event__preprocess_sample(const event_t *self, struct perf_session *session,
- struct addr_location *al, struct sample_data *data,
- symbol_filter_t filter);
-int event__parse_sample(const event_t *event, struct perf_session *session,
- struct sample_data *sample);
+int perf_event__preprocess_sample(const union perf_event *self,
+ struct perf_session *session,
+ struct addr_location *al,
+ struct perf_sample *sample,
+ symbol_filter_t filter);
+
+const char *perf_event__name(unsigned int id);
-const char *event__get_event_name(unsigned int id);
+int perf_event__parse_sample(const union perf_event *event, u64 type,
+ bool sample_id_all, struct perf_sample *sample);
#endif /* __PERF_RECORD_H */
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
new file mode 100644
index 000000000000..95b21fece2ce
--- /dev/null
+++ b/tools/perf/util/evlist.c
@@ -0,0 +1,350 @@
+/*
+ * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Parts came from builtin-{top,stat,record}.c, see those files for further
+ * copyright notes.
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+#include <poll.h>
+#include "cpumap.h"
+#include "thread_map.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "util.h"
+
+#include <sys/mman.h>
+
+#include <linux/bitops.h>
+#include <linux/hash.h>
+
+#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
+#define SID(e, x, y) xyarray__entry(e->id, x, y)
+
+void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus,
+ struct thread_map *threads)
+{
+ int i;
+
+ for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i)
+ INIT_HLIST_HEAD(&evlist->heads[i]);
+ INIT_LIST_HEAD(&evlist->entries);
+ perf_evlist__set_maps(evlist, cpus, threads);
+}
+
+struct perf_evlist *perf_evlist__new(struct cpu_map *cpus,
+ struct thread_map *threads)
+{
+ struct perf_evlist *evlist = zalloc(sizeof(*evlist));
+
+ if (evlist != NULL)
+ perf_evlist__init(evlist, cpus, threads);
+
+ return evlist;
+}
+
+static void perf_evlist__purge(struct perf_evlist *evlist)
+{
+ struct perf_evsel *pos, *n;
+
+ list_for_each_entry_safe(pos, n, &evlist->entries, node) {
+ list_del_init(&pos->node);
+ perf_evsel__delete(pos);
+ }
+
+ evlist->nr_entries = 0;
+}
+
+void perf_evlist__exit(struct perf_evlist *evlist)
+{
+ free(evlist->mmap);
+ free(evlist->pollfd);
+ evlist->mmap = NULL;
+ evlist->pollfd = NULL;
+}
+
+void perf_evlist__delete(struct perf_evlist *evlist)
+{
+ perf_evlist__purge(evlist);
+ perf_evlist__exit(evlist);
+ free(evlist);
+}
+
+void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry)
+{
+ list_add_tail(&entry->node, &evlist->entries);
+ ++evlist->nr_entries;
+}
+
+int perf_evlist__add_default(struct perf_evlist *evlist)
+{
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_CPU_CYCLES,
+ };
+ struct perf_evsel *evsel = perf_evsel__new(&attr, 0);
+
+ if (evsel == NULL)
+ return -ENOMEM;
+
+ perf_evlist__add(evlist, evsel);
+ return 0;
+}
+
+int perf_evlist__alloc_pollfd(struct perf_evlist *evlist)
+{
+ int nfds = evlist->cpus->nr * evlist->threads->nr * evlist->nr_entries;
+ evlist->pollfd = malloc(sizeof(struct pollfd) * nfds);
+ return evlist->pollfd != NULL ? 0 : -ENOMEM;
+}
+
+void perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd)
+{
+ fcntl(fd, F_SETFL, O_NONBLOCK);
+ evlist->pollfd[evlist->nr_fds].fd = fd;
+ evlist->pollfd[evlist->nr_fds].events = POLLIN;
+ evlist->nr_fds++;
+}
+
+static int perf_evlist__id_hash(struct perf_evlist *evlist, struct perf_evsel *evsel,
+ int cpu, int thread, int fd)
+{
+ struct perf_sample_id *sid;
+ u64 read_data[4] = { 0, };
+ int hash, id_idx = 1; /* The first entry is the counter value */
+
+ if (!(evsel->attr.read_format & PERF_FORMAT_ID) ||
+ read(fd, &read_data, sizeof(read_data)) == -1)
+ return -1;
+
+ if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+ ++id_idx;
+ if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+ ++id_idx;
+
+ sid = SID(evsel, cpu, thread);
+ sid->id = read_data[id_idx];
+ sid->evsel = evsel;
+ hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS);
+ hlist_add_head(&sid->node, &evlist->heads[hash]);
+ return 0;
+}
+
+struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id)
+{
+ struct hlist_head *head;
+ struct hlist_node *pos;
+ struct perf_sample_id *sid;
+ int hash;
+
+ if (evlist->nr_entries == 1)
+ return list_entry(evlist->entries.next, struct perf_evsel, node);
+
+ hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
+ head = &evlist->heads[hash];
+
+ hlist_for_each_entry(sid, pos, head, node)
+ if (sid->id == id)
+ return sid->evsel;
+ return NULL;
+}
+
+union perf_event *perf_evlist__read_on_cpu(struct perf_evlist *evlist, int cpu)
+{
+ /* XXX Move this to perf.c, making it generally available */
+ unsigned int page_size = sysconf(_SC_PAGE_SIZE);
+ struct perf_mmap *md = &evlist->mmap[cpu];
+ unsigned int head = perf_mmap__read_head(md);
+ unsigned int old = md->prev;
+ unsigned char *data = md->base + page_size;
+ union perf_event *event = NULL;
+
+ if (evlist->overwrite) {
+ /*
+ * If we're further behind than half the buffer, there's a chance
+ * the writer will bite our tail and mess up the samples under us.
+ *
+ * If we somehow ended up ahead of the head, we got messed up.
+ *
+ * In either case, truncate and restart at head.
+ */
+ int diff = head - old;
+ if (diff > md->mask / 2 || diff < 0) {
+ fprintf(stderr, "WARNING: failed to keep up with mmap data.\n");
+
+ /*
+ * head points to a known good entry, start there.
+ */
+ old = head;
+ }
+ }
+
+ if (old != head) {
+ size_t size;
+
+ event = (union perf_event *)&data[old & md->mask];
+ size = event->header.size;
+
+ /*
+ * Event straddles the mmap boundary -- header should always
+ * be inside due to u64 alignment of output.
+ */
+ if ((old & md->mask) + size != ((old + size) & md->mask)) {
+ unsigned int offset = old;
+ unsigned int len = min(sizeof(*event), size), cpy;
+ void *dst = &evlist->event_copy;
+
+ do {
+ cpy = min(md->mask + 1 - (offset & md->mask), len);
+ memcpy(dst, &data[offset & md->mask], cpy);
+ offset += cpy;
+ dst += cpy;
+ len -= cpy;
+ } while (len);
+
+ event = &evlist->event_copy;
+ }
+
+ old += size;
+ }
+
+ md->prev = old;
+
+ if (!evlist->overwrite)
+ perf_mmap__write_tail(md, old);
+
+ return event;
+}
+
+void perf_evlist__munmap(struct perf_evlist *evlist)
+{
+ int cpu;
+
+ for (cpu = 0; cpu < evlist->cpus->nr; cpu++) {
+ if (evlist->mmap[cpu].base != NULL) {
+ munmap(evlist->mmap[cpu].base, evlist->mmap_len);
+ evlist->mmap[cpu].base = NULL;
+ }
+ }
+}
+
+int perf_evlist__alloc_mmap(struct perf_evlist *evlist)
+{
+ evlist->mmap = zalloc(evlist->cpus->nr * sizeof(struct perf_mmap));
+ return evlist->mmap != NULL ? 0 : -ENOMEM;
+}
+
+static int __perf_evlist__mmap(struct perf_evlist *evlist, int cpu, int prot,
+ int mask, int fd)
+{
+ evlist->mmap[cpu].prev = 0;
+ evlist->mmap[cpu].mask = mask;
+ evlist->mmap[cpu].base = mmap(NULL, evlist->mmap_len, prot,
+ MAP_SHARED, fd, 0);
+ if (evlist->mmap[cpu].base == MAP_FAILED)
+ return -1;
+
+ perf_evlist__add_pollfd(evlist, fd);
+ return 0;
+}
+
+/** perf_evlist__mmap - Create per cpu maps to receive events
+ *
+ * @evlist - list of events
+ * @pages - map length in pages
+ * @overwrite - overwrite older events?
+ *
+ * If overwrite is false the user needs to signal event consuption using:
+ *
+ * struct perf_mmap *m = &evlist->mmap[cpu];
+ * unsigned int head = perf_mmap__read_head(m);
+ *
+ * perf_mmap__write_tail(m, head)
+ *
+ * Using perf_evlist__read_on_cpu does this automatically.
+ */
+int perf_evlist__mmap(struct perf_evlist *evlist, int pages, bool overwrite)
+{
+ unsigned int page_size = sysconf(_SC_PAGE_SIZE);
+ int mask = pages * page_size - 1, cpu;
+ struct perf_evsel *first_evsel, *evsel;
+ const struct cpu_map *cpus = evlist->cpus;
+ const struct thread_map *threads = evlist->threads;
+ int thread, prot = PROT_READ | (overwrite ? 0 : PROT_WRITE);
+
+ if (evlist->mmap == NULL && perf_evlist__alloc_mmap(evlist) < 0)
+ return -ENOMEM;
+
+ if (evlist->pollfd == NULL && perf_evlist__alloc_pollfd(evlist) < 0)
+ return -ENOMEM;
+
+ evlist->overwrite = overwrite;
+ evlist->mmap_len = (pages + 1) * page_size;
+ first_evsel = list_entry(evlist->entries.next, struct perf_evsel, node);
+
+ list_for_each_entry(evsel, &evlist->entries, node) {
+ if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
+ evsel->id == NULL &&
+ perf_evsel__alloc_id(evsel, cpus->nr, threads->nr) < 0)
+ return -ENOMEM;
+
+ for (cpu = 0; cpu < cpus->nr; cpu++) {
+ for (thread = 0; thread < threads->nr; thread++) {
+ int fd = FD(evsel, cpu, thread);
+
+ if (evsel->idx || thread) {
+ if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT,
+ FD(first_evsel, cpu, 0)) != 0)
+ goto out_unmap;
+ } else if (__perf_evlist__mmap(evlist, cpu, prot, mask, fd) < 0)
+ goto out_unmap;
+
+ if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
+ perf_evlist__id_hash(evlist, evsel, cpu, thread, fd) < 0)
+ goto out_unmap;
+ }
+ }
+ }
+
+ return 0;
+
+out_unmap:
+ for (cpu = 0; cpu < cpus->nr; cpu++) {
+ if (evlist->mmap[cpu].base != NULL) {
+ munmap(evlist->mmap[cpu].base, evlist->mmap_len);
+ evlist->mmap[cpu].base = NULL;
+ }
+ }
+ return -1;
+}
+
+int perf_evlist__create_maps(struct perf_evlist *evlist, pid_t target_pid,
+ pid_t target_tid, const char *cpu_list)
+{
+ evlist->threads = thread_map__new(target_pid, target_tid);
+
+ if (evlist->threads == NULL)
+ return -1;
+
+ if (target_tid != -1)
+ evlist->cpus = cpu_map__dummy_new();
+ else
+ evlist->cpus = cpu_map__new(cpu_list);
+
+ if (evlist->cpus == NULL)
+ goto out_delete_threads;
+
+ return 0;
+
+out_delete_threads:
+ thread_map__delete(evlist->threads);
+ return -1;
+}
+
+void perf_evlist__delete_maps(struct perf_evlist *evlist)
+{
+ cpu_map__delete(evlist->cpus);
+ thread_map__delete(evlist->threads);
+ evlist->cpus = NULL;
+ evlist->threads = NULL;
+}
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
new file mode 100644
index 000000000000..c9884056097c
--- /dev/null
+++ b/tools/perf/util/evlist.h
@@ -0,0 +1,64 @@
+#ifndef __PERF_EVLIST_H
+#define __PERF_EVLIST_H 1
+
+#include <linux/list.h>
+#include "../perf.h"
+#include "event.h"
+
+struct pollfd;
+struct thread_map;
+struct cpu_map;
+
+#define PERF_EVLIST__HLIST_BITS 8
+#define PERF_EVLIST__HLIST_SIZE (1 << PERF_EVLIST__HLIST_BITS)
+
+struct perf_evlist {
+ struct list_head entries;
+ struct hlist_head heads[PERF_EVLIST__HLIST_SIZE];
+ int nr_entries;
+ int nr_fds;
+ int mmap_len;
+ bool overwrite;
+ union perf_event event_copy;
+ struct perf_mmap *mmap;
+ struct pollfd *pollfd;
+ struct thread_map *threads;
+ struct cpu_map *cpus;
+};
+
+struct perf_evsel;
+
+struct perf_evlist *perf_evlist__new(struct cpu_map *cpus,
+ struct thread_map *threads);
+void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus,
+ struct thread_map *threads);
+void perf_evlist__exit(struct perf_evlist *evlist);
+void perf_evlist__delete(struct perf_evlist *evlist);
+
+void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry);
+int perf_evlist__add_default(struct perf_evlist *evlist);
+
+int perf_evlist__alloc_pollfd(struct perf_evlist *evlist);
+void perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd);
+
+struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id);
+
+union perf_event *perf_evlist__read_on_cpu(struct perf_evlist *self, int cpu);
+
+int perf_evlist__alloc_mmap(struct perf_evlist *evlist);
+int perf_evlist__mmap(struct perf_evlist *evlist, int pages, bool overwrite);
+void perf_evlist__munmap(struct perf_evlist *evlist);
+
+static inline void perf_evlist__set_maps(struct perf_evlist *evlist,
+ struct cpu_map *cpus,
+ struct thread_map *threads)
+{
+ evlist->cpus = cpus;
+ evlist->threads = threads;
+}
+
+int perf_evlist__create_maps(struct perf_evlist *evlist, pid_t target_pid,
+ pid_t target_tid, const char *cpu_list);
+void perf_evlist__delete_maps(struct perf_evlist *evlist);
+
+#endif /* __PERF_EVLIST_H */
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index d8575d31ee6c..211063eed474 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1,20 +1,34 @@
+/*
+ * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Parts came from builtin-{top,stat,record}.c, see those files for further
+ * copyright notes.
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+
#include "evsel.h"
-#include "../perf.h"
+#include "evlist.h"
#include "util.h"
#include "cpumap.h"
-#include "thread.h"
+#include "thread_map.h"
#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
+void perf_evsel__init(struct perf_evsel *evsel,
+ struct perf_event_attr *attr, int idx)
+{
+ evsel->idx = idx;
+ evsel->attr = *attr;
+ INIT_LIST_HEAD(&evsel->node);
+}
+
struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx)
{
struct perf_evsel *evsel = zalloc(sizeof(*evsel));
- if (evsel != NULL) {
- evsel->idx = idx;
- evsel->attr = *attr;
- INIT_LIST_HEAD(&evsel->node);
- }
+ if (evsel != NULL)
+ perf_evsel__init(evsel, attr, idx);
return evsel;
}
@@ -25,6 +39,12 @@ int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
return evsel->fd != NULL ? 0 : -ENOMEM;
}
+int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads)
+{
+ evsel->id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id));
+ return evsel->id != NULL ? 0 : -ENOMEM;
+}
+
int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus)
{
evsel->counts = zalloc((sizeof(*evsel->counts) +
@@ -38,6 +58,12 @@ void perf_evsel__free_fd(struct perf_evsel *evsel)
evsel->fd = NULL;
}
+void perf_evsel__free_id(struct perf_evsel *evsel)
+{
+ xyarray__delete(evsel->id);
+ evsel->id = NULL;
+}
+
void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
{
int cpu, thread;
@@ -49,10 +75,16 @@ void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
}
}
-void perf_evsel__delete(struct perf_evsel *evsel)
+void perf_evsel__exit(struct perf_evsel *evsel)
{
assert(list_empty(&evsel->node));
xyarray__delete(evsel->fd);
+ xyarray__delete(evsel->id);
+}
+
+void perf_evsel__delete(struct perf_evsel *evsel)
+{
+ perf_evsel__exit(evsel);
free(evsel);
}
@@ -128,7 +160,7 @@ int __perf_evsel__read(struct perf_evsel *evsel,
}
static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
- struct thread_map *threads)
+ struct thread_map *threads, bool group, bool inherit)
{
int cpu, thread;
@@ -137,12 +169,20 @@ static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
return -1;
for (cpu = 0; cpu < cpus->nr; cpu++) {
+ int group_fd = -1;
+
+ evsel->attr.inherit = (cpus->map[cpu] < 0) && inherit;
+
for (thread = 0; thread < threads->nr; thread++) {
FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr,
threads->map[thread],
- cpus->map[cpu], -1, 0);
+ cpus->map[cpu],
+ group_fd, 0);
if (FD(evsel, cpu, thread) < 0)
goto out_close;
+
+ if (group && group_fd == -1)
+ group_fd = FD(evsel, cpu, thread);
}
}
@@ -175,10 +215,9 @@ static struct {
.threads = { -1, },
};
-int perf_evsel__open(struct perf_evsel *evsel,
- struct cpu_map *cpus, struct thread_map *threads)
+int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
+ struct thread_map *threads, bool group, bool inherit)
{
-
if (cpus == NULL) {
/* Work around old compiler warnings about strict aliasing */
cpus = &empty_cpu_map.map;
@@ -187,15 +226,135 @@ int perf_evsel__open(struct perf_evsel *evsel,
if (threads == NULL)
threads = &empty_thread_map.map;
- return __perf_evsel__open(evsel, cpus, threads);
+ return __perf_evsel__open(evsel, cpus, threads, group, inherit);
+}
+
+int perf_evsel__open_per_cpu(struct perf_evsel *evsel,
+ struct cpu_map *cpus, bool group, bool inherit)
+{
+ return __perf_evsel__open(evsel, cpus, &empty_thread_map.map, group, inherit);
}
-int perf_evsel__open_per_cpu(struct perf_evsel *evsel, struct cpu_map *cpus)
+int perf_evsel__open_per_thread(struct perf_evsel *evsel,
+ struct thread_map *threads, bool group, bool inherit)
{
- return __perf_evsel__open(evsel, cpus, &empty_thread_map.map);
+ return __perf_evsel__open(evsel, &empty_cpu_map.map, threads, group, inherit);
}
-int perf_evsel__open_per_thread(struct perf_evsel *evsel, struct thread_map *threads)
+static int perf_event__parse_id_sample(const union perf_event *event, u64 type,
+ struct perf_sample *sample)
{
- return __perf_evsel__open(evsel, &empty_cpu_map.map, threads);
+ const u64 *array = event->sample.array;
+
+ array += ((event->header.size -
+ sizeof(event->header)) / sizeof(u64)) - 1;
+
+ if (type & PERF_SAMPLE_CPU) {
+ u32 *p = (u32 *)array;
+ sample->cpu = *p;
+ array--;
+ }
+
+ if (type & PERF_SAMPLE_STREAM_ID) {
+ sample->stream_id = *array;
+ array--;
+ }
+
+ if (type & PERF_SAMPLE_ID) {
+ sample->id = *array;
+ array--;
+ }
+
+ if (type & PERF_SAMPLE_TIME) {
+ sample->time = *array;
+ array--;
+ }
+
+ if (type & PERF_SAMPLE_TID) {
+ u32 *p = (u32 *)array;
+ sample->pid = p[0];
+ sample->tid = p[1];
+ }
+
+ return 0;
+}
+
+int perf_event__parse_sample(const union perf_event *event, u64 type,
+ bool sample_id_all, struct perf_sample *data)
+{
+ const u64 *array;
+
+ data->cpu = data->pid = data->tid = -1;
+ data->stream_id = data->id = data->time = -1ULL;
+
+ if (event->header.type != PERF_RECORD_SAMPLE) {
+ if (!sample_id_all)
+ return 0;
+ return perf_event__parse_id_sample(event, type, data);
+ }
+
+ array = event->sample.array;
+
+ if (type & PERF_SAMPLE_IP) {
+ data->ip = event->ip.ip;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_TID) {
+ u32 *p = (u32 *)array;
+ data->pid = p[0];
+ data->tid = p[1];
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_TIME) {
+ data->time = *array;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_ADDR) {
+ data->addr = *array;
+ array++;
+ }
+
+ data->id = -1ULL;
+ if (type & PERF_SAMPLE_ID) {
+ data->id = *array;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_STREAM_ID) {
+ data->stream_id = *array;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_CPU) {
+ u32 *p = (u32 *)array;
+ data->cpu = *p;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_PERIOD) {
+ data->period = *array;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_READ) {
+ fprintf(stderr, "PERF_SAMPLE_READ is unsuported for now\n");
+ return -1;
+ }
+
+ if (type & PERF_SAMPLE_CALLCHAIN) {
+ data->callchain = (struct ip_callchain *)array;
+ array += 1 + data->callchain->nr;
+ }
+
+ if (type & PERF_SAMPLE_RAW) {
+ u32 *p = (u32 *)array;
+ data->raw_size = *p;
+ p++;
+ data->raw_data = p;
+ }
+
+ return 0;
}
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index b2d755fe88a5..eecdc3aabc14 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -24,11 +24,24 @@ struct perf_counts {
struct perf_counts_values cpu[];
};
+struct perf_evsel;
+
+/*
+ * Per fd, to map back from PERF_SAMPLE_ID to evsel, only used when there are
+ * more than one entry in the evlist.
+ */
+struct perf_sample_id {
+ struct hlist_node node;
+ u64 id;
+ struct perf_evsel *evsel;
+};
+
struct perf_evsel {
struct list_head node;
struct perf_event_attr attr;
char *filter;
struct xyarray *fd;
+ struct xyarray *id;
struct perf_counts *counts;
int idx;
void *priv;
@@ -36,19 +49,27 @@ struct perf_evsel {
struct cpu_map;
struct thread_map;
+struct perf_evlist;
struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx);
+void perf_evsel__init(struct perf_evsel *evsel,
+ struct perf_event_attr *attr, int idx);
+void perf_evsel__exit(struct perf_evsel *evsel);
void perf_evsel__delete(struct perf_evsel *evsel);
int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
+int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads);
int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus);
void perf_evsel__free_fd(struct perf_evsel *evsel);
+void perf_evsel__free_id(struct perf_evsel *evsel);
void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
-int perf_evsel__open_per_cpu(struct perf_evsel *evsel, struct cpu_map *cpus);
-int perf_evsel__open_per_thread(struct perf_evsel *evsel, struct thread_map *threads);
-int perf_evsel__open(struct perf_evsel *evsel,
- struct cpu_map *cpus, struct thread_map *threads);
+int perf_evsel__open_per_cpu(struct perf_evsel *evsel,
+ struct cpu_map *cpus, bool group, bool inherit);
+int perf_evsel__open_per_thread(struct perf_evsel *evsel,
+ struct thread_map *threads, bool group, bool inherit);
+int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
+ struct thread_map *threads, bool group, bool inherit);
#define perf_evsel__match(evsel, t, c) \
(evsel->attr.type == PERF_TYPE_##t && \
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index f6a929e74981..72c124dc5781 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -8,6 +8,7 @@
#include <linux/list.h>
#include <linux/kernel.h>
+#include "evlist.h"
#include "util.h"
#include "header.h"
#include "../perf.h"
@@ -428,7 +429,8 @@ static bool perf_session__read_build_ids(struct perf_session *self, bool with_hi
return ret;
}
-static int perf_header__adds_write(struct perf_header *self, int fd)
+static int perf_header__adds_write(struct perf_header *self,
+ struct perf_evlist *evlist, int fd)
{
int nr_sections;
struct perf_session *session;
@@ -463,7 +465,7 @@ static int perf_header__adds_write(struct perf_header *self, int fd)
/* Write trace info */
trace_sec->offset = lseek(fd, 0, SEEK_CUR);
- read_tracing_data(fd, &evsel_list);
+ read_tracing_data(fd, &evlist->entries);
trace_sec->size = lseek(fd, 0, SEEK_CUR) - trace_sec->offset;
}
@@ -513,7 +515,8 @@ int perf_header__write_pipe(int fd)
return 0;
}
-int perf_header__write(struct perf_header *self, int fd, bool at_exit)
+int perf_header__write(struct perf_header *self, struct perf_evlist *evlist,
+ int fd, bool at_exit)
{
struct perf_file_header f_header;
struct perf_file_attr f_attr;
@@ -566,7 +569,7 @@ int perf_header__write(struct perf_header *self, int fd, bool at_exit)
self->data_offset = lseek(fd, 0, SEEK_CUR);
if (at_exit) {
- err = perf_header__adds_write(self, fd);
+ err = perf_header__adds_write(self, evlist, fd);
if (err < 0)
return err;
}
@@ -997,11 +1000,11 @@ perf_header__find_attr(u64 id, struct perf_header *header)
return NULL;
}
-int event__synthesize_attr(struct perf_event_attr *attr, u16 ids, u64 *id,
- event__handler_t process,
- struct perf_session *session)
+int perf_event__synthesize_attr(struct perf_event_attr *attr, u16 ids, u64 *id,
+ perf_event__handler_t process,
+ struct perf_session *session)
{
- event_t *ev;
+ union perf_event *ev;
size_t size;
int err;
@@ -1028,8 +1031,9 @@ int event__synthesize_attr(struct perf_event_attr *attr, u16 ids, u64 *id,
return err;
}
-int event__synthesize_attrs(struct perf_header *self, event__handler_t process,
- struct perf_session *session)
+int perf_event__synthesize_attrs(struct perf_header *self,
+ perf_event__handler_t process,
+ struct perf_session *session)
{
struct perf_header_attr *attr;
int i, err = 0;
@@ -1037,8 +1041,8 @@ int event__synthesize_attrs(struct perf_header *self, event__handler_t process,
for (i = 0; i < self->attrs; i++) {
attr = self->attr[i];
- err = event__synthesize_attr(&attr->attr, attr->ids, attr->id,
- process, session);
+ err = perf_event__synthesize_attr(&attr->attr, attr->ids,
+ attr->id, process, session);
if (err) {
pr_debug("failed to create perf header attribute\n");
return err;
@@ -1048,21 +1052,22 @@ int event__synthesize_attrs(struct perf_header *self, event__handler_t process,
return err;
}
-int event__process_attr(event_t *self, struct perf_session *session)
+int perf_event__process_attr(union perf_event *event,
+ struct perf_session *session)
{
struct perf_header_attr *attr;
unsigned int i, ids, n_ids;
- attr = perf_header_attr__new(&self->attr.attr);
+ attr = perf_header_attr__new(&event->attr.attr);
if (attr == NULL)
return -ENOMEM;
- ids = self->header.size;
- ids -= (void *)&self->attr.id - (void *)self;
+ ids = event->header.size;
+ ids -= (void *)&event->attr.id - (void *)event;
n_ids = ids / sizeof(u64);
for (i = 0; i < n_ids; i++) {
- if (perf_header_attr__add_id(attr, self->attr.id[i]) < 0) {
+ if (perf_header_attr__add_id(attr, event->attr.id[i]) < 0) {
perf_header_attr__delete(attr);
return -ENOMEM;
}
@@ -1078,11 +1083,11 @@ int event__process_attr(event_t *self, struct perf_session *session)
return 0;
}
-int event__synthesize_event_type(u64 event_id, char *name,
- event__handler_t process,
- struct perf_session *session)
+int perf_event__synthesize_event_type(u64 event_id, char *name,
+ perf_event__handler_t process,
+ struct perf_session *session)
{
- event_t ev;
+ union perf_event ev;
size_t size = 0;
int err = 0;
@@ -1103,8 +1108,8 @@ int event__synthesize_event_type(u64 event_id, char *name,
return err;
}
-int event__synthesize_event_types(event__handler_t process,
- struct perf_session *session)
+int perf_event__synthesize_event_types(perf_event__handler_t process,
+ struct perf_session *session)
{
struct perf_trace_event_type *type;
int i, err = 0;
@@ -1112,8 +1117,9 @@ int event__synthesize_event_types(event__handler_t process,
for (i = 0; i < event_count; i++) {
type = &events[i];
- err = event__synthesize_event_type(type->event_id, type->name,
- process, session);
+ err = perf_event__synthesize_event_type(type->event_id,
+ type->name, process,
+ session);
if (err) {
pr_debug("failed to create perf header event type\n");
return err;
@@ -1123,28 +1129,28 @@ int event__synthesize_event_types(event__handler_t process,
return err;
}
-int event__process_event_type(event_t *self,
- struct perf_session *session __unused)
+int perf_event__process_event_type(union perf_event *event,
+ struct perf_session *session __unused)
{
- if (perf_header__push_event(self->event_type.event_type.event_id,
- self->event_type.event_type.name) < 0)
+ if (perf_header__push_event(event->event_type.event_type.event_id,
+ event->event_type.event_type.name) < 0)
return -ENOMEM;
return 0;
}
-int event__synthesize_tracing_data(int fd, struct list_head *pattrs,
- event__handler_t process,
+int perf_event__synthesize_tracing_data(int fd, struct perf_evlist *evlist,
+ perf_event__handler_t process,
struct perf_session *session __unused)
{
- event_t ev;
+ union perf_event ev;
ssize_t size = 0, aligned_size = 0, padding;
- int err = 0;
+ int err __used = 0;
memset(&ev, 0, sizeof(ev));
ev.tracing_data.header.type = PERF_RECORD_HEADER_TRACING_DATA;
- size = read_tracing_data_size(fd, pattrs);
+ size = read_tracing_data_size(fd, &evlist->entries);
if (size <= 0)
return size;
aligned_size = ALIGN(size, sizeof(u64));
@@ -1154,16 +1160,16 @@ int event__synthesize_tracing_data(int fd, struct list_head *pattrs,
process(&ev, NULL, session);
- err = read_tracing_data(fd, pattrs);
+ err = read_tracing_data(fd, &evlist->entries);
write_padded(fd, NULL, 0, padding);
return aligned_size;
}
-int event__process_tracing_data(event_t *self,
- struct perf_session *session)
+int perf_event__process_tracing_data(union perf_event *event,
+ struct perf_session *session)
{
- ssize_t size_read, padding, size = self->tracing_data.size;
+ ssize_t size_read, padding, size = event->tracing_data.size;
off_t offset = lseek(session->fd, 0, SEEK_CUR);
char buf[BUFSIZ];
@@ -1189,12 +1195,12 @@ int event__process_tracing_data(event_t *self,
return size_read + padding;
}
-int event__synthesize_build_id(struct dso *pos, u16 misc,
- event__handler_t process,
- struct machine *machine,
- struct perf_session *session)
+int perf_event__synthesize_build_id(struct dso *pos, u16 misc,
+ perf_event__handler_t process,
+ struct machine *machine,
+ struct perf_session *session)
{
- event_t ev;
+ union perf_event ev;
size_t len;
int err = 0;
@@ -1217,11 +1223,11 @@ int event__synthesize_build_id(struct dso *pos, u16 misc,
return err;
}
-int event__process_build_id(event_t *self,
- struct perf_session *session)
+int perf_event__process_build_id(union perf_event *event,
+ struct perf_session *session)
{
- __event_process_build_id(&self->build_id,
- self->build_id.filename,
+ __event_process_build_id(&event->build_id,
+ event->build_id.filename,
session);
return 0;
}
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 33f16be7b72f..f042cebcec1e 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -65,8 +65,11 @@ struct perf_header {
int perf_header__init(struct perf_header *self);
void perf_header__exit(struct perf_header *self);
+struct perf_evlist;
+
int perf_header__read(struct perf_session *session, int fd);
-int perf_header__write(struct perf_header *self, int fd, bool at_exit);
+int perf_header__write(struct perf_header *self, struct perf_evlist *evlist,
+ int fd, bool at_exit);
int perf_header__write_pipe(int fd);
int perf_header__add_attr(struct perf_header *self,
@@ -97,32 +100,32 @@ int build_id_cache__add_s(const char *sbuild_id, const char *debugdir,
const char *name, bool is_kallsyms);
int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir);
-int event__synthesize_attr(struct perf_event_attr *attr, u16 ids, u64 *id,
- event__handler_t process,
- struct perf_session *session);
-int event__synthesize_attrs(struct perf_header *self,
- event__handler_t process,
- struct perf_session *session);
-int event__process_attr(event_t *self, struct perf_session *session);
-
-int event__synthesize_event_type(u64 event_id, char *name,
- event__handler_t process,
+int perf_event__synthesize_attr(struct perf_event_attr *attr, u16 ids, u64 *id,
+ perf_event__handler_t process,
+ struct perf_session *session);
+int perf_event__synthesize_attrs(struct perf_header *self,
+ perf_event__handler_t process,
struct perf_session *session);
-int event__synthesize_event_types(event__handler_t process,
- struct perf_session *session);
-int event__process_event_type(event_t *self,
- struct perf_session *session);
-
-int event__synthesize_tracing_data(int fd, struct list_head *pattrs,
- event__handler_t process,
+int perf_event__process_attr(union perf_event *event, struct perf_session *session);
+
+int perf_event__synthesize_event_type(u64 event_id, char *name,
+ perf_event__handler_t process,
+ struct perf_session *session);
+int perf_event__synthesize_event_types(perf_event__handler_t process,
+ struct perf_session *session);
+int perf_event__process_event_type(union perf_event *event,
struct perf_session *session);
-int event__process_tracing_data(event_t *self,
- struct perf_session *session);
-
-int event__synthesize_build_id(struct dso *pos, u16 misc,
- event__handler_t process,
- struct machine *machine,
- struct perf_session *session);
-int event__process_build_id(event_t *self, struct perf_session *session);
+int perf_event__synthesize_tracing_data(int fd, struct perf_evlist *evlist,
+ perf_event__handler_t process,
+ struct perf_session *session);
+int perf_event__process_tracing_data(union perf_event *event,
+ struct perf_session *session);
+
+int perf_event__synthesize_build_id(struct dso *pos, u16 misc,
+ perf_event__handler_t process,
+ struct machine *machine,
+ struct perf_session *session);
+int perf_event__process_build_id(union perf_event *event,
+ struct perf_session *session);
#endif /* __PERF_HEADER_H */
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 32f4f1f2f6e4..3f437236f193 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -1,3 +1,4 @@
+#include "annotate.h"
#include "util.h"
#include "build-id.h"
#include "hist.h"
@@ -211,7 +212,9 @@ void hist_entry__free(struct hist_entry *he)
* collapse the histogram
*/
-static bool collapse__insert_entry(struct rb_root *root, struct hist_entry *he)
+static bool hists__collapse_insert_entry(struct hists *self,
+ struct rb_root *root,
+ struct hist_entry *he)
{
struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL;
@@ -226,8 +229,11 @@ static bool collapse__insert_entry(struct rb_root *root, struct hist_entry *he)
if (!cmp) {
iter->period += he->period;
- if (symbol_conf.use_callchain)
- callchain_merge(iter->callchain, he->callchain);
+ if (symbol_conf.use_callchain) {
+ callchain_cursor_reset(&self->callchain_cursor);
+ callchain_merge(&self->callchain_cursor, iter->callchain,
+ he->callchain);
+ }
hist_entry__free(he);
return false;
}
@@ -262,7 +268,7 @@ void hists__collapse_resort(struct hists *self)
next = rb_next(&n->rb_node);
rb_erase(&n->rb_node, &self->entries);
- if (collapse__insert_entry(&tmp, n))
+ if (hists__collapse_insert_entry(self, &tmp, n))
hists__inc_nr_entries(self, n);
}
@@ -425,7 +431,7 @@ static size_t __callchain__fprintf_graph(FILE *fp, struct callchain_node *self,
u64 cumul;
child = rb_entry(node, struct callchain_node, rb_node);
- cumul = cumul_hits(child);
+ cumul = callchain_cumul_hits(child);
remaining -= cumul;
/*
@@ -944,225 +950,14 @@ void hists__filter_by_thread(struct hists *self, const struct thread *thread)
}
}
-static int symbol__alloc_hist(struct symbol *self)
-{
- struct sym_priv *priv = symbol__priv(self);
- const int size = (sizeof(*priv->hist) +
- (self->end - self->start) * sizeof(u64));
-
- priv->hist = zalloc(size);
- return priv->hist == NULL ? -1 : 0;
-}
-
-int hist_entry__inc_addr_samples(struct hist_entry *self, u64 ip)
-{
- unsigned int sym_size, offset;
- struct symbol *sym = self->ms.sym;
- struct sym_priv *priv;
- struct sym_hist *h;
-
- if (!sym || !self->ms.map)
- return 0;
-
- priv = symbol__priv(sym);
- if (priv->hist == NULL && symbol__alloc_hist(sym) < 0)
- return -ENOMEM;
-
- sym_size = sym->end - sym->start;
- offset = ip - sym->start;
-
- pr_debug3("%s: ip=%#" PRIx64 "\n", __func__, self->ms.map->unmap_ip(self->ms.map, ip));
-
- if (offset >= sym_size)
- return 0;
-
- h = priv->hist;
- h->sum++;
- h->ip[offset]++;
-
- pr_debug3("%#" PRIx64 " %s: period++ [ip: %#" PRIx64 ", %#" PRIx64
- "] => %" PRIu64 "\n", self->ms.sym->start, self->ms.sym->name,
- ip, ip - self->ms.sym->start, h->ip[offset]);
- return 0;
-}
-
-static struct objdump_line *objdump_line__new(s64 offset, char *line, size_t privsize)
-{
- struct objdump_line *self = malloc(sizeof(*self) + privsize);
-
- if (self != NULL) {
- self->offset = offset;
- self->line = line;
- }
-
- return self;
-}
-
-void objdump_line__free(struct objdump_line *self)
-{
- free(self->line);
- free(self);
-}
-
-static void objdump__add_line(struct list_head *head, struct objdump_line *line)
-{
- list_add_tail(&line->node, head);
-}
-
-struct objdump_line *objdump__get_next_ip_line(struct list_head *head,
- struct objdump_line *pos)
-{
- list_for_each_entry_continue(pos, head, node)
- if (pos->offset >= 0)
- return pos;
-
- return NULL;
-}
-
-static int hist_entry__parse_objdump_line(struct hist_entry *self, FILE *file,
- struct list_head *head, size_t privsize)
+int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 ip)
{
- struct symbol *sym = self->ms.sym;
- struct objdump_line *objdump_line;
- char *line = NULL, *tmp, *tmp2, *c;
- size_t line_len;
- s64 line_ip, offset = -1;
-
- if (getline(&line, &line_len, file) < 0)
- return -1;
-
- if (!line)
- return -1;
-
- while (line_len != 0 && isspace(line[line_len - 1]))
- line[--line_len] = '\0';
-
- c = strchr(line, '\n');
- if (c)
- *c = 0;
-
- line_ip = -1;
-
- /*
- * Strip leading spaces:
- */
- tmp = line;
- while (*tmp) {
- if (*tmp != ' ')
- break;
- tmp++;
- }
-
- if (*tmp) {
- /*
- * Parse hexa addresses followed by ':'
- */
- line_ip = strtoull(tmp, &tmp2, 16);
- if (*tmp2 != ':' || tmp == tmp2 || tmp2[1] == '\0')
- line_ip = -1;
- }
-
- if (line_ip != -1) {
- u64 start = map__rip_2objdump(self->ms.map, sym->start),
- end = map__rip_2objdump(self->ms.map, sym->end);
-
- offset = line_ip - start;
- if (offset < 0 || (u64)line_ip > end)
- offset = -1;
- }
-
- objdump_line = objdump_line__new(offset, line, privsize);
- if (objdump_line == NULL) {
- free(line);
- return -1;
- }
- objdump__add_line(head, objdump_line);
-
- return 0;
+ return symbol__inc_addr_samples(he->ms.sym, he->ms.map, evidx, ip);
}
-int hist_entry__annotate(struct hist_entry *self, struct list_head *head,
- size_t privsize)
+int hist_entry__annotate(struct hist_entry *he, size_t privsize)
{
- struct symbol *sym = self->ms.sym;
- struct map *map = self->ms.map;
- struct dso *dso = map->dso;
- char *filename = dso__build_id_filename(dso, NULL, 0);
- bool free_filename = true;
- char command[PATH_MAX * 2];
- FILE *file;
- int err = 0;
- u64 len;
- char symfs_filename[PATH_MAX];
-
- if (filename) {
- snprintf(symfs_filename, sizeof(symfs_filename), "%s%s",
- symbol_conf.symfs, filename);
- }
-
- if (filename == NULL) {
- if (dso->has_build_id) {
- pr_err("Can't annotate %s: not enough memory\n",
- sym->name);
- return -ENOMEM;
- }
- goto fallback;
- } else if (readlink(symfs_filename, command, sizeof(command)) < 0 ||
- strstr(command, "[kernel.kallsyms]") ||
- access(symfs_filename, R_OK)) {
- free(filename);
-fallback:
- /*
- * If we don't have build-ids or the build-id file isn't in the
- * cache, or is just a kallsyms file, well, lets hope that this
- * DSO is the same as when 'perf record' ran.
- */
- filename = dso->long_name;
- snprintf(symfs_filename, sizeof(symfs_filename), "%s%s",
- symbol_conf.symfs, filename);
- free_filename = false;
- }
-
- if (dso->origin == DSO__ORIG_KERNEL) {
- if (dso->annotate_warned)
- goto out_free_filename;
- err = -ENOENT;
- dso->annotate_warned = 1;
- pr_err("Can't annotate %s: No vmlinux file was found in the "
- "path\n", sym->name);
- goto out_free_filename;
- }
-
- pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__,
- filename, sym->name, map->unmap_ip(map, sym->start),
- map->unmap_ip(map, sym->end));
-
- len = sym->end - sym->start;
-
- pr_debug("annotating [%p] %30s : [%p] %30s\n",
- dso, dso->long_name, sym, sym->name);
-
- snprintf(command, sizeof(command),
- "objdump --start-address=0x%016" PRIx64 " --stop-address=0x%016" PRIx64 " -dS -C %s|grep -v %s|expand",
- map__rip_2objdump(map, sym->start),
- map__rip_2objdump(map, sym->end),
- symfs_filename, filename);
-
- pr_debug("Executing: %s\n", command);
-
- file = popen(command, "r");
- if (!file)
- goto out_free_filename;
-
- while (!feof(file))
- if (hist_entry__parse_objdump_line(self, file, head, privsize) < 0)
- break;
-
- pclose(file);
-out_free_filename:
- if (free_filename)
- free(filename);
- return err;
+ return symbol__annotate(he->ms.sym, he->ms.map, privsize);
}
void hists__inc_nr_events(struct hists *self, u32 type)
@@ -1177,7 +972,7 @@ size_t hists__fprintf_nr_events(struct hists *self, FILE *fp)
size_t ret = 0;
for (i = 0; i < PERF_RECORD_HEADER_MAX; ++i) {
- const char *name = event__get_event_name(i);
+ const char *name = perf_event__name(i);
if (!strcmp(name, "UNKNOWN"))
continue;
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index ee789856a8c9..37c79089de09 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -9,33 +9,6 @@ extern struct callchain_param callchain_param;
struct hist_entry;
struct addr_location;
struct symbol;
-struct rb_root;
-
-struct objdump_line {
- struct list_head node;
- s64 offset;
- char *line;
-};
-
-void objdump_line__free(struct objdump_line *self);
-struct objdump_line *objdump__get_next_ip_line(struct list_head *head,
- struct objdump_line *pos);
-
-struct sym_hist {
- u64 sum;
- u64 ip[0];
-};
-
-struct sym_ext {
- struct rb_node node;
- double percent;
- char *path;
-};
-
-struct sym_priv {
- struct sym_hist *hist;
- struct sym_ext *ext;
-};
/*
* The kernel collects the number of events it couldn't send in a stretch and
@@ -77,6 +50,8 @@ struct hists {
u64 event_stream;
u32 type;
u16 col_len[HISTC_NR_COLS];
+ /* Best would be to reuse the session callchain cursor */
+ struct callchain_cursor callchain_cursor;
};
struct hist_entry *__hists__add_entry(struct hists *self,
@@ -102,9 +77,8 @@ size_t hists__fprintf_nr_events(struct hists *self, FILE *fp);
size_t hists__fprintf(struct hists *self, struct hists *pair,
bool show_displacement, FILE *fp);
-int hist_entry__inc_addr_samples(struct hist_entry *self, u64 ip);
-int hist_entry__annotate(struct hist_entry *self, struct list_head *head,
- size_t privsize);
+int hist_entry__inc_addr_samples(struct hist_entry *self, int evidx, u64 addr);
+int hist_entry__annotate(struct hist_entry *self, size_t privsize);
void hists__filter_by_dso(struct hists *self, const struct dso *dso);
void hists__filter_by_thread(struct hists *self, const struct thread *thread);
@@ -116,18 +90,20 @@ bool hists__new_col_len(struct hists *self, enum hist_column col, u16 len);
#ifdef NO_NEWT_SUPPORT
static inline int hists__browse(struct hists *self __used,
const char *helpline __used,
- const char *ev_name __used)
+ const char *ev_name __used, int evidx __used)
{
return 0;
}
static inline int hists__tui_browse_tree(struct rb_root *self __used,
- const char *help __used)
+ const char *help __used,
+ int evidx __used)
{
return 0;
}
-static inline int hist_entry__tui_annotate(struct hist_entry *self __used)
+static inline int hist_entry__tui_annotate(struct hist_entry *self __used,
+ int evidx __used)
{
return 0;
}
@@ -136,13 +112,13 @@ static inline int hist_entry__tui_annotate(struct hist_entry *self __used)
#else
#include <newt.h>
int hists__browse(struct hists *self, const char *helpline,
- const char *ev_name);
-int hist_entry__tui_annotate(struct hist_entry *self);
+ const char *ev_name, int evidx);
+int hist_entry__tui_annotate(struct hist_entry *self, int evidx);
#define KEY_LEFT NEWT_KEY_LEFT
#define KEY_RIGHT NEWT_KEY_RIGHT
-int hists__tui_browse_tree(struct rb_root *self, const char *help);
+int hists__tui_browse_tree(struct rb_root *self, const char *help, int evidx);
#endif
unsigned int hists__sort_list_width(struct hists *self);
diff --git a/tools/perf/util/include/linux/list.h b/tools/perf/util/include/linux/list.h
index f5ca26e53fbb..356c7e467b83 100644
--- a/tools/perf/util/include/linux/list.h
+++ b/tools/perf/util/include/linux/list.h
@@ -1,3 +1,4 @@
+#include <linux/kernel.h>
#include "../../../../include/linux/list.h"
#ifndef PERF_LIST_H
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 135f69baf966..cf082daa43e3 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -1,6 +1,7 @@
#include "../../../include/linux/hw_breakpoint.h"
#include "util.h"
#include "../perf.h"
+#include "evlist.h"
#include "evsel.h"
#include "parse-options.h"
#include "parse-events.h"
@@ -11,10 +12,6 @@
#include "header.h"
#include "debugfs.h"
-int nr_counters;
-
-LIST_HEAD(evsel_list);
-
struct event_symbol {
u8 type;
u64 config;
@@ -449,8 +446,8 @@ parse_single_tracepoint_event(char *sys_name,
/* sys + ':' + event + ':' + flags*/
#define MAX_EVOPT_LEN (MAX_EVENT_LENGTH * 2 + 2 + 128)
static enum event_result
-parse_multiple_tracepoint_event(char *sys_name, const char *evt_exp,
- char *flags)
+parse_multiple_tracepoint_event(const struct option *opt, char *sys_name,
+ const char *evt_exp, char *flags)
{
char evt_path[MAXPATHLEN];
struct dirent *evt_ent;
@@ -483,15 +480,16 @@ parse_multiple_tracepoint_event(char *sys_name, const char *evt_exp,
if (len < 0)
return EVT_FAILED;
- if (parse_events(NULL, event_opt, 0))
+ if (parse_events(opt, event_opt, 0))
return EVT_FAILED;
}
return EVT_HANDLED_ALL;
}
-static enum event_result parse_tracepoint_event(const char **strp,
- struct perf_event_attr *attr)
+static enum event_result
+parse_tracepoint_event(const struct option *opt, const char **strp,
+ struct perf_event_attr *attr)
{
const char *evt_name;
char *flags = NULL, *comma_loc;
@@ -530,7 +528,7 @@ static enum event_result parse_tracepoint_event(const char **strp,
return EVT_FAILED;
if (strpbrk(evt_name, "*?")) {
*strp += strlen(sys_name) + evt_length + 1; /* 1 == the ':' */
- return parse_multiple_tracepoint_event(sys_name, evt_name,
+ return parse_multiple_tracepoint_event(opt, sys_name, evt_name,
flags);
} else {
return parse_single_tracepoint_event(sys_name, evt_name,
@@ -740,11 +738,12 @@ parse_event_modifier(const char **strp, struct perf_event_attr *attr)
* Symbolic names are (almost) exactly matched.
*/
static enum event_result
-parse_event_symbols(const char **str, struct perf_event_attr *attr)
+parse_event_symbols(const struct option *opt, const char **str,
+ struct perf_event_attr *attr)
{
enum event_result ret;
- ret = parse_tracepoint_event(str, attr);
+ ret = parse_tracepoint_event(opt, str, attr);
if (ret != EVT_FAILED)
goto modifier;
@@ -778,14 +777,15 @@ modifier:
return ret;
}
-int parse_events(const struct option *opt __used, const char *str, int unset __used)
+int parse_events(const struct option *opt, const char *str, int unset __used)
{
+ struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
struct perf_event_attr attr;
enum event_result ret;
for (;;) {
memset(&attr, 0, sizeof(attr));
- ret = parse_event_symbols(&str, &attr);
+ ret = parse_event_symbols(opt, &str, &attr);
if (ret == EVT_FAILED)
return -1;
@@ -794,12 +794,10 @@ int parse_events(const struct option *opt __used, const char *str, int unset __u
if (ret != EVT_HANDLED_ALL) {
struct perf_evsel *evsel;
- evsel = perf_evsel__new(&attr,
- nr_counters);
+ evsel = perf_evsel__new(&attr, evlist->nr_entries);
if (evsel == NULL)
return -1;
- list_add_tail(&evsel->node, &evsel_list);
- ++nr_counters;
+ perf_evlist__add(evlist, evsel);
}
if (*str == 0)
@@ -813,13 +811,14 @@ int parse_events(const struct option *opt __used, const char *str, int unset __u
return 0;
}
-int parse_filter(const struct option *opt __used, const char *str,
+int parse_filter(const struct option *opt, const char *str,
int unset __used)
{
+ struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
struct perf_evsel *last = NULL;
- if (!list_empty(&evsel_list))
- last = list_entry(evsel_list.prev, struct perf_evsel, node);
+ if (evlist->nr_entries > 0)
+ last = list_entry(evlist->entries.prev, struct perf_evsel, node);
if (last == NULL || last->attr.type != PERF_TYPE_TRACEPOINT) {
fprintf(stderr,
@@ -981,33 +980,3 @@ void print_events(void)
exit(129);
}
-
-int perf_evsel_list__create_default(void)
-{
- struct perf_evsel *evsel;
- struct perf_event_attr attr;
-
- memset(&attr, 0, sizeof(attr));
- attr.type = PERF_TYPE_HARDWARE;
- attr.config = PERF_COUNT_HW_CPU_CYCLES;
-
- evsel = perf_evsel__new(&attr, 0);
-
- if (evsel == NULL)
- return -ENOMEM;
-
- list_add(&evsel->node, &evsel_list);
- ++nr_counters;
- return 0;
-}
-
-void perf_evsel_list__delete(void)
-{
- struct perf_evsel *pos, *n;
-
- list_for_each_entry_safe(pos, n, &evsel_list, node) {
- list_del_init(&pos->node);
- perf_evsel__delete(pos);
- }
- nr_counters = 0;
-}
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 458e3ecf17af..cf7e94abb676 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -9,11 +9,6 @@
struct list_head;
struct perf_evsel;
-extern struct list_head evsel_list;
-
-int perf_evsel_list__create_default(void);
-void perf_evsel_list__delete(void);
-
struct option;
struct tracepoint_path {
@@ -25,8 +20,6 @@ struct tracepoint_path {
extern struct tracepoint_path *tracepoint_id_to_path(u64 config);
extern bool have_tracepoints(struct list_head *evlist);
-extern int nr_counters;
-
const char *event_name(struct perf_evsel *event);
extern const char *__event_name(int type, u64 config);
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 6e29d9c9dccc..9d237e3cff5d 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -31,6 +31,7 @@
#include <string.h>
#include <stdarg.h>
#include <limits.h>
+#include <elf.h>
#undef _GNU_SOURCE
#include "util.h"
@@ -111,7 +112,25 @@ static struct symbol *__find_kernel_function_by_name(const char *name,
NULL);
}
-const char *kernel_get_module_path(const char *module)
+static struct map *kernel_get_module_map(const char *module)
+{
+ struct rb_node *nd;
+ struct map_groups *grp = &machine.kmaps;
+
+ if (!module)
+ module = "kernel";
+
+ for (nd = rb_first(&grp->maps[MAP__FUNCTION]); nd; nd = rb_next(nd)) {
+ struct map *pos = rb_entry(nd, struct map, rb_node);
+ if (strncmp(pos->dso->short_name + 1, module,
+ pos->dso->short_name_len - 2) == 0) {
+ return pos;
+ }
+ }
+ return NULL;
+}
+
+static struct dso *kernel_get_module_dso(const char *module)
{
struct dso *dso;
struct map *map;
@@ -141,7 +160,13 @@ const char *kernel_get_module_path(const char *module)
}
}
found:
- return dso->long_name;
+ return dso;
+}
+
+const char *kernel_get_module_path(const char *module)
+{
+ struct dso *dso = kernel_get_module_dso(module);
+ return (dso) ? dso->long_name : NULL;
}
#ifdef DWARF_SUPPORT
@@ -426,12 +451,14 @@ end:
}
static int show_available_vars_at(int fd, struct perf_probe_event *pev,
- int max_vls, bool externs)
+ int max_vls, struct strfilter *_filter,
+ bool externs)
{
char *buf;
- int ret, i;
+ int ret, i, nvars;
struct str_node *node;
struct variable_list *vls = NULL, *vl;
+ const char *var;
buf = synthesize_perf_probe_point(&pev->point);
if (!buf)
@@ -439,36 +466,45 @@ static int show_available_vars_at(int fd, struct perf_probe_event *pev,
pr_debug("Searching variables at %s\n", buf);
ret = find_available_vars_at(fd, pev, &vls, max_vls, externs);
- if (ret > 0) {
- /* Some variables were found */
- fprintf(stdout, "Available variables at %s\n", buf);
- for (i = 0; i < ret; i++) {
- vl = &vls[i];
- /*
- * A probe point might be converted to
- * several trace points.
- */
- fprintf(stdout, "\t@<%s+%lu>\n", vl->point.symbol,
- vl->point.offset);
- free(vl->point.symbol);
- if (vl->vars) {
- strlist__for_each(node, vl->vars)
+ if (ret <= 0) {
+ pr_err("Failed to find variables at %s (%d)\n", buf, ret);
+ goto end;
+ }
+ /* Some variables are found */
+ fprintf(stdout, "Available variables at %s\n", buf);
+ for (i = 0; i < ret; i++) {
+ vl = &vls[i];
+ /*
+ * A probe point might be converted to
+ * several trace points.
+ */
+ fprintf(stdout, "\t@<%s+%lu>\n", vl->point.symbol,
+ vl->point.offset);
+ free(vl->point.symbol);
+ nvars = 0;
+ if (vl->vars) {
+ strlist__for_each(node, vl->vars) {
+ var = strchr(node->s, '\t') + 1;
+ if (strfilter__compare(_filter, var)) {
fprintf(stdout, "\t\t%s\n", node->s);
- strlist__delete(vl->vars);
- } else
- fprintf(stdout, "(No variables)\n");
+ nvars++;
+ }
+ }
+ strlist__delete(vl->vars);
}
- free(vls);
- } else
- pr_err("Failed to find variables at %s (%d)\n", buf, ret);
-
+ if (nvars == 0)
+ fprintf(stdout, "\t\t(No matched variables)\n");
+ }
+ free(vls);
+end:
free(buf);
return ret;
}
/* Show available variables on given probe point */
int show_available_vars(struct perf_probe_event *pevs, int npevs,
- int max_vls, const char *module, bool externs)
+ int max_vls, const char *module,
+ struct strfilter *_filter, bool externs)
{
int i, fd, ret = 0;
@@ -485,7 +521,8 @@ int show_available_vars(struct perf_probe_event *pevs, int npevs,
setup_pager();
for (i = 0; i < npevs && ret >= 0; i++)
- ret = show_available_vars_at(fd, &pevs[i], max_vls, externs);
+ ret = show_available_vars_at(fd, &pevs[i], max_vls, _filter,
+ externs);
close(fd);
return ret;
@@ -531,7 +568,9 @@ int show_line_range(struct line_range *lr __unused, const char *module __unused)
int show_available_vars(struct perf_probe_event *pevs __unused,
int npevs __unused, int max_vls __unused,
- const char *module __unused, bool externs __unused)
+ const char *module __unused,
+ struct strfilter *filter __unused,
+ bool externs __unused)
{
pr_warning("Debuginfo-analysis is not supported.\n");
return -ENOSYS;
@@ -1912,4 +1951,46 @@ int del_perf_probe_events(struct strlist *dellist)
return ret;
}
+/* TODO: don't use a global variable for filter ... */
+static struct strfilter *available_func_filter;
+/*
+ * If a symbol corresponds to a function with global binding and
+ * matches filter return 0. For all others return 1.
+ */
+static int filter_available_functions(struct map *map __unused,
+ struct symbol *sym)
+{
+ if (sym->binding == STB_GLOBAL &&
+ strfilter__compare(available_func_filter, sym->name))
+ return 0;
+ return 1;
+}
+
+int show_available_funcs(const char *module, struct strfilter *_filter)
+{
+ struct map *map;
+ int ret;
+
+ setup_pager();
+
+ ret = init_vmlinux();
+ if (ret < 0)
+ return ret;
+
+ map = kernel_get_module_map(module);
+ if (!map) {
+ pr_err("Failed to find %s map.\n", (module) ? : "kernel");
+ return -EINVAL;
+ }
+ available_func_filter = _filter;
+ if (map__load(map, filter_available_functions)) {
+ pr_err("Failed to load map.\n");
+ return -EINVAL;
+ }
+ if (!dso__sorted_by_name(map->dso, map->type))
+ dso__sort_by_name(map->dso, map->type);
+
+ dso__fprintf_symbols_by_name(map->dso, map->type, stdout);
+ return 0;
+}
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index 5accbedfea37..3434fc9d79d5 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -3,6 +3,7 @@
#include <stdbool.h>
#include "strlist.h"
+#include "strfilter.h"
extern bool probe_event_dry_run;
@@ -126,7 +127,8 @@ extern int show_perf_probe_events(void);
extern int show_line_range(struct line_range *lr, const char *module);
extern int show_available_vars(struct perf_probe_event *pevs, int npevs,
int max_probe_points, const char *module,
- bool externs);
+ struct strfilter *filter, bool externs);
+extern int show_available_funcs(const char *module, struct strfilter *filter);
/* Maximum index number of event-name postfix */
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index ab83b6ac5d65..fe461f6559f1 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -33,6 +33,7 @@
#include <ctype.h>
#include <dwarf-regs.h>
+#include <linux/bitops.h>
#include "event.h"
#include "debug.h"
#include "util.h"
@@ -280,6 +281,19 @@ static bool die_compare_name(Dwarf_Die *dw_die, const char *tname)
return name ? (strcmp(tname, name) == 0) : false;
}
+/* Get callsite line number of inline-function instance */
+static int die_get_call_lineno(Dwarf_Die *in_die)
+{
+ Dwarf_Attribute attr;
+ Dwarf_Word ret;
+
+ if (!dwarf_attr(in_die, DW_AT_call_line, &attr))
+ return -ENOENT;
+
+ dwarf_formudata(&attr, &ret);
+ return (int)ret;
+}
+
/* Get type die */
static Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem)
{
@@ -320,13 +334,23 @@ static Dwarf_Die *die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem)
return vr_die;
}
-static bool die_is_signed_type(Dwarf_Die *tp_die)
+static int die_get_attr_udata(Dwarf_Die *tp_die, unsigned int attr_name,
+ Dwarf_Word *result)
{
Dwarf_Attribute attr;
+
+ if (dwarf_attr(tp_die, attr_name, &attr) == NULL ||
+ dwarf_formudata(&attr, result) != 0)
+ return -ENOENT;
+
+ return 0;
+}
+
+static bool die_is_signed_type(Dwarf_Die *tp_die)
+{
Dwarf_Word ret;
- if (dwarf_attr(tp_die, DW_AT_encoding, &attr) == NULL ||
- dwarf_formudata(&attr, &ret) != 0)
+ if (die_get_attr_udata(tp_die, DW_AT_encoding, &ret))
return false;
return (ret == DW_ATE_signed_char || ret == DW_ATE_signed ||
@@ -335,11 +359,29 @@ static bool die_is_signed_type(Dwarf_Die *tp_die)
static int die_get_byte_size(Dwarf_Die *tp_die)
{
- Dwarf_Attribute attr;
Dwarf_Word ret;
- if (dwarf_attr(tp_die, DW_AT_byte_size, &attr) == NULL ||
- dwarf_formudata(&attr, &ret) != 0)
+ if (die_get_attr_udata(tp_die, DW_AT_byte_size, &ret))
+ return 0;
+
+ return (int)ret;
+}
+
+static int die_get_bit_size(Dwarf_Die *tp_die)
+{
+ Dwarf_Word ret;
+
+ if (die_get_attr_udata(tp_die, DW_AT_bit_size, &ret))
+ return 0;
+
+ return (int)ret;
+}
+
+static int die_get_bit_offset(Dwarf_Die *tp_die)
+{
+ Dwarf_Word ret;
+
+ if (die_get_attr_udata(tp_die, DW_AT_bit_offset, &ret))
return 0;
return (int)ret;
@@ -458,6 +500,151 @@ static Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
return die_find_child(sp_die, __die_find_inline_cb, &addr, die_mem);
}
+/* Walker on lines (Note: line number will not be sorted) */
+typedef int (* line_walk_handler_t) (const char *fname, int lineno,
+ Dwarf_Addr addr, void *data);
+
+struct __line_walk_param {
+ const char *fname;
+ line_walk_handler_t handler;
+ void *data;
+ int retval;
+};
+
+static int __die_walk_funclines_cb(Dwarf_Die *in_die, void *data)
+{
+ struct __line_walk_param *lw = data;
+ Dwarf_Addr addr;
+ int lineno;
+
+ if (dwarf_tag(in_die) == DW_TAG_inlined_subroutine) {
+ lineno = die_get_call_lineno(in_die);
+ if (lineno > 0 && dwarf_entrypc(in_die, &addr) == 0) {
+ lw->retval = lw->handler(lw->fname, lineno, addr,
+ lw->data);
+ if (lw->retval != 0)
+ return DIE_FIND_CB_FOUND;
+ }
+ }
+ return DIE_FIND_CB_SIBLING;
+}
+
+/* Walk on lines of blocks included in given DIE */
+static int __die_walk_funclines(Dwarf_Die *sp_die,
+ line_walk_handler_t handler, void *data)
+{
+ struct __line_walk_param lw = {
+ .handler = handler,
+ .data = data,
+ .retval = 0,
+ };
+ Dwarf_Die die_mem;
+ Dwarf_Addr addr;
+ int lineno;
+
+ /* Handle function declaration line */
+ lw.fname = dwarf_decl_file(sp_die);
+ if (lw.fname && dwarf_decl_line(sp_die, &lineno) == 0 &&
+ dwarf_entrypc(sp_die, &addr) == 0) {
+ lw.retval = handler(lw.fname, lineno, addr, data);
+ if (lw.retval != 0)
+ goto done;
+ }
+ die_find_child(sp_die, __die_walk_funclines_cb, &lw, &die_mem);
+done:
+ return lw.retval;
+}
+
+static int __die_walk_culines_cb(Dwarf_Die *sp_die, void *data)
+{
+ struct __line_walk_param *lw = data;
+
+ lw->retval = __die_walk_funclines(sp_die, lw->handler, lw->data);
+ if (lw->retval != 0)
+ return DWARF_CB_ABORT;
+
+ return DWARF_CB_OK;
+}
+
+/*
+ * Walk on lines inside given PDIE. If the PDIE is subprogram, walk only on
+ * the lines inside the subprogram, otherwise PDIE must be a CU DIE.
+ */
+static int die_walk_lines(Dwarf_Die *pdie, line_walk_handler_t handler,
+ void *data)
+{
+ Dwarf_Lines *lines;
+ Dwarf_Line *line;
+ Dwarf_Addr addr;
+ const char *fname;
+ int lineno, ret = 0;
+ Dwarf_Die die_mem, *cu_die;
+ size_t nlines, i;
+
+ /* Get the CU die */
+ if (dwarf_tag(pdie) == DW_TAG_subprogram)
+ cu_die = dwarf_diecu(pdie, &die_mem, NULL, NULL);
+ else
+ cu_die = pdie;
+ if (!cu_die) {
+ pr_debug2("Failed to get CU from subprogram\n");
+ return -EINVAL;
+ }
+
+ /* Get lines list in the CU */
+ if (dwarf_getsrclines(cu_die, &lines, &nlines) != 0) {
+ pr_debug2("Failed to get source lines on this CU.\n");
+ return -ENOENT;
+ }
+ pr_debug2("Get %zd lines from this CU\n", nlines);
+
+ /* Walk on the lines on lines list */
+ for (i = 0; i < nlines; i++) {
+ line = dwarf_onesrcline(lines, i);
+ if (line == NULL ||
+ dwarf_lineno(line, &lineno) != 0 ||
+ dwarf_lineaddr(line, &addr) != 0) {
+ pr_debug2("Failed to get line info. "
+ "Possible error in debuginfo.\n");
+ continue;
+ }
+ /* Filter lines based on address */
+ if (pdie != cu_die)
+ /*
+ * Address filtering
+ * The line is included in given function, and
+ * no inline block includes it.
+ */
+ if (!dwarf_haspc(pdie, addr) ||
+ die_find_inlinefunc(pdie, addr, &die_mem))
+ continue;
+ /* Get source line */
+ fname = dwarf_linesrc(line, NULL, NULL);
+
+ ret = handler(fname, lineno, addr, data);
+ if (ret != 0)
+ return ret;
+ }
+
+ /*
+ * Dwarf lines doesn't include function declarations and inlined
+ * subroutines. We have to check functions list or given function.
+ */
+ if (pdie != cu_die)
+ ret = __die_walk_funclines(pdie, handler, data);
+ else {
+ struct __line_walk_param param = {
+ .handler = handler,
+ .data = data,
+ .retval = 0,
+ };
+ dwarf_getfuncs(cu_die, __die_walk_culines_cb, &param, 0);
+ ret = param.retval;
+ }
+
+ return ret;
+}
+
struct __find_variable_param {
const char *name;
Dwarf_Addr addr;
@@ -669,6 +856,8 @@ static_var:
return 0;
}
+#define BYTES_TO_BITS(nb) ((nb) * BITS_PER_LONG / sizeof(long))
+
static int convert_variable_type(Dwarf_Die *vr_die,
struct probe_trace_arg *tvar,
const char *cast)
@@ -685,6 +874,14 @@ static int convert_variable_type(Dwarf_Die *vr_die,
return (tvar->type == NULL) ? -ENOMEM : 0;
}
+ if (die_get_bit_size(vr_die) != 0) {
+ /* This is a bitfield */
+ ret = snprintf(buf, 16, "b%d@%d/%zd", die_get_bit_size(vr_die),
+ die_get_bit_offset(vr_die),
+ BYTES_TO_BITS(die_get_byte_size(vr_die)));
+ goto formatted;
+ }
+
if (die_get_real_type(vr_die, &type) == NULL) {
pr_warning("Failed to get a type information of %s.\n",
dwarf_diename(vr_die));
@@ -729,29 +926,31 @@ static int convert_variable_type(Dwarf_Die *vr_die,
return (tvar->type == NULL) ? -ENOMEM : 0;
}
- ret = die_get_byte_size(&type) * 8;
- if (ret) {
- /* Check the bitwidth */
- if (ret > MAX_BASIC_TYPE_BITS) {
- pr_info("%s exceeds max-bitwidth."
- " Cut down to %d bits.\n",
- dwarf_diename(&type), MAX_BASIC_TYPE_BITS);
- ret = MAX_BASIC_TYPE_BITS;
- }
+ ret = BYTES_TO_BITS(die_get_byte_size(&type));
+ if (!ret)
+ /* No size ... try to use default type */
+ return 0;
- ret = snprintf(buf, 16, "%c%d",
- die_is_signed_type(&type) ? 's' : 'u', ret);
- if (ret < 0 || ret >= 16) {
- if (ret >= 16)
- ret = -E2BIG;
- pr_warning("Failed to convert variable type: %s\n",
- strerror(-ret));
- return ret;
- }
- tvar->type = strdup(buf);
- if (tvar->type == NULL)
- return -ENOMEM;
+ /* Check the bitwidth */
+ if (ret > MAX_BASIC_TYPE_BITS) {
+ pr_info("%s exceeds max-bitwidth. Cut down to %d bits.\n",
+ dwarf_diename(&type), MAX_BASIC_TYPE_BITS);
+ ret = MAX_BASIC_TYPE_BITS;
+ }
+ ret = snprintf(buf, 16, "%c%d",
+ die_is_signed_type(&type) ? 's' : 'u', ret);
+
+formatted:
+ if (ret < 0 || ret >= 16) {
+ if (ret >= 16)
+ ret = -E2BIG;
+ pr_warning("Failed to convert variable type: %s\n",
+ strerror(-ret));
+ return ret;
}
+ tvar->type = strdup(buf);
+ if (tvar->type == NULL)
+ return -ENOMEM;
return 0;
}
@@ -1050,157 +1249,102 @@ static int call_probe_finder(Dwarf_Die *sp_die, struct probe_finder *pf)
return ret;
}
-/* Find probe point from its line number */
-static int find_probe_point_by_line(struct probe_finder *pf)
+static int probe_point_line_walker(const char *fname, int lineno,
+ Dwarf_Addr addr, void *data)
{
- Dwarf_Lines *lines;
- Dwarf_Line *line;
- size_t nlines, i;
- Dwarf_Addr addr;
- int lineno;
- int ret = 0;
-
- if (dwarf_getsrclines(&pf->cu_die, &lines, &nlines) != 0) {
- pr_warning("No source lines found.\n");
- return -ENOENT;
- }
+ struct probe_finder *pf = data;
+ int ret;
- for (i = 0; i < nlines && ret == 0; i++) {
- line = dwarf_onesrcline(lines, i);
- if (dwarf_lineno(line, &lineno) != 0 ||
- lineno != pf->lno)
- continue;
+ if (lineno != pf->lno || strtailcmp(fname, pf->fname) != 0)
+ return 0;
- /* TODO: Get fileno from line, but how? */
- if (strtailcmp(dwarf_linesrc(line, NULL, NULL), pf->fname) != 0)
- continue;
+ pf->addr = addr;
+ ret = call_probe_finder(NULL, pf);
- if (dwarf_lineaddr(line, &addr) != 0) {
- pr_warning("Failed to get the address of the line.\n");
- return -ENOENT;
- }
- pr_debug("Probe line found: line[%d]:%d addr:0x%jx\n",
- (int)i, lineno, (uintmax_t)addr);
- pf->addr = addr;
+ /* Continue if no error, because the line will be in inline function */
+ return ret < 0 ?: 0;
+}
- ret = call_probe_finder(NULL, pf);
- /* Continuing, because target line might be inlined. */
- }
- return ret;
+/* Find probe point from its line number */
+static int find_probe_point_by_line(struct probe_finder *pf)
+{
+ return die_walk_lines(&pf->cu_die, probe_point_line_walker, pf);
}
/* Find lines which match lazy pattern */
static int find_lazy_match_lines(struct list_head *head,
const char *fname, const char *pat)
{
- char *fbuf, *p1, *p2;
- int fd, line, nlines = -1;
- struct stat st;
-
- fd = open(fname, O_RDONLY);
- if (fd < 0) {
- pr_warning("Failed to open %s: %s\n", fname, strerror(-fd));
+ FILE *fp;
+ char *line = NULL;
+ size_t line_len;
+ ssize_t len;
+ int count = 0, linenum = 1;
+
+ fp = fopen(fname, "r");
+ if (!fp) {
+ pr_warning("Failed to open %s: %s\n", fname, strerror(errno));
return -errno;
}
- if (fstat(fd, &st) < 0) {
- pr_warning("Failed to get the size of %s: %s\n",
- fname, strerror(errno));
- nlines = -errno;
- goto out_close;
- }
-
- nlines = -ENOMEM;
- fbuf = malloc(st.st_size + 2);
- if (fbuf == NULL)
- goto out_close;
- if (read(fd, fbuf, st.st_size) < 0) {
- pr_warning("Failed to read %s: %s\n", fname, strerror(errno));
- nlines = -errno;
- goto out_free_fbuf;
- }
- fbuf[st.st_size] = '\n'; /* Dummy line */
- fbuf[st.st_size + 1] = '\0';
- p1 = fbuf;
- line = 1;
- nlines = 0;
- while ((p2 = strchr(p1, '\n')) != NULL) {
- *p2 = '\0';
- if (strlazymatch(p1, pat)) {
- line_list__add_line(head, line);
- nlines++;
+ while ((len = getline(&line, &line_len, fp)) > 0) {
+
+ if (line[len - 1] == '\n')
+ line[len - 1] = '\0';
+
+ if (strlazymatch(line, pat)) {
+ line_list__add_line(head, linenum);
+ count++;
}
- line++;
- p1 = p2 + 1;
+ linenum++;
}
-out_free_fbuf:
- free(fbuf);
-out_close:
- close(fd);
- return nlines;
+
+ if (ferror(fp))
+ count = -errno;
+ free(line);
+ fclose(fp);
+
+ if (count == 0)
+ pr_debug("No matched lines found in %s.\n", fname);
+ return count;
+}
+
+static int probe_point_lazy_walker(const char *fname, int lineno,
+ Dwarf_Addr addr, void *data)
+{
+ struct probe_finder *pf = data;
+ int ret;
+
+ if (!line_list__has_line(&pf->lcache, lineno) ||
+ strtailcmp(fname, pf->fname) != 0)
+ return 0;
+
+ pr_debug("Probe line found: line:%d addr:0x%llx\n",
+ lineno, (unsigned long long)addr);
+ pf->addr = addr;
+ ret = call_probe_finder(NULL, pf);
+
+ /*
+ * Continue if no error, because the lazy pattern will match
+ * to other lines
+ */
+ return ret < 0 ?: 0;
}
/* Find probe points from lazy pattern */
static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf)
{
- Dwarf_Lines *lines;
- Dwarf_Line *line;
- size_t nlines, i;
- Dwarf_Addr addr;
- Dwarf_Die die_mem;
- int lineno;
int ret = 0;
if (list_empty(&pf->lcache)) {
/* Matching lazy line pattern */
ret = find_lazy_match_lines(&pf->lcache, pf->fname,
pf->pev->point.lazy_line);
- if (ret == 0) {
- pr_debug("No matched lines found in %s.\n", pf->fname);
- return 0;
- } else if (ret < 0)
+ if (ret <= 0)
return ret;
}
- if (dwarf_getsrclines(&pf->cu_die, &lines, &nlines) != 0) {
- pr_warning("No source lines found.\n");
- return -ENOENT;
- }
-
- for (i = 0; i < nlines && ret >= 0; i++) {
- line = dwarf_onesrcline(lines, i);
-
- if (dwarf_lineno(line, &lineno) != 0 ||
- !line_list__has_line(&pf->lcache, lineno))
- continue;
-
- /* TODO: Get fileno from line, but how? */
- if (strtailcmp(dwarf_linesrc(line, NULL, NULL), pf->fname) != 0)
- continue;
-
- if (dwarf_lineaddr(line, &addr) != 0) {
- pr_debug("Failed to get the address of line %d.\n",
- lineno);
- continue;
- }
- if (sp_die) {
- /* Address filtering 1: does sp_die include addr? */
- if (!dwarf_haspc(sp_die, addr))
- continue;
- /* Address filtering 2: No child include addr? */
- if (die_find_inlinefunc(sp_die, addr, &die_mem))
- continue;
- }
-
- pr_debug("Probe line found: line[%d]:%d addr:0x%llx\n",
- (int)i, lineno, (unsigned long long)addr);
- pf->addr = addr;
-
- ret = call_probe_finder(sp_die, pf);
- /* Continuing, because target line might be inlined. */
- }
- /* TODO: deallocate lines, but how? */
- return ret;
+ return die_walk_lines(sp_die, probe_point_lazy_walker, pf);
}
/* Callback parameter with return value */
@@ -1644,91 +1788,28 @@ static int line_range_add_line(const char *src, unsigned int lineno,
return line_list__add_line(&lr->line_list, lineno);
}
-/* Search function declaration lines */
-static int line_range_funcdecl_cb(Dwarf_Die *sp_die, void *data)
+static int line_range_walk_cb(const char *fname, int lineno,
+ Dwarf_Addr addr __used,
+ void *data)
{
- struct dwarf_callback_param *param = data;
- struct line_finder *lf = param->data;
- const char *src;
- int lineno;
+ struct line_finder *lf = data;
- src = dwarf_decl_file(sp_die);
- if (src && strtailcmp(src, lf->fname) != 0)
- return DWARF_CB_OK;
-
- if (dwarf_decl_line(sp_die, &lineno) != 0 ||
+ if ((strtailcmp(fname, lf->fname) != 0) ||
(lf->lno_s > lineno || lf->lno_e < lineno))
- return DWARF_CB_OK;
+ return 0;
- param->retval = line_range_add_line(src, lineno, lf->lr);
- if (param->retval < 0)
- return DWARF_CB_ABORT;
- return DWARF_CB_OK;
-}
+ if (line_range_add_line(fname, lineno, lf->lr) < 0)
+ return -EINVAL;
-static int find_line_range_func_decl_lines(struct line_finder *lf)
-{
- struct dwarf_callback_param param = {.data = (void *)lf, .retval = 0};
- dwarf_getfuncs(&lf->cu_die, line_range_funcdecl_cb, &param, 0);
- return param.retval;
+ return 0;
}
/* Find line range from its line number */
static int find_line_range_by_line(Dwarf_Die *sp_die, struct line_finder *lf)
{
- Dwarf_Lines *lines;
- Dwarf_Line *line;
- size_t nlines, i;
- Dwarf_Addr addr;
- int lineno, ret = 0;
- const char *src;
- Dwarf_Die die_mem;
-
- line_list__init(&lf->lr->line_list);
- if (dwarf_getsrclines(&lf->cu_die, &lines, &nlines) != 0) {
- pr_warning("No source lines found.\n");
- return -ENOENT;
- }
-
- /* Search probable lines on lines list */
- for (i = 0; i < nlines; i++) {
- line = dwarf_onesrcline(lines, i);
- if (dwarf_lineno(line, &lineno) != 0 ||
- (lf->lno_s > lineno || lf->lno_e < lineno))
- continue;
-
- if (sp_die) {
- /* Address filtering 1: does sp_die include addr? */
- if (dwarf_lineaddr(line, &addr) != 0 ||
- !dwarf_haspc(sp_die, addr))
- continue;
-
- /* Address filtering 2: No child include addr? */
- if (die_find_inlinefunc(sp_die, addr, &die_mem))
- continue;
- }
-
- /* TODO: Get fileno from line, but how? */
- src = dwarf_linesrc(line, NULL, NULL);
- if (strtailcmp(src, lf->fname) != 0)
- continue;
-
- ret = line_range_add_line(src, lineno, lf->lr);
- if (ret < 0)
- return ret;
- }
+ int ret;
- /*
- * Dwarf lines doesn't include function declarations. We have to
- * check functions list or given function.
- */
- if (sp_die) {
- src = dwarf_decl_file(sp_die);
- if (src && dwarf_decl_line(sp_die, &lineno) == 0 &&
- (lf->lno_s <= lineno && lf->lno_e >= lineno))
- ret = line_range_add_line(src, lineno, lf->lr);
- } else
- ret = find_line_range_func_decl_lines(lf);
+ ret = die_walk_lines(sp_die ?: &lf->cu_die, line_range_walk_cb, lf);
/* Update status */
if (ret >= 0)
@@ -1758,9 +1839,6 @@ static int line_range_search_cb(Dwarf_Die *sp_die, void *data)
struct line_finder *lf = param->data;
struct line_range *lr = lf->lr;
- pr_debug("find (%llx) %s\n",
- (unsigned long long)dwarf_dieoffset(sp_die),
- dwarf_diename(sp_die));
if (dwarf_tag(sp_die) == DW_TAG_subprogram &&
die_compare_name(sp_die, lr->function)) {
lf->fname = dwarf_decl_file(sp_die);
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
new file mode 100644
index 000000000000..5317ef229041
--- /dev/null
+++ b/tools/perf/util/python.c
@@ -0,0 +1,891 @@
+#include <Python.h>
+#include <structmember.h>
+#include <inttypes.h>
+#include <poll.h>
+#include "evlist.h"
+#include "evsel.h"
+#include "event.h"
+#include "cpumap.h"
+#include "thread_map.h"
+
+struct throttle_event {
+ struct perf_event_header header;
+ u64 time;
+ u64 id;
+ u64 stream_id;
+};
+
+PyMODINIT_FUNC initperf(void);
+
+#define member_def(type, member, ptype, help) \
+ { #member, ptype, \
+ offsetof(struct pyrf_event, event) + offsetof(struct type, member), \
+ 0, help }
+
+#define sample_member_def(name, member, ptype, help) \
+ { #name, ptype, \
+ offsetof(struct pyrf_event, sample) + offsetof(struct perf_sample, member), \
+ 0, help }
+
+struct pyrf_event {
+ PyObject_HEAD
+ struct perf_sample sample;
+ union perf_event event;
+};
+
+#define sample_members \
+ sample_member_def(sample_ip, ip, T_ULONGLONG, "event type"), \
+ sample_member_def(sample_pid, pid, T_INT, "event pid"), \
+ sample_member_def(sample_tid, tid, T_INT, "event tid"), \
+ sample_member_def(sample_time, time, T_ULONGLONG, "event timestamp"), \
+ sample_member_def(sample_addr, addr, T_ULONGLONG, "event addr"), \
+ sample_member_def(sample_id, id, T_ULONGLONG, "event id"), \
+ sample_member_def(sample_stream_id, stream_id, T_ULONGLONG, "event stream id"), \
+ sample_member_def(sample_period, period, T_ULONGLONG, "event period"), \
+ sample_member_def(sample_cpu, cpu, T_UINT, "event cpu"),
+
+static char pyrf_mmap_event__doc[] = PyDoc_STR("perf mmap event object.");
+
+static PyMemberDef pyrf_mmap_event__members[] = {
+ sample_members
+ member_def(perf_event_header, type, T_UINT, "event type"),
+ member_def(mmap_event, pid, T_UINT, "event pid"),
+ member_def(mmap_event, tid, T_UINT, "event tid"),
+ member_def(mmap_event, start, T_ULONGLONG, "start of the map"),
+ member_def(mmap_event, len, T_ULONGLONG, "map length"),
+ member_def(mmap_event, pgoff, T_ULONGLONG, "page offset"),
+ member_def(mmap_event, filename, T_STRING_INPLACE, "backing store"),
+ { .name = NULL, },
+};
+
+static PyObject *pyrf_mmap_event__repr(struct pyrf_event *pevent)
+{
+ PyObject *ret;
+ char *s;
+
+ if (asprintf(&s, "{ type: mmap, pid: %u, tid: %u, start: %#" PRIx64 ", "
+ "length: %#" PRIx64 ", offset: %#" PRIx64 ", "
+ "filename: %s }",
+ pevent->event.mmap.pid, pevent->event.mmap.tid,
+ pevent->event.mmap.start, pevent->event.mmap.len,
+ pevent->event.mmap.pgoff, pevent->event.mmap.filename) < 0) {
+ ret = PyErr_NoMemory();
+ } else {
+ ret = PyString_FromString(s);
+ free(s);
+ }
+ return ret;
+}
+
+static PyTypeObject pyrf_mmap_event__type = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ .tp_name = "perf.mmap_event",
+ .tp_basicsize = sizeof(struct pyrf_event),
+ .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+ .tp_doc = pyrf_mmap_event__doc,
+ .tp_members = pyrf_mmap_event__members,
+ .tp_repr = (reprfunc)pyrf_mmap_event__repr,
+};
+
+static char pyrf_task_event__doc[] = PyDoc_STR("perf task (fork/exit) event object.");
+
+static PyMemberDef pyrf_task_event__members[] = {
+ sample_members
+ member_def(perf_event_header, type, T_UINT, "event type"),
+ member_def(fork_event, pid, T_UINT, "event pid"),
+ member_def(fork_event, ppid, T_UINT, "event ppid"),
+ member_def(fork_event, tid, T_UINT, "event tid"),
+ member_def(fork_event, ptid, T_UINT, "event ptid"),
+ member_def(fork_event, time, T_ULONGLONG, "timestamp"),
+ { .name = NULL, },
+};
+
+static PyObject *pyrf_task_event__repr(struct pyrf_event *pevent)
+{
+ return PyString_FromFormat("{ type: %s, pid: %u, ppid: %u, tid: %u, "
+ "ptid: %u, time: %" PRIu64 "}",
+ pevent->event.header.type == PERF_RECORD_FORK ? "fork" : "exit",
+ pevent->event.fork.pid,
+ pevent->event.fork.ppid,
+ pevent->event.fork.tid,
+ pevent->event.fork.ptid,
+ pevent->event.fork.time);
+}
+
+static PyTypeObject pyrf_task_event__type = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ .tp_name = "perf.task_event",
+ .tp_basicsize = sizeof(struct pyrf_event),
+ .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+ .tp_doc = pyrf_task_event__doc,
+ .tp_members = pyrf_task_event__members,
+ .tp_repr = (reprfunc)pyrf_task_event__repr,
+};
+
+static char pyrf_comm_event__doc[] = PyDoc_STR("perf comm event object.");
+
+static PyMemberDef pyrf_comm_event__members[] = {
+ sample_members
+ member_def(perf_event_header, type, T_UINT, "event type"),
+ member_def(comm_event, pid, T_UINT, "event pid"),
+ member_def(comm_event, tid, T_UINT, "event tid"),
+ member_def(comm_event, comm, T_STRING_INPLACE, "process name"),
+ { .name = NULL, },
+};
+
+static PyObject *pyrf_comm_event__repr(struct pyrf_event *pevent)
+{
+ return PyString_FromFormat("{ type: comm, pid: %u, tid: %u, comm: %s }",
+ pevent->event.comm.pid,
+ pevent->event.comm.tid,
+ pevent->event.comm.comm);
+}
+
+static PyTypeObject pyrf_comm_event__type = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ .tp_name = "perf.comm_event",
+ .tp_basicsize = sizeof(struct pyrf_event),
+ .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+ .tp_doc = pyrf_comm_event__doc,
+ .tp_members = pyrf_comm_event__members,
+ .tp_repr = (reprfunc)pyrf_comm_event__repr,
+};
+
+static char pyrf_throttle_event__doc[] = PyDoc_STR("perf throttle event object.");
+
+static PyMemberDef pyrf_throttle_event__members[] = {
+ sample_members
+ member_def(perf_event_header, type, T_UINT, "event type"),
+ member_def(throttle_event, time, T_ULONGLONG, "timestamp"),
+ member_def(throttle_event, id, T_ULONGLONG, "event id"),
+ member_def(throttle_event, stream_id, T_ULONGLONG, "event stream id"),
+ { .name = NULL, },
+};
+
+static PyObject *pyrf_throttle_event__repr(struct pyrf_event *pevent)
+{
+ struct throttle_event *te = (struct throttle_event *)(&pevent->event.header + 1);
+
+ return PyString_FromFormat("{ type: %sthrottle, time: %" PRIu64 ", id: %" PRIu64
+ ", stream_id: %" PRIu64 " }",
+ pevent->event.header.type == PERF_RECORD_THROTTLE ? "" : "un",
+ te->time, te->id, te->stream_id);
+}
+
+static PyTypeObject pyrf_throttle_event__type = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ .tp_name = "perf.throttle_event",
+ .tp_basicsize = sizeof(struct pyrf_event),
+ .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+ .tp_doc = pyrf_throttle_event__doc,
+ .tp_members = pyrf_throttle_event__members,
+ .tp_repr = (reprfunc)pyrf_throttle_event__repr,
+};
+
+static int pyrf_event__setup_types(void)
+{
+ int err;
+ pyrf_mmap_event__type.tp_new =
+ pyrf_task_event__type.tp_new =
+ pyrf_comm_event__type.tp_new =
+ pyrf_throttle_event__type.tp_new = PyType_GenericNew;
+ err = PyType_Ready(&pyrf_mmap_event__type);
+ if (err < 0)
+ goto out;
+ err = PyType_Ready(&pyrf_task_event__type);
+ if (err < 0)
+ goto out;
+ err = PyType_Ready(&pyrf_comm_event__type);
+ if (err < 0)
+ goto out;
+ err = PyType_Ready(&pyrf_throttle_event__type);
+ if (err < 0)
+ goto out;
+out:
+ return err;
+}
+
+static PyTypeObject *pyrf_event__type[] = {
+ [PERF_RECORD_MMAP] = &pyrf_mmap_event__type,
+ [PERF_RECORD_LOST] = &pyrf_mmap_event__type,
+ [PERF_RECORD_COMM] = &pyrf_comm_event__type,
+ [PERF_RECORD_EXIT] = &pyrf_task_event__type,
+ [PERF_RECORD_THROTTLE] = &pyrf_throttle_event__type,
+ [PERF_RECORD_UNTHROTTLE] = &pyrf_throttle_event__type,
+ [PERF_RECORD_FORK] = &pyrf_task_event__type,
+ [PERF_RECORD_READ] = &pyrf_mmap_event__type,
+ [PERF_RECORD_SAMPLE] = &pyrf_mmap_event__type,
+};
+
+static PyObject *pyrf_event__new(union perf_event *event)
+{
+ struct pyrf_event *pevent;
+ PyTypeObject *ptype;
+
+ if (event->header.type < PERF_RECORD_MMAP ||
+ event->header.type > PERF_RECORD_SAMPLE)
+ return NULL;
+
+ ptype = pyrf_event__type[event->header.type];
+ pevent = PyObject_New(struct pyrf_event, ptype);
+ if (pevent != NULL)
+ memcpy(&pevent->event, event, event->header.size);
+ return (PyObject *)pevent;
+}
+
+struct pyrf_cpu_map {
+ PyObject_HEAD
+
+ struct cpu_map *cpus;
+};
+
+static int pyrf_cpu_map__init(struct pyrf_cpu_map *pcpus,
+ PyObject *args, PyObject *kwargs)
+{
+ static char *kwlist[] = { "cpustr", NULL, NULL, };
+ char *cpustr = NULL;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|s",
+ kwlist, &cpustr))
+ return -1;
+
+ pcpus->cpus = cpu_map__new(cpustr);
+ if (pcpus->cpus == NULL)
+ return -1;
+ return 0;
+}
+
+static void pyrf_cpu_map__delete(struct pyrf_cpu_map *pcpus)
+{
+ cpu_map__delete(pcpus->cpus);
+ pcpus->ob_type->tp_free((PyObject*)pcpus);
+}
+
+static Py_ssize_t pyrf_cpu_map__length(PyObject *obj)
+{
+ struct pyrf_cpu_map *pcpus = (void *)obj;
+
+ return pcpus->cpus->nr;
+}
+
+static PyObject *pyrf_cpu_map__item(PyObject *obj, Py_ssize_t i)
+{
+ struct pyrf_cpu_map *pcpus = (void *)obj;
+
+ if (i >= pcpus->cpus->nr)
+ return NULL;
+
+ return Py_BuildValue("i", pcpus->cpus->map[i]);
+}
+
+static PySequenceMethods pyrf_cpu_map__sequence_methods = {
+ .sq_length = pyrf_cpu_map__length,
+ .sq_item = pyrf_cpu_map__item,
+};
+
+static char pyrf_cpu_map__doc[] = PyDoc_STR("cpu map object.");
+
+static PyTypeObject pyrf_cpu_map__type = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ .tp_name = "perf.cpu_map",
+ .tp_basicsize = sizeof(struct pyrf_cpu_map),
+ .tp_dealloc = (destructor)pyrf_cpu_map__delete,
+ .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+ .tp_doc = pyrf_cpu_map__doc,
+ .tp_as_sequence = &pyrf_cpu_map__sequence_methods,
+ .tp_init = (initproc)pyrf_cpu_map__init,
+};
+
+static int pyrf_cpu_map__setup_types(void)
+{
+ pyrf_cpu_map__type.tp_new = PyType_GenericNew;
+ return PyType_Ready(&pyrf_cpu_map__type);
+}
+
+struct pyrf_thread_map {
+ PyObject_HEAD
+
+ struct thread_map *threads;
+};
+
+static int pyrf_thread_map__init(struct pyrf_thread_map *pthreads,
+ PyObject *args, PyObject *kwargs)
+{
+ static char *kwlist[] = { "pid", "tid", NULL, NULL, };
+ int pid = -1, tid = -1;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ii",
+ kwlist, &pid, &tid))
+ return -1;
+
+ pthreads->threads = thread_map__new(pid, tid);
+ if (pthreads->threads == NULL)
+ return -1;
+ return 0;
+}
+
+static void pyrf_thread_map__delete(struct pyrf_thread_map *pthreads)
+{
+ thread_map__delete(pthreads->threads);
+ pthreads->ob_type->tp_free((PyObject*)pthreads);
+}
+
+static Py_ssize_t pyrf_thread_map__length(PyObject *obj)
+{
+ struct pyrf_thread_map *pthreads = (void *)obj;
+
+ return pthreads->threads->nr;
+}
+
+static PyObject *pyrf_thread_map__item(PyObject *obj, Py_ssize_t i)
+{
+ struct pyrf_thread_map *pthreads = (void *)obj;
+
+ if (i >= pthreads->threads->nr)
+ return NULL;
+
+ return Py_BuildValue("i", pthreads->threads->map[i]);
+}
+
+static PySequenceMethods pyrf_thread_map__sequence_methods = {
+ .sq_length = pyrf_thread_map__length,
+ .sq_item = pyrf_thread_map__item,
+};
+
+static char pyrf_thread_map__doc[] = PyDoc_STR("thread map object.");
+
+static PyTypeObject pyrf_thread_map__type = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ .tp_name = "perf.thread_map",
+ .tp_basicsize = sizeof(struct pyrf_thread_map),
+ .tp_dealloc = (destructor)pyrf_thread_map__delete,
+ .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+ .tp_doc = pyrf_thread_map__doc,
+ .tp_as_sequence = &pyrf_thread_map__sequence_methods,
+ .tp_init = (initproc)pyrf_thread_map__init,
+};
+
+static int pyrf_thread_map__setup_types(void)
+{
+ pyrf_thread_map__type.tp_new = PyType_GenericNew;
+ return PyType_Ready(&pyrf_thread_map__type);
+}
+
+struct pyrf_evsel {
+ PyObject_HEAD
+
+ struct perf_evsel evsel;
+};
+
+static int pyrf_evsel__init(struct pyrf_evsel *pevsel,
+ PyObject *args, PyObject *kwargs)
+{
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_CPU_CYCLES,
+ .sample_type = PERF_SAMPLE_PERIOD | PERF_SAMPLE_TID,
+ };
+ static char *kwlist[] = {
+ "type",
+ "config",
+ "sample_freq",
+ "sample_period",
+ "sample_type",
+ "read_format",
+ "disabled",
+ "inherit",
+ "pinned",
+ "exclusive",
+ "exclude_user",
+ "exclude_kernel",
+ "exclude_hv",
+ "exclude_idle",
+ "mmap",
+ "comm",
+ "freq",
+ "inherit_stat",
+ "enable_on_exec",
+ "task",
+ "watermark",
+ "precise_ip",
+ "mmap_data",
+ "sample_id_all",
+ "wakeup_events",
+ "bp_type",
+ "bp_addr",
+ "bp_len", NULL, NULL, };
+ u64 sample_period = 0;
+ u32 disabled = 0,
+ inherit = 0,
+ pinned = 0,
+ exclusive = 0,
+ exclude_user = 0,
+ exclude_kernel = 0,
+ exclude_hv = 0,
+ exclude_idle = 0,
+ mmap = 0,
+ comm = 0,
+ freq = 1,
+ inherit_stat = 0,
+ enable_on_exec = 0,
+ task = 0,
+ watermark = 0,
+ precise_ip = 0,
+ mmap_data = 0,
+ sample_id_all = 1;
+ int idx = 0;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs,
+ "|iKiKKiiiiiiiiiiiiiiiiiiiiiKK", kwlist,
+ &attr.type, &attr.config, &attr.sample_freq,
+ &sample_period, &attr.sample_type,
+ &attr.read_format, &disabled, &inherit,
+ &pinned, &exclusive, &exclude_user,
+ &exclude_kernel, &exclude_hv, &exclude_idle,
+ &mmap, &comm, &freq, &inherit_stat,
+ &enable_on_exec, &task, &watermark,
+ &precise_ip, &mmap_data, &sample_id_all,
+ &attr.wakeup_events, &attr.bp_type,
+ &attr.bp_addr, &attr.bp_len, &idx))
+ return -1;
+
+ /* union... */
+ if (sample_period != 0) {
+ if (attr.sample_freq != 0)
+ return -1; /* FIXME: throw right exception */
+ attr.sample_period = sample_period;
+ }
+
+ /* Bitfields */
+ attr.disabled = disabled;
+ attr.inherit = inherit;
+ attr.pinned = pinned;
+ attr.exclusive = exclusive;
+ attr.exclude_user = exclude_user;
+ attr.exclude_kernel = exclude_kernel;
+ attr.exclude_hv = exclude_hv;
+ attr.exclude_idle = exclude_idle;
+ attr.mmap = mmap;
+ attr.comm = comm;
+ attr.freq = freq;
+ attr.inherit_stat = inherit_stat;
+ attr.enable_on_exec = enable_on_exec;
+ attr.task = task;
+ attr.watermark = watermark;
+ attr.precise_ip = precise_ip;
+ attr.mmap_data = mmap_data;
+ attr.sample_id_all = sample_id_all;
+
+ perf_evsel__init(&pevsel->evsel, &attr, idx);
+ return 0;
+}
+
+static void pyrf_evsel__delete(struct pyrf_evsel *pevsel)
+{
+ perf_evsel__exit(&pevsel->evsel);
+ pevsel->ob_type->tp_free((PyObject*)pevsel);
+}
+
+static PyObject *pyrf_evsel__open(struct pyrf_evsel *pevsel,
+ PyObject *args, PyObject *kwargs)
+{
+ struct perf_evsel *evsel = &pevsel->evsel;
+ struct cpu_map *cpus = NULL;
+ struct thread_map *threads = NULL;
+ PyObject *pcpus = NULL, *pthreads = NULL;
+ int group = 0, overwrite = 0;
+ static char *kwlist[] = {"cpus", "threads", "group", "overwrite", NULL, NULL};
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|OOii", kwlist,
+ &pcpus, &pthreads, &group, &overwrite))
+ return NULL;
+
+ if (pthreads != NULL)
+ threads = ((struct pyrf_thread_map *)pthreads)->threads;
+
+ if (pcpus != NULL)
+ cpus = ((struct pyrf_cpu_map *)pcpus)->cpus;
+
+ if (perf_evsel__open(evsel, cpus, threads, group, overwrite) < 0) {
+ PyErr_SetFromErrno(PyExc_OSError);
+ return NULL;
+ }
+
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+
+static PyMethodDef pyrf_evsel__methods[] = {
+ {
+ .ml_name = "open",
+ .ml_meth = (PyCFunction)pyrf_evsel__open,
+ .ml_flags = METH_VARARGS | METH_KEYWORDS,
+ .ml_doc = PyDoc_STR("open the event selector file descriptor table.")
+ },
+ { .ml_name = NULL, }
+};
+
+static char pyrf_evsel__doc[] = PyDoc_STR("perf event selector list object.");
+
+static PyTypeObject pyrf_evsel__type = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ .tp_name = "perf.evsel",
+ .tp_basicsize = sizeof(struct pyrf_evsel),
+ .tp_dealloc = (destructor)pyrf_evsel__delete,
+ .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+ .tp_doc = pyrf_evsel__doc,
+ .tp_methods = pyrf_evsel__methods,
+ .tp_init = (initproc)pyrf_evsel__init,
+};
+
+static int pyrf_evsel__setup_types(void)
+{
+ pyrf_evsel__type.tp_new = PyType_GenericNew;
+ return PyType_Ready(&pyrf_evsel__type);
+}
+
+struct pyrf_evlist {
+ PyObject_HEAD
+
+ struct perf_evlist evlist;
+};
+
+static int pyrf_evlist__init(struct pyrf_evlist *pevlist,
+ PyObject *args, PyObject *kwargs __used)
+{
+ PyObject *pcpus = NULL, *pthreads = NULL;
+ struct cpu_map *cpus;
+ struct thread_map *threads;
+
+ if (!PyArg_ParseTuple(args, "OO", &pcpus, &pthreads))
+ return -1;
+
+ threads = ((struct pyrf_thread_map *)pthreads)->threads;
+ cpus = ((struct pyrf_cpu_map *)pcpus)->cpus;
+ perf_evlist__init(&pevlist->evlist, cpus, threads);
+ return 0;
+}
+
+static void pyrf_evlist__delete(struct pyrf_evlist *pevlist)
+{
+ perf_evlist__exit(&pevlist->evlist);
+ pevlist->ob_type->tp_free((PyObject*)pevlist);
+}
+
+static PyObject *pyrf_evlist__mmap(struct pyrf_evlist *pevlist,
+ PyObject *args, PyObject *kwargs)
+{
+ struct perf_evlist *evlist = &pevlist->evlist;
+ static char *kwlist[] = {"pages", "overwrite",
+ NULL, NULL};
+ int pages = 128, overwrite = false;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ii", kwlist,
+ &pages, &overwrite))
+ return NULL;
+
+ if (perf_evlist__mmap(evlist, pages, overwrite) < 0) {
+ PyErr_SetFromErrno(PyExc_OSError);
+ return NULL;
+ }
+
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+
+static PyObject *pyrf_evlist__poll(struct pyrf_evlist *pevlist,
+ PyObject *args, PyObject *kwargs)
+{
+ struct perf_evlist *evlist = &pevlist->evlist;
+ static char *kwlist[] = {"timeout", NULL, NULL};
+ int timeout = -1, n;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i", kwlist, &timeout))
+ return NULL;
+
+ n = poll(evlist->pollfd, evlist->nr_fds, timeout);
+ if (n < 0) {
+ PyErr_SetFromErrno(PyExc_OSError);
+ return NULL;
+ }
+
+ return Py_BuildValue("i", n);
+}
+
+static PyObject *pyrf_evlist__get_pollfd(struct pyrf_evlist *pevlist,
+ PyObject *args __used, PyObject *kwargs __used)
+{
+ struct perf_evlist *evlist = &pevlist->evlist;
+ PyObject *list = PyList_New(0);
+ int i;
+
+ for (i = 0; i < evlist->nr_fds; ++i) {
+ PyObject *file;
+ FILE *fp = fdopen(evlist->pollfd[i].fd, "r");
+
+ if (fp == NULL)
+ goto free_list;
+
+ file = PyFile_FromFile(fp, "perf", "r", NULL);
+ if (file == NULL)
+ goto free_list;
+
+ if (PyList_Append(list, file) != 0) {
+ Py_DECREF(file);
+ goto free_list;
+ }
+
+ Py_DECREF(file);
+ }
+
+ return list;
+free_list:
+ return PyErr_NoMemory();
+}
+
+
+static PyObject *pyrf_evlist__add(struct pyrf_evlist *pevlist,
+ PyObject *args, PyObject *kwargs __used)
+{
+ struct perf_evlist *evlist = &pevlist->evlist;
+ PyObject *pevsel;
+ struct perf_evsel *evsel;
+
+ if (!PyArg_ParseTuple(args, "O", &pevsel))
+ return NULL;
+
+ Py_INCREF(pevsel);
+ evsel = &((struct pyrf_evsel *)pevsel)->evsel;
+ evsel->idx = evlist->nr_entries;
+ perf_evlist__add(evlist, evsel);
+
+ return Py_BuildValue("i", evlist->nr_entries);
+}
+
+static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist,
+ PyObject *args, PyObject *kwargs)
+{
+ struct perf_evlist *evlist = &pevlist->evlist;
+ union perf_event *event;
+ int sample_id_all = 1, cpu;
+ static char *kwlist[] = {"sample_id_all", NULL, NULL};
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "i|i", kwlist,
+ &cpu, &sample_id_all))
+ return NULL;
+
+ event = perf_evlist__read_on_cpu(evlist, cpu);
+ if (event != NULL) {
+ struct perf_evsel *first;
+ PyObject *pyevent = pyrf_event__new(event);
+ struct pyrf_event *pevent = (struct pyrf_event *)pyevent;
+
+ if (pyevent == NULL)
+ return PyErr_NoMemory();
+
+ first = list_entry(evlist->entries.next, struct perf_evsel, node);
+ perf_event__parse_sample(event, first->attr.sample_type, sample_id_all,
+ &pevent->sample);
+ return pyevent;
+ }
+
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+
+static PyMethodDef pyrf_evlist__methods[] = {
+ {
+ .ml_name = "mmap",
+ .ml_meth = (PyCFunction)pyrf_evlist__mmap,
+ .ml_flags = METH_VARARGS | METH_KEYWORDS,
+ .ml_doc = PyDoc_STR("mmap the file descriptor table.")
+ },
+ {
+ .ml_name = "poll",
+ .ml_meth = (PyCFunction)pyrf_evlist__poll,
+ .ml_flags = METH_VARARGS | METH_KEYWORDS,
+ .ml_doc = PyDoc_STR("poll the file descriptor table.")
+ },
+ {
+ .ml_name = "get_pollfd",
+ .ml_meth = (PyCFunction)pyrf_evlist__get_pollfd,
+ .ml_flags = METH_VARARGS | METH_KEYWORDS,
+ .ml_doc = PyDoc_STR("get the poll file descriptor table.")
+ },
+ {
+ .ml_name = "add",
+ .ml_meth = (PyCFunction)pyrf_evlist__add,
+ .ml_flags = METH_VARARGS | METH_KEYWORDS,
+ .ml_doc = PyDoc_STR("adds an event selector to the list.")
+ },
+ {
+ .ml_name = "read_on_cpu",
+ .ml_meth = (PyCFunction)pyrf_evlist__read_on_cpu,
+ .ml_flags = METH_VARARGS | METH_KEYWORDS,
+ .ml_doc = PyDoc_STR("reads an event.")
+ },
+ { .ml_name = NULL, }
+};
+
+static Py_ssize_t pyrf_evlist__length(PyObject *obj)
+{
+ struct pyrf_evlist *pevlist = (void *)obj;
+
+ return pevlist->evlist.nr_entries;
+}
+
+static PyObject *pyrf_evlist__item(PyObject *obj, Py_ssize_t i)
+{
+ struct pyrf_evlist *pevlist = (void *)obj;
+ struct perf_evsel *pos;
+
+ if (i >= pevlist->evlist.nr_entries)
+ return NULL;
+
+ list_for_each_entry(pos, &pevlist->evlist.entries, node)
+ if (i-- == 0)
+ break;
+
+ return Py_BuildValue("O", container_of(pos, struct pyrf_evsel, evsel));
+}
+
+static PySequenceMethods pyrf_evlist__sequence_methods = {
+ .sq_length = pyrf_evlist__length,
+ .sq_item = pyrf_evlist__item,
+};
+
+static char pyrf_evlist__doc[] = PyDoc_STR("perf event selector list object.");
+
+static PyTypeObject pyrf_evlist__type = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ .tp_name = "perf.evlist",
+ .tp_basicsize = sizeof(struct pyrf_evlist),
+ .tp_dealloc = (destructor)pyrf_evlist__delete,
+ .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+ .tp_as_sequence = &pyrf_evlist__sequence_methods,
+ .tp_doc = pyrf_evlist__doc,
+ .tp_methods = pyrf_evlist__methods,
+ .tp_init = (initproc)pyrf_evlist__init,
+};
+
+static int pyrf_evlist__setup_types(void)
+{
+ pyrf_evlist__type.tp_new = PyType_GenericNew;
+ return PyType_Ready(&pyrf_evlist__type);
+}
+
+static struct {
+ const char *name;
+ int value;
+} perf__constants[] = {
+ { "TYPE_HARDWARE", PERF_TYPE_HARDWARE },
+ { "TYPE_SOFTWARE", PERF_TYPE_SOFTWARE },
+ { "TYPE_TRACEPOINT", PERF_TYPE_TRACEPOINT },
+ { "TYPE_HW_CACHE", PERF_TYPE_HW_CACHE },
+ { "TYPE_RAW", PERF_TYPE_RAW },
+ { "TYPE_BREAKPOINT", PERF_TYPE_BREAKPOINT },
+
+ { "COUNT_HW_CPU_CYCLES", PERF_COUNT_HW_CPU_CYCLES },
+ { "COUNT_HW_INSTRUCTIONS", PERF_COUNT_HW_INSTRUCTIONS },
+ { "COUNT_HW_CACHE_REFERENCES", PERF_COUNT_HW_CACHE_REFERENCES },
+ { "COUNT_HW_CACHE_MISSES", PERF_COUNT_HW_CACHE_MISSES },
+ { "COUNT_HW_BRANCH_INSTRUCTIONS", PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
+ { "COUNT_HW_BRANCH_MISSES", PERF_COUNT_HW_BRANCH_MISSES },
+ { "COUNT_HW_BUS_CYCLES", PERF_COUNT_HW_BUS_CYCLES },
+ { "COUNT_HW_CACHE_L1D", PERF_COUNT_HW_CACHE_L1D },
+ { "COUNT_HW_CACHE_L1I", PERF_COUNT_HW_CACHE_L1I },
+ { "COUNT_HW_CACHE_LL", PERF_COUNT_HW_CACHE_LL },
+ { "COUNT_HW_CACHE_DTLB", PERF_COUNT_HW_CACHE_DTLB },
+ { "COUNT_HW_CACHE_ITLB", PERF_COUNT_HW_CACHE_ITLB },
+ { "COUNT_HW_CACHE_BPU", PERF_COUNT_HW_CACHE_BPU },
+ { "COUNT_HW_CACHE_OP_READ", PERF_COUNT_HW_CACHE_OP_READ },
+ { "COUNT_HW_CACHE_OP_WRITE", PERF_COUNT_HW_CACHE_OP_WRITE },
+ { "COUNT_HW_CACHE_OP_PREFETCH", PERF_COUNT_HW_CACHE_OP_PREFETCH },
+ { "COUNT_HW_CACHE_RESULT_ACCESS", PERF_COUNT_HW_CACHE_RESULT_ACCESS },
+ { "COUNT_HW_CACHE_RESULT_MISS", PERF_COUNT_HW_CACHE_RESULT_MISS },
+
+ { "COUNT_SW_CPU_CLOCK", PERF_COUNT_SW_CPU_CLOCK },
+ { "COUNT_SW_TASK_CLOCK", PERF_COUNT_SW_TASK_CLOCK },
+ { "COUNT_SW_PAGE_FAULTS", PERF_COUNT_SW_PAGE_FAULTS },
+ { "COUNT_SW_CONTEXT_SWITCHES", PERF_COUNT_SW_CONTEXT_SWITCHES },
+ { "COUNT_SW_CPU_MIGRATIONS", PERF_COUNT_SW_CPU_MIGRATIONS },
+ { "COUNT_SW_PAGE_FAULTS_MIN", PERF_COUNT_SW_PAGE_FAULTS_MIN },
+ { "COUNT_SW_PAGE_FAULTS_MAJ", PERF_COUNT_SW_PAGE_FAULTS_MAJ },
+ { "COUNT_SW_ALIGNMENT_FAULTS", PERF_COUNT_SW_ALIGNMENT_FAULTS },
+ { "COUNT_SW_EMULATION_FAULTS", PERF_COUNT_SW_EMULATION_FAULTS },
+
+ { "SAMPLE_IP", PERF_SAMPLE_IP },
+ { "SAMPLE_TID", PERF_SAMPLE_TID },
+ { "SAMPLE_TIME", PERF_SAMPLE_TIME },
+ { "SAMPLE_ADDR", PERF_SAMPLE_ADDR },
+ { "SAMPLE_READ", PERF_SAMPLE_READ },
+ { "SAMPLE_CALLCHAIN", PERF_SAMPLE_CALLCHAIN },
+ { "SAMPLE_ID", PERF_SAMPLE_ID },
+ { "SAMPLE_CPU", PERF_SAMPLE_CPU },
+ { "SAMPLE_PERIOD", PERF_SAMPLE_PERIOD },
+ { "SAMPLE_STREAM_ID", PERF_SAMPLE_STREAM_ID },
+ { "SAMPLE_RAW", PERF_SAMPLE_RAW },
+
+ { "FORMAT_TOTAL_TIME_ENABLED", PERF_FORMAT_TOTAL_TIME_ENABLED },
+ { "FORMAT_TOTAL_TIME_RUNNING", PERF_FORMAT_TOTAL_TIME_RUNNING },
+ { "FORMAT_ID", PERF_FORMAT_ID },
+ { "FORMAT_GROUP", PERF_FORMAT_GROUP },
+
+ { "RECORD_MMAP", PERF_RECORD_MMAP },
+ { "RECORD_LOST", PERF_RECORD_LOST },
+ { "RECORD_COMM", PERF_RECORD_COMM },
+ { "RECORD_EXIT", PERF_RECORD_EXIT },
+ { "RECORD_THROTTLE", PERF_RECORD_THROTTLE },
+ { "RECORD_UNTHROTTLE", PERF_RECORD_UNTHROTTLE },
+ { "RECORD_FORK", PERF_RECORD_FORK },
+ { "RECORD_READ", PERF_RECORD_READ },
+ { "RECORD_SAMPLE", PERF_RECORD_SAMPLE },
+ { .name = NULL, },
+};
+
+static PyMethodDef perf__methods[] = {
+ { .ml_name = NULL, }
+};
+
+PyMODINIT_FUNC initperf(void)
+{
+ PyObject *obj;
+ int i;
+ PyObject *dict, *module = Py_InitModule("perf", perf__methods);
+
+ if (module == NULL ||
+ pyrf_event__setup_types() < 0 ||
+ pyrf_evlist__setup_types() < 0 ||
+ pyrf_evsel__setup_types() < 0 ||
+ pyrf_thread_map__setup_types() < 0 ||
+ pyrf_cpu_map__setup_types() < 0)
+ return;
+
+ Py_INCREF(&pyrf_evlist__type);
+ PyModule_AddObject(module, "evlist", (PyObject*)&pyrf_evlist__type);
+
+ Py_INCREF(&pyrf_evsel__type);
+ PyModule_AddObject(module, "evsel", (PyObject*)&pyrf_evsel__type);
+
+ Py_INCREF(&pyrf_thread_map__type);
+ PyModule_AddObject(module, "thread_map", (PyObject*)&pyrf_thread_map__type);
+
+ Py_INCREF(&pyrf_cpu_map__type);
+ PyModule_AddObject(module, "cpu_map", (PyObject*)&pyrf_cpu_map__type);
+
+ dict = PyModule_GetDict(module);
+ if (dict == NULL)
+ goto error;
+
+ for (i = 0; perf__constants[i].name != NULL; i++) {
+ obj = PyInt_FromLong(perf__constants[i].value);
+ if (obj == NULL)
+ goto error;
+ PyDict_SetItemString(dict, perf__constants[i].name, obj);
+ Py_DECREF(obj);
+ }
+
+error:
+ if (PyErr_Occurred())
+ PyErr_SetString(PyExc_ImportError, "perf: Init failed!");
+}
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index c6d99334bdfa..2040b8538527 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -248,8 +248,7 @@ static void python_process_event(int cpu, void *data,
context = PyCObject_FromVoidPtr(scripting_context, NULL);
PyTuple_SetItem(t, n++, PyString_FromString(handler_name));
- PyTuple_SetItem(t, n++,
- PyCObject_FromVoidPtr(scripting_context, NULL));
+ PyTuple_SetItem(t, n++, context);
if (handler) {
PyTuple_SetItem(t, n++, PyInt_FromLong(cpu));
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 105f00bfd555..a3a871f7bda3 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -67,7 +67,7 @@ out_close:
static void perf_session__id_header_size(struct perf_session *session)
{
- struct sample_data *data;
+ struct perf_sample *data;
u64 sample_type = session->sample_type;
u16 size = 0;
@@ -165,7 +165,7 @@ struct perf_session *perf_session__new(const char *filename, int mode,
} else if (mode == O_WRONLY) {
/*
* In O_RDONLY mode this will be performed when reading the
- * kernel MMAP event, in event__process_mmap().
+ * kernel MMAP event, in perf_event__process_mmap().
*/
if (perf_session__create_kernel_maps(self) < 0)
goto out_delete;
@@ -242,17 +242,16 @@ static bool symbol__match_parent_regex(struct symbol *sym)
return 0;
}
-struct map_symbol *perf_session__resolve_callchain(struct perf_session *self,
- struct thread *thread,
- struct ip_callchain *chain,
- struct symbol **parent)
+int perf_session__resolve_callchain(struct perf_session *self,
+ struct thread *thread,
+ struct ip_callchain *chain,
+ struct symbol **parent)
{
u8 cpumode = PERF_RECORD_MISC_USER;
unsigned int i;
- struct map_symbol *syms = calloc(chain->nr, sizeof(*syms));
+ int err;
- if (!syms)
- return NULL;
+ callchain_cursor_reset(&self->callchain_cursor);
for (i = 0; i < chain->nr; i++) {
u64 ip = chain->ips[i];
@@ -281,30 +280,33 @@ struct map_symbol *perf_session__resolve_callchain(struct perf_session *self,
*parent = al.sym;
if (!symbol_conf.use_callchain)
break;
- syms[i].map = al.map;
- syms[i].sym = al.sym;
}
+
+ err = callchain_cursor_append(&self->callchain_cursor,
+ ip, al.map, al.sym);
+ if (err)
+ return err;
}
- return syms;
+ return 0;
}
-static int process_event_synth_stub(event_t *event __used,
+static int process_event_synth_stub(union perf_event *event __used,
struct perf_session *session __used)
{
dump_printf(": unhandled!\n");
return 0;
}
-static int process_event_stub(event_t *event __used,
- struct sample_data *sample __used,
+static int process_event_stub(union perf_event *event __used,
+ struct perf_sample *sample __used,
struct perf_session *session __used)
{
dump_printf(": unhandled!\n");
return 0;
}
-static int process_finished_round_stub(event_t *event __used,
+static int process_finished_round_stub(union perf_event *event __used,
struct perf_session *session __used,
struct perf_event_ops *ops __used)
{
@@ -312,7 +314,7 @@ static int process_finished_round_stub(event_t *event __used,
return 0;
}
-static int process_finished_round(event_t *event,
+static int process_finished_round(union perf_event *event,
struct perf_session *session,
struct perf_event_ops *ops);
@@ -329,7 +331,7 @@ static void perf_event_ops__fill_defaults(struct perf_event_ops *handler)
if (handler->exit == NULL)
handler->exit = process_event_stub;
if (handler->lost == NULL)
- handler->lost = event__process_lost;
+ handler->lost = perf_event__process_lost;
if (handler->read == NULL)
handler->read = process_event_stub;
if (handler->throttle == NULL)
@@ -363,98 +365,98 @@ void mem_bswap_64(void *src, int byte_size)
}
}
-static void event__all64_swap(event_t *self)
+static void perf_event__all64_swap(union perf_event *event)
{
- struct perf_event_header *hdr = &self->header;
- mem_bswap_64(hdr + 1, self->header.size - sizeof(*hdr));
+ struct perf_event_header *hdr = &event->header;
+ mem_bswap_64(hdr + 1, event->header.size - sizeof(*hdr));
}
-static void event__comm_swap(event_t *self)
+static void perf_event__comm_swap(union perf_event *event)
{
- self->comm.pid = bswap_32(self->comm.pid);
- self->comm.tid = bswap_32(self->comm.tid);
+ event->comm.pid = bswap_32(event->comm.pid);
+ event->comm.tid = bswap_32(event->comm.tid);
}
-static void event__mmap_swap(event_t *self)
+static void perf_event__mmap_swap(union perf_event *event)
{
- self->mmap.pid = bswap_32(self->mmap.pid);
- self->mmap.tid = bswap_32(self->mmap.tid);
- self->mmap.start = bswap_64(self->mmap.start);
- self->mmap.len = bswap_64(self->mmap.len);
- self->mmap.pgoff = bswap_64(self->mmap.pgoff);
+ event->mmap.pid = bswap_32(event->mmap.pid);
+ event->mmap.tid = bswap_32(event->mmap.tid);
+ event->mmap.start = bswap_64(event->mmap.start);
+ event->mmap.len = bswap_64(event->mmap.len);
+ event->mmap.pgoff = bswap_64(event->mmap.pgoff);
}
-static void event__task_swap(event_t *self)
+static void perf_event__task_swap(union perf_event *event)
{
- self->fork.pid = bswap_32(self->fork.pid);
- self->fork.tid = bswap_32(self->fork.tid);
- self->fork.ppid = bswap_32(self->fork.ppid);
- self->fork.ptid = bswap_32(self->fork.ptid);
- self->fork.time = bswap_64(self->fork.time);
+ event->fork.pid = bswap_32(event->fork.pid);
+ event->fork.tid = bswap_32(event->fork.tid);
+ event->fork.ppid = bswap_32(event->fork.ppid);
+ event->fork.ptid = bswap_32(event->fork.ptid);
+ event->fork.time = bswap_64(event->fork.time);
}
-static void event__read_swap(event_t *self)
+static void perf_event__read_swap(union perf_event *event)
{
- self->read.pid = bswap_32(self->read.pid);
- self->read.tid = bswap_32(self->read.tid);
- self->read.value = bswap_64(self->read.value);
- self->read.time_enabled = bswap_64(self->read.time_enabled);
- self->read.time_running = bswap_64(self->read.time_running);
- self->read.id = bswap_64(self->read.id);
+ event->read.pid = bswap_32(event->read.pid);
+ event->read.tid = bswap_32(event->read.tid);
+ event->read.value = bswap_64(event->read.value);
+ event->read.time_enabled = bswap_64(event->read.time_enabled);
+ event->read.time_running = bswap_64(event->read.time_running);
+ event->read.id = bswap_64(event->read.id);
}
-static void event__attr_swap(event_t *self)
+static void perf_event__attr_swap(union perf_event *event)
{
size_t size;
- self->attr.attr.type = bswap_32(self->attr.attr.type);
- self->attr.attr.size = bswap_32(self->attr.attr.size);
- self->attr.attr.config = bswap_64(self->attr.attr.config);
- self->attr.attr.sample_period = bswap_64(self->attr.attr.sample_period);
- self->attr.attr.sample_type = bswap_64(self->attr.attr.sample_type);
- self->attr.attr.read_format = bswap_64(self->attr.attr.read_format);
- self->attr.attr.wakeup_events = bswap_32(self->attr.attr.wakeup_events);
- self->attr.attr.bp_type = bswap_32(self->attr.attr.bp_type);
- self->attr.attr.bp_addr = bswap_64(self->attr.attr.bp_addr);
- self->attr.attr.bp_len = bswap_64(self->attr.attr.bp_len);
-
- size = self->header.size;
- size -= (void *)&self->attr.id - (void *)self;
- mem_bswap_64(self->attr.id, size);
+ event->attr.attr.type = bswap_32(event->attr.attr.type);
+ event->attr.attr.size = bswap_32(event->attr.attr.size);
+ event->attr.attr.config = bswap_64(event->attr.attr.config);
+ event->attr.attr.sample_period = bswap_64(event->attr.attr.sample_period);
+ event->attr.attr.sample_type = bswap_64(event->attr.attr.sample_type);
+ event->attr.attr.read_format = bswap_64(event->attr.attr.read_format);
+ event->attr.attr.wakeup_events = bswap_32(event->attr.attr.wakeup_events);
+ event->attr.attr.bp_type = bswap_32(event->attr.attr.bp_type);
+ event->attr.attr.bp_addr = bswap_64(event->attr.attr.bp_addr);
+ event->attr.attr.bp_len = bswap_64(event->attr.attr.bp_len);
+
+ size = event->header.size;
+ size -= (void *)&event->attr.id - (void *)event;
+ mem_bswap_64(event->attr.id, size);
}
-static void event__event_type_swap(event_t *self)
+static void perf_event__event_type_swap(union perf_event *event)
{
- self->event_type.event_type.event_id =
- bswap_64(self->event_type.event_type.event_id);
+ event->event_type.event_type.event_id =
+ bswap_64(event->event_type.event_type.event_id);
}
-static void event__tracing_data_swap(event_t *self)
+static void perf_event__tracing_data_swap(union perf_event *event)
{
- self->tracing_data.size = bswap_32(self->tracing_data.size);
+ event->tracing_data.size = bswap_32(event->tracing_data.size);
}
-typedef void (*event__swap_op)(event_t *self);
-
-static event__swap_op event__swap_ops[] = {
- [PERF_RECORD_MMAP] = event__mmap_swap,
- [PERF_RECORD_COMM] = event__comm_swap,
- [PERF_RECORD_FORK] = event__task_swap,
- [PERF_RECORD_EXIT] = event__task_swap,
- [PERF_RECORD_LOST] = event__all64_swap,
- [PERF_RECORD_READ] = event__read_swap,
- [PERF_RECORD_SAMPLE] = event__all64_swap,
- [PERF_RECORD_HEADER_ATTR] = event__attr_swap,
- [PERF_RECORD_HEADER_EVENT_TYPE] = event__event_type_swap,
- [PERF_RECORD_HEADER_TRACING_DATA] = event__tracing_data_swap,
- [PERF_RECORD_HEADER_BUILD_ID] = NULL,
- [PERF_RECORD_HEADER_MAX] = NULL,
+typedef void (*perf_event__swap_op)(union perf_event *event);
+
+static perf_event__swap_op perf_event__swap_ops[] = {
+ [PERF_RECORD_MMAP] = perf_event__mmap_swap,
+ [PERF_RECORD_COMM] = perf_event__comm_swap,
+ [PERF_RECORD_FORK] = perf_event__task_swap,
+ [PERF_RECORD_EXIT] = perf_event__task_swap,
+ [PERF_RECORD_LOST] = perf_event__all64_swap,
+ [PERF_RECORD_READ] = perf_event__read_swap,
+ [PERF_RECORD_SAMPLE] = perf_event__all64_swap,
+ [PERF_RECORD_HEADER_ATTR] = perf_event__attr_swap,
+ [PERF_RECORD_HEADER_EVENT_TYPE] = perf_event__event_type_swap,
+ [PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap,
+ [PERF_RECORD_HEADER_BUILD_ID] = NULL,
+ [PERF_RECORD_HEADER_MAX] = NULL,
};
struct sample_queue {
u64 timestamp;
u64 file_offset;
- event_t *event;
+ union perf_event *event;
struct list_head list;
};
@@ -472,8 +474,8 @@ static void perf_session_free_sample_buffers(struct perf_session *session)
}
static int perf_session_deliver_event(struct perf_session *session,
- event_t *event,
- struct sample_data *sample,
+ union perf_event *event,
+ struct perf_sample *sample,
struct perf_event_ops *ops,
u64 file_offset);
@@ -483,7 +485,7 @@ static void flush_sample_queue(struct perf_session *s,
struct ordered_samples *os = &s->ordered_samples;
struct list_head *head = &os->samples;
struct sample_queue *tmp, *iter;
- struct sample_data sample;
+ struct perf_sample sample;
u64 limit = os->next_flush;
u64 last_ts = os->last_sample ? os->last_sample->timestamp : 0ULL;
@@ -494,7 +496,7 @@ static void flush_sample_queue(struct perf_session *s,
if (iter->timestamp > limit)
break;
- event__parse_sample(iter->event, s, &sample);
+ perf_session__parse_sample(s, iter->event, &sample);
perf_session_deliver_event(s, iter->event, &sample, ops,
iter->file_offset);
@@ -550,7 +552,7 @@ static void flush_sample_queue(struct perf_session *s,
* Flush every events below timestamp 7
* etc...
*/
-static int process_finished_round(event_t *event __used,
+static int process_finished_round(union perf_event *event __used,
struct perf_session *session,
struct perf_event_ops *ops)
{
@@ -607,12 +609,12 @@ static void __queue_event(struct sample_queue *new, struct perf_session *s)
#define MAX_SAMPLE_BUFFER (64 * 1024 / sizeof(struct sample_queue))
-static int perf_session_queue_event(struct perf_session *s, event_t *event,
- struct sample_data *data, u64 file_offset)
+static int perf_session_queue_event(struct perf_session *s, union perf_event *event,
+ struct perf_sample *sample, u64 file_offset)
{
struct ordered_samples *os = &s->ordered_samples;
struct list_head *sc = &os->sample_cache;
- u64 timestamp = data->time;
+ u64 timestamp = sample->time;
struct sample_queue *new;
if (!timestamp || timestamp == ~0ULL)
@@ -648,7 +650,7 @@ static int perf_session_queue_event(struct perf_session *s, event_t *event,
return 0;
}
-static void callchain__printf(struct sample_data *sample)
+static void callchain__printf(struct perf_sample *sample)
{
unsigned int i;
@@ -660,8 +662,8 @@ static void callchain__printf(struct sample_data *sample)
}
static void perf_session__print_tstamp(struct perf_session *session,
- event_t *event,
- struct sample_data *sample)
+ union perf_event *event,
+ struct perf_sample *sample)
{
if (event->header.type != PERF_RECORD_SAMPLE &&
!session->sample_id_all) {
@@ -676,8 +678,8 @@ static void perf_session__print_tstamp(struct perf_session *session,
printf("%" PRIu64 " ", sample->time);
}
-static void dump_event(struct perf_session *session, event_t *event,
- u64 file_offset, struct sample_data *sample)
+static void dump_event(struct perf_session *session, union perf_event *event,
+ u64 file_offset, struct perf_sample *sample)
{
if (!dump_trace)
return;
@@ -691,11 +693,11 @@ static void dump_event(struct perf_session *session, event_t *event,
perf_session__print_tstamp(session, event, sample);
printf("%#" PRIx64 " [%#x]: PERF_RECORD_%s", file_offset,
- event->header.size, event__get_event_name(event->header.type));
+ event->header.size, perf_event__name(event->header.type));
}
-static void dump_sample(struct perf_session *session, event_t *event,
- struct sample_data *sample)
+static void dump_sample(struct perf_session *session, union perf_event *event,
+ struct perf_sample *sample)
{
if (!dump_trace)
return;
@@ -709,8 +711,8 @@ static void dump_sample(struct perf_session *session, event_t *event,
}
static int perf_session_deliver_event(struct perf_session *session,
- event_t *event,
- struct sample_data *sample,
+ union perf_event *event,
+ struct perf_sample *sample,
struct perf_event_ops *ops,
u64 file_offset)
{
@@ -743,7 +745,7 @@ static int perf_session_deliver_event(struct perf_session *session,
}
static int perf_session__preprocess_sample(struct perf_session *session,
- event_t *event, struct sample_data *sample)
+ union perf_event *event, struct perf_sample *sample)
{
if (event->header.type != PERF_RECORD_SAMPLE ||
!(session->sample_type & PERF_SAMPLE_CALLCHAIN))
@@ -758,7 +760,7 @@ static int perf_session__preprocess_sample(struct perf_session *session,
return 0;
}
-static int perf_session__process_user_event(struct perf_session *session, event_t *event,
+static int perf_session__process_user_event(struct perf_session *session, union perf_event *event,
struct perf_event_ops *ops, u64 file_offset)
{
dump_event(session, event, file_offset, NULL);
@@ -783,15 +785,16 @@ static int perf_session__process_user_event(struct perf_session *session, event_
}
static int perf_session__process_event(struct perf_session *session,
- event_t *event,
+ union perf_event *event,
struct perf_event_ops *ops,
u64 file_offset)
{
- struct sample_data sample;
+ struct perf_sample sample;
int ret;
- if (session->header.needs_swap && event__swap_ops[event->header.type])
- event__swap_ops[event->header.type](event);
+ if (session->header.needs_swap &&
+ perf_event__swap_ops[event->header.type])
+ perf_event__swap_ops[event->header.type](event);
if (event->header.type >= PERF_RECORD_HEADER_MAX)
return -EINVAL;
@@ -804,7 +807,7 @@ static int perf_session__process_event(struct perf_session *session,
/*
* For all kernel events we get the sample data
*/
- event__parse_sample(event, session, &sample);
+ perf_session__parse_sample(session, event, &sample);
/* Preprocess sample records - precheck callchains */
if (perf_session__preprocess_sample(session, event, &sample))
@@ -843,7 +846,7 @@ static struct thread *perf_session__register_idle_thread(struct perf_session *se
static void perf_session__warn_about_errors(const struct perf_session *session,
const struct perf_event_ops *ops)
{
- if (ops->lost == event__process_lost &&
+ if (ops->lost == perf_event__process_lost &&
session->hists.stats.total_lost != 0) {
ui__warning("Processed %" PRIu64 " events and LOST %" PRIu64
"!\n\nCheck IO/CPU overload!\n\n",
@@ -875,7 +878,7 @@ volatile int session_done;
static int __perf_session__process_pipe_events(struct perf_session *self,
struct perf_event_ops *ops)
{
- event_t event;
+ union perf_event event;
uint32_t size;
int skip = 0;
u64 head;
@@ -956,7 +959,7 @@ int __perf_session__process_events(struct perf_session *session,
struct ui_progress *progress;
size_t page_size, mmap_size;
char *buf, *mmaps[8];
- event_t *event;
+ union perf_event *event;
uint32_t size;
perf_event_ops__fill_defaults(ops);
@@ -1001,7 +1004,7 @@ remap:
file_pos = file_offset + head;
more:
- event = (event_t *)(buf + head);
+ event = (union perf_event *)(buf + head);
if (session->header.needs_swap)
perf_event_header__bswap(&event->header);
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index decd83f274fd..977b3a1b14aa 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -51,15 +51,17 @@ struct perf_session {
int cwdlen;
char *cwd;
struct ordered_samples ordered_samples;
- char filename[0];
+ struct callchain_cursor callchain_cursor;
+ char filename[0];
};
struct perf_event_ops;
-typedef int (*event_op)(event_t *self, struct sample_data *sample,
+typedef int (*event_op)(union perf_event *self, struct perf_sample *sample,
struct perf_session *session);
-typedef int (*event_synth_op)(event_t *self, struct perf_session *session);
-typedef int (*event_op2)(event_t *self, struct perf_session *session,
+typedef int (*event_synth_op)(union perf_event *self,
+ struct perf_session *session);
+typedef int (*event_op2)(union perf_event *self, struct perf_session *session,
struct perf_event_ops *ops);
struct perf_event_ops {
@@ -94,10 +96,10 @@ int __perf_session__process_events(struct perf_session *self,
int perf_session__process_events(struct perf_session *self,
struct perf_event_ops *event_ops);
-struct map_symbol *perf_session__resolve_callchain(struct perf_session *self,
- struct thread *thread,
- struct ip_callchain *chain,
- struct symbol **parent);
+int perf_session__resolve_callchain(struct perf_session *self,
+ struct thread *thread,
+ struct ip_callchain *chain,
+ struct symbol **parent);
bool perf_session__has_traces(struct perf_session *self, const char *msg);
@@ -154,4 +156,13 @@ size_t perf_session__fprintf_nr_events(struct perf_session *self, FILE *fp)
{
return hists__fprintf_nr_events(&self->hists, fp);
}
+
+static inline int perf_session__parse_sample(struct perf_session *session,
+ const union perf_event *event,
+ struct perf_sample *sample)
+{
+ return perf_event__parse_sample(event, session->sample_type,
+ session->sample_id_all, sample);
+}
+
#endif /* __PERF_SESSION_H */
diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py
new file mode 100644
index 000000000000..1947b0430c94
--- /dev/null
+++ b/tools/perf/util/setup.py
@@ -0,0 +1,19 @@
+#!/usr/bin/python2
+
+from distutils.core import setup, Extension
+
+perf = Extension('perf',
+ sources = ['util/python.c', 'util/ctype.c', 'util/evlist.c',
+ 'util/evsel.c', 'util/cpumap.c', 'util/thread_map.c',
+ 'util/util.c', 'util/xyarray.c'],
+ include_dirs = ['util/include'],
+ extra_compile_args = ['-fno-strict-aliasing', '-Wno-write-strings'])
+
+setup(name='perf',
+ version='0.1',
+ description='Interface with the Linux profiling infrastructure',
+ author='Arnaldo Carvalho de Melo',
+ author_email='acme@redhat.com',
+ license='GPLv2',
+ url='http://perf.wiki.kernel.org',
+ ext_modules=[perf])
diff --git a/tools/perf/util/strfilter.c b/tools/perf/util/strfilter.c
new file mode 100644
index 000000000000..4064b7d708b8
--- /dev/null
+++ b/tools/perf/util/strfilter.c
@@ -0,0 +1,200 @@
+#include <ctype.h>
+#include "util.h"
+#include "string.h"
+#include "strfilter.h"
+
+/* Operators */
+static const char *OP_and = "&"; /* Logical AND */
+static const char *OP_or = "|"; /* Logical OR */
+static const char *OP_not = "!"; /* Logical NOT */
+
+#define is_operator(c) ((c) == '|' || (c) == '&' || (c) == '!')
+#define is_separator(c) (is_operator(c) || (c) == '(' || (c) == ')')
+
+static void strfilter_node__delete(struct strfilter_node *self)
+{
+ if (self) {
+ if (self->p && !is_operator(*self->p))
+ free((char *)self->p);
+ strfilter_node__delete(self->l);
+ strfilter_node__delete(self->r);
+ free(self);
+ }
+}
+
+void strfilter__delete(struct strfilter *self)
+{
+ if (self) {
+ strfilter_node__delete(self->root);
+ free(self);
+ }
+}
+
+static const char *get_token(const char *s, const char **e)
+{
+ const char *p;
+
+ while (isspace(*s)) /* Skip spaces */
+ s++;
+
+ if (*s == '\0') {
+ p = s;
+ goto end;
+ }
+
+ p = s + 1;
+ if (!is_separator(*s)) {
+ /* End search */
+retry:
+ while (*p && !is_separator(*p) && !isspace(*p))
+ p++;
+ /* Escape and special case: '!' is also used in glob pattern */
+ if (*(p - 1) == '\\' || (*p == '!' && *(p - 1) == '[')) {
+ p++;
+ goto retry;
+ }
+ }
+end:
+ *e = p;
+ return s;
+}
+
+static struct strfilter_node *strfilter_node__alloc(const char *op,
+ struct strfilter_node *l,
+ struct strfilter_node *r)
+{
+ struct strfilter_node *ret = zalloc(sizeof(struct strfilter_node));
+
+ if (ret) {
+ ret->p = op;
+ ret->l = l;
+ ret->r = r;
+ }
+
+ return ret;
+}
+
+static struct strfilter_node *strfilter_node__new(const char *s,
+ const char **ep)
+{
+ struct strfilter_node root, *cur, *last_op;
+ const char *e;
+
+ if (!s)
+ return NULL;
+
+ memset(&root, 0, sizeof(root));
+ last_op = cur = &root;
+
+ s = get_token(s, &e);
+ while (*s != '\0' && *s != ')') {
+ switch (*s) {
+ case '&': /* Exchg last OP->r with AND */
+ if (!cur->r || !last_op->r)
+ goto error;
+ cur = strfilter_node__alloc(OP_and, last_op->r, NULL);
+ if (!cur)
+ goto nomem;
+ last_op->r = cur;
+ last_op = cur;
+ break;
+ case '|': /* Exchg the root with OR */
+ if (!cur->r || !root.r)
+ goto error;
+ cur = strfilter_node__alloc(OP_or, root.r, NULL);
+ if (!cur)
+ goto nomem;
+ root.r = cur;
+ last_op = cur;
+ break;
+ case '!': /* Add NOT as a leaf node */
+ if (cur->r)
+ goto error;
+ cur->r = strfilter_node__alloc(OP_not, NULL, NULL);
+ if (!cur->r)
+ goto nomem;
+ cur = cur->r;
+ break;
+ case '(': /* Recursively parses inside the parenthesis */
+ if (cur->r)
+ goto error;
+ cur->r = strfilter_node__new(s + 1, &s);
+ if (!s)
+ goto nomem;
+ if (!cur->r || *s != ')')
+ goto error;
+ e = s + 1;
+ break;
+ default:
+ if (cur->r)
+ goto error;
+ cur->r = strfilter_node__alloc(NULL, NULL, NULL);
+ if (!cur->r)
+ goto nomem;
+ cur->r->p = strndup(s, e - s);
+ if (!cur->r->p)
+ goto nomem;
+ }
+ s = get_token(e, &e);
+ }
+ if (!cur->r)
+ goto error;
+ *ep = s;
+ return root.r;
+nomem:
+ s = NULL;
+error:
+ *ep = s;
+ strfilter_node__delete(root.r);
+ return NULL;
+}
+
+/*
+ * Parse filter rule and return new strfilter.
+ * Return NULL if fail, and *ep == NULL if memory allocation failed.
+ */
+struct strfilter *strfilter__new(const char *rules, const char **err)
+{
+ struct strfilter *ret = zalloc(sizeof(struct strfilter));
+ const char *ep = NULL;
+
+ if (ret)
+ ret->root = strfilter_node__new(rules, &ep);
+
+ if (!ret || !ret->root || *ep != '\0') {
+ if (err)
+ *err = ep;
+ strfilter__delete(ret);
+ ret = NULL;
+ }
+
+ return ret;
+}
+
+static bool strfilter_node__compare(struct strfilter_node *self,
+ const char *str)
+{
+ if (!self || !self->p)
+ return false;
+
+ switch (*self->p) {
+ case '|': /* OR */
+ return strfilter_node__compare(self->l, str) ||
+ strfilter_node__compare(self->r, str);
+ case '&': /* AND */
+ return strfilter_node__compare(self->l, str) &&
+ strfilter_node__compare(self->r, str);
+ case '!': /* NOT */
+ return !strfilter_node__compare(self->r, str);
+ default:
+ return strglobmatch(str, self->p);
+ }
+}
+
+/* Return true if STR matches the filter rules */
+bool strfilter__compare(struct strfilter *self, const char *str)
+{
+ if (!self)
+ return false;
+ return strfilter_node__compare(self->root, str);
+}
diff --git a/tools/perf/util/strfilter.h b/tools/perf/util/strfilter.h
new file mode 100644
index 000000000000..00f58a7506de
--- /dev/null
+++ b/tools/perf/util/strfilter.h
@@ -0,0 +1,48 @@
+#ifndef __PERF_STRFILTER_H
+#define __PERF_STRFILTER_H
+/* General purpose glob matching filter */
+
+#include <linux/list.h>
+#include <stdbool.h>
+
+/* A node of string filter */
+struct strfilter_node {
+ struct strfilter_node *l; /* Tree left branche (for &,|) */
+ struct strfilter_node *r; /* Tree right branche (for !,&,|) */
+ const char *p; /* Operator or rule */
+};
+
+/* String filter */
+struct strfilter {
+ struct strfilter_node *root;
+};
+
+/**
+ * strfilter__new - Create a new string filter
+ * @rules: Filter rule, which is a combination of glob expressions.
+ * @err: Pointer which points an error detected on @rules
+ *
+ * Parse @rules and return new strfilter. Return NULL if an error detected.
+ * In that case, *@err will indicate where it is detected, and *@err is NULL
+ * if a memory allocation is failed.
+ */
+struct strfilter *strfilter__new(const char *rules, const char **err);
+
+/**
+ * strfilter__compare - compare given string and a string filter
+ * @self: String filter
+ * @str: target string
+ *
+ * Compare @str and @self. Return true if the str match the rule
+ */
+bool strfilter__compare(struct strfilter *self, const char *str);
+
+/**
+ * strfilter__delete - delete a string filter
+ * @self: String filter to delete
+ *
+ * Delete @self.
+ */
+void strfilter__delete(struct strfilter *self);
+
+#endif
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 7821d0e6866f..3e193f8e3061 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1525,8 +1525,8 @@ int dso__load(struct dso *self, struct map *map, symbol_filter_t filter)
symbol_conf.symfs, self->long_name);
break;
case DSO__ORIG_GUEST_KMODULE:
- if (map->groups && map->groups->machine)
- root_dir = map->groups->machine->root_dir;
+ if (map->groups && machine)
+ root_dir = machine->root_dir;
else
root_dir = "";
snprintf(name, size, "%s%s%s", symbol_conf.symfs,
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 00f4eade2e3e..d5d3b22250f3 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -7,61 +7,6 @@
#include "util.h"
#include "debug.h"
-/* Skip "." and ".." directories */
-static int filter(const struct dirent *dir)
-{
- if (dir->d_name[0] == '.')
- return 0;
- else
- return 1;
-}
-
-struct thread_map *thread_map__new_by_pid(pid_t pid)
-{
- struct thread_map *threads;
- char name[256];
- int items;
- struct dirent **namelist = NULL;
- int i;
-
- sprintf(name, "/proc/%d/task", pid);
- items = scandir(name, &namelist, filter, NULL);
- if (items <= 0)
- return NULL;
-
- threads = malloc(sizeof(*threads) + sizeof(pid_t) * items);
- if (threads != NULL) {
- for (i = 0; i < items; i++)
- threads->map[i] = atoi(namelist[i]->d_name);
- threads->nr = items;
- }
-
- for (i=0; i<items; i++)
- free(namelist[i]);
- free(namelist);
-
- return threads;
-}
-
-struct thread_map *thread_map__new_by_tid(pid_t tid)
-{
- struct thread_map *threads = malloc(sizeof(*threads) + sizeof(pid_t));
-
- if (threads != NULL) {
- threads->map[0] = tid;
- threads->nr = 1;
- }
-
- return threads;
-}
-
-struct thread_map *thread_map__new(pid_t pid, pid_t tid)
-{
- if (pid != -1)
- return thread_map__new_by_pid(pid);
- return thread_map__new_by_tid(tid);
-}
-
static struct thread *thread__new(pid_t pid)
{
struct thread *self = zalloc(sizeof(*self));
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index d7574101054a..e5f2401c1b5e 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -18,24 +18,10 @@ struct thread {
int comm_len;
};
-struct thread_map {
- int nr;
- int map[];
-};
-
struct perf_session;
void thread__delete(struct thread *self);
-struct thread_map *thread_map__new_by_pid(pid_t pid);
-struct thread_map *thread_map__new_by_tid(pid_t tid);
-struct thread_map *thread_map__new(pid_t pid, pid_t tid);
-
-static inline void thread_map__delete(struct thread_map *threads)
-{
- free(threads);
-}
-
int thread__set_comm(struct thread *self, const char *comm);
int thread__comm_len(struct thread *self);
struct thread *perf_session__findnew(struct perf_session *self, pid_t pid);
diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c
new file mode 100644
index 000000000000..a5df131b77c3
--- /dev/null
+++ b/tools/perf/util/thread_map.c
@@ -0,0 +1,64 @@
+#include <dirent.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include "thread_map.h"
+
+/* Skip "." and ".." directories */
+static int filter(const struct dirent *dir)
+{
+ if (dir->d_name[0] == '.')
+ return 0;
+ else
+ return 1;
+}
+
+struct thread_map *thread_map__new_by_pid(pid_t pid)
+{
+ struct thread_map *threads;
+ char name[256];
+ int items;
+ struct dirent **namelist = NULL;
+ int i;
+
+ sprintf(name, "/proc/%d/task", pid);
+ items = scandir(name, &namelist, filter, NULL);
+ if (items <= 0)
+ return NULL;
+
+ threads = malloc(sizeof(*threads) + sizeof(pid_t) * items);
+ if (threads != NULL) {
+ for (i = 0; i < items; i++)
+ threads->map[i] = atoi(namelist[i]->d_name);
+ threads->nr = items;
+ }
+
+ for (i=0; i<items; i++)
+ free(namelist[i]);
+ free(namelist);
+
+ return threads;
+}
+
+struct thread_map *thread_map__new_by_tid(pid_t tid)
+{
+ struct thread_map *threads = malloc(sizeof(*threads) + sizeof(pid_t));
+
+ if (threads != NULL) {
+ threads->map[0] = tid;
+ threads->nr = 1;
+ }
+
+ return threads;
+}
+
+struct thread_map *thread_map__new(pid_t pid, pid_t tid)
+{
+ if (pid != -1)
+ return thread_map__new_by_pid(pid);
+ return thread_map__new_by_tid(tid);
+}
+
+void thread_map__delete(struct thread_map *threads)
+{
+ free(threads);
+}
diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h
new file mode 100644
index 000000000000..3cb907311409
--- /dev/null
+++ b/tools/perf/util/thread_map.h
@@ -0,0 +1,15 @@
+#ifndef __PERF_THREAD_MAP_H
+#define __PERF_THREAD_MAP_H
+
+#include <sys/types.h>
+
+struct thread_map {
+ int nr;
+ int map[];
+};
+
+struct thread_map *thread_map__new_by_pid(pid_t pid);
+struct thread_map *thread_map__new_by_tid(pid_t tid);
+struct thread_map *thread_map__new(pid_t pid, pid_t tid);
+void thread_map__delete(struct thread_map *threads);
+#endif /* __PERF_THREAD_MAP_H */
diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c
new file mode 100644
index 000000000000..70a9c13f4ad5
--- /dev/null
+++ b/tools/perf/util/top.c
@@ -0,0 +1,218 @@
+/*
+ * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Refactored from builtin-top.c, see that files for further copyright notes.
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+
+#include "cpumap.h"
+#include "event.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "parse-events.h"
+#include "symbol.h"
+#include "top.h"
+#include <inttypes.h>
+
+/*
+ * Ordering weight: count-1 * count-2 * ... / count-n
+ */
+static double sym_weight(const struct sym_entry *sym, struct perf_top *top)
+{
+ double weight = sym->snap_count;
+ int counter;
+
+ if (!top->display_weighted)
+ return weight;
+
+ for (counter = 1; counter < top->evlist->nr_entries - 1; counter++)
+ weight *= sym->count[counter];
+
+ weight /= (sym->count[counter] + 1);
+
+ return weight;
+}
+
+static void perf_top__remove_active_sym(struct perf_top *top, struct sym_entry *syme)
+{
+ pthread_mutex_lock(&top->active_symbols_lock);
+ list_del_init(&syme->node);
+ pthread_mutex_unlock(&top->active_symbols_lock);
+}
+
+static void rb_insert_active_sym(struct rb_root *tree, struct sym_entry *se)
+{
+ struct rb_node **p = &tree->rb_node;
+ struct rb_node *parent = NULL;
+ struct sym_entry *iter;
+
+ while (*p != NULL) {
+ parent = *p;
+ iter = rb_entry(parent, struct sym_entry, rb_node);
+
+ if (se->weight > iter->weight)
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ }
+
+ rb_link_node(&se->rb_node, parent, p);
+ rb_insert_color(&se->rb_node, tree);
+}
+
+size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size)
+{
+ struct perf_evsel *counter;
+ float samples_per_sec = top->samples / top->delay_secs;
+ float ksamples_per_sec = top->kernel_samples / top->delay_secs;
+ float esamples_percent = (100.0 * top->exact_samples) / top->samples;
+ size_t ret = 0;
+
+ if (!perf_guest) {
+ ret = snprintf(bf, size,
+ " PerfTop:%8.0f irqs/sec kernel:%4.1f%%"
+ " exact: %4.1f%% [", samples_per_sec,
+ 100.0 - (100.0 * ((samples_per_sec - ksamples_per_sec) /
+ samples_per_sec)),
+ esamples_percent);
+ } else {
+ float us_samples_per_sec = top->us_samples / top->delay_secs;
+ float guest_kernel_samples_per_sec = top->guest_kernel_samples / top->delay_secs;
+ float guest_us_samples_per_sec = top->guest_us_samples / top->delay_secs;
+
+ ret = snprintf(bf, size,
+ " PerfTop:%8.0f irqs/sec kernel:%4.1f%% us:%4.1f%%"
+ " guest kernel:%4.1f%% guest us:%4.1f%%"
+ " exact: %4.1f%% [", samples_per_sec,
+ 100.0 - (100.0 * ((samples_per_sec - ksamples_per_sec) /
+ samples_per_sec)),
+ 100.0 - (100.0 * ((samples_per_sec - us_samples_per_sec) /
+ samples_per_sec)),
+ 100.0 - (100.0 * ((samples_per_sec -
+ guest_kernel_samples_per_sec) /
+ samples_per_sec)),
+ 100.0 - (100.0 * ((samples_per_sec -
+ guest_us_samples_per_sec) /
+ samples_per_sec)),
+ esamples_percent);
+ }
+
+ if (top->evlist->nr_entries == 1 || !top->display_weighted) {
+ struct perf_evsel *first;
+ first = list_entry(top->evlist->entries.next, struct perf_evsel, node);
+ ret += snprintf(bf + ret, size - ret, "%" PRIu64 "%s ",
+ (uint64_t)first->attr.sample_period,
+ top->freq ? "Hz" : "");
+ }
+
+ if (!top->display_weighted) {
+ ret += snprintf(bf + ret, size - ret, "%s",
+ event_name(top->sym_evsel));
+ } else list_for_each_entry(counter, &top->evlist->entries, node) {
+ ret += snprintf(bf + ret, size - ret, "%s%s",
+ counter->idx ? "/" : "", event_name(counter));
+ }
+
+ ret += snprintf(bf + ret, size - ret, "], ");
+
+ if (top->target_pid != -1)
+ ret += snprintf(bf + ret, size - ret, " (target_pid: %d",
+ top->target_pid);
+ else if (top->target_tid != -1)
+ ret += snprintf(bf + ret, size - ret, " (target_tid: %d",
+ top->target_tid);
+ else
+ ret += snprintf(bf + ret, size - ret, " (all");
+
+ if (top->cpu_list)
+ ret += snprintf(bf + ret, size - ret, ", CPU%s: %s)",
+ top->evlist->cpus->nr > 1 ? "s" : "", top->cpu_list);
+ else {
+ if (top->target_tid != -1)
+ ret += snprintf(bf + ret, size - ret, ")");
+ else
+ ret += snprintf(bf + ret, size - ret, ", %d CPU%s)",
+ top->evlist->cpus->nr,
+ top->evlist->cpus->nr > 1 ? "s" : "");
+ }
+
+ return ret;
+}
+
+void perf_top__reset_sample_counters(struct perf_top *top)
+{
+ top->samples = top->us_samples = top->kernel_samples =
+ top->exact_samples = top->guest_kernel_samples =
+ top->guest_us_samples = 0;
+}
+
+float perf_top__decay_samples(struct perf_top *top, struct rb_root *root)
+{
+ struct sym_entry *syme, *n;
+ float sum_ksamples = 0.0;
+ int snap = !top->display_weighted ? top->sym_counter : 0, j;
+
+ /* Sort the active symbols */
+ pthread_mutex_lock(&top->active_symbols_lock);
+ syme = list_entry(top->active_symbols.next, struct sym_entry, node);
+ pthread_mutex_unlock(&top->active_symbols_lock);
+
+ top->rb_entries = 0;
+ list_for_each_entry_safe_from(syme, n, &top->active_symbols, node) {
+ syme->snap_count = syme->count[snap];
+ if (syme->snap_count != 0) {
+
+ if ((top->hide_user_symbols &&
+ syme->origin == PERF_RECORD_MISC_USER) ||
+ (top->hide_kernel_symbols &&
+ syme->origin == PERF_RECORD_MISC_KERNEL)) {
+ perf_top__remove_active_sym(top, syme);
+ continue;
+ }
+ syme->weight = sym_weight(syme, top);
+
+ if ((int)syme->snap_count >= top->count_filter) {
+ rb_insert_active_sym(root, syme);
+ ++top->rb_entries;
+ }
+ sum_ksamples += syme->snap_count;
+
+ for (j = 0; j < top->evlist->nr_entries; j++)
+ syme->count[j] = top->zero ? 0 : syme->count[j] * 7 / 8;
+ } else
+ perf_top__remove_active_sym(top, syme);
+ }
+
+ return sum_ksamples;
+}
+
+/*
+ * Find the longest symbol name that will be displayed
+ */
+void perf_top__find_widths(struct perf_top *top, struct rb_root *root,
+ int *dso_width, int *dso_short_width, int *sym_width)
+{
+ struct rb_node *nd;
+ int printed = 0;
+
+ *sym_width = *dso_width = *dso_short_width = 0;
+
+ for (nd = rb_first(root); nd; nd = rb_next(nd)) {
+ struct sym_entry *syme = rb_entry(nd, struct sym_entry, rb_node);
+ struct symbol *sym = sym_entry__symbol(syme);
+
+ if (++printed > top->print_entries ||
+ (int)syme->snap_count < top->count_filter)
+ continue;
+
+ if (syme->map->dso->long_name_len > *dso_width)
+ *dso_width = syme->map->dso->long_name_len;
+
+ if (syme->map->dso->short_name_len > *dso_short_width)
+ *dso_short_width = syme->map->dso->short_name_len;
+
+ if (sym->namelen > *sym_width)
+ *sym_width = sym->namelen;
+ }
+}
diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h
new file mode 100644
index 000000000000..4f769f47e19a
--- /dev/null
+++ b/tools/perf/util/top.h
@@ -0,0 +1,64 @@
+#ifndef __PERF_TOP_H
+#define __PERF_TOP_H 1
+
+#include "types.h"
+#include "../perf.h"
+#include <stddef.h>
+#include <pthread.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+
+struct perf_evlist;
+struct perf_evsel;
+
+struct sym_entry {
+ struct rb_node rb_node;
+ struct list_head node;
+ unsigned long snap_count;
+ double weight;
+ int skip;
+ u8 origin;
+ struct map *map;
+ unsigned long count[0];
+};
+
+static inline struct symbol *sym_entry__symbol(struct sym_entry *self)
+{
+ return ((void *)self) + symbol_conf.priv_size;
+}
+
+struct perf_top {
+ struct perf_evlist *evlist;
+ /*
+ * Symbols will be added here in perf_event__process_sample and will
+ * get out after decayed.
+ */
+ struct list_head active_symbols;
+ pthread_mutex_t active_symbols_lock;
+ u64 samples;
+ u64 kernel_samples, us_samples;
+ u64 exact_samples;
+ u64 guest_us_samples, guest_kernel_samples;
+ int print_entries, count_filter, delay_secs;
+ int display_weighted, freq, rb_entries, sym_counter;
+ pid_t target_pid, target_tid;
+ bool hide_kernel_symbols, hide_user_symbols, zero;
+ const char *cpu_list;
+ struct perf_evsel *sym_evsel;
+};
+
+size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size);
+void perf_top__reset_sample_counters(struct perf_top *top);
+float perf_top__decay_samples(struct perf_top *top, struct rb_root *root);
+void perf_top__find_widths(struct perf_top *top, struct rb_root *root,
+ int *dso_width, int *dso_short_width, int *sym_width);
+
+#ifdef NO_NEWT_SUPPORT
+static inline int perf_top__tui_browser(struct perf_top *top __used)
+{
+ return 0;
+}
+#else
+int perf_top__tui_browser(struct perf_top *top);
+#endif
+#endif /* __PERF_TOP_H */
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index 73a02223c629..d8e622dd738a 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -153,7 +153,7 @@ void parse_proc_kallsyms(char *file, unsigned int size __unused)
char *next = NULL;
char *addr_str;
char ch;
- int ret;
+ int ret __used;
int i;
line = strtok_r(file, "\n", &next);
diff --git a/tools/perf/util/ui/browsers/annotate.c b/tools/perf/util/ui/browsers/annotate.c
index 82b78f99251b..1aa39658539c 100644
--- a/tools/perf/util/ui/browsers/annotate.c
+++ b/tools/perf/util/ui/browsers/annotate.c
@@ -1,9 +1,11 @@
#include "../browser.h"
#include "../helpline.h"
#include "../libslang.h"
+#include "../../annotate.h"
#include "../../hist.h"
#include "../../sort.h"
#include "../../symbol.h"
+#include "../../annotate.h"
static void ui__error_window(const char *fmt, ...)
{
@@ -58,32 +60,34 @@ static void annotate_browser__write(struct ui_browser *self, void *entry, int ro
}
static double objdump_line__calc_percent(struct objdump_line *self,
- struct list_head *head,
- struct symbol *sym)
+ struct symbol *sym, int evidx)
{
double percent = 0.0;
if (self->offset != -1) {
int len = sym->end - sym->start;
unsigned int hits = 0;
- struct sym_priv *priv = symbol__priv(sym);
- struct sym_ext *sym_ext = priv->ext;
- struct sym_hist *h = priv->hist;
+ struct annotation *notes = symbol__annotation(sym);
+ struct source_line *src_line = notes->src->lines;
+ struct sym_hist *h = annotation__histogram(notes, evidx);
s64 offset = self->offset;
- struct objdump_line *next = objdump__get_next_ip_line(head, self);
-
+ struct objdump_line *next;
+ next = objdump__get_next_ip_line(&notes->src->source, self);
while (offset < (s64)len &&
(next == NULL || offset < next->offset)) {
- if (sym_ext) {
- percent += sym_ext[offset].percent;
+ if (src_line) {
+ percent += src_line[offset].percent;
} else
- hits += h->ip[offset];
+ hits += h->addr[offset];
++offset;
}
-
- if (sym_ext == NULL && h->sum)
+ /*
+ * If the percentage wasn't already calculated in
+ * symbol__get_source_line, do it now:
+ */
+ if (src_line == NULL && h->sum)
percent = 100.0 * hits / h->sum;
}
@@ -136,10 +140,10 @@ static void annotate_browser__set_top(struct annotate_browser *self,
static int annotate_browser__run(struct annotate_browser *self)
{
struct rb_node *nd;
- struct hist_entry *he = self->b.priv;
+ struct symbol *sym = self->b.priv;
int key;
- if (ui_browser__show(&self->b, he->ms.sym->name,
+ if (ui_browser__show(&self->b, sym->name,
"<-, -> or ESC: exit, TAB/shift+TAB: cycle thru samples") < 0)
return -1;
/*
@@ -179,42 +183,47 @@ out:
return key;
}
-int hist_entry__tui_annotate(struct hist_entry *self)
+int hist_entry__tui_annotate(struct hist_entry *he, int evidx)
+{
+ return symbol__tui_annotate(he->ms.sym, he->ms.map, evidx);
+}
+
+int symbol__tui_annotate(struct symbol *sym, struct map *map, int evidx)
{
struct objdump_line *pos, *n;
struct objdump_line_rb_node *rbpos;
- LIST_HEAD(head);
+ struct annotation *notes = symbol__annotation(sym);
struct annotate_browser browser = {
.b = {
- .entries = &head,
+ .entries = &notes->src->source,
.refresh = ui_browser__list_head_refresh,
.seek = ui_browser__list_head_seek,
.write = annotate_browser__write,
- .priv = self,
+ .priv = sym,
},
};
int ret;
- if (self->ms.sym == NULL)
+ if (sym == NULL)
return -1;
- if (self->ms.map->dso->annotate_warned)
+ if (map->dso->annotate_warned)
return -1;
- if (hist_entry__annotate(self, &head, sizeof(*rbpos)) < 0) {
+ if (symbol__annotate(sym, map, sizeof(*rbpos)) < 0) {
ui__error_window(ui_helpline__last_msg);
return -1;
}
ui_helpline__push("Press <- or ESC to exit");
- list_for_each_entry(pos, &head, node) {
+ list_for_each_entry(pos, &notes->src->source, node) {
size_t line_len = strlen(pos->line);
if (browser.b.width < line_len)
browser.b.width = line_len;
rbpos = objdump_line__rb(pos);
rbpos->idx = browser.b.nr_entries++;
- rbpos->percent = objdump_line__calc_percent(pos, &head, self->ms.sym);
+ rbpos->percent = objdump_line__calc_percent(pos, sym, evidx);
if (rbpos->percent < 0.01)
continue;
objdump__insert_line(&browser.entries, rbpos);
@@ -229,7 +238,7 @@ int hist_entry__tui_annotate(struct hist_entry *self)
browser.b.width += 18; /* Percentage */
ret = annotate_browser__run(&browser);
- list_for_each_entry_safe(pos, n, &head, node) {
+ list_for_each_entry_safe(pos, n, &notes->src->source, node) {
list_del(&pos->node);
objdump_line__free(pos);
}
diff --git a/tools/perf/util/ui/browsers/hists.c b/tools/perf/util/ui/browsers/hists.c
index 60c463c16028..294b49538522 100644
--- a/tools/perf/util/ui/browsers/hists.c
+++ b/tools/perf/util/ui/browsers/hists.c
@@ -377,7 +377,7 @@ static int hist_browser__show_callchain_node_rb_tree(struct hist_browser *self,
while (node) {
struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node);
struct rb_node *next = rb_next(node);
- u64 cumul = cumul_hits(child);
+ u64 cumul = callchain_cumul_hits(child);
struct callchain_list *chain;
char folded_sign = ' ';
int first = true;
@@ -797,7 +797,8 @@ static int hists__browser_title(struct hists *self, char *bf, size_t size,
return printed;
}
-int hists__browse(struct hists *self, const char *helpline, const char *ev_name)
+int hists__browse(struct hists *self, const char *helpline,
+ const char *ev_name, int evidx)
{
struct hist_browser *browser = hist_browser__new(self);
struct pstack *fstack;
@@ -935,7 +936,7 @@ do_annotate:
if (he == NULL)
continue;
- hist_entry__tui_annotate(he);
+ hist_entry__tui_annotate(he, evidx);
} else if (choice == browse_map)
map__browse(browser->selection->map);
else if (choice == zoom_dso) {
@@ -984,7 +985,7 @@ out:
return key;
}
-int hists__tui_browse_tree(struct rb_root *self, const char *help)
+int hists__tui_browse_tree(struct rb_root *self, const char *help, int evidx)
{
struct rb_node *first = rb_first(self), *nd = first, *next;
int key = 0;
@@ -993,7 +994,7 @@ int hists__tui_browse_tree(struct rb_root *self, const char *help)
struct hists *hists = rb_entry(nd, struct hists, rb_node);
const char *ev_name = __event_name(hists->type, hists->config);
- key = hists__browse(hists, help, ev_name);
+ key = hists__browse(hists, help, ev_name, evidx);
switch (key) {
case NEWT_KEY_TAB:
next = rb_next(nd);
diff --git a/tools/perf/util/ui/browsers/map.c b/tools/perf/util/ui/browsers/map.c
index e5158369106e..8462bffe20bc 100644
--- a/tools/perf/util/ui/browsers/map.c
+++ b/tools/perf/util/ui/browsers/map.c
@@ -41,7 +41,7 @@ static int ui_entry__read(const char *title, char *bf, size_t size, int width)
out_free_form:
newtPopWindow();
newtFormDestroy(form);
- return 0;
+ return err;
}
struct map_browser {
diff --git a/tools/perf/util/ui/browsers/top.c b/tools/perf/util/ui/browsers/top.c
new file mode 100644
index 000000000000..ca6062483a8f
--- /dev/null
+++ b/tools/perf/util/ui/browsers/top.c
@@ -0,0 +1,136 @@
+/*
+ * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Parts came from builtin-{top,stat,record}.c, see those files for further
+ * copyright notes.
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+#include "../browser.h"
+#include "../helpline.h"
+#include "../libslang.h"
+#include "../../evlist.h"
+#include "../../hist.h"
+#include "../../sort.h"
+#include "../../symbol.h"
+#include "../../top.h"
+
+struct perf_top_browser {
+ struct ui_browser b;
+ struct rb_root root;
+ float sum_ksamples;
+ int dso_width;
+ int dso_short_width;
+ int sym_width;
+};
+
+static void perf_top_browser__write(struct ui_browser *browser, void *entry, int row)
+{
+ struct perf_top_browser *top_browser = container_of(browser, struct perf_top_browser, b);
+ struct sym_entry *syme = rb_entry(entry, struct sym_entry, rb_node);
+ bool current_entry = ui_browser__is_current_entry(browser, row);
+ struct symbol *symbol = sym_entry__symbol(syme);
+ struct perf_top *top = browser->priv;
+ int width = browser->width;
+ double pcnt;
+
+ pcnt = 100.0 - (100.0 * ((top_browser->sum_ksamples - syme->snap_count) /
+ top_browser->sum_ksamples));
+ ui_browser__set_percent_color(browser, pcnt, current_entry);
+
+ if (top->evlist->nr_entries == 1 || !top->display_weighted) {
+ slsmg_printf("%20.2f ", syme->weight);
+ width -= 24;
+ } else {
+ slsmg_printf("%9.1f %10ld ", syme->weight, syme->snap_count);
+ width -= 23;
+ }
+
+ slsmg_printf("%4.1f%%", pcnt);
+ width -= 7;
+
+ if (verbose) {
+ slsmg_printf(" %016" PRIx64, symbol->start);
+ width -= 17;
+ }
+
+ slsmg_printf(" %-*.*s ", top_browser->sym_width, top_browser->sym_width,
+ symbol->name);
+ width -= top_browser->sym_width;
+ slsmg_write_nstring(width >= syme->map->dso->long_name_len ?
+ syme->map->dso->long_name :
+ syme->map->dso->short_name, width);
+}
+
+static void perf_top_browser__update_rb_tree(struct perf_top_browser *browser)
+{
+ struct perf_top *top = browser->b.priv;
+
+ browser->root = RB_ROOT;
+ browser->b.top = NULL;
+ browser->sum_ksamples = perf_top__decay_samples(top, &browser->root);
+ perf_top__find_widths(top, &browser->root, &browser->dso_width,
+ &browser->dso_short_width,
+ &browser->sym_width);
+ if (browser->sym_width + browser->dso_width > browser->b.width - 29) {
+ browser->dso_width = browser->dso_short_width;
+ if (browser->sym_width + browser->dso_width > browser->b.width - 29)
+ browser->sym_width = browser->b.width - browser->dso_width - 29;
+ }
+ browser->b.nr_entries = top->rb_entries;
+}
+
+static int perf_top_browser__run(struct perf_top_browser *browser)
+{
+ int key;
+ char title[160];
+ struct perf_top *top = browser->b.priv;
+ int delay_msecs = top->delay_secs * 1000;
+
+ perf_top_browser__update_rb_tree(browser);
+ perf_top__header_snprintf(top, title, sizeof(title));
+ perf_top__reset_sample_counters(top);
+
+ if (ui_browser__show(&browser->b, title, "ESC: exit") < 0)
+ return -1;
+
+ newtFormSetTimer(browser->b.form, delay_msecs);
+
+ while (1) {
+ key = ui_browser__run(&browser->b);
+
+ switch (key) {
+ case -1:
+ /* FIXME we need to check if it was es.reason == NEWT_EXIT_TIMER */
+ perf_top_browser__update_rb_tree(browser);
+ perf_top__header_snprintf(top, title, sizeof(title));
+ perf_top__reset_sample_counters(top);
+ ui_browser__set_color(&browser->b, NEWT_COLORSET_ROOT);
+ SLsmg_gotorc(0, 0);
+ slsmg_write_nstring(title, browser->b.width);
+ break;
+ case NEWT_KEY_TAB:
+ default:
+ goto out;
+ }
+ }
+out:
+ ui_browser__hide(&browser->b);
+ return key;
+}
+
+int perf_top__tui_browser(struct perf_top *top)
+{
+ struct perf_top_browser browser = {
+ .b = {
+ .entries = &browser.root,
+ .refresh = ui_browser__rb_tree_refresh,
+ .seek = ui_browser__rb_tree_seek,
+ .write = perf_top_browser__write,
+ .priv = top,
+ },
+ };
+
+ ui_helpline__push("Press <- or ESC to exit");
+ return perf_top_browser__run(&browser);
+}
diff --git a/tools/perf/util/ui/libslang.h b/tools/perf/util/ui/libslang.h
index 5623da8e8080..2b63e1c9b181 100644
--- a/tools/perf/util/ui/libslang.h
+++ b/tools/perf/util/ui/libslang.h
@@ -13,11 +13,11 @@
#if SLANG_VERSION < 20104
#define slsmg_printf(msg, args...) \
- SLsmg_printf((char *)msg, ##args)
+ SLsmg_printf((char *)(msg), ##args)
#define slsmg_write_nstring(msg, len) \
- SLsmg_write_nstring((char *)msg, len)
+ SLsmg_write_nstring((char *)(msg), len)
#define sltt_set_color(obj, name, fg, bg) \
- SLtt_set_color(obj,(char *)name, (char *)fg, (char *)bg)
+ SLtt_set_color(obj,(char *)(name), (char *)(fg), (char *)(bg))
#else
#define slsmg_printf SLsmg_printf
#define slsmg_write_nstring SLsmg_write_nstring
diff --git a/tools/perf/util/ui/setup.c b/tools/perf/util/ui/setup.c
index 662085032eb7..fbf1a145492f 100644
--- a/tools/perf/util/ui/setup.c
+++ b/tools/perf/util/ui/setup.c
@@ -14,11 +14,12 @@ static void newt_suspend(void *d __used)
newtResume();
}
-void setup_browser(void)
+void setup_browser(bool fallback_to_pager)
{
if (!isatty(1) || !use_browser || dump_trace) {
use_browser = 0;
- setup_pager();
+ if (fallback_to_pager)
+ setup_pager();
return;
}