summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2011-02-02 13:47:48 +1100
committerStephen Rothwell <sfr@canb.auug.org.au>2011-02-02 13:47:48 +1100
commit47cf5f4e60b5d732dcbf0b9cfc99e772f7c136d6 (patch)
tree574089c30edd4a2ec888dc8cd20d73f657fdca19
parenta0eeb409249d2c6e943f739c14be60d93f980ea9 (diff)
parent4e70e1a1b8bf2425aaf3d25f6e65ce795d347723 (diff)
Merge remote branch 'tip/auto-latest'
-rw-r--r--Documentation/spinlocks.txt24
-rw-r--r--arch/alpha/include/asm/rwsem.h36
-rw-r--r--arch/ia64/include/asm/rwsem.h37
-rw-r--r--arch/mips/kernel/vpe.c4
-rw-r--r--arch/powerpc/include/asm/rwsem.h51
-rw-r--r--arch/s390/include/asm/rwsem.h63
-rw-r--r--arch/sh/include/asm/rwsem.h56
-rw-r--r--arch/sparc/include/asm/rwsem.h46
-rw-r--r--arch/sparc/lib/atomic32.c2
-rw-r--r--arch/um/drivers/ubd_kern.c2
-rw-r--r--arch/x86/include/asm/amd_nb.h1
-rw-r--r--arch/x86/include/asm/processor.h4
-rw-r--r--arch/x86/include/asm/rwsem.h80
-rw-r--r--arch/x86/include/asm/smp.h7
-rw-r--r--arch/x86/kernel/amd_nb.c14
-rw-r--r--arch/x86/kernel/cpu/amd.c8
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c4
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c7
-rw-r--r--arch/x86/kernel/cpu/perf_event.c4
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c12
-rw-r--r--arch/x86/kernel/smpboot.c45
-rw-r--r--arch/x86/kernel/x8664_ksyms_64.c1
-rw-r--r--arch/x86/lib/memmove_64.S197
-rw-r--r--arch/x86/lib/memmove_64.c192
-rw-r--r--arch/xtensa/include/asm/rwsem.h37
-rw-r--r--include/asm-generic/cputime.h3
-rw-r--r--include/linux/interrupt.h7
-rw-r--r--include/linux/jiffies.h1
-rw-r--r--include/linux/kthread.h2
-rw-r--r--include/linux/pci_ids.h1
-rw-r--r--include/linux/rwlock_types.h8
-rw-r--r--include/linux/rwsem-spinlock.h31
-rw-r--r--include/linux/rwsem.h53
-rw-r--r--include/linux/sched.h9
-rw-r--r--include/linux/spinlock_types.h8
-rw-r--r--kernel/cred.c2
-rw-r--r--kernel/irq/manage.c2
-rw-r--r--kernel/perf_event.c3
-rw-r--r--kernel/rtmutex-tester.c39
-rw-r--r--kernel/sched.c187
-rw-r--r--kernel/sched_fair.c73
-rw-r--r--kernel/sched_idletask.c26
-rw-r--r--kernel/sched_rt.c19
-rw-r--r--kernel/sched_stoptask.c7
-rw-r--r--kernel/softirq.c3
-rw-r--r--kernel/time.c23
-rw-r--r--lib/rwsem.c10
-rwxr-xr-xscripts/checkpatch.pl5
-rw-r--r--tools/perf/Documentation/perf-probe.txt4
-rw-r--r--tools/perf/Makefile4
-rw-r--r--tools/perf/builtin-probe.c29
-rw-r--r--tools/perf/builtin-record.c328
-rw-r--r--tools/perf/builtin-report.c29
-rw-r--r--tools/perf/builtin-stat.c39
-rw-r--r--tools/perf/builtin-test.c184
-rw-r--r--tools/perf/builtin-top.c283
-rw-r--r--tools/perf/perf.h26
-rw-r--r--tools/perf/util/callchain.c224
-rw-r--r--tools/perf/util/callchain.h74
-rw-r--r--tools/perf/util/cpumap.c5
-rw-r--r--tools/perf/util/cpumap.h2
-rw-r--r--tools/perf/util/event.c125
-rw-r--r--tools/perf/util/event.h5
-rw-r--r--tools/perf/util/evlist.c170
-rw-r--r--tools/perf/util/evlist.h41
-rw-r--r--tools/perf/util/evsel.c318
-rw-r--r--tools/perf/util/evsel.h33
-rw-r--r--tools/perf/util/header.c17
-rw-r--r--tools/perf/util/header.h7
-rw-r--r--tools/perf/util/hist.c15
-rw-r--r--tools/perf/util/hist.h2
-rw-r--r--tools/perf/util/include/linux/list.h1
-rw-r--r--tools/perf/util/parse-events.c71
-rw-r--r--tools/perf/util/parse-events.h7
-rw-r--r--tools/perf/util/probe-event.c68
-rw-r--r--tools/perf/util/probe-event.h1
-rw-r--r--tools/perf/util/probe-finder.c361
-rw-r--r--tools/perf/util/session.c26
-rw-r--r--tools/perf/util/session.h20
-rw-r--r--tools/perf/util/thread.c55
-rw-r--r--tools/perf/util/thread.h14
-rw-r--r--tools/perf/util/thread_map.c64
-rw-r--r--tools/perf/util/thread_map.h15
-rw-r--r--tools/perf/util/ui/browsers/hists.c2
84 files changed, 2246 insertions, 1879 deletions
diff --git a/Documentation/spinlocks.txt b/Documentation/spinlocks.txt
index 178c831b907d..2e3c64b1a6a5 100644
--- a/Documentation/spinlocks.txt
+++ b/Documentation/spinlocks.txt
@@ -86,7 +86,7 @@ to change the variables it has to get an exclusive write lock.
The routines look the same as above:
- rwlock_t xxx_lock = RW_LOCK_UNLOCKED;
+ rwlock_t xxx_lock = __RW_LOCK_UNLOCKED(xxx_lock);
unsigned long flags;
@@ -196,25 +196,3 @@ appropriate:
For static initialization, use DEFINE_SPINLOCK() / DEFINE_RWLOCK() or
__SPIN_LOCK_UNLOCKED() / __RW_LOCK_UNLOCKED() as appropriate.
-
-SPIN_LOCK_UNLOCKED and RW_LOCK_UNLOCKED are deprecated. These interfere
-with lockdep state tracking.
-
-Most of the time, you can simply turn:
- static spinlock_t xxx_lock = SPIN_LOCK_UNLOCKED;
-into:
- static DEFINE_SPINLOCK(xxx_lock);
-
-Static structure member variables go from:
-
- struct foo bar {
- .lock = SPIN_LOCK_UNLOCKED;
- };
-
-to:
-
- struct foo bar {
- .lock = __SPIN_LOCK_UNLOCKED(bar.lock);
- };
-
-Declaration of static rw_locks undergo a similar transformation.
diff --git a/arch/alpha/include/asm/rwsem.h b/arch/alpha/include/asm/rwsem.h
index 1570c0b54336..a83bbea62c67 100644
--- a/arch/alpha/include/asm/rwsem.h
+++ b/arch/alpha/include/asm/rwsem.h
@@ -13,44 +13,13 @@
#ifdef __KERNEL__
#include <linux/compiler.h>
-#include <linux/list.h>
-#include <linux/spinlock.h>
-struct rwsem_waiter;
-
-extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *);
-extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
-
-/*
- * the semaphore definition
- */
-struct rw_semaphore {
- long count;
#define RWSEM_UNLOCKED_VALUE 0x0000000000000000L
#define RWSEM_ACTIVE_BIAS 0x0000000000000001L
#define RWSEM_ACTIVE_MASK 0x00000000ffffffffL
#define RWSEM_WAITING_BIAS (-0x0000000100000000L)
#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
- spinlock_t wait_lock;
- struct list_head wait_list;
-};
-
-#define __RWSEM_INITIALIZER(name) \
- { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \
- LIST_HEAD_INIT((name).wait_list) }
-
-#define DECLARE_RWSEM(name) \
- struct rw_semaphore name = __RWSEM_INITIALIZER(name)
-
-static inline void init_rwsem(struct rw_semaphore *sem)
-{
- sem->count = RWSEM_UNLOCKED_VALUE;
- spin_lock_init(&sem->wait_lock);
- INIT_LIST_HEAD(&sem->wait_list);
-}
static inline void __down_read(struct rw_semaphore *sem)
{
@@ -250,10 +219,5 @@ static inline long rwsem_atomic_update(long val, struct rw_semaphore *sem)
#endif
}
-static inline int rwsem_is_locked(struct rw_semaphore *sem)
-{
- return (sem->count != 0);
-}
-
#endif /* __KERNEL__ */
#endif /* _ALPHA_RWSEM_H */
diff --git a/arch/ia64/include/asm/rwsem.h b/arch/ia64/include/asm/rwsem.h
index 215d5454c7d3..3027e7516d85 100644
--- a/arch/ia64/include/asm/rwsem.h
+++ b/arch/ia64/include/asm/rwsem.h
@@ -25,20 +25,8 @@
#error "Please don't include <asm/rwsem.h> directly, use <linux/rwsem.h> instead."
#endif
-#include <linux/list.h>
-#include <linux/spinlock.h>
-
#include <asm/intrinsics.h>
-/*
- * the semaphore definition
- */
-struct rw_semaphore {
- signed long count;
- spinlock_t wait_lock;
- struct list_head wait_list;
-};
-
#define RWSEM_UNLOCKED_VALUE __IA64_UL_CONST(0x0000000000000000)
#define RWSEM_ACTIVE_BIAS (1L)
#define RWSEM_ACTIVE_MASK (0xffffffffL)
@@ -46,26 +34,6 @@ struct rw_semaphore {
#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
-#define __RWSEM_INITIALIZER(name) \
- { RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait_lock), \
- LIST_HEAD_INIT((name).wait_list) }
-
-#define DECLARE_RWSEM(name) \
- struct rw_semaphore name = __RWSEM_INITIALIZER(name)
-
-extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
-
-static inline void
-init_rwsem (struct rw_semaphore *sem)
-{
- sem->count = RWSEM_UNLOCKED_VALUE;
- spin_lock_init(&sem->wait_lock);
- INIT_LIST_HEAD(&sem->wait_list);
-}
-
/*
* lock for reading
*/
@@ -174,9 +142,4 @@ __downgrade_write (struct rw_semaphore *sem)
#define rwsem_atomic_add(delta, sem) atomic64_add(delta, (atomic64_t *)(&(sem)->count))
#define rwsem_atomic_update(delta, sem) atomic64_add_return(delta, (atomic64_t *)(&(sem)->count))
-static inline int rwsem_is_locked(struct rw_semaphore *sem)
-{
- return (sem->count != 0);
-}
-
#endif /* _ASM_IA64_RWSEM_H */
diff --git a/arch/mips/kernel/vpe.c b/arch/mips/kernel/vpe.c
index 6a1fdfef8fde..ab52b7cf3b6b 100644
--- a/arch/mips/kernel/vpe.c
+++ b/arch/mips/kernel/vpe.c
@@ -148,9 +148,9 @@ struct {
spinlock_t tc_list_lock;
struct list_head tc_list; /* Thread contexts */
} vpecontrol = {
- .vpe_list_lock = SPIN_LOCK_UNLOCKED,
+ .vpe_list_lock = __SPIN_LOCK_UNLOCKED(vpe_list_lock),
.vpe_list = LIST_HEAD_INIT(vpecontrol.vpe_list),
- .tc_list_lock = SPIN_LOCK_UNLOCKED,
+ .tc_list_lock = __SPIN_LOCK_UNLOCKED(tc_list_lock),
.tc_list = LIST_HEAD_INIT(vpecontrol.tc_list)
};
diff --git a/arch/powerpc/include/asm/rwsem.h b/arch/powerpc/include/asm/rwsem.h
index 8447d89fbe72..bb1e2cdeb9bf 100644
--- a/arch/powerpc/include/asm/rwsem.h
+++ b/arch/powerpc/include/asm/rwsem.h
@@ -13,11 +13,6 @@
* by Paul Mackerras <paulus@samba.org>.
*/
-#include <linux/list.h>
-#include <linux/spinlock.h>
-#include <asm/atomic.h>
-#include <asm/system.h>
-
/*
* the semaphore definition
*/
@@ -33,47 +28,6 @@
#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
-struct rw_semaphore {
- long count;
- spinlock_t wait_lock;
- struct list_head wait_list;
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- struct lockdep_map dep_map;
-#endif
-};
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
-#else
-# define __RWSEM_DEP_MAP_INIT(lockname)
-#endif
-
-#define __RWSEM_INITIALIZER(name) \
-{ \
- RWSEM_UNLOCKED_VALUE, \
- __SPIN_LOCK_UNLOCKED((name).wait_lock), \
- LIST_HEAD_INIT((name).wait_list) \
- __RWSEM_DEP_MAP_INIT(name) \
-}
-
-#define DECLARE_RWSEM(name) \
- struct rw_semaphore name = __RWSEM_INITIALIZER(name)
-
-extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
-
-extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
- struct lock_class_key *key);
-
-#define init_rwsem(sem) \
- do { \
- static struct lock_class_key __key; \
- \
- __init_rwsem((sem), #sem, &__key); \
- } while (0)
-
/*
* lock for reading
*/
@@ -174,10 +128,5 @@ static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
return atomic_long_add_return(delta, (atomic_long_t *)&sem->count);
}
-static inline int rwsem_is_locked(struct rw_semaphore *sem)
-{
- return sem->count != 0;
-}
-
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_RWSEM_H */
diff --git a/arch/s390/include/asm/rwsem.h b/arch/s390/include/asm/rwsem.h
index 423fdda2322d..d0eb4653cebd 100644
--- a/arch/s390/include/asm/rwsem.h
+++ b/arch/s390/include/asm/rwsem.h
@@ -43,29 +43,6 @@
#ifdef __KERNEL__
-#include <linux/list.h>
-#include <linux/spinlock.h>
-
-struct rwsem_waiter;
-
-extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *);
-extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *);
-extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *);
-extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *);
-extern struct rw_semaphore *rwsem_downgrade_write(struct rw_semaphore *);
-
-/*
- * the semaphore definition
- */
-struct rw_semaphore {
- signed long count;
- spinlock_t wait_lock;
- struct list_head wait_list;
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- struct lockdep_map dep_map;
-#endif
-};
-
#ifndef __s390x__
#define RWSEM_UNLOCKED_VALUE 0x00000000
#define RWSEM_ACTIVE_BIAS 0x00000001
@@ -81,41 +58,6 @@ struct rw_semaphore {
#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
/*
- * initialisation
- */
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
-#else
-# define __RWSEM_DEP_MAP_INIT(lockname)
-#endif
-
-#define __RWSEM_INITIALIZER(name) \
- { RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait.lock), \
- LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) }
-
-#define DECLARE_RWSEM(name) \
- struct rw_semaphore name = __RWSEM_INITIALIZER(name)
-
-static inline void init_rwsem(struct rw_semaphore *sem)
-{
- sem->count = RWSEM_UNLOCKED_VALUE;
- spin_lock_init(&sem->wait_lock);
- INIT_LIST_HEAD(&sem->wait_list);
-}
-
-extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
- struct lock_class_key *key);
-
-#define init_rwsem(sem) \
-do { \
- static struct lock_class_key __key; \
- \
- __init_rwsem((sem), #sem, &__key); \
-} while (0)
-
-
-/*
* lock for reading
*/
static inline void __down_read(struct rw_semaphore *sem)
@@ -377,10 +319,5 @@ static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
return new;
}
-static inline int rwsem_is_locked(struct rw_semaphore *sem)
-{
- return (sem->count != 0);
-}
-
#endif /* __KERNEL__ */
#endif /* _S390_RWSEM_H */
diff --git a/arch/sh/include/asm/rwsem.h b/arch/sh/include/asm/rwsem.h
index 06e2251a5e48..edab57265293 100644
--- a/arch/sh/include/asm/rwsem.h
+++ b/arch/sh/include/asm/rwsem.h
@@ -11,64 +11,13 @@
#endif
#ifdef __KERNEL__
-#include <linux/list.h>
-#include <linux/spinlock.h>
-#include <asm/atomic.h>
-#include <asm/system.h>
-/*
- * the semaphore definition
- */
-struct rw_semaphore {
- long count;
#define RWSEM_UNLOCKED_VALUE 0x00000000
#define RWSEM_ACTIVE_BIAS 0x00000001
#define RWSEM_ACTIVE_MASK 0x0000ffff
#define RWSEM_WAITING_BIAS (-0x00010000)
#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
- spinlock_t wait_lock;
- struct list_head wait_list;
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- struct lockdep_map dep_map;
-#endif
-};
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
-#else
-# define __RWSEM_DEP_MAP_INIT(lockname)
-#endif
-
-#define __RWSEM_INITIALIZER(name) \
- { RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait_lock), \
- LIST_HEAD_INIT((name).wait_list) \
- __RWSEM_DEP_MAP_INIT(name) }
-
-#define DECLARE_RWSEM(name) \
- struct rw_semaphore name = __RWSEM_INITIALIZER(name)
-
-extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
-
-extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
- struct lock_class_key *key);
-
-#define init_rwsem(sem) \
-do { \
- static struct lock_class_key __key; \
- \
- __init_rwsem((sem), #sem, &__key); \
-} while (0)
-
-static inline void init_rwsem(struct rw_semaphore *sem)
-{
- sem->count = RWSEM_UNLOCKED_VALUE;
- spin_lock_init(&sem->wait_lock);
- INIT_LIST_HEAD(&sem->wait_list);
-}
/*
* lock for reading
@@ -179,10 +128,5 @@ static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem)
return atomic_add_return(delta, (atomic_t *)(&sem->count));
}
-static inline int rwsem_is_locked(struct rw_semaphore *sem)
-{
- return (sem->count != 0);
-}
-
#endif /* __KERNEL__ */
#endif /* _ASM_SH_RWSEM_H */
diff --git a/arch/sparc/include/asm/rwsem.h b/arch/sparc/include/asm/rwsem.h
index a2b4302869bc..069bf4d663a1 100644
--- a/arch/sparc/include/asm/rwsem.h
+++ b/arch/sparc/include/asm/rwsem.h
@@ -13,53 +13,12 @@
#ifdef __KERNEL__
-#include <linux/list.h>
-#include <linux/spinlock.h>
-
-struct rwsem_waiter;
-
-struct rw_semaphore {
- signed long count;
#define RWSEM_UNLOCKED_VALUE 0x00000000L
#define RWSEM_ACTIVE_BIAS 0x00000001L
#define RWSEM_ACTIVE_MASK 0xffffffffL
#define RWSEM_WAITING_BIAS (-RWSEM_ACTIVE_MASK-1)
#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
- spinlock_t wait_lock;
- struct list_head wait_list;
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- struct lockdep_map dep_map;
-#endif
-};
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
-#else
-# define __RWSEM_DEP_MAP_INIT(lockname)
-#endif
-
-#define __RWSEM_INITIALIZER(name) \
-{ RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait_lock), \
- LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) }
-
-#define DECLARE_RWSEM(name) \
- struct rw_semaphore name = __RWSEM_INITIALIZER(name)
-
-extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
-
-extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
- struct lock_class_key *key);
-
-#define init_rwsem(sem) \
-do { \
- static struct lock_class_key __key; \
- \
- __init_rwsem((sem), #sem, &__key); \
-} while (0)
/*
* lock for reading
@@ -160,11 +119,6 @@ static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
return atomic64_add_return(delta, (atomic64_t *)(&sem->count));
}
-static inline int rwsem_is_locked(struct rw_semaphore *sem)
-{
- return (sem->count != 0);
-}
-
#endif /* __KERNEL__ */
#endif /* _SPARC64_RWSEM_H */
diff --git a/arch/sparc/lib/atomic32.c b/arch/sparc/lib/atomic32.c
index cbddeb38ffda..d3c7a12ad879 100644
--- a/arch/sparc/lib/atomic32.c
+++ b/arch/sparc/lib/atomic32.c
@@ -16,7 +16,7 @@
#define ATOMIC_HASH(a) (&__atomic_hash[(((unsigned long)a)>>8) & (ATOMIC_HASH_SIZE-1)])
spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] = {
- [0 ... (ATOMIC_HASH_SIZE-1)] = SPIN_LOCK_UNLOCKED
+ [0 ... (ATOMIC_HASH_SIZE-1)] = __SPIN_LOCK_UNLOCKED(__atomic_hash)
};
#else /* SMP */
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index ba4a98ba39c0..620f5b70957d 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -185,7 +185,7 @@ struct ubd {
.no_cow = 0, \
.shared = 0, \
.cow = DEFAULT_COW, \
- .lock = SPIN_LOCK_UNLOCKED, \
+ .lock = __SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
.request = NULL, \
.start_sg = 0, \
.end_sg = 0, \
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index 64dc82ee19f0..3e7070071d73 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -26,6 +26,7 @@ extern void amd_get_nodes(struct bootnode *nodes);
struct amd_northbridge {
struct pci_dev *misc;
+ struct pci_dev *link;
};
struct amd_northbridge_info {
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 45636cefa186..4c25ab48257b 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -94,10 +94,6 @@ struct cpuinfo_x86 {
int x86_cache_alignment; /* In bytes */
int x86_power;
unsigned long loops_per_jiffy;
-#ifdef CONFIG_SMP
- /* cpus sharing the last level cache: */
- cpumask_var_t llc_shared_map;
-#endif
/* cpuid returned max cores value: */
u16 x86_max_cores;
u16 apicid;
diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h
index d1e41b0f9b60..df4cd32b4cc6 100644
--- a/arch/x86/include/asm/rwsem.h
+++ b/arch/x86/include/asm/rwsem.h
@@ -37,26 +37,9 @@
#endif
#ifdef __KERNEL__
-
-#include <linux/list.h>
-#include <linux/spinlock.h>
-#include <linux/lockdep.h>
#include <asm/asm.h>
-struct rwsem_waiter;
-
-extern asmregparm struct rw_semaphore *
- rwsem_down_read_failed(struct rw_semaphore *sem);
-extern asmregparm struct rw_semaphore *
- rwsem_down_write_failed(struct rw_semaphore *sem);
-extern asmregparm struct rw_semaphore *
- rwsem_wake(struct rw_semaphore *);
-extern asmregparm struct rw_semaphore *
- rwsem_downgrade_wake(struct rw_semaphore *sem);
-
/*
- * the semaphore definition
- *
* The bias values and the counter type limits the number of
* potential readers/writers to 32767 for 32 bits and 2147483647
* for 64 bits.
@@ -74,43 +57,6 @@ extern asmregparm struct rw_semaphore *
#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
-typedef signed long rwsem_count_t;
-
-struct rw_semaphore {
- rwsem_count_t count;
- spinlock_t wait_lock;
- struct list_head wait_list;
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
- struct lockdep_map dep_map;
-#endif
-};
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
-#else
-# define __RWSEM_DEP_MAP_INIT(lockname)
-#endif
-
-
-#define __RWSEM_INITIALIZER(name) \
-{ \
- RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait_lock), \
- LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) \
-}
-
-#define DECLARE_RWSEM(name) \
- struct rw_semaphore name = __RWSEM_INITIALIZER(name)
-
-extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
- struct lock_class_key *key);
-
-#define init_rwsem(sem) \
-do { \
- static struct lock_class_key __key; \
- \
- __init_rwsem((sem), #sem, &__key); \
-} while (0)
-
/*
* lock for reading
*/
@@ -133,7 +79,7 @@ static inline void __down_read(struct rw_semaphore *sem)
*/
static inline int __down_read_trylock(struct rw_semaphore *sem)
{
- rwsem_count_t result, tmp;
+ long result, tmp;
asm volatile("# beginning __down_read_trylock\n\t"
" mov %0,%1\n\t"
"1:\n\t"
@@ -155,7 +101,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem)
*/
static inline void __down_write_nested(struct rw_semaphore *sem, int subclass)
{
- rwsem_count_t tmp;
+ long tmp;
asm volatile("# beginning down_write\n\t"
LOCK_PREFIX " xadd %1,(%2)\n\t"
/* adds 0xffff0001, returns the old value */
@@ -180,9 +126,8 @@ static inline void __down_write(struct rw_semaphore *sem)
*/
static inline int __down_write_trylock(struct rw_semaphore *sem)
{
- rwsem_count_t ret = cmpxchg(&sem->count,
- RWSEM_UNLOCKED_VALUE,
- RWSEM_ACTIVE_WRITE_BIAS);
+ long ret = cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE,
+ RWSEM_ACTIVE_WRITE_BIAS);
if (ret == RWSEM_UNLOCKED_VALUE)
return 1;
return 0;
@@ -193,7 +138,7 @@ static inline int __down_write_trylock(struct rw_semaphore *sem)
*/
static inline void __up_read(struct rw_semaphore *sem)
{
- rwsem_count_t tmp;
+ long tmp;
asm volatile("# beginning __up_read\n\t"
LOCK_PREFIX " xadd %1,(%2)\n\t"
/* subtracts 1, returns the old value */
@@ -211,7 +156,7 @@ static inline void __up_read(struct rw_semaphore *sem)
*/
static inline void __up_write(struct rw_semaphore *sem)
{
- rwsem_count_t tmp;
+ long tmp;
asm volatile("# beginning __up_write\n\t"
LOCK_PREFIX " xadd %1,(%2)\n\t"
/* subtracts 0xffff0001, returns the old value */
@@ -247,8 +192,7 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
/*
* implement atomic add functionality
*/
-static inline void rwsem_atomic_add(rwsem_count_t delta,
- struct rw_semaphore *sem)
+static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem)
{
asm volatile(LOCK_PREFIX _ASM_ADD "%1,%0"
: "+m" (sem->count)
@@ -258,10 +202,9 @@ static inline void rwsem_atomic_add(rwsem_count_t delta,
/*
* implement exchange and add functionality
*/
-static inline rwsem_count_t rwsem_atomic_update(rwsem_count_t delta,
- struct rw_semaphore *sem)
+static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
{
- rwsem_count_t tmp = delta;
+ long tmp = delta;
asm volatile(LOCK_PREFIX "xadd %0,%1"
: "+r" (tmp), "+m" (sem->count)
@@ -270,10 +213,5 @@ static inline rwsem_count_t rwsem_atomic_update(rwsem_count_t delta,
return tmp + delta;
}
-static inline int rwsem_is_locked(struct rw_semaphore *sem)
-{
- return (sem->count != 0);
-}
-
#endif /* __KERNEL__ */
#endif /* _ASM_X86_RWSEM_H */
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 4c2f63c7fc1b..3597825a3112 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -23,6 +23,8 @@ extern unsigned int num_processors;
DECLARE_PER_CPU(cpumask_var_t, cpu_sibling_map);
DECLARE_PER_CPU(cpumask_var_t, cpu_core_map);
+/* cpus sharing the last level cache: */
+DECLARE_PER_CPU(cpumask_var_t, cpu_llc_shared_map);
DECLARE_PER_CPU(u16, cpu_llc_id);
DECLARE_PER_CPU(int, cpu_number);
@@ -36,6 +38,11 @@ static inline struct cpumask *cpu_core_mask(int cpu)
return per_cpu(cpu_core_map, cpu);
}
+static inline struct cpumask *cpu_llc_shared_mask(int cpu)
+{
+ return per_cpu(cpu_llc_shared_map, cpu);
+}
+
DECLARE_EARLY_PER_CPU(u16, x86_cpu_to_apicid);
DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid);
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 0a99f7198bc3..4ae9a961c33c 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -20,6 +20,11 @@ struct pci_device_id amd_nb_misc_ids[] = {
};
EXPORT_SYMBOL(amd_nb_misc_ids);
+static struct pci_device_id amd_nb_link_ids[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_LINK) },
+ {}
+};
+
const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[] __initconst = {
{ 0x00, 0x18, 0x20 },
{ 0xff, 0x00, 0x20 },
@@ -45,7 +50,7 @@ int amd_cache_northbridges(void)
{
int i = 0;
struct amd_northbridge *nb;
- struct pci_dev *misc;
+ struct pci_dev *misc, *link;
if (amd_nb_num())
return 0;
@@ -64,10 +69,12 @@ int amd_cache_northbridges(void)
amd_northbridges.nb = nb;
amd_northbridges.num = i;
- misc = NULL;
+ link = misc = NULL;
for (i = 0; i != amd_nb_num(); i++) {
node_to_amd_nb(i)->misc = misc =
next_northbridge(misc, amd_nb_misc_ids);
+ node_to_amd_nb(i)->link = link =
+ next_northbridge(link, amd_nb_link_ids);
}
/* some CPU families (e.g. family 0x11) do not support GART */
@@ -85,6 +92,9 @@ int amd_cache_northbridges(void)
boot_cpu_data.x86_mask >= 0x1))
amd_northbridges.flags |= AMD_NB_L3_INDEX_DISABLE;
+ if (boot_cpu_data.x86 == 0x15)
+ amd_northbridges.flags |= AMD_NB_L3_INDEX_DISABLE;
+
return 0;
}
EXPORT_SYMBOL_GPL(amd_cache_northbridges);
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 7c7bedb83c5a..990cc4861586 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -261,7 +261,7 @@ static int __cpuinit nearby_node(int apicid)
#ifdef CONFIG_X86_HT
static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c)
{
- u32 nodes;
+ u32 nodes, cores_per_cu;
u8 node_id;
int cpu = smp_processor_id();
@@ -276,6 +276,7 @@ static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c)
/* get compute unit information */
smp_num_siblings = ((ebx >> 8) & 3) + 1;
c->compute_unit_id = ebx & 0xff;
+ cores_per_cu = ((ebx >> 8) & 3) + 1;
} else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) {
u64 value;
@@ -288,15 +289,18 @@ static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c)
/* fixup multi-node processor information */
if (nodes > 1) {
u32 cores_per_node;
+ u32 cus_per_node;
set_cpu_cap(c, X86_FEATURE_AMD_DCM);
cores_per_node = c->x86_max_cores / nodes;
+ cus_per_node = cores_per_node / cores_per_cu;
/* store NodeID, use llc_shared_map to store sibling info */
per_cpu(cpu_llc_id, cpu) = node_id;
/* core id to be in range from 0 to (cores_per_node - 1) */
- c->cpu_core_id = c->cpu_core_id % cores_per_node;
+ c->cpu_core_id %= cores_per_node;
+ c->compute_unit_id %= cus_per_node;
}
}
#endif
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index ec2c19a7b8ef..5419a263ebd1 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -732,11 +732,11 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
struct cpuinfo_x86 *c = &cpu_data(cpu);
if ((index == 3) && (c->x86_vendor == X86_VENDOR_AMD)) {
- for_each_cpu(i, c->llc_shared_map) {
+ for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
if (!per_cpu(ici_cpuid4_info, i))
continue;
this_leaf = CPUID4_INFO_IDX(i, index);
- for_each_cpu(sibling, c->llc_shared_map) {
+ for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
if (!cpu_online(sibling))
continue;
set_bit(sibling, this_leaf->shared_cpu_map);
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 5bf2fac52aca..167f97b5596e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -527,15 +527,12 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
int i, err = 0;
struct threshold_bank *b = NULL;
char name[32];
-#ifdef CONFIG_SMP
- struct cpuinfo_x86 *c = &cpu_data(cpu);
-#endif
sprintf(name, "threshold_bank%i", bank);
#ifdef CONFIG_SMP
if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */
- i = cpumask_first(c->llc_shared_map);
+ i = cpumask_first(cpu_llc_shared_mask(cpu));
/* first core not up yet */
if (cpu_data(i).cpu_core_id)
@@ -555,7 +552,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
if (err)
goto out;
- cpumask_copy(b->cpus, c->llc_shared_map);
+ cpumask_copy(b->cpus, cpu_llc_shared_mask(cpu));
per_cpu(threshold_banks, cpu)[bank] = b;
goto out;
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 9d977a2ea693..4d98789b0664 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1389,7 +1389,7 @@ static void __init pmu_check_apic(void)
pr_info("no hardware sampling interrupt available.\n");
}
-int __init init_hw_perf_events(void)
+static int __init init_hw_perf_events(void)
{
struct event_constraint *c;
int err;
@@ -1608,7 +1608,7 @@ out:
return ret;
}
-int x86_pmu_event_init(struct perf_event *event)
+static int x86_pmu_event_init(struct perf_event *event)
{
struct pmu *tmp;
int err;
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index e56b9bfbabd1..f7a0993c1e7c 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -682,7 +682,7 @@ static int p4_validate_raw_event(struct perf_event *event)
* if an event is shared accross the logical threads
* the user needs special permissions to be able to use it
*/
- if (p4_event_bind_map[v].shared) {
+ if (p4_ht_active() && p4_event_bind_map[v].shared) {
if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
return -EACCES;
}
@@ -727,7 +727,8 @@ static int p4_hw_config(struct perf_event *event)
event->hw.config = p4_set_ht_bit(event->hw.config);
if (event->attr.type == PERF_TYPE_RAW) {
-
+ struct p4_event_bind *bind;
+ unsigned int esel;
/*
* Clear bits we reserve to be managed by kernel itself
* and never allowed from a user space
@@ -743,6 +744,13 @@ static int p4_hw_config(struct perf_event *event)
* bits since we keep additional info here (for cache events and etc)
*/
event->hw.config |= event->attr.config;
+ bind = p4_config_get_bind(event->attr.config);
+ if (!bind) {
+ rc = -EINVAL;
+ goto out;
+ }
+ esel = P4_OPCODE_ESEL(bind->opcode);
+ event->hw.config |= p4_config_pack_cccr(P4_CCCR_ESEL(esel));
}
rc = x86_setup_perfctr(event);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 0cbe8c0b35ed..7a29a00c172b 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -130,6 +130,8 @@ EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
DEFINE_PER_CPU(cpumask_var_t, cpu_core_map);
EXPORT_PER_CPU_SYMBOL(cpu_core_map);
+DEFINE_PER_CPU(cpumask_var_t, cpu_llc_shared_map);
+
/* Per CPU bogomips and other parameters */
DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
EXPORT_PER_CPU_SYMBOL(cpu_info);
@@ -355,23 +357,6 @@ notrace static void __cpuinit start_secondary(void *unused)
cpu_idle();
}
-#ifdef CONFIG_CPUMASK_OFFSTACK
-/* In this case, llc_shared_map is a pointer to a cpumask. */
-static inline void copy_cpuinfo_x86(struct cpuinfo_x86 *dst,
- const struct cpuinfo_x86 *src)
-{
- struct cpumask *llc = dst->llc_shared_map;
- *dst = *src;
- dst->llc_shared_map = llc;
-}
-#else
-static inline void copy_cpuinfo_x86(struct cpuinfo_x86 *dst,
- const struct cpuinfo_x86 *src)
-{
- *dst = *src;
-}
-#endif /* CONFIG_CPUMASK_OFFSTACK */
-
/*
* The bootstrap kernel entry code has set these up. Save them for
* a given CPU
@@ -381,7 +366,7 @@ void __cpuinit smp_store_cpu_info(int id)
{
struct cpuinfo_x86 *c = &cpu_data(id);
- copy_cpuinfo_x86(c, &boot_cpu_data);
+ *c = boot_cpu_data;
c->cpu_index = id;
if (id != 0)
identify_secondary_cpu(c);
@@ -389,15 +374,12 @@ void __cpuinit smp_store_cpu_info(int id)
static void __cpuinit link_thread_siblings(int cpu1, int cpu2)
{
- struct cpuinfo_x86 *c1 = &cpu_data(cpu1);
- struct cpuinfo_x86 *c2 = &cpu_data(cpu2);
-
cpumask_set_cpu(cpu1, cpu_sibling_mask(cpu2));
cpumask_set_cpu(cpu2, cpu_sibling_mask(cpu1));
cpumask_set_cpu(cpu1, cpu_core_mask(cpu2));
cpumask_set_cpu(cpu2, cpu_core_mask(cpu1));
- cpumask_set_cpu(cpu1, c2->llc_shared_map);
- cpumask_set_cpu(cpu2, c1->llc_shared_map);
+ cpumask_set_cpu(cpu1, cpu_llc_shared_mask(cpu2));
+ cpumask_set_cpu(cpu2, cpu_llc_shared_mask(cpu1));
}
@@ -414,6 +396,7 @@ void __cpuinit set_cpu_sibling_map(int cpu)
if (cpu_has(c, X86_FEATURE_TOPOEXT)) {
if (c->phys_proc_id == o->phys_proc_id &&
+ per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i) &&
c->compute_unit_id == o->compute_unit_id)
link_thread_siblings(cpu, i);
} else if (c->phys_proc_id == o->phys_proc_id &&
@@ -425,7 +408,7 @@ void __cpuinit set_cpu_sibling_map(int cpu)
cpumask_set_cpu(cpu, cpu_sibling_mask(cpu));
}
- cpumask_set_cpu(cpu, c->llc_shared_map);
+ cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu));
if (__this_cpu_read(cpu_info.x86_max_cores) == 1) {
cpumask_copy(cpu_core_mask(cpu), cpu_sibling_mask(cpu));
@@ -436,8 +419,8 @@ void __cpuinit set_cpu_sibling_map(int cpu)
for_each_cpu(i, cpu_sibling_setup_mask) {
if (per_cpu(cpu_llc_id, cpu) != BAD_APICID &&
per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) {
- cpumask_set_cpu(i, c->llc_shared_map);
- cpumask_set_cpu(cpu, cpu_data(i).llc_shared_map);
+ cpumask_set_cpu(i, cpu_llc_shared_mask(cpu));
+ cpumask_set_cpu(cpu, cpu_llc_shared_mask(i));
}
if (c->phys_proc_id == cpu_data(i).phys_proc_id) {
cpumask_set_cpu(i, cpu_core_mask(cpu));
@@ -476,7 +459,7 @@ const struct cpumask *cpu_coregroup_mask(int cpu)
!(cpu_has(c, X86_FEATURE_AMD_DCM)))
return cpu_core_mask(cpu);
else
- return c->llc_shared_map;
+ return cpu_llc_shared_mask(cpu);
}
static void impress_friends(void)
@@ -1089,13 +1072,13 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
preempt_disable();
smp_cpu_index_default();
- memcpy(__this_cpu_ptr(&cpu_info), &boot_cpu_data, sizeof(cpu_info));
- cpumask_copy(cpu_callin_mask, cpumask_of(0));
- mb();
+
/*
* Setup boot CPU information
*/
smp_store_cpu_info(0); /* Final full version of the data */
+ cpumask_copy(cpu_callin_mask, cpumask_of(0));
+ mb();
#ifdef CONFIG_X86_32
boot_cpu_logical_apicid = logical_smp_processor_id();
#endif
@@ -1103,7 +1086,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
for_each_possible_cpu(i) {
zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
- zalloc_cpumask_var(&cpu_data(i).llc_shared_map, GFP_KERNEL);
+ zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL);
}
set_cpu_sibling_map(0);
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index 1b950d151e58..9796c2f3d074 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -52,6 +52,7 @@ extern void *__memcpy(void *, const void *, __kernel_size_t);
EXPORT_SYMBOL(memset);
EXPORT_SYMBOL(memcpy);
EXPORT_SYMBOL(__memcpy);
+EXPORT_SYMBOL(memmove);
EXPORT_SYMBOL(empty_zero_page);
#ifndef CONFIG_PARAVIRT
diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S
new file mode 100644
index 000000000000..0ecb8433e5a8
--- /dev/null
+++ b/arch/x86/lib/memmove_64.S
@@ -0,0 +1,197 @@
+/*
+ * Normally compiler builtins are used, but sometimes the compiler calls out
+ * of line code. Based on asm-i386/string.h.
+ *
+ * This assembly file is re-written from memmove_64.c file.
+ * - Copyright 2011 Fenghua Yu <fenghua.yu@intel.com>
+ */
+#define _STRING_C
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
+
+#undef memmove
+
+/*
+ * Implement memmove(). This can handle overlap between src and dst.
+ *
+ * Input:
+ * rdi: dest
+ * rsi: src
+ * rdx: count
+ *
+ * Output:
+ * rax: dest
+ */
+ENTRY(memmove)
+ CFI_STARTPROC
+ /* Handle more 32bytes in loop */
+ mov %rdi, %rax
+ cmp $0x20, %rdx
+ jb 1f
+
+ /* Decide forward/backward copy mode */
+ cmp %rdi, %rsi
+ jb 2f
+
+ /*
+ * movsq instruction have many startup latency
+ * so we handle small size by general register.
+ */
+ cmp $680, %rdx
+ jb 3f
+ /*
+ * movsq instruction is only good for aligned case.
+ */
+
+ cmpb %dil, %sil
+ je 4f
+3:
+ sub $0x20, %rdx
+ /*
+ * We gobble 32byts forward in each loop.
+ */
+5:
+ sub $0x20, %rdx
+ movq 0*8(%rsi), %r11
+ movq 1*8(%rsi), %r10
+ movq 2*8(%rsi), %r9
+ movq 3*8(%rsi), %r8
+ leaq 4*8(%rsi), %rsi
+
+ movq %r11, 0*8(%rdi)
+ movq %r10, 1*8(%rdi)
+ movq %r9, 2*8(%rdi)
+ movq %r8, 3*8(%rdi)
+ leaq 4*8(%rdi), %rdi
+ jae 5b
+ addq $0x20, %rdx
+ jmp 1f
+ /*
+ * Handle data forward by movsq.
+ */
+ .p2align 4
+4:
+ movq %rdx, %rcx
+ movq -8(%rsi, %rdx), %r11
+ lea -8(%rdi, %rdx), %r10
+ shrq $3, %rcx
+ rep movsq
+ movq %r11, (%r10)
+ jmp 13f
+ /*
+ * Handle data backward by movsq.
+ */
+ .p2align 4
+7:
+ movq %rdx, %rcx
+ movq (%rsi), %r11
+ movq %rdi, %r10
+ leaq -8(%rsi, %rdx), %rsi
+ leaq -8(%rdi, %rdx), %rdi
+ shrq $3, %rcx
+ std
+ rep movsq
+ cld
+ movq %r11, (%r10)
+ jmp 13f
+
+ /*
+ * Start to prepare for backward copy.
+ */
+ .p2align 4
+2:
+ cmp $680, %rdx
+ jb 6f
+ cmp %dil, %sil
+ je 7b
+6:
+ /*
+ * Calculate copy position to tail.
+ */
+ addq %rdx, %rsi
+ addq %rdx, %rdi
+ subq $0x20, %rdx
+ /*
+ * We gobble 32byts backward in each loop.
+ */
+8:
+ subq $0x20, %rdx
+ movq -1*8(%rsi), %r11
+ movq -2*8(%rsi), %r10
+ movq -3*8(%rsi), %r9
+ movq -4*8(%rsi), %r8
+ leaq -4*8(%rsi), %rsi
+
+ movq %r11, -1*8(%rdi)
+ movq %r10, -2*8(%rdi)
+ movq %r9, -3*8(%rdi)
+ movq %r8, -4*8(%rdi)
+ leaq -4*8(%rdi), %rdi
+ jae 8b
+ /*
+ * Calculate copy position to head.
+ */
+ addq $0x20, %rdx
+ subq %rdx, %rsi
+ subq %rdx, %rdi
+1:
+ cmpq $16, %rdx
+ jb 9f
+ /*
+ * Move data from 16 bytes to 31 bytes.
+ */
+ movq 0*8(%rsi), %r11
+ movq 1*8(%rsi), %r10
+ movq -2*8(%rsi, %rdx), %r9
+ movq -1*8(%rsi, %rdx), %r8
+ movq %r11, 0*8(%rdi)
+ movq %r10, 1*8(%rdi)
+ movq %r9, -2*8(%rdi, %rdx)
+ movq %r8, -1*8(%rdi, %rdx)
+ jmp 13f
+ .p2align 4
+9:
+ cmpq $8, %rdx
+ jb 10f
+ /*
+ * Move data from 8 bytes to 15 bytes.
+ */
+ movq 0*8(%rsi), %r11
+ movq -1*8(%rsi, %rdx), %r10
+ movq %r11, 0*8(%rdi)
+ movq %r10, -1*8(%rdi, %rdx)
+ jmp 13f
+10:
+ cmpq $4, %rdx
+ jb 11f
+ /*
+ * Move data from 4 bytes to 7 bytes.
+ */
+ movl (%rsi), %r11d
+ movl -4(%rsi, %rdx), %r10d
+ movl %r11d, (%rdi)
+ movl %r10d, -4(%rdi, %rdx)
+ jmp 13f
+11:
+ cmp $2, %rdx
+ jb 12f
+ /*
+ * Move data from 2 bytes to 3 bytes.
+ */
+ movw (%rsi), %r11w
+ movw -2(%rsi, %rdx), %r10w
+ movw %r11w, (%rdi)
+ movw %r10w, -2(%rdi, %rdx)
+ jmp 13f
+12:
+ cmp $1, %rdx
+ jb 13f
+ /*
+ * Move data for 1 byte.
+ */
+ movb (%rsi), %r11b
+ movb %r11b, (%rdi)
+13:
+ retq
+ CFI_ENDPROC
+ENDPROC(memmove)
diff --git a/arch/x86/lib/memmove_64.c b/arch/x86/lib/memmove_64.c
deleted file mode 100644
index 6d0f0ec41b34..000000000000
--- a/arch/x86/lib/memmove_64.c
+++ /dev/null
@@ -1,192 +0,0 @@
-/* Normally compiler builtins are used, but sometimes the compiler calls out
- of line code. Based on asm-i386/string.h.
- */
-#define _STRING_C
-#include <linux/string.h>
-#include <linux/module.h>
-
-#undef memmove
-void *memmove(void *dest, const void *src, size_t count)
-{
- unsigned long d0,d1,d2,d3,d4,d5,d6,d7;
- char *ret;
-
- __asm__ __volatile__(
- /* Handle more 32bytes in loop */
- "mov %2, %3\n\t"
- "cmp $0x20, %0\n\t"
- "jb 1f\n\t"
-
- /* Decide forward/backward copy mode */
- "cmp %2, %1\n\t"
- "jb 2f\n\t"
-
- /*
- * movsq instruction have many startup latency
- * so we handle small size by general register.
- */
- "cmp $680, %0\n\t"
- "jb 3f\n\t"
- /*
- * movsq instruction is only good for aligned case.
- */
- "cmpb %%dil, %%sil\n\t"
- "je 4f\n\t"
- "3:\n\t"
- "sub $0x20, %0\n\t"
- /*
- * We gobble 32byts forward in each loop.
- */
- "5:\n\t"
- "sub $0x20, %0\n\t"
- "movq 0*8(%1), %4\n\t"
- "movq 1*8(%1), %5\n\t"
- "movq 2*8(%1), %6\n\t"
- "movq 3*8(%1), %7\n\t"
- "leaq 4*8(%1), %1\n\t"
-
- "movq %4, 0*8(%2)\n\t"
- "movq %5, 1*8(%2)\n\t"
- "movq %6, 2*8(%2)\n\t"
- "movq %7, 3*8(%2)\n\t"
- "leaq 4*8(%2), %2\n\t"
- "jae 5b\n\t"
- "addq $0x20, %0\n\t"
- "jmp 1f\n\t"
- /*
- * Handle data forward by movsq.
- */
- ".p2align 4\n\t"
- "4:\n\t"
- "movq %0, %8\n\t"
- "movq -8(%1, %0), %4\n\t"
- "lea -8(%2, %0), %5\n\t"
- "shrq $3, %8\n\t"
- "rep movsq\n\t"
- "movq %4, (%5)\n\t"
- "jmp 13f\n\t"
- /*
- * Handle data backward by movsq.
- */
- ".p2align 4\n\t"
- "7:\n\t"
- "movq %0, %8\n\t"
- "movq (%1), %4\n\t"
- "movq %2, %5\n\t"
- "leaq -8(%1, %0), %1\n\t"
- "leaq -8(%2, %0), %2\n\t"
- "shrq $3, %8\n\t"
- "std\n\t"
- "rep movsq\n\t"
- "cld\n\t"
- "movq %4, (%5)\n\t"
- "jmp 13f\n\t"
-
- /*
- * Start to prepare for backward copy.
- */
- ".p2align 4\n\t"
- "2:\n\t"
- "cmp $680, %0\n\t"
- "jb 6f \n\t"
- "cmp %%dil, %%sil\n\t"
- "je 7b \n\t"
- "6:\n\t"
- /*
- * Calculate copy position to tail.
- */
- "addq %0, %1\n\t"
- "addq %0, %2\n\t"
- "subq $0x20, %0\n\t"
- /*
- * We gobble 32byts backward in each loop.
- */
- "8:\n\t"
- "subq $0x20, %0\n\t"
- "movq -1*8(%1), %4\n\t"
- "movq -2*8(%1), %5\n\t"
- "movq -3*8(%1), %6\n\t"
- "movq -4*8(%1), %7\n\t"
- "leaq -4*8(%1), %1\n\t"
-
- "movq %4, -1*8(%2)\n\t"
- "movq %5, -2*8(%2)\n\t"
- "movq %6, -3*8(%2)\n\t"
- "movq %7, -4*8(%2)\n\t"
- "leaq -4*8(%2), %2\n\t"
- "jae 8b\n\t"
- /*
- * Calculate copy position to head.
- */
- "addq $0x20, %0\n\t"
- "subq %0, %1\n\t"
- "subq %0, %2\n\t"
- "1:\n\t"
- "cmpq $16, %0\n\t"
- "jb 9f\n\t"
- /*
- * Move data from 16 bytes to 31 bytes.
- */
- "movq 0*8(%1), %4\n\t"
- "movq 1*8(%1), %5\n\t"
- "movq -2*8(%1, %0), %6\n\t"
- "movq -1*8(%1, %0), %7\n\t"
- "movq %4, 0*8(%2)\n\t"
- "movq %5, 1*8(%2)\n\t"
- "movq %6, -2*8(%2, %0)\n\t"
- "movq %7, -1*8(%2, %0)\n\t"
- "jmp 13f\n\t"
- ".p2align 4\n\t"
- "9:\n\t"
- "cmpq $8, %0\n\t"
- "jb 10f\n\t"
- /*
- * Move data from 8 bytes to 15 bytes.
- */
- "movq 0*8(%1), %4\n\t"
- "movq -1*8(%1, %0), %5\n\t"
- "movq %4, 0*8(%2)\n\t"
- "movq %5, -1*8(%2, %0)\n\t"
- "jmp 13f\n\t"
- "10:\n\t"
- "cmpq $4, %0\n\t"
- "jb 11f\n\t"
- /*
- * Move data from 4 bytes to 7 bytes.
- */
- "movl (%1), %4d\n\t"
- "movl -4(%1, %0), %5d\n\t"
- "movl %4d, (%2)\n\t"
- "movl %5d, -4(%2, %0)\n\t"
- "jmp 13f\n\t"
- "11:\n\t"
- "cmp $2, %0\n\t"
- "jb 12f\n\t"
- /*
- * Move data from 2 bytes to 3 bytes.
- */
- "movw (%1), %4w\n\t"
- "movw -2(%1, %0), %5w\n\t"
- "movw %4w, (%2)\n\t"
- "movw %5w, -2(%2, %0)\n\t"
- "jmp 13f\n\t"
- "12:\n\t"
- "cmp $1, %0\n\t"
- "jb 13f\n\t"
- /*
- * Move data for 1 byte.
- */
- "movb (%1), %4b\n\t"
- "movb %4b, (%2)\n\t"
- "13:\n\t"
- : "=&d" (d0), "=&S" (d1), "=&D" (d2), "=&a" (ret) ,
- "=r"(d3), "=r"(d4), "=r"(d5), "=r"(d6), "=&c" (d7)
- :"0" (count),
- "1" (src),
- "2" (dest)
- :"memory");
-
- return ret;
-
-}
-EXPORT_SYMBOL(memmove);
diff --git a/arch/xtensa/include/asm/rwsem.h b/arch/xtensa/include/asm/rwsem.h
index e39edf5c86f2..249619e7e7f2 100644
--- a/arch/xtensa/include/asm/rwsem.h
+++ b/arch/xtensa/include/asm/rwsem.h
@@ -17,44 +17,12 @@
#error "Please don't include <asm/rwsem.h> directly, use <linux/rwsem.h> instead."
#endif
-#include <linux/list.h>
-#include <linux/spinlock.h>
-#include <asm/atomic.h>
-#include <asm/system.h>
-
-/*
- * the semaphore definition
- */
-struct rw_semaphore {
- signed long count;
#define RWSEM_UNLOCKED_VALUE 0x00000000
#define RWSEM_ACTIVE_BIAS 0x00000001
#define RWSEM_ACTIVE_MASK 0x0000ffff
#define RWSEM_WAITING_BIAS (-0x00010000)
#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
- spinlock_t wait_lock;
- struct list_head wait_list;
-};
-
-#define __RWSEM_INITIALIZER(name) \
- { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \
- LIST_HEAD_INIT((name).wait_list) }
-
-#define DECLARE_RWSEM(name) \
- struct rw_semaphore name = __RWSEM_INITIALIZER(name)
-
-extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem);
-extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
-
-static inline void init_rwsem(struct rw_semaphore *sem)
-{
- sem->count = RWSEM_UNLOCKED_VALUE;
- spin_lock_init(&sem->wait_lock);
- INIT_LIST_HEAD(&sem->wait_list);
-}
/*
* lock for reading
@@ -160,9 +128,4 @@ static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem)
return atomic_add_return(delta, (atomic_t *)(&sem->count));
}
-static inline int rwsem_is_locked(struct rw_semaphore *sem)
-{
- return (sem->count != 0);
-}
-
#endif /* _XTENSA_RWSEM_H */
diff --git a/include/asm-generic/cputime.h b/include/asm-generic/cputime.h
index 2bcc5c7c22a6..61e03dd7939e 100644
--- a/include/asm-generic/cputime.h
+++ b/include/asm-generic/cputime.h
@@ -30,6 +30,9 @@ typedef u64 cputime64_t;
#define cputime64_to_jiffies64(__ct) (__ct)
#define jiffies64_to_cputime64(__jif) (__jif)
#define cputime_to_cputime64(__ct) ((u64) __ct)
+#define cputime64_gt(__a, __b) ((__a) > (__b))
+
+#define nsecs_to_cputime64(__ct) nsecs_to_jiffies64(__ct)
/*
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 63c5ad78e37c..e89c5c735d1e 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -457,6 +457,13 @@ extern void raise_softirq(unsigned int nr);
*/
DECLARE_PER_CPU(struct list_head [NR_SOFTIRQS], softirq_work_list);
+DECLARE_PER_CPU(struct task_struct *, ksoftirqd);
+
+static inline struct task_struct *this_cpu_ksoftirqd(void)
+{
+ return this_cpu_read(ksoftirqd);
+}
+
/* Try to send a softirq to a remote cpu. If this cannot be done, the
* work will be queued to the local cpu.
*/
diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index 6811f4bfc6e7..922aa313c9f9 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -307,6 +307,7 @@ extern clock_t jiffies_to_clock_t(long x);
extern unsigned long clock_t_to_jiffies(unsigned long x);
extern u64 jiffies_64_to_clock_t(u64 x);
extern u64 nsec_to_clock_t(u64 x);
+extern u64 nsecs_to_jiffies64(u64 n);
extern unsigned long nsecs_to_jiffies(u64 n);
#define TIMESTAMP_SIZE 30
diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index ce0775aa64c3..7ff16f7d3ed4 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -64,7 +64,7 @@ struct kthread_work {
};
#define KTHREAD_WORKER_INIT(worker) { \
- .lock = SPIN_LOCK_UNLOCKED, \
+ .lock = __SPIN_LOCK_UNLOCKED((worker).lock), \
.work_list = LIST_HEAD_INIT((worker).work_list), \
}
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 3adb06ebf841..580de67f318b 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -518,6 +518,7 @@
#define PCI_DEVICE_ID_AMD_11H_NB_MISC 0x1303
#define PCI_DEVICE_ID_AMD_11H_NB_LINK 0x1304
#define PCI_DEVICE_ID_AMD_15H_NB_MISC 0x1603
+#define PCI_DEVICE_ID_AMD_15H_NB_LINK 0x1604
#define PCI_DEVICE_ID_AMD_CNB17H_F3 0x1703
#define PCI_DEVICE_ID_AMD_LANCE 0x2000
#define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001
diff --git a/include/linux/rwlock_types.h b/include/linux/rwlock_types.h
index bd31808c7d8e..cc0072e93e36 100644
--- a/include/linux/rwlock_types.h
+++ b/include/linux/rwlock_types.h
@@ -43,14 +43,6 @@ typedef struct {
RW_DEP_MAP_INIT(lockname) }
#endif
-/*
- * RW_LOCK_UNLOCKED defeat lockdep state tracking and is hence
- * deprecated.
- *
- * Please use DEFINE_RWLOCK() or __RW_LOCK_UNLOCKED() as appropriate.
- */
-#define RW_LOCK_UNLOCKED __RW_LOCK_UNLOCKED(old_style_rw_init)
-
#define DEFINE_RWLOCK(x) rwlock_t x = __RW_LOCK_UNLOCKED(x)
#endif /* __LINUX_RWLOCK_TYPES_H */
diff --git a/include/linux/rwsem-spinlock.h b/include/linux/rwsem-spinlock.h
index bdfcc2527970..34701241b673 100644
--- a/include/linux/rwsem-spinlock.h
+++ b/include/linux/rwsem-spinlock.h
@@ -12,15 +12,7 @@
#error "please don't include linux/rwsem-spinlock.h directly, use linux/rwsem.h instead"
#endif
-#include <linux/spinlock.h>
-#include <linux/list.h>
-
#ifdef __KERNEL__
-
-#include <linux/types.h>
-
-struct rwsem_waiter;
-
/*
* the rw-semaphore definition
* - if activity is 0 then there are no active readers or writers
@@ -37,28 +29,7 @@ struct rw_semaphore {
#endif
};
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
-#else
-# define __RWSEM_DEP_MAP_INIT(lockname)
-#endif
-
-#define __RWSEM_INITIALIZER(name) \
-{ 0, __SPIN_LOCK_UNLOCKED(name.wait_lock), LIST_HEAD_INIT((name).wait_list) \
- __RWSEM_DEP_MAP_INIT(name) }
-
-#define DECLARE_RWSEM(name) \
- struct rw_semaphore name = __RWSEM_INITIALIZER(name)
-
-extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
- struct lock_class_key *key);
-
-#define init_rwsem(sem) \
-do { \
- static struct lock_class_key __key; \
- \
- __init_rwsem((sem), #sem, &__key); \
-} while (0)
+#define RWSEM_UNLOCKED_VALUE 0x00000000
extern void __down_read(struct rw_semaphore *sem);
extern int __down_read_trylock(struct rw_semaphore *sem);
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index efd348fe8ca7..a8afe9cd000c 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -11,6 +11,9 @@
#include <linux/types.h>
#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+
#include <asm/system.h>
#include <asm/atomic.h>
@@ -19,9 +22,57 @@ struct rw_semaphore;
#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
#include <linux/rwsem-spinlock.h> /* use a generic implementation */
#else
-#include <asm/rwsem.h> /* use an arch-specific implementation */
+/* All arch specific implementations share the same struct */
+struct rw_semaphore {
+ long count;
+ spinlock_t wait_lock;
+ struct list_head wait_list;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ struct lockdep_map dep_map;
+#endif
+};
+
+extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
+extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
+extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *);
+extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
+
+/* Include the arch specific part */
+#include <asm/rwsem.h>
+
+/* In all implementations count != 0 means locked */
+static inline int rwsem_is_locked(struct rw_semaphore *sem)
+{
+ return sem->count != 0;
+}
+
+#endif
+
+/* Common initializer macros and functions */
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname }
+#else
+# define __RWSEM_DEP_MAP_INIT(lockname)
#endif
+#define __RWSEM_INITIALIZER(name) \
+ { RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED(name.wait_lock), \
+ LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) }
+
+#define DECLARE_RWSEM(name) \
+ struct rw_semaphore name = __RWSEM_INITIALIZER(name)
+
+extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
+ struct lock_class_key *key);
+
+#define init_rwsem(sem) \
+do { \
+ static struct lock_class_key __key; \
+ \
+ __init_rwsem((sem), #sem, &__key); \
+} while (0)
+
/*
* lock for reading
*/
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d747f948b34e..0542774914d4 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1084,12 +1084,10 @@ struct sched_class {
void (*task_tick) (struct rq *rq, struct task_struct *p, int queued);
void (*task_fork) (struct task_struct *p);
- void (*switched_from) (struct rq *this_rq, struct task_struct *task,
- int running);
- void (*switched_to) (struct rq *this_rq, struct task_struct *task,
- int running);
+ void (*switched_from) (struct rq *this_rq, struct task_struct *task);
+ void (*switched_to) (struct rq *this_rq, struct task_struct *task);
void (*prio_changed) (struct rq *this_rq, struct task_struct *task,
- int oldprio, int running);
+ int oldprio);
unsigned int (*get_rr_interval) (struct rq *rq,
struct task_struct *task);
@@ -1715,7 +1713,6 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
/*
* Per process flags
*/
-#define PF_KSOFTIRQD 0x00000001 /* I am ksoftirqd */
#define PF_STARTING 0x00000002 /* being created */
#define PF_EXITING 0x00000004 /* getting shut down */
#define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */
diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h
index 851b7783720d..73548eb13a5d 100644
--- a/include/linux/spinlock_types.h
+++ b/include/linux/spinlock_types.h
@@ -81,14 +81,6 @@ typedef struct spinlock {
#define __SPIN_LOCK_UNLOCKED(lockname) \
(spinlock_t ) __SPIN_LOCK_INITIALIZER(lockname)
-/*
- * SPIN_LOCK_UNLOCKED defeats lockdep state tracking and is hence
- * deprecated.
- * Please use DEFINE_SPINLOCK() or __SPIN_LOCK_UNLOCKED() as
- * appropriate.
- */
-#define SPIN_LOCK_UNLOCKED __SPIN_LOCK_UNLOCKED(old_style_spin_init)
-
#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x)
#include <linux/rwlock_types.h>
diff --git a/kernel/cred.c b/kernel/cred.c
index 6a1aa004e376..b5496e81b0f7 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -35,7 +35,7 @@ static struct kmem_cache *cred_jar;
static struct thread_group_cred init_tgcred = {
.usage = ATOMIC_INIT(2),
.tgid = 0,
- .lock = SPIN_LOCK_UNLOCKED,
+ .lock = __SPIN_LOCK_UNLOCKED(init_cred.tgcred.lock),
};
#endif
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 0587c5ceaed8..538fce2db51c 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -179,7 +179,7 @@ static void irq_affinity_notify(struct work_struct *work)
cpumask_copy(cpumask, desc->pending_mask);
else
#endif
- cpumask_copy(cpumask, desc->affinity);
+ cpumask_copy(cpumask, desc->irq_data.affinity);
raw_spin_unlock_irqrestore(&desc->lock, flags);
notify->notify(notify, cpumask);
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 126a302c481c..852ae8c66502 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -1999,8 +1999,7 @@ static int alloc_callchain_buffers(void)
* accessed from NMI. Use a temporary manual per cpu allocation
* until that gets sorted out.
*/
- size = sizeof(*entries) + sizeof(struct perf_callchain_entry *) *
- num_possible_cpus();
+ size = offsetof(struct callchain_cpus_entries, cpu_entries[nr_cpu_ids]);
entries = kzalloc(size, GFP_KERNEL);
if (!entries)
diff --git a/kernel/rtmutex-tester.c b/kernel/rtmutex-tester.c
index 66cb89bc5ef1..d5b543506cbc 100644
--- a/kernel/rtmutex-tester.c
+++ b/kernel/rtmutex-tester.c
@@ -9,7 +9,6 @@
#include <linux/kthread.h>
#include <linux/module.h>
#include <linux/sched.h>
-#include <linux/smp_lock.h>
#include <linux/spinlock.h>
#include <linux/sysdev.h>
#include <linux/timer.h>
@@ -27,7 +26,6 @@ struct test_thread_data {
int opcode;
int opdata;
int mutexes[MAX_RT_TEST_MUTEXES];
- int bkl;
int event;
struct sys_device sysdev;
};
@@ -46,8 +44,8 @@ enum test_opcodes {
RTTEST_LOCKINTNOWAIT, /* 6 Lock interruptible no wait in wakeup, data = lockindex */
RTTEST_LOCKCONT, /* 7 Continue locking after the wakeup delay */
RTTEST_UNLOCK, /* 8 Unlock, data = lockindex */
- RTTEST_LOCKBKL, /* 9 Lock BKL */
- RTTEST_UNLOCKBKL, /* 10 Unlock BKL */
+ RTTEST_LOCKBKL, /* 9 Was: Lock BKL */
+ RTTEST_UNLOCKBKL, /* 10 Was: Unlock BKL */
RTTEST_SIGNAL, /* 11 Signal other test thread, data = thread id */
RTTEST_RESETEVENT = 98, /* 98 Reset event counter */
RTTEST_RESET = 99, /* 99 Reset all pending operations */
@@ -74,13 +72,6 @@ static int handle_op(struct test_thread_data *td, int lockwakeup)
td->mutexes[i] = 0;
}
}
-
- if (!lockwakeup && td->bkl == 4) {
-#ifdef CONFIG_LOCK_KERNEL
- unlock_kernel();
-#endif
- td->bkl = 0;
- }
return 0;
case RTTEST_RESETEVENT:
@@ -131,25 +122,6 @@ static int handle_op(struct test_thread_data *td, int lockwakeup)
td->mutexes[id] = 0;
return 0;
- case RTTEST_LOCKBKL:
- if (td->bkl)
- return 0;
- td->bkl = 1;
-#ifdef CONFIG_LOCK_KERNEL
- lock_kernel();
-#endif
- td->bkl = 4;
- return 0;
-
- case RTTEST_UNLOCKBKL:
- if (td->bkl != 4)
- break;
-#ifdef CONFIG_LOCK_KERNEL
- unlock_kernel();
-#endif
- td->bkl = 0;
- return 0;
-
default:
break;
}
@@ -196,7 +168,6 @@ void schedule_rt_mutex_test(struct rt_mutex *mutex)
td->event = atomic_add_return(1, &rttest_event);
break;
- case RTTEST_LOCKBKL:
default:
break;
}
@@ -229,8 +200,6 @@ void schedule_rt_mutex_test(struct rt_mutex *mutex)
td->event = atomic_add_return(1, &rttest_event);
return;
- case RTTEST_LOCKBKL:
- return;
default:
return;
}
@@ -380,11 +349,11 @@ static ssize_t sysfs_test_status(struct sys_device *dev, struct sysdev_attribute
spin_lock(&rttest_lock);
curr += sprintf(curr,
- "O: %4d, E:%8d, S: 0x%08lx, P: %4d, N: %4d, B: %p, K: %d, M:",
+ "O: %4d, E:%8d, S: 0x%08lx, P: %4d, N: %4d, B: %p, M:",
td->opcode, td->event, tsk->state,
(MAX_RT_PRIO - 1) - tsk->prio,
(MAX_RT_PRIO - 1) - tsk->normal_prio,
- tsk->pi_blocked_on, td->bkl);
+ tsk->pi_blocked_on);
for (i = MAX_RT_TEST_MUTEXES - 1; i >=0 ; i--)
curr += sprintf(curr, "%d", td->mutexes[i]);
diff --git a/kernel/sched.c b/kernel/sched.c
index 18d38e4ec7ba..477e1bcc63f9 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1880,7 +1880,7 @@ void account_system_vtime(struct task_struct *curr)
*/
if (hardirq_count())
__this_cpu_add(cpu_hardirq_time, delta);
- else if (in_serving_softirq() && !(curr->flags & PF_KSOFTIRQD))
+ else if (in_serving_softirq() && curr != this_cpu_ksoftirqd())
__this_cpu_add(cpu_softirq_time, delta);
irq_time_write_end();
@@ -1920,8 +1920,40 @@ static void update_rq_clock_task(struct rq *rq, s64 delta)
sched_rt_avg_update(rq, irq_delta);
}
+static int irqtime_account_hi_update(void)
+{
+ struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
+ unsigned long flags;
+ u64 latest_ns;
+ int ret = 0;
+
+ local_irq_save(flags);
+ latest_ns = this_cpu_read(cpu_hardirq_time);
+ if (cputime64_gt(nsecs_to_cputime64(latest_ns), cpustat->irq))
+ ret = 1;
+ local_irq_restore(flags);
+ return ret;
+}
+
+static int irqtime_account_si_update(void)
+{
+ struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
+ unsigned long flags;
+ u64 latest_ns;
+ int ret = 0;
+
+ local_irq_save(flags);
+ latest_ns = this_cpu_read(cpu_softirq_time);
+ if (cputime64_gt(nsecs_to_cputime64(latest_ns), cpustat->softirq))
+ ret = 1;
+ local_irq_restore(flags);
+ return ret;
+}
+
#else /* CONFIG_IRQ_TIME_ACCOUNTING */
+#define sched_clock_irqtime (0)
+
static void update_rq_clock_task(struct rq *rq, s64 delta)
{
rq->clock_task += delta;
@@ -2025,14 +2057,14 @@ inline int task_curr(const struct task_struct *p)
static inline void check_class_changed(struct rq *rq, struct task_struct *p,
const struct sched_class *prev_class,
- int oldprio, int running)
+ int oldprio)
{
if (prev_class != p->sched_class) {
if (prev_class->switched_from)
- prev_class->switched_from(rq, p, running);
- p->sched_class->switched_to(rq, p, running);
- } else
- p->sched_class->prio_changed(rq, p, oldprio, running);
+ prev_class->switched_from(rq, p);
+ p->sched_class->switched_to(rq, p);
+ } else if (oldprio != p->prio)
+ p->sched_class->prio_changed(rq, p, oldprio);
}
static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
@@ -2566,6 +2598,7 @@ static void __sched_fork(struct task_struct *p)
p->se.sum_exec_runtime = 0;
p->se.prev_sum_exec_runtime = 0;
p->se.nr_migrations = 0;
+ p->se.vruntime = 0;
#ifdef CONFIG_SCHEDSTATS
memset(&p->se.statistics, 0, sizeof(p->se.statistics));
@@ -3568,6 +3601,32 @@ static void account_guest_time(struct task_struct *p, cputime_t cputime,
}
/*
+ * Account system cpu time to a process and desired cpustat field
+ * @p: the process that the cpu time gets accounted to
+ * @cputime: the cpu time spent in kernel space since the last update
+ * @cputime_scaled: cputime scaled by cpu frequency
+ * @target_cputime64: pointer to cpustat field that has to be updated
+ */
+static inline
+void __account_system_time(struct task_struct *p, cputime_t cputime,
+ cputime_t cputime_scaled, cputime64_t *target_cputime64)
+{
+ cputime64_t tmp = cputime_to_cputime64(cputime);
+
+ /* Add system time to process. */
+ p->stime = cputime_add(p->stime, cputime);
+ p->stimescaled = cputime_add(p->stimescaled, cputime_scaled);
+ account_group_system_time(p, cputime);
+
+ /* Add system time to cpustat. */
+ *target_cputime64 = cputime64_add(*target_cputime64, tmp);
+ cpuacct_update_stats(p, CPUACCT_STAT_SYSTEM, cputime);
+
+ /* Account for system time used */
+ acct_update_integrals(p);
+}
+
+/*
* Account system cpu time to a process.
* @p: the process that the cpu time gets accounted to
* @hardirq_offset: the offset to subtract from hardirq_count()
@@ -3578,33 +3637,90 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
cputime_t cputime, cputime_t cputime_scaled)
{
struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
- cputime64_t tmp;
+ cputime64_t *target_cputime64;
if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) {
account_guest_time(p, cputime, cputime_scaled);
return;
}
- /* Add system time to process. */
- p->stime = cputime_add(p->stime, cputime);
- p->stimescaled = cputime_add(p->stimescaled, cputime_scaled);
- account_group_system_time(p, cputime);
-
- /* Add system time to cpustat. */
- tmp = cputime_to_cputime64(cputime);
if (hardirq_count() - hardirq_offset)
- cpustat->irq = cputime64_add(cpustat->irq, tmp);
+ target_cputime64 = &cpustat->irq;
else if (in_serving_softirq())
- cpustat->softirq = cputime64_add(cpustat->softirq, tmp);
+ target_cputime64 = &cpustat->softirq;
else
- cpustat->system = cputime64_add(cpustat->system, tmp);
+ target_cputime64 = &cpustat->system;
- cpuacct_update_stats(p, CPUACCT_STAT_SYSTEM, cputime);
+ __account_system_time(p, cputime, cputime_scaled, target_cputime64);
+}
- /* Account for system time used */
- acct_update_integrals(p);
+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
+/*
+ * Account a tick to a process and cpustat
+ * @p: the process that the cpu time gets accounted to
+ * @user_tick: is the tick from userspace
+ * @rq: the pointer to rq
+ *
+ * Tick demultiplexing follows the order
+ * - pending hardirq update
+ * - pending softirq update
+ * - user_time
+ * - idle_time
+ * - system time
+ * - check for guest_time
+ * - else account as system_time
+ *
+ * Check for hardirq is done both for system and user time as there is
+ * no timer going off while we are on hardirq and hence we may never get an
+ * opportunity to update it solely in system time.
+ * p->stime and friends are only updated on system time and not on irq
+ * softirq as those do not count in task exec_runtime any more.
+ */
+static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
+ struct rq *rq)
+{
+ cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
+ cputime64_t tmp = cputime_to_cputime64(cputime_one_jiffy);
+ struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
+
+ if (irqtime_account_hi_update()) {
+ cpustat->irq = cputime64_add(cpustat->irq, tmp);
+ } else if (irqtime_account_si_update()) {
+ cpustat->softirq = cputime64_add(cpustat->softirq, tmp);
+ } else if (this_cpu_ksoftirqd() == p) {
+ /*
+ * ksoftirqd time do not get accounted in cpu_softirq_time.
+ * So, we have to handle it separately here.
+ * Also, p->stime needs to be updated for ksoftirqd.
+ */
+ __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled,
+ &cpustat->softirq);
+ } else if (user_tick) {
+ account_user_time(p, cputime_one_jiffy, one_jiffy_scaled);
+ } else if (p == rq->idle) {
+ account_idle_time(cputime_one_jiffy);
+ } else if (p->flags & PF_VCPU) { /* System time or guest time */
+ account_guest_time(p, cputime_one_jiffy, one_jiffy_scaled);
+ } else {
+ __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled,
+ &cpustat->system);
+ }
}
+static void irqtime_account_idle_ticks(int ticks)
+{
+ int i;
+ struct rq *rq = this_rq();
+
+ for (i = 0; i < ticks; i++)
+ irqtime_account_process_tick(current, 0, rq);
+}
+#else
+static void irqtime_account_idle_ticks(int ticks) {}
+static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
+ struct rq *rq) {}
+#endif
+
/*
* Account for involuntary wait time.
* @steal: the cpu time spent in involuntary wait
@@ -3645,6 +3761,11 @@ void account_process_tick(struct task_struct *p, int user_tick)
cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
struct rq *rq = this_rq();
+ if (sched_clock_irqtime) {
+ irqtime_account_process_tick(p, user_tick, rq);
+ return;
+ }
+
if (user_tick)
account_user_time(p, cputime_one_jiffy, one_jiffy_scaled);
else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET))
@@ -3670,6 +3791,12 @@ void account_steal_ticks(unsigned long ticks)
*/
void account_idle_ticks(unsigned long ticks)
{
+
+ if (sched_clock_irqtime) {
+ irqtime_account_idle_ticks(ticks);
+ return;
+ }
+
account_idle_time(jiffies_to_cputime(ticks));
}
@@ -4570,11 +4697,10 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
if (running)
p->sched_class->set_curr_task(rq);
- if (on_rq) {
+ if (on_rq)
enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0);
- check_class_changed(rq, p, prev_class, oldprio, running);
- }
+ check_class_changed(rq, p, prev_class, oldprio);
task_rq_unlock(rq, &flags);
}
@@ -4902,11 +5028,10 @@ recheck:
if (running)
p->sched_class->set_curr_task(rq);
- if (on_rq) {
+ if (on_rq)
activate_task(rq, p, 0);
- check_class_changed(rq, p, prev_class, oldprio, running);
- }
+ check_class_changed(rq, p, prev_class, oldprio);
__task_rq_unlock(rq);
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
@@ -7796,6 +7921,10 @@ static void init_cfs_rq(struct cfs_rq *cfs_rq, struct rq *rq)
INIT_LIST_HEAD(&cfs_rq->tasks);
#ifdef CONFIG_FAIR_GROUP_SCHED
cfs_rq->rq = rq;
+ /* allow initial update_cfs_load() to truncate */
+#ifdef CONFIG_SMP
+ cfs_rq->load_stamp = 1;
+#endif
#endif
cfs_rq->min_vruntime = (u64)(-(1LL << 20));
}
@@ -8109,6 +8238,8 @@ EXPORT_SYMBOL(__might_sleep);
#ifdef CONFIG_MAGIC_SYSRQ
static void normalize_task(struct rq *rq, struct task_struct *p)
{
+ const struct sched_class *prev_class = p->sched_class;
+ int old_prio = p->prio;
int on_rq;
on_rq = p->se.on_rq;
@@ -8119,6 +8250,8 @@ static void normalize_task(struct rq *rq, struct task_struct *p)
activate_task(rq, p, 0);
resched_task(rq->curr);
}
+
+ check_class_changed(rq, p, prev_class, old_prio);
}
void normalize_rt_tasks(void)
@@ -8510,7 +8643,7 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
/* Propagate contribution to hierarchy */
raw_spin_lock_irqsave(&rq->lock, flags);
for_each_sched_entity(se)
- update_cfs_shares(group_cfs_rq(se), 0);
+ update_cfs_shares(group_cfs_rq(se));
raw_spin_unlock_irqrestore(&rq->lock, flags);
}
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 0c26e2df450e..55040f3938d8 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -540,7 +540,7 @@ static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se)
}
static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update);
-static void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta);
+static void update_cfs_shares(struct cfs_rq *cfs_rq);
/*
* Update the current task's runtime statistics. Skip current tasks that
@@ -733,6 +733,7 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
now - cfs_rq->load_last > 4 * period) {
cfs_rq->load_period = 0;
cfs_rq->load_avg = 0;
+ delta = period - 1;
}
cfs_rq->load_stamp = now;
@@ -763,16 +764,15 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
list_del_leaf_cfs_rq(cfs_rq);
}
-static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg,
- long weight_delta)
+static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg)
{
long load_weight, load, shares;
- load = cfs_rq->load.weight + weight_delta;
+ load = cfs_rq->load.weight;
load_weight = atomic_read(&tg->load_weight);
- load_weight -= cfs_rq->load_contribution;
load_weight += load;
+ load_weight -= cfs_rq->load_contribution;
shares = (tg->shares * load);
if (load_weight)
@@ -790,7 +790,7 @@ static void update_entity_shares_tick(struct cfs_rq *cfs_rq)
{
if (cfs_rq->load_unacc_exec_time > sysctl_sched_shares_window) {
update_cfs_load(cfs_rq, 0);
- update_cfs_shares(cfs_rq, 0);
+ update_cfs_shares(cfs_rq);
}
}
# else /* CONFIG_SMP */
@@ -798,8 +798,7 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
{
}
-static inline long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg,
- long weight_delta)
+static inline long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg)
{
return tg->shares;
}
@@ -824,7 +823,7 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
account_entity_enqueue(cfs_rq, se);
}
-static void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta)
+static void update_cfs_shares(struct cfs_rq *cfs_rq)
{
struct task_group *tg;
struct sched_entity *se;
@@ -838,7 +837,7 @@ static void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta)
if (likely(se->load.weight == tg->shares))
return;
#endif
- shares = calc_cfs_shares(cfs_rq, tg, weight_delta);
+ shares = calc_cfs_shares(cfs_rq, tg);
reweight_entity(cfs_rq_of(se), se, shares);
}
@@ -847,7 +846,7 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
{
}
-static inline void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta)
+static inline void update_cfs_shares(struct cfs_rq *cfs_rq)
{
}
@@ -978,8 +977,8 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
*/
update_curr(cfs_rq);
update_cfs_load(cfs_rq, 0);
- update_cfs_shares(cfs_rq, se->load.weight);
account_entity_enqueue(cfs_rq, se);
+ update_cfs_shares(cfs_rq);
if (flags & ENQUEUE_WAKEUP) {
place_entity(cfs_rq, se, 0);
@@ -1041,7 +1040,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
update_cfs_load(cfs_rq, 0);
account_entity_dequeue(cfs_rq, se);
update_min_vruntime(cfs_rq);
- update_cfs_shares(cfs_rq, 0);
+ update_cfs_shares(cfs_rq);
/*
* Normalize the entity after updating the min_vruntime because the
@@ -1282,7 +1281,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
struct cfs_rq *cfs_rq = cfs_rq_of(se);
update_cfs_load(cfs_rq, 0);
- update_cfs_shares(cfs_rq, 0);
+ update_cfs_shares(cfs_rq);
}
hrtick_update(rq);
@@ -1312,7 +1311,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
struct cfs_rq *cfs_rq = cfs_rq_of(se);
update_cfs_load(cfs_rq, 0);
- update_cfs_shares(cfs_rq, 0);
+ update_cfs_shares(cfs_rq);
}
hrtick_update(rq);
@@ -2123,7 +2122,7 @@ static int update_shares_cpu(struct task_group *tg, int cpu)
* We need to update shares after updating tg->load_weight in
* order to adjust the weight of groups with long running tasks.
*/
- update_cfs_shares(cfs_rq, 0);
+ update_cfs_shares(cfs_rq);
raw_spin_unlock_irqrestore(&rq->lock, flags);
@@ -4079,33 +4078,62 @@ static void task_fork_fair(struct task_struct *p)
* Priority of the task has changed. Check to see if we preempt
* the current task.
*/
-static void prio_changed_fair(struct rq *rq, struct task_struct *p,
- int oldprio, int running)
+static void
+prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio)
{
+ if (!p->se.on_rq)
+ return;
+
/*
* Reschedule if we are currently running on this runqueue and
* our priority decreased, or if we are not currently running on
* this runqueue and our priority is higher than the current's
*/
- if (running) {
+ if (rq->curr == p) {
if (p->prio > oldprio)
resched_task(rq->curr);
} else
check_preempt_curr(rq, p, 0);
}
+static void switched_from_fair(struct rq *rq, struct task_struct *p)
+{
+ struct sched_entity *se = &p->se;
+ struct cfs_rq *cfs_rq = cfs_rq_of(se);
+
+ /*
+ * Ensure the task's vruntime is normalized, so that when its
+ * switched back to the fair class the enqueue_entity(.flags=0) will
+ * do the right thing.
+ *
+ * If it was on_rq, then the dequeue_entity(.flags=0) will already
+ * have normalized the vruntime, if it was !on_rq, then only when
+ * the task is sleeping will it still have non-normalized vruntime.
+ */
+ if (!se->on_rq && p->state != TASK_RUNNING) {
+ /*
+ * Fix up our vruntime so that the current sleep doesn't
+ * cause 'unlimited' sleep bonus.
+ */
+ place_entity(cfs_rq, se, 0);
+ se->vruntime -= cfs_rq->min_vruntime;
+ }
+}
+
/*
* We switched to the sched_fair class.
*/
-static void switched_to_fair(struct rq *rq, struct task_struct *p,
- int running)
+static void switched_to_fair(struct rq *rq, struct task_struct *p)
{
+ if (!p->se.on_rq)
+ return;
+
/*
* We were most likely switched from sched_rt, so
* kick off the schedule if running, otherwise just see
* if we can still preempt the current task.
*/
- if (running)
+ if (rq->curr == p)
resched_task(rq->curr);
else
check_preempt_curr(rq, p, 0);
@@ -4191,6 +4219,7 @@ static const struct sched_class fair_sched_class = {
.task_fork = task_fork_fair,
.prio_changed = prio_changed_fair,
+ .switched_from = switched_from_fair,
.switched_to = switched_to_fair,
.get_rr_interval = get_rr_interval_fair,
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index 9fa0f402c87c..c82f26c1b7c3 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -52,31 +52,15 @@ static void set_curr_task_idle(struct rq *rq)
{
}
-static void switched_to_idle(struct rq *rq, struct task_struct *p,
- int running)
+static void switched_to_idle(struct rq *rq, struct task_struct *p)
{
- /* Can this actually happen?? */
- if (running)
- resched_task(rq->curr);
- else
- check_preempt_curr(rq, p, 0);
+ BUG();
}
-static void prio_changed_idle(struct rq *rq, struct task_struct *p,
- int oldprio, int running)
+static void
+prio_changed_idle(struct rq *rq, struct task_struct *p, int oldprio)
{
- /* This can happen for hot plug CPUS */
-
- /*
- * Reschedule if we are currently running on this runqueue and
- * our priority decreased, or if we are not currently running on
- * this runqueue and our priority is higher than the current's
- */
- if (running) {
- if (p->prio > oldprio)
- resched_task(rq->curr);
- } else
- check_preempt_curr(rq, p, 0);
+ BUG();
}
static unsigned int get_rr_interval_idle(struct rq *rq, struct task_struct *task)
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index c914ec747ca6..c381fdc18c64 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1595,8 +1595,7 @@ static void rq_offline_rt(struct rq *rq)
* When switch from the rt queue, we bring ourselves to a position
* that we might want to pull RT tasks from other runqueues.
*/
-static void switched_from_rt(struct rq *rq, struct task_struct *p,
- int running)
+static void switched_from_rt(struct rq *rq, struct task_struct *p)
{
/*
* If there are other RT tasks then we will reschedule
@@ -1605,7 +1604,7 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p,
* we may need to handle the pulling of RT tasks
* now.
*/
- if (!rq->rt.rt_nr_running)
+ if (p->se.on_rq && !rq->rt.rt_nr_running)
pull_rt_task(rq);
}
@@ -1624,8 +1623,7 @@ static inline void init_sched_rt_class(void)
* with RT tasks. In this case we try to push them off to
* other runqueues.
*/
-static void switched_to_rt(struct rq *rq, struct task_struct *p,
- int running)
+static void switched_to_rt(struct rq *rq, struct task_struct *p)
{
int check_resched = 1;
@@ -1636,7 +1634,7 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p,
* If that current running task is also an RT task
* then see if we can move to another run queue.
*/
- if (!running) {
+ if (p->se.on_rq && rq->curr != p) {
#ifdef CONFIG_SMP
if (rq->rt.overloaded && push_rt_task(rq) &&
/* Don't resched if we changed runqueues */
@@ -1652,10 +1650,13 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p,
* Priority of the task has changed. This may cause
* us to initiate a push or pull.
*/
-static void prio_changed_rt(struct rq *rq, struct task_struct *p,
- int oldprio, int running)
+static void
+prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio)
{
- if (running) {
+ if (!p->se.on_rq)
+ return;
+
+ if (rq->curr == p) {
#ifdef CONFIG_SMP
/*
* If our priority decreases while running, we
diff --git a/kernel/sched_stoptask.c b/kernel/sched_stoptask.c
index 2bf6b47058c1..84ec9bcf82d9 100644
--- a/kernel/sched_stoptask.c
+++ b/kernel/sched_stoptask.c
@@ -59,14 +59,13 @@ static void set_curr_task_stop(struct rq *rq)
{
}
-static void switched_to_stop(struct rq *rq, struct task_struct *p,
- int running)
+static void switched_to_stop(struct rq *rq, struct task_struct *p)
{
BUG(); /* its impossible to change to this class */
}
-static void prio_changed_stop(struct rq *rq, struct task_struct *p,
- int oldprio, int running)
+static void
+prio_changed_stop(struct rq *rq, struct task_struct *p, int oldprio)
{
BUG(); /* how!?, what priority? */
}
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 68eb5efec388..0cee50487629 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -54,7 +54,7 @@ EXPORT_SYMBOL(irq_stat);
static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
-static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
+DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
char *softirq_to_name[NR_SOFTIRQS] = {
"HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL",
@@ -721,7 +721,6 @@ static int run_ksoftirqd(void * __bind_cpu)
{
set_current_state(TASK_INTERRUPTIBLE);
- current->flags |= PF_KSOFTIRQD;
while (!kthread_should_stop()) {
preempt_disable();
if (!local_softirq_pending()) {
diff --git a/kernel/time.c b/kernel/time.c
index 32174359576f..55337a816b20 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -645,7 +645,7 @@ u64 nsec_to_clock_t(u64 x)
}
/**
- * nsecs_to_jiffies - Convert nsecs in u64 to jiffies
+ * nsecs_to_jiffies64 - Convert nsecs in u64 to jiffies64
*
* @n: nsecs in u64
*
@@ -657,7 +657,7 @@ u64 nsec_to_clock_t(u64 x)
* NSEC_PER_SEC = 10^9 = (5^9 * 2^9) = (1953125 * 512)
* ULLONG_MAX ns = 18446744073.709551615 secs = about 584 years
*/
-unsigned long nsecs_to_jiffies(u64 n)
+u64 nsecs_to_jiffies64(u64 n)
{
#if (NSEC_PER_SEC % HZ) == 0
/* Common case, HZ = 100, 128, 200, 250, 256, 500, 512, 1000 etc. */
@@ -674,6 +674,25 @@ unsigned long nsecs_to_jiffies(u64 n)
#endif
}
+
+/**
+ * nsecs_to_jiffies - Convert nsecs in u64 to jiffies
+ *
+ * @n: nsecs in u64
+ *
+ * Unlike {m,u}secs_to_jiffies, type of input is not unsigned int but u64.
+ * And this doesn't return MAX_JIFFY_OFFSET since this function is designed
+ * for scheduler, not for use in device drivers to calculate timeout value.
+ *
+ * note:
+ * NSEC_PER_SEC = 10^9 = (5^9 * 2^9) = (1953125 * 512)
+ * ULLONG_MAX ns = 18446744073.709551615 secs = about 584 years
+ */
+unsigned long nsecs_to_jiffies(u64 n)
+{
+ return (unsigned long)nsecs_to_jiffies64(n);
+}
+
#if (BITS_PER_LONG < 64)
u64 get_jiffies_64(void)
{
diff --git a/lib/rwsem.c b/lib/rwsem.c
index f236d7cd5cf3..aa7c3052261f 100644
--- a/lib/rwsem.c
+++ b/lib/rwsem.c
@@ -222,8 +222,7 @@ rwsem_down_failed_common(struct rw_semaphore *sem,
/*
* wait for the read lock to be granted
*/
-asmregparm struct rw_semaphore __sched *
-rwsem_down_read_failed(struct rw_semaphore *sem)
+struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
{
return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_READ,
-RWSEM_ACTIVE_READ_BIAS);
@@ -232,8 +231,7 @@ rwsem_down_read_failed(struct rw_semaphore *sem)
/*
* wait for the write lock to be granted
*/
-asmregparm struct rw_semaphore __sched *
-rwsem_down_write_failed(struct rw_semaphore *sem)
+struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem)
{
return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_WRITE,
-RWSEM_ACTIVE_WRITE_BIAS);
@@ -243,7 +241,7 @@ rwsem_down_write_failed(struct rw_semaphore *sem)
* handle waking up a waiter on the semaphore
* - up_read/up_write has decremented the active part of count if we come here
*/
-asmregparm struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
+struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
{
unsigned long flags;
@@ -263,7 +261,7 @@ asmregparm struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
* - caller incremented waiting part of count and discovered it still negative
* - just wake up any readers at the front of the queue
*/
-asmregparm struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
+struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
{
unsigned long flags;
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 4c0383da1c9a..58848e3e392c 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -2654,11 +2654,6 @@ sub process {
WARN("Use of volatile is usually wrong: see Documentation/volatile-considered-harmful.txt\n" . $herecurr);
}
-# SPIN_LOCK_UNLOCKED & RW_LOCK_UNLOCKED are deprecated
- if ($line =~ /\b(SPIN_LOCK_UNLOCKED|RW_LOCK_UNLOCKED)/) {
- ERROR("Use of $1 is deprecated: see Documentation/spinlocks.txt\n" . $herecurr);
- }
-
# warn about #if 0
if ($line =~ /^.\s*\#\s*if\s+0\b/) {
CHK("if this code is redundant consider removing it\n" .
diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
index 86b797a35aa6..fcc51fe0195c 100644
--- a/tools/perf/Documentation/perf-probe.txt
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -73,6 +73,10 @@ OPTIONS
(Only for --vars) Show external defined variables in addition to local
variables.
+-F::
+--funcs::
+ Show available functions in given module or kernel.
+
-f::
--force::
Forcibly add events with existing name.
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 7141c42e1469..638e8e146bb9 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -402,6 +402,7 @@ LIB_H += util/debug.h
LIB_H += util/debugfs.h
LIB_H += util/event.h
LIB_H += util/evsel.h
+LIB_H += util/evlist.h
LIB_H += util/exec_cmd.h
LIB_H += util/types.h
LIB_H += util/levenshtein.h
@@ -425,6 +426,7 @@ LIB_H += util/values.h
LIB_H += util/sort.h
LIB_H += util/hist.h
LIB_H += util/thread.h
+LIB_H += util/thread_map.h
LIB_H += util/trace-event.h
LIB_H += util/probe-finder.h
LIB_H += util/probe-event.h
@@ -440,6 +442,7 @@ LIB_OBJS += $(OUTPUT)util/ctype.o
LIB_OBJS += $(OUTPUT)util/debugfs.o
LIB_OBJS += $(OUTPUT)util/environment.o
LIB_OBJS += $(OUTPUT)util/event.o
+LIB_OBJS += $(OUTPUT)util/evlist.o
LIB_OBJS += $(OUTPUT)util/evsel.o
LIB_OBJS += $(OUTPUT)util/exec_cmd.o
LIB_OBJS += $(OUTPUT)util/help.o
@@ -469,6 +472,7 @@ LIB_OBJS += $(OUTPUT)util/map.o
LIB_OBJS += $(OUTPUT)util/pstack.o
LIB_OBJS += $(OUTPUT)util/session.o
LIB_OBJS += $(OUTPUT)util/thread.o
+LIB_OBJS += $(OUTPUT)util/thread_map.o
LIB_OBJS += $(OUTPUT)util/trace-event-parse.o
LIB_OBJS += $(OUTPUT)util/trace-event-read.o
LIB_OBJS += $(OUTPUT)util/trace-event-info.o
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index add163c9f0e7..6cf708aba7c9 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -52,6 +52,7 @@ static struct {
bool show_lines;
bool show_vars;
bool show_ext_vars;
+ bool show_funcs;
bool mod_events;
int nevents;
struct perf_probe_event events[MAX_PROBES];
@@ -221,6 +222,8 @@ static const struct option options[] = {
OPT__DRY_RUN(&probe_event_dry_run),
OPT_INTEGER('\0', "max-probes", &params.max_probe_points,
"Set how many probe points can be found for a probe."),
+ OPT_BOOLEAN('F', "funcs", &params.show_funcs,
+ "Show potential probe-able functions."),
OPT_END()
};
@@ -246,7 +249,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
params.max_probe_points = MAX_PROBES;
if ((!params.nevents && !params.dellist && !params.list_events &&
- !params.show_lines))
+ !params.show_lines && !params.show_funcs))
usage_with_options(probe_usage, options);
/*
@@ -267,12 +270,36 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
pr_err(" Error: Don't use --list with --vars.\n");
usage_with_options(probe_usage, options);
}
+ if (params.show_funcs) {
+ pr_err(" Error: Don't use --list with --funcs.\n");
+ usage_with_options(probe_usage, options);
+ }
ret = show_perf_probe_events();
if (ret < 0)
pr_err(" Error: Failed to show event list. (%d)\n",
ret);
return ret;
}
+ if (params.show_funcs) {
+ if (params.nevents != 0 || params.dellist) {
+ pr_err(" Error: Don't use --funcs with"
+ " --add/--del.\n");
+ usage_with_options(probe_usage, options);
+ }
+ if (params.show_lines) {
+ pr_err(" Error: Don't use --funcs with --line.\n");
+ usage_with_options(probe_usage, options);
+ }
+ if (params.show_vars) {
+ pr_err(" Error: Don't use --funcs with --vars.\n");
+ usage_with_options(probe_usage, options);
+ }
+ ret = show_available_funcs(params.target_module);
+ if (ret < 0)
+ pr_err(" Error: Failed to show functions."
+ " (%d)\n", ret);
+ return ret;
+ }
#ifdef DWARF_SUPPORT
if (params.show_lines) {
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index b2f729fdb317..d7886307f6f4 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -18,17 +18,20 @@
#include "util/header.h"
#include "util/event.h"
+#include "util/evlist.h"
#include "util/evsel.h"
#include "util/debug.h"
#include "util/session.h"
#include "util/symbol.h"
#include "util/cpumap.h"
+#include "util/thread_map.h"
#include <unistd.h>
#include <sched.h>
#include <sys/mman.h>
#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
+#define SID(e, x, y) xyarray__entry(e->id, x, y)
enum write_mode_t {
WRITE_FORCE,
@@ -46,7 +49,7 @@ static unsigned int user_freq = UINT_MAX;
static int freq = 1000;
static int output;
static int pipe_output = 0;
-static const char *output_name = "perf.data";
+static const char *output_name = NULL;
static int group = 0;
static int realtime_prio = 0;
static bool nodelay = false;
@@ -66,51 +69,17 @@ static bool sample_address = false;
static bool sample_time = false;
static bool no_buildid = false;
static bool no_buildid_cache = false;
+static struct perf_evlist *evsel_list;
static long samples = 0;
static u64 bytes_written = 0;
-static struct pollfd *event_array;
-
-static int nr_poll = 0;
-static int nr_cpu = 0;
-
static int file_new = 1;
static off_t post_processing_offset;
static struct perf_session *session;
static const char *cpu_list;
-struct mmap_data {
- void *base;
- unsigned int mask;
- unsigned int prev;
-};
-
-static struct mmap_data mmap_array[MAX_NR_CPUS];
-
-static unsigned long mmap_read_head(struct mmap_data *md)
-{
- struct perf_event_mmap_page *pc = md->base;
- long head;
-
- head = pc->data_head;
- rmb();
-
- return head;
-}
-
-static void mmap_write_tail(struct mmap_data *md, unsigned long tail)
-{
- struct perf_event_mmap_page *pc = md->base;
-
- /*
- * ensure all reads are done before we write the tail out.
- */
- /* mb(); */
- pc->data_tail = tail;
-}
-
static void advance_output(size_t size)
{
bytes_written += size;
@@ -139,9 +108,9 @@ static int process_synthesized_event(event_t *event,
return 0;
}
-static void mmap_read(struct mmap_data *md)
+static void mmap_read(struct perf_mmap *md)
{
- unsigned int head = mmap_read_head(md);
+ unsigned int head = perf_mmap__read_head(md);
unsigned int old = md->prev;
unsigned char *data = md->base + page_size;
unsigned long size;
@@ -185,7 +154,7 @@ static void mmap_read(struct mmap_data *md)
write_output(buf, size);
md->prev = old;
- mmap_write_tail(md, old);
+ perf_mmap__write_tail(md, old);
}
static volatile int done = 0;
@@ -209,8 +178,6 @@ static void sig_atexit(void)
kill(getpid(), signr);
}
-static int group_fd;
-
static struct perf_header_attr *get_header_attr(struct perf_event_attr *a, int nr)
{
struct perf_header_attr *h_attr;
@@ -234,28 +201,47 @@ static void create_counter(struct perf_evsel *evsel, int cpu)
char *filter = evsel->filter;
struct perf_event_attr *attr = &evsel->attr;
struct perf_header_attr *h_attr;
- int track = !evsel->idx; /* only the first counter needs these */
+ struct perf_sample_id *sid;
int thread_index;
int ret;
- struct {
- u64 count;
- u64 time_enabled;
- u64 time_running;
- u64 id;
- } read_data;
- /*
- * Check if parse_single_tracepoint_event has already asked for
- * PERF_SAMPLE_TIME.
- *
- * XXX this is kludgy but short term fix for problems introduced by
- * eac23d1c that broke 'perf script' by having different sample_types
- * when using multiple tracepoint events when we use a perf binary
- * that tries to use sample_id_all on an older kernel.
- *
- * We need to move counter creation to perf_session, support
- * different sample_types, etc.
- */
- bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
+
+ for (thread_index = 0; thread_index < threads->nr; thread_index++) {
+ h_attr = get_header_attr(attr, evsel->idx);
+ if (h_attr == NULL)
+ die("nomem\n");
+
+ if (!file_new) {
+ if (memcmp(&h_attr->attr, attr, sizeof(*attr))) {
+ fprintf(stderr, "incompatible append\n");
+ exit(-1);
+ }
+ }
+
+ sid = SID(evsel, cpu, thread_index);
+ if (perf_header_attr__add_id(h_attr, sid->id) < 0) {
+ pr_warning("Not enough memory to add id\n");
+ exit(-1);
+ }
+
+ if (filter != NULL) {
+ ret = ioctl(FD(evsel, cpu, thread_index),
+ PERF_EVENT_IOC_SET_FILTER, filter);
+ if (ret) {
+ error("failed to set filter with %d (%s)\n", errno,
+ strerror(errno));
+ exit(-1);
+ }
+ }
+ }
+
+ if (!sample_type)
+ sample_type = attr->sample_type;
+}
+
+static void config_attr(struct perf_evsel *evsel, struct perf_evlist *evlist)
+{
+ struct perf_event_attr *attr = &evsel->attr;
+ int track = !evsel->idx; /* only the first counter needs these */
attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
PERF_FORMAT_TOTAL_TIME_RUNNING |
@@ -263,7 +249,7 @@ static void create_counter(struct perf_evsel *evsel, int cpu)
attr->sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
- if (nr_counters > 1)
+ if (evlist->nr_entries > 1)
attr->sample_type |= PERF_SAMPLE_ID;
/*
@@ -315,19 +301,39 @@ static void create_counter(struct perf_evsel *evsel, int cpu)
attr->mmap = track;
attr->comm = track;
- attr->inherit = !no_inherit;
+
if (target_pid == -1 && target_tid == -1 && !system_wide) {
attr->disabled = 1;
attr->enable_on_exec = 1;
}
-retry_sample_id:
- attr->sample_id_all = sample_id_all_avail ? 1 : 0;
+}
- for (thread_index = 0; thread_index < threads->nr; thread_index++) {
-try_again:
- FD(evsel, nr_cpu, thread_index) = sys_perf_event_open(attr, threads->map[thread_index], cpu, group_fd, 0);
+static void open_counters(struct perf_evlist *evlist)
+{
+ struct perf_evsel *pos;
+ int cpu;
- if (FD(evsel, nr_cpu, thread_index) < 0) {
+ list_for_each_entry(pos, &evlist->entries, node) {
+ struct perf_event_attr *attr = &pos->attr;
+ /*
+ * Check if parse_single_tracepoint_event has already asked for
+ * PERF_SAMPLE_TIME.
+ *
+ * XXX this is kludgy but short term fix for problems introduced by
+ * eac23d1c that broke 'perf script' by having different sample_types
+ * when using multiple tracepoint events when we use a perf binary
+ * that tries to use sample_id_all on an older kernel.
+ *
+ * We need to move counter creation to perf_session, support
+ * different sample_types, etc.
+ */
+ bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
+
+ config_attr(pos, evlist);
+retry_sample_id:
+ attr->sample_id_all = sample_id_all_avail ? 1 : 0;
+try_again:
+ if (perf_evsel__open(pos, cpus, threads, group, !no_inherit) < 0) {
int err = errno;
if (err == EPERM || err == EACCES)
@@ -364,7 +370,7 @@ try_again:
}
printf("\n");
error("sys_perf_event_open() syscall returned with %d (%s). /bin/dmesg may provide additional information.\n",
- FD(evsel, nr_cpu, thread_index), strerror(err));
+ err, strerror(err));
#if defined(__i386__) || defined(__x86_64__)
if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP)
@@ -375,90 +381,16 @@ try_again:
#endif
die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
- exit(-1);
- }
-
- h_attr = get_header_attr(attr, evsel->idx);
- if (h_attr == NULL)
- die("nomem\n");
-
- if (!file_new) {
- if (memcmp(&h_attr->attr, attr, sizeof(*attr))) {
- fprintf(stderr, "incompatible append\n");
- exit(-1);
- }
- }
-
- if (read(FD(evsel, nr_cpu, thread_index), &read_data, sizeof(read_data)) == -1) {
- perror("Unable to read perf file descriptor");
- exit(-1);
- }
-
- if (perf_header_attr__add_id(h_attr, read_data.id) < 0) {
- pr_warning("Not enough memory to add id\n");
- exit(-1);
- }
-
- assert(FD(evsel, nr_cpu, thread_index) >= 0);
- fcntl(FD(evsel, nr_cpu, thread_index), F_SETFL, O_NONBLOCK);
-
- /*
- * First counter acts as the group leader:
- */
- if (group && group_fd == -1)
- group_fd = FD(evsel, nr_cpu, thread_index);
-
- if (evsel->idx || thread_index) {
- struct perf_evsel *first;
- first = list_entry(evsel_list.next, struct perf_evsel, node);
- ret = ioctl(FD(evsel, nr_cpu, thread_index),
- PERF_EVENT_IOC_SET_OUTPUT,
- FD(first, nr_cpu, 0));
- if (ret) {
- error("failed to set output: %d (%s)\n", errno,
- strerror(errno));
- exit(-1);
- }
- } else {
- mmap_array[nr_cpu].prev = 0;
- mmap_array[nr_cpu].mask = mmap_pages*page_size - 1;
- mmap_array[nr_cpu].base = mmap(NULL, (mmap_pages+1)*page_size,
- PROT_READ | PROT_WRITE, MAP_SHARED, FD(evsel, nr_cpu, thread_index), 0);
- if (mmap_array[nr_cpu].base == MAP_FAILED) {
- error("failed to mmap with %d (%s)\n", errno, strerror(errno));
- exit(-1);
- }
-
- event_array[nr_poll].fd = FD(evsel, nr_cpu, thread_index);
- event_array[nr_poll].events = POLLIN;
- nr_poll++;
- }
-
- if (filter != NULL) {
- ret = ioctl(FD(evsel, nr_cpu, thread_index),
- PERF_EVENT_IOC_SET_FILTER, filter);
- if (ret) {
- error("failed to set filter with %d (%s)\n", errno,
- strerror(errno));
- exit(-1);
- }
}
}
- if (!sample_type)
- sample_type = attr->sample_type;
-}
-
-static void open_counters(int cpu)
-{
- struct perf_evsel *pos;
-
- group_fd = -1;
-
- list_for_each_entry(pos, &evsel_list, node)
- create_counter(pos, cpu);
+ if (perf_evlist__mmap(evlist, cpus, threads, mmap_pages, false) < 0)
+ die("failed to mmap with %d (%s)\n", errno, strerror(errno));
- nr_cpu++;
+ for (cpu = 0; cpu < cpus->nr; ++cpu) {
+ list_for_each_entry(pos, &evlist->entries, node)
+ create_counter(pos, cpu);
+ }
}
static int process_buildids(void)
@@ -481,9 +413,9 @@ static void atexit_header(void)
if (!no_buildid)
process_buildids();
- perf_header__write(&session->header, output, true);
+ perf_header__write(&session->header, evsel_list, output, true);
perf_session__delete(session);
- perf_evsel_list__delete();
+ perf_evlist__delete(evsel_list);
symbol__exit();
}
}
@@ -533,9 +465,9 @@ static void mmap_read_all(void)
{
int i;
- for (i = 0; i < nr_cpu; i++) {
- if (mmap_array[i].base)
- mmap_read(&mmap_array[i]);
+ for (i = 0; i < cpus->nr; i++) {
+ if (evsel_list->mmap[i].base)
+ mmap_read(&evsel_list->mmap[i]);
}
if (perf_header__has_feat(&session->header, HEADER_TRACE_INFO))
@@ -566,18 +498,26 @@ static int __cmd_record(int argc, const char **argv)
exit(-1);
}
- if (!strcmp(output_name, "-"))
- pipe_output = 1;
- else if (!stat(output_name, &st) && st.st_size) {
- if (write_mode == WRITE_FORCE) {
- char oldname[PATH_MAX];
- snprintf(oldname, sizeof(oldname), "%s.old",
- output_name);
- unlink(oldname);
- rename(output_name, oldname);
+ if (!output_name) {
+ if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
+ pipe_output = 1;
+ else
+ output_name = "perf.data";
+ }
+ if (output_name) {
+ if (!strcmp(output_name, "-"))
+ pipe_output = 1;
+ else if (!stat(output_name, &st) && st.st_size) {
+ if (write_mode == WRITE_FORCE) {
+ char oldname[PATH_MAX];
+ snprintf(oldname, sizeof(oldname), "%s.old",
+ output_name);
+ unlink(oldname);
+ rename(output_name, oldname);
+ }
+ } else if (write_mode == WRITE_APPEND) {
+ write_mode = WRITE_FORCE;
}
- } else if (write_mode == WRITE_APPEND) {
- write_mode = WRITE_FORCE;
}
flags = O_CREAT|O_RDWR;
@@ -611,7 +551,7 @@ static int __cmd_record(int argc, const char **argv)
goto out_delete_session;
}
- if (have_tracepoints(&evsel_list))
+ if (have_tracepoints(&evsel_list->entries))
perf_header__set_feat(&session->header, HEADER_TRACE_INFO);
/*
@@ -673,12 +613,7 @@ static int __cmd_record(int argc, const char **argv)
close(child_ready_pipe[0]);
}
- if (!system_wide && no_inherit && !cpu_list) {
- open_counters(-1);
- } else {
- for (i = 0; i < cpus->nr; i++)
- open_counters(cpus->map[i]);
- }
+ open_counters(evsel_list);
perf_session__set_sample_type(session, sample_type);
@@ -687,7 +622,8 @@ static int __cmd_record(int argc, const char **argv)
if (err < 0)
return err;
} else if (file_new) {
- err = perf_header__write(&session->header, output, false);
+ err = perf_header__write(&session->header, evsel_list,
+ output, false);
if (err < 0)
return err;
}
@@ -712,7 +648,7 @@ static int __cmd_record(int argc, const char **argv)
return err;
}
- if (have_tracepoints(&evsel_list)) {
+ if (have_tracepoints(&evsel_list->entries)) {
/*
* FIXME err <= 0 here actually means that
* there were no tracepoints so its not really
@@ -721,7 +657,7 @@ static int __cmd_record(int argc, const char **argv)
* return this more properly and also
* propagate errors that now are calling die()
*/
- err = event__synthesize_tracing_data(output, &evsel_list,
+ err = event__synthesize_tracing_data(output, evsel_list,
process_synthesized_event,
session);
if (err <= 0) {
@@ -789,15 +725,15 @@ static int __cmd_record(int argc, const char **argv)
if (hits == samples) {
if (done)
break;
- err = poll(event_array, nr_poll, -1);
+ err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
waking++;
}
if (done) {
- for (i = 0; i < nr_cpu; i++) {
+ for (i = 0; i < cpus->nr; i++) {
struct perf_evsel *pos;
- list_for_each_entry(pos, &evsel_list, node) {
+ list_for_each_entry(pos, &evsel_list->entries, node) {
for (thread = 0;
thread < threads->nr;
thread++)
@@ -838,10 +774,10 @@ static const char * const record_usage[] = {
static bool force, append_file;
const struct option record_options[] = {
- OPT_CALLBACK('e', "event", NULL, "event",
+ OPT_CALLBACK('e', "event", &evsel_list, "event",
"event selector. use 'perf list' to list available events",
parse_events),
- OPT_CALLBACK(0, "filter", NULL, "filter",
+ OPT_CALLBACK(0, "filter", &evsel_list, "filter",
"event filter", parse_filter),
OPT_INTEGER('p', "pid", &target_pid,
"record events on existing process id"),
@@ -892,6 +828,10 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
int err = -ENOMEM;
struct perf_evsel *pos;
+ evsel_list = perf_evlist__new();
+ if (evsel_list == NULL)
+ return -ENOMEM;
+
argc = parse_options(argc, argv, record_options, record_usage,
PARSE_OPT_STOP_AT_NON_OPTION);
if (!argc && target_pid == -1 && target_tid == -1 &&
@@ -913,7 +853,8 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
if (no_buildid_cache || no_buildid)
disable_buildid_cache();
- if (list_empty(&evsel_list) && perf_evsel_list__create_default() < 0) {
+ if (evsel_list->nr_entries == 0 &&
+ perf_evlist__add_default(evsel_list) < 0) {
pr_err("Not enough memory for event selector list\n");
goto out_symbol_exit;
}
@@ -927,21 +868,22 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
usage_with_options(record_usage, record_options);
}
- cpus = cpu_map__new(cpu_list);
- if (cpus == NULL) {
- perror("failed to parse CPUs map");
- return -1;
- }
+ if (target_tid != -1)
+ cpus = cpu_map__dummy_new();
+ else
+ cpus = cpu_map__new(cpu_list);
- list_for_each_entry(pos, &evsel_list, node) {
+ if (cpus == NULL)
+ usage_with_options(record_usage, record_options);
+
+ list_for_each_entry(pos, &evsel_list->entries, node) {
if (perf_evsel__alloc_fd(pos, cpus->nr, threads->nr) < 0)
goto out_free_fd;
if (perf_header__push_event(pos->attr.config, event_name(pos)))
goto out_free_fd;
}
- event_array = malloc((sizeof(struct pollfd) * MAX_NR_CPUS *
- MAX_COUNTERS * threads->nr));
- if (!event_array)
+
+ if (perf_evlist__alloc_pollfd(evsel_list, cpus->nr, threads->nr) < 0)
goto out_free_fd;
if (user_interval != ULLONG_MAX)
@@ -959,13 +901,11 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
} else {
fprintf(stderr, "frequency and count are zero, aborting\n");
err = -EINVAL;
- goto out_free_event_array;
+ goto out_free_fd;
}
err = __cmd_record(argc, argv);
-out_free_event_array:
- free(event_array);
out_free_fd:
thread_map__delete(threads);
threads = NULL;
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index c27e31f289e6..f6a43493d1d0 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -81,18 +81,17 @@ static int perf_session__add_hist_entry(struct perf_session *self,
struct addr_location *al,
struct sample_data *data)
{
- struct map_symbol *syms = NULL;
struct symbol *parent = NULL;
- int err = -ENOMEM;
+ int err = 0;
struct hist_entry *he;
struct hists *hists;
struct perf_event_attr *attr;
if ((sort__has_parent || symbol_conf.use_callchain) && data->callchain) {
- syms = perf_session__resolve_callchain(self, al->thread,
- data->callchain, &parent);
- if (syms == NULL)
- return -ENOMEM;
+ err = perf_session__resolve_callchain(self, al->thread,
+ data->callchain, &parent);
+ if (err)
+ return err;
}
attr = perf_header__find_attr(data->id, &self->header);
@@ -101,16 +100,17 @@ static int perf_session__add_hist_entry(struct perf_session *self,
else
hists = perf_session__hists_findnew(self, data->id, 0, 0);
if (hists == NULL)
- goto out_free_syms;
+ return -ENOMEM;
+
he = __hists__add_entry(hists, al, parent, data->period);
if (he == NULL)
- goto out_free_syms;
- err = 0;
+ return -ENOMEM;
+
if (symbol_conf.use_callchain) {
- err = callchain_append(he->callchain, data->callchain, syms,
+ err = callchain_append(he->callchain, &self->callchain_cursor,
data->period);
if (err)
- goto out_free_syms;
+ return err;
}
/*
* Only in the newt browser we are doing integrated annotation,
@@ -119,8 +119,7 @@ static int perf_session__add_hist_entry(struct perf_session *self,
*/
if (use_browser > 0)
err = hist_entry__inc_addr_samples(he, al->addr);
-out_free_syms:
- free(syms);
+
return err;
}
@@ -222,7 +221,7 @@ static int perf_session__setup_sample_type(struct perf_session *self)
} else if (!dont_use_callchains && callchain_param.mode != CHAIN_NONE &&
!symbol_conf.use_callchain) {
symbol_conf.use_callchain = true;
- if (register_callchain_param(&callchain_param) < 0) {
+ if (callchain_register_param(&callchain_param) < 0) {
fprintf(stderr, "Can't register callchain"
" params\n");
return -EINVAL;
@@ -424,7 +423,7 @@ parse_callchain_opt(const struct option *opt __used, const char *arg,
if (tok2)
callchain_param.print_limit = strtod(tok2, &endptr);
setup:
- if (register_callchain_param(&callchain_param) < 0) {
+ if (callchain_register_param(&callchain_param) < 0) {
fprintf(stderr, "Can't register callchain params\n");
return -1;
}
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index a482a191a0ca..8906adfdbd8e 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -43,11 +43,13 @@
#include "util/parse-options.h"
#include "util/parse-events.h"
#include "util/event.h"
+#include "util/evlist.h"
#include "util/evsel.h"
#include "util/debug.h"
#include "util/header.h"
#include "util/cpumap.h"
#include "util/thread.h"
+#include "util/thread_map.h"
#include <sys/prctl.h>
#include <math.h>
@@ -71,6 +73,8 @@ static struct perf_event_attr default_attrs[] = {
};
+struct perf_evlist *evsel_list;
+
static bool system_wide = false;
static struct cpu_map *cpus;
static int run_idx = 0;
@@ -166,7 +170,7 @@ static int create_perf_stat_counter(struct perf_evsel *evsel)
PERF_FORMAT_TOTAL_TIME_RUNNING;
if (system_wide)
- return perf_evsel__open_per_cpu(evsel, cpus);
+ return perf_evsel__open_per_cpu(evsel, cpus, false, false);
attr->inherit = !no_inherit;
if (target_pid == -1 && target_tid == -1) {
@@ -174,7 +178,7 @@ static int create_perf_stat_counter(struct perf_evsel *evsel)
attr->enable_on_exec = 1;
}
- return perf_evsel__open_per_thread(evsel, threads);
+ return perf_evsel__open_per_thread(evsel, threads, false, false);
}
/*
@@ -309,7 +313,7 @@ static int run_perf_stat(int argc __used, const char **argv)
close(child_ready_pipe[0]);
}
- list_for_each_entry(counter, &evsel_list, node) {
+ list_for_each_entry(counter, &evsel_list->entries, node) {
if (create_perf_stat_counter(counter) < 0) {
if (errno == -EPERM || errno == -EACCES) {
error("You may not have permission to collect %sstats.\n"
@@ -347,12 +351,12 @@ static int run_perf_stat(int argc __used, const char **argv)
update_stats(&walltime_nsecs_stats, t1 - t0);
if (no_aggr) {
- list_for_each_entry(counter, &evsel_list, node) {
+ list_for_each_entry(counter, &evsel_list->entries, node) {
read_counter(counter);
perf_evsel__close_fd(counter, cpus->nr, 1);
}
} else {
- list_for_each_entry(counter, &evsel_list, node) {
+ list_for_each_entry(counter, &evsel_list->entries, node) {
read_counter_aggr(counter);
perf_evsel__close_fd(counter, cpus->nr, threads->nr);
}
@@ -555,10 +559,10 @@ static void print_stat(int argc, const char **argv)
}
if (no_aggr) {
- list_for_each_entry(counter, &evsel_list, node)
+ list_for_each_entry(counter, &evsel_list->entries, node)
print_counter(counter);
} else {
- list_for_each_entry(counter, &evsel_list, node)
+ list_for_each_entry(counter, &evsel_list->entries, node)
print_counter_aggr(counter);
}
@@ -610,7 +614,7 @@ static int stat__set_big_num(const struct option *opt __used,
}
static const struct option options[] = {
- OPT_CALLBACK('e', "event", NULL, "event",
+ OPT_CALLBACK('e', "event", &evsel_list, "event",
"event selector. use 'perf list' to list available events",
parse_events),
OPT_BOOLEAN('i', "no-inherit", &no_inherit,
@@ -648,6 +652,10 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
setlocale(LC_ALL, "");
+ evsel_list = perf_evlist__new();
+ if (evsel_list == NULL)
+ return -ENOMEM;
+
argc = parse_options(argc, argv, options, stat_usage,
PARSE_OPT_STOP_AT_NON_OPTION);
@@ -679,17 +687,14 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
usage_with_options(stat_usage, options);
/* Set attrs and nr_counters if no event is selected and !null_run */
- if (!null_run && !nr_counters) {
+ if (!null_run && !evsel_list->nr_entries) {
size_t c;
- nr_counters = ARRAY_SIZE(default_attrs);
-
for (c = 0; c < ARRAY_SIZE(default_attrs); ++c) {
- pos = perf_evsel__new(&default_attrs[c],
- nr_counters);
+ pos = perf_evsel__new(&default_attrs[c], c);
if (pos == NULL)
goto out;
- list_add(&pos->node, &evsel_list);
+ perf_evlist__add(evsel_list, pos);
}
}
@@ -713,7 +718,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
return -1;
}
- list_for_each_entry(pos, &evsel_list, node) {
+ list_for_each_entry(pos, &evsel_list->entries, node) {
if (perf_evsel__alloc_stat_priv(pos) < 0 ||
perf_evsel__alloc_counts(pos, cpus->nr) < 0 ||
perf_evsel__alloc_fd(pos, cpus->nr, threads->nr) < 0)
@@ -741,9 +746,9 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
if (status != -1)
print_stat(argc, argv);
out_free_fd:
- list_for_each_entry(pos, &evsel_list, node)
+ list_for_each_entry(pos, &evsel_list->entries, node)
perf_evsel__free_stat_priv(pos);
- perf_evsel_list__delete();
+ perf_evlist__delete(evsel_list);
out:
thread_map__delete(threads);
threads = NULL;
diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c
index 5dcdba653d70..231e3e21810c 100644
--- a/tools/perf/builtin-test.c
+++ b/tools/perf/builtin-test.c
@@ -7,10 +7,11 @@
#include "util/cache.h"
#include "util/debug.h"
+#include "util/evlist.h"
#include "util/parse-options.h"
-#include "util/session.h"
+#include "util/parse-events.h"
#include "util/symbol.h"
-#include "util/thread.h"
+#include "util/thread_map.h"
static long page_size;
@@ -238,14 +239,14 @@ out:
#include "util/evsel.h"
#include <sys/types.h>
-static int trace_event__id(const char *event_name)
+static int trace_event__id(const char *evname)
{
char *filename;
int err = -1, fd;
if (asprintf(&filename,
"/sys/kernel/debug/tracing/events/syscalls/%s/id",
- event_name) < 0)
+ evname) < 0)
return -1;
fd = open(filename, O_RDONLY);
@@ -289,7 +290,7 @@ static int test__open_syscall_event(void)
goto out_thread_map_delete;
}
- if (perf_evsel__open_per_thread(evsel, threads) < 0) {
+ if (perf_evsel__open_per_thread(evsel, threads, false, false) < 0) {
pr_debug("failed to open counter: %s, "
"tweak /proc/sys/kernel/perf_event_paranoid?\n",
strerror(errno));
@@ -347,9 +348,9 @@ static int test__open_syscall_event_on_all_cpus(void)
}
cpus = cpu_map__new(NULL);
- if (threads == NULL) {
- pr_debug("thread_map__new\n");
- return -1;
+ if (cpus == NULL) {
+ pr_debug("cpu_map__new\n");
+ goto out_thread_map_delete;
}
@@ -364,7 +365,7 @@ static int test__open_syscall_event_on_all_cpus(void)
goto out_thread_map_delete;
}
- if (perf_evsel__open(evsel, cpus, threads) < 0) {
+ if (perf_evsel__open(evsel, cpus, threads, false, false) < 0) {
pr_debug("failed to open counter: %s, "
"tweak /proc/sys/kernel/perf_event_paranoid?\n",
strerror(errno));
@@ -408,6 +409,8 @@ static int test__open_syscall_event_on_all_cpus(void)
goto out_close_fd;
}
+ err = 0;
+
for (cpu = 0; cpu < cpus->nr; ++cpu) {
unsigned int expected;
@@ -416,18 +419,18 @@ static int test__open_syscall_event_on_all_cpus(void)
if (perf_evsel__read_on_cpu(evsel, cpu, 0) < 0) {
pr_debug("perf_evsel__open_read_on_cpu\n");
- goto out_close_fd;
+ err = -1;
+ break;
}
expected = nr_open_calls + cpu;
if (evsel->counts->cpu[cpu].val != expected) {
pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls on cpu %d, got %" PRIu64 "\n",
expected, cpus->map[cpu], evsel->counts->cpu[cpu].val);
- goto out_close_fd;
+ err = -1;
}
}
- err = 0;
out_close_fd:
perf_evsel__close_fd(evsel, 1, threads->nr);
out_evsel_delete:
@@ -437,6 +440,159 @@ out_thread_map_delete:
return err;
}
+/*
+ * This test will generate random numbers of calls to some getpid syscalls,
+ * then establish an mmap for a group of events that are created to monitor
+ * the syscalls.
+ *
+ * It will receive the events, using mmap, use its PERF_SAMPLE_ID generated
+ * sample.id field to map back to its respective perf_evsel instance.
+ *
+ * Then it checks if the number of syscalls reported as perf events by
+ * the kernel corresponds to the number of syscalls made.
+ */
+static int test__basic_mmap(void)
+{
+ int err = -1;
+ event_t *event;
+ struct thread_map *threads;
+ struct cpu_map *cpus;
+ struct perf_evlist *evlist;
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_TRACEPOINT,
+ .read_format = PERF_FORMAT_ID,
+ .sample_type = PERF_SAMPLE_ID,
+ .watermark = 0,
+ };
+ cpu_set_t cpu_set;
+ const char *syscall_names[] = { "getsid", "getppid", "getpgrp",
+ "getpgid", };
+ pid_t (*syscalls[])(void) = { (void *)getsid, getppid, getpgrp,
+ (void*)getpgid };
+#define nsyscalls ARRAY_SIZE(syscall_names)
+ int ids[nsyscalls];
+ unsigned int nr_events[nsyscalls],
+ expected_nr_events[nsyscalls], i, j;
+ struct perf_evsel *evsels[nsyscalls], *evsel;
+
+ for (i = 0; i < nsyscalls; ++i) {
+ char name[64];
+
+ snprintf(name, sizeof(name), "sys_enter_%s", syscall_names[i]);
+ ids[i] = trace_event__id(name);
+ if (ids[i] < 0) {
+ pr_debug("Is debugfs mounted on /sys/kernel/debug?\n");
+ return -1;
+ }
+ nr_events[i] = 0;
+ expected_nr_events[i] = random() % 257;
+ }
+
+ threads = thread_map__new(-1, getpid());
+ if (threads == NULL) {
+ pr_debug("thread_map__new\n");
+ return -1;
+ }
+
+ cpus = cpu_map__new(NULL);
+ if (threads == NULL) {
+ pr_debug("thread_map__new\n");
+ goto out_free_threads;
+ }
+
+ CPU_ZERO(&cpu_set);
+ CPU_SET(cpus->map[0], &cpu_set);
+ sched_setaffinity(0, sizeof(cpu_set), &cpu_set);
+ if (sched_setaffinity(0, sizeof(cpu_set), &cpu_set) < 0) {
+ pr_debug("sched_setaffinity() failed on CPU %d: %s ",
+ cpus->map[0], strerror(errno));
+ goto out_free_cpus;
+ }
+
+ evlist = perf_evlist__new();
+ if (threads == NULL) {
+ pr_debug("perf_evlist__new\n");
+ goto out_free_cpus;
+ }
+
+ /* anonymous union fields, can't be initialized above */
+ attr.wakeup_events = 1;
+ attr.sample_period = 1;
+
+ for (i = 0; i < nsyscalls; ++i) {
+ attr.config = ids[i];
+ evsels[i] = perf_evsel__new(&attr, i);
+ if (evsels[i] == NULL) {
+ pr_debug("perf_evsel__new\n");
+ goto out_free_evlist;
+ }
+
+ perf_evlist__add(evlist, evsels[i]);
+
+ if (perf_evsel__open(evsels[i], cpus, threads, false, false) < 0) {
+ pr_debug("failed to open counter: %s, "
+ "tweak /proc/sys/kernel/perf_event_paranoid?\n",
+ strerror(errno));
+ goto out_close_fd;
+ }
+ }
+
+ if (perf_evlist__mmap(evlist, cpus, threads, 128, true) < 0) {
+ pr_debug("failed to mmap events: %d (%s)\n", errno,
+ strerror(errno));
+ goto out_close_fd;
+ }
+
+ for (i = 0; i < nsyscalls; ++i)
+ for (j = 0; j < expected_nr_events[i]; ++j) {
+ int foo = syscalls[i]();
+ ++foo;
+ }
+
+ while ((event = perf_evlist__read_on_cpu(evlist, 0)) != NULL) {
+ struct sample_data sample;
+
+ if (event->header.type != PERF_RECORD_SAMPLE) {
+ pr_debug("unexpected %s event\n",
+ event__get_event_name(event->header.type));
+ goto out_munmap;
+ }
+
+ event__parse_sample(event, attr.sample_type, false, &sample);
+ evsel = perf_evlist__id2evsel(evlist, sample.id);
+ if (evsel == NULL) {
+ pr_debug("event with id %" PRIu64
+ " doesn't map to an evsel\n", sample.id);
+ goto out_munmap;
+ }
+ nr_events[evsel->idx]++;
+ }
+
+ list_for_each_entry(evsel, &evlist->entries, node) {
+ if (nr_events[evsel->idx] != expected_nr_events[evsel->idx]) {
+ pr_debug("expected %d %s events, got %d\n",
+ expected_nr_events[evsel->idx],
+ event_name(evsel), nr_events[evsel->idx]);
+ goto out_munmap;
+ }
+ }
+
+ err = 0;
+out_munmap:
+ perf_evlist__munmap(evlist, 1);
+out_close_fd:
+ for (i = 0; i < nsyscalls; ++i)
+ perf_evsel__close_fd(evsels[i], 1, threads->nr);
+out_free_evlist:
+ perf_evlist__delete(evlist);
+out_free_cpus:
+ cpu_map__delete(cpus);
+out_free_threads:
+ thread_map__delete(threads);
+ return err;
+#undef nsyscalls
+}
+
static struct test {
const char *desc;
int (*func)(void);
@@ -454,6 +610,10 @@ static struct test {
.func = test__open_syscall_event_on_all_cpus,
},
{
+ .desc = "read samples using the mmap interface",
+ .func = test__basic_mmap,
+ },
+ {
.func = NULL,
},
};
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index b6998e055767..ce2e50c891c7 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -21,10 +21,12 @@
#include "perf.h"
#include "util/color.h"
+#include "util/evlist.h"
#include "util/evsel.h"
#include "util/session.h"
#include "util/symbol.h"
#include "util/thread.h"
+#include "util/thread_map.h"
#include "util/util.h"
#include <linux/rbtree.h>
#include "util/parse-options.h"
@@ -60,6 +62,8 @@
#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
+struct perf_evlist *evsel_list;
+
static bool system_wide = false;
static int default_interval = 0;
@@ -75,7 +79,7 @@ static struct cpu_map *cpus;
static int realtime_prio = 0;
static bool group = false;
static unsigned int page_size;
-static unsigned int mmap_pages = 16;
+static unsigned int mmap_pages = 128;
static int freq = 1000; /* 1 KHz */
static int delay_secs = 2;
@@ -267,7 +271,7 @@ static void __zero_source_counters(struct sym_entry *syme)
line = syme->src->lines;
while (line) {
- for (i = 0; i < nr_counters; i++)
+ for (i = 0; i < evsel_list->nr_entries; i++)
line->count[i] = 0;
line = line->next;
}
@@ -414,7 +418,7 @@ static double sym_weight(const struct sym_entry *sym)
if (!display_weighted)
return weight;
- for (counter = 1; counter < nr_counters-1; counter++)
+ for (counter = 1; counter < evsel_list->nr_entries - 1; counter++)
weight *= sym->count[counter];
weight /= (sym->count[counter] + 1);
@@ -501,7 +505,7 @@ static void print_sym_table(void)
rb_insert_active_sym(&tmp, syme);
sum_ksamples += syme->snap_count;
- for (j = 0; j < nr_counters; j++)
+ for (j = 0; j < evsel_list->nr_entries; j++)
syme->count[j] = zero ? 0 : syme->count[j] * 7 / 8;
} else
list_remove_active_sym(syme);
@@ -535,9 +539,9 @@ static void print_sym_table(void)
esamples_percent);
}
- if (nr_counters == 1 || !display_weighted) {
+ if (evsel_list->nr_entries == 1 || !display_weighted) {
struct perf_evsel *first;
- first = list_entry(evsel_list.next, struct perf_evsel, node);
+ first = list_entry(evsel_list->entries.next, struct perf_evsel, node);
printf("%" PRIu64, (uint64_t)first->attr.sample_period);
if (freq)
printf("Hz ");
@@ -547,7 +551,7 @@ static void print_sym_table(void)
if (!display_weighted)
printf("%s", event_name(sym_evsel));
- else list_for_each_entry(counter, &evsel_list, node) {
+ else list_for_each_entry(counter, &evsel_list->entries, node) {
if (counter->idx)
printf("/");
@@ -606,7 +610,7 @@ static void print_sym_table(void)
sym_width = winsize.ws_col - dso_width - 29;
}
putchar('\n');
- if (nr_counters == 1)
+ if (evsel_list->nr_entries == 1)
printf(" samples pcnt");
else
printf(" weight samples pcnt");
@@ -615,7 +619,7 @@ static void print_sym_table(void)
printf(" RIP ");
printf(" %-*.*s DSO\n", sym_width, sym_width, "function");
printf(" %s _______ _____",
- nr_counters == 1 ? " " : "______");
+ evsel_list->nr_entries == 1 ? " " : "______");
if (verbose)
printf(" ________________");
printf(" %-*.*s", sym_width, sym_width, graph_line);
@@ -634,7 +638,7 @@ static void print_sym_table(void)
pcnt = 100.0 - (100.0 * ((sum_ksamples - syme->snap_count) /
sum_ksamples));
- if (nr_counters == 1 || !display_weighted)
+ if (evsel_list->nr_entries == 1 || !display_weighted)
printf("%20.2f ", syme->weight);
else
printf("%9.1f %10ld ", syme->weight, syme->snap_count);
@@ -744,7 +748,7 @@ static void print_mapped_keys(void)
fprintf(stdout, "\t[d] display refresh delay. \t(%d)\n", delay_secs);
fprintf(stdout, "\t[e] display entries (lines). \t(%d)\n", print_entries);
- if (nr_counters > 1)
+ if (evsel_list->nr_entries > 1)
fprintf(stdout, "\t[E] active event counter. \t(%s)\n", event_name(sym_evsel));
fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", count_filter);
@@ -753,7 +757,7 @@ static void print_mapped_keys(void)
fprintf(stdout, "\t[s] annotate symbol. \t(%s)\n", name?: "NULL");
fprintf(stdout, "\t[S] stop annotation.\n");
- if (nr_counters > 1)
+ if (evsel_list->nr_entries > 1)
fprintf(stdout, "\t[w] toggle display weighted/count[E]r. \t(%d)\n", display_weighted ? 1 : 0);
fprintf(stdout,
@@ -783,7 +787,7 @@ static int key_mapped(int c)
return 1;
case 'E':
case 'w':
- return nr_counters > 1 ? 1 : 0;
+ return evsel_list->nr_entries > 1 ? 1 : 0;
default:
break;
}
@@ -831,22 +835,22 @@ static void handle_keypress(struct perf_session *session, int c)
signal(SIGWINCH, SIG_DFL);
break;
case 'E':
- if (nr_counters > 1) {
+ if (evsel_list->nr_entries > 1) {
fprintf(stderr, "\nAvailable events:");
- list_for_each_entry(sym_evsel, &evsel_list, node)
+ list_for_each_entry(sym_evsel, &evsel_list->entries, node)
fprintf(stderr, "\n\t%d %s", sym_evsel->idx, event_name(sym_evsel));
prompt_integer(&sym_counter, "Enter details event counter");
- if (sym_counter >= nr_counters) {
- sym_evsel = list_entry(evsel_list.next, struct perf_evsel, node);
+ if (sym_counter >= evsel_list->nr_entries) {
+ sym_evsel = list_entry(evsel_list->entries.next, struct perf_evsel, node);
sym_counter = 0;
fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(sym_evsel));
sleep(1);
break;
}
- list_for_each_entry(sym_evsel, &evsel_list, node)
+ list_for_each_entry(sym_evsel, &evsel_list->entries, node)
if (sym_evsel->idx == sym_counter)
break;
} else sym_counter = 0;
@@ -930,6 +934,7 @@ repeat:
/* Tag samples to be skipped. */
static const char *skip_symbols[] = {
"default_idle",
+ "native_safe_halt",
"cpu_idle",
"enter_idle",
"exit_idle",
@@ -988,8 +993,7 @@ static int symbol_filter(struct map *map, struct symbol *sym)
static void event__process_sample(const event_t *self,
struct sample_data *sample,
- struct perf_session *session,
- struct perf_evsel *evsel)
+ struct perf_session *session)
{
u64 ip = self->ip.ip;
struct sym_entry *syme;
@@ -1082,8 +1086,12 @@ static void event__process_sample(const event_t *self,
syme = symbol__priv(al.sym);
if (!syme->skip) {
- syme->count[evsel->idx]++;
+ struct perf_evsel *evsel;
+
syme->origin = origin;
+ evsel = perf_evlist__id2evsel(evsel_list, sample->id);
+ assert(evsel != NULL);
+ syme->count[evsel->idx]++;
record_precise_ip(syme, evsel->idx, ip);
pthread_mutex_lock(&active_symbols_lock);
if (list_empty(&syme->node) || !syme->node.next)
@@ -1092,156 +1100,52 @@ static void event__process_sample(const event_t *self,
}
}
-struct mmap_data {
- void *base;
- int mask;
- unsigned int prev;
-};
-
-static int perf_evsel__alloc_mmap_per_thread(struct perf_evsel *evsel,
- int ncpus, int nthreads)
-{
- evsel->priv = xyarray__new(ncpus, nthreads, sizeof(struct mmap_data));
- return evsel->priv != NULL ? 0 : -ENOMEM;
-}
-
-static void perf_evsel__free_mmap(struct perf_evsel *evsel)
-{
- xyarray__delete(evsel->priv);
- evsel->priv = NULL;
-}
-
-static unsigned int mmap_read_head(struct mmap_data *md)
-{
- struct perf_event_mmap_page *pc = md->base;
- int head;
-
- head = pc->data_head;
- rmb();
-
- return head;
-}
-
-static void perf_session__mmap_read_counter(struct perf_session *self,
- struct perf_evsel *evsel,
- int cpu, int thread_idx)
+static void perf_session__mmap_read_cpu(struct perf_session *self, int cpu)
{
- struct xyarray *mmap_array = evsel->priv;
- struct mmap_data *md = xyarray__entry(mmap_array, cpu, thread_idx);
- unsigned int head = mmap_read_head(md);
- unsigned int old = md->prev;
- unsigned char *data = md->base + page_size;
struct sample_data sample;
- int diff;
-
- /*
- * If we're further behind than half the buffer, there's a chance
- * the writer will bite our tail and mess up the samples under us.
- *
- * If we somehow ended up ahead of the head, we got messed up.
- *
- * In either case, truncate and restart at head.
- */
- diff = head - old;
- if (diff > md->mask / 2 || diff < 0) {
- fprintf(stderr, "WARNING: failed to keep up with mmap data.\n");
-
- /*
- * head points to a known good entry, start there.
- */
- old = head;
- }
-
- for (; old != head;) {
- event_t *event = (event_t *)&data[old & md->mask];
+ event_t *event;
- event_t event_copy;
-
- size_t size = event->header.size;
-
- /*
- * Event straddles the mmap boundary -- header should always
- * be inside due to u64 alignment of output.
- */
- if ((old & md->mask) + size != ((old + size) & md->mask)) {
- unsigned int offset = old;
- unsigned int len = min(sizeof(*event), size), cpy;
- void *dst = &event_copy;
-
- do {
- cpy = min(md->mask + 1 - (offset & md->mask), len);
- memcpy(dst, &data[offset & md->mask], cpy);
- offset += cpy;
- dst += cpy;
- len -= cpy;
- } while (len);
-
- event = &event_copy;
- }
+ while ((event = perf_evlist__read_on_cpu(evsel_list, cpu)) != NULL) {
+ perf_session__parse_sample(self, event, &sample);
- event__parse_sample(event, self, &sample);
if (event->header.type == PERF_RECORD_SAMPLE)
- event__process_sample(event, &sample, self, evsel);
+ event__process_sample(event, &sample, self);
else
event__process(event, &sample, self);
- old += size;
}
-
- md->prev = old;
}
-static struct pollfd *event_array;
-
static void perf_session__mmap_read(struct perf_session *self)
{
- struct perf_evsel *counter;
- int i, thread_index;
-
- for (i = 0; i < cpus->nr; i++) {
- list_for_each_entry(counter, &evsel_list, node) {
- for (thread_index = 0;
- thread_index < threads->nr;
- thread_index++) {
- perf_session__mmap_read_counter(self,
- counter, i, thread_index);
- }
- }
- }
-}
+ int i;
-int nr_poll;
-int group_fd;
+ for (i = 0; i < cpus->nr; i++)
+ perf_session__mmap_read_cpu(self, i);
+}
-static void start_counter(int i, struct perf_evsel *evsel)
+static void start_counters(struct perf_evlist *evlist)
{
- struct xyarray *mmap_array = evsel->priv;
- struct mmap_data *mm;
- struct perf_event_attr *attr;
- int cpu = -1;
- int thread_index;
-
- if (target_tid == -1)
- cpu = cpus->map[i];
+ struct perf_evsel *counter;
- attr = &evsel->attr;
+ list_for_each_entry(counter, &evlist->entries, node) {
+ struct perf_event_attr *attr = &counter->attr;
- attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID;
+ attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID;
- if (freq) {
- attr->sample_type |= PERF_SAMPLE_PERIOD;
- attr->freq = 1;
- attr->sample_freq = freq;
- }
+ if (freq) {
+ attr->sample_type |= PERF_SAMPLE_PERIOD;
+ attr->freq = 1;
+ attr->sample_freq = freq;
+ }
- attr->inherit = (cpu < 0) && inherit;
- attr->mmap = 1;
+ if (evlist->nr_entries > 1) {
+ attr->sample_type |= PERF_SAMPLE_ID;
+ attr->read_format |= PERF_FORMAT_ID;
+ }
- for (thread_index = 0; thread_index < threads->nr; thread_index++) {
+ attr->mmap = 1;
try_again:
- FD(evsel, i, thread_index) = sys_perf_event_open(attr,
- threads->map[thread_index], cpu, group_fd, 0);
-
- if (FD(evsel, i, thread_index) < 0) {
+ if (perf_evsel__open(counter, cpus, threads, group, inherit) < 0) {
int err = errno;
if (err == EPERM || err == EACCES)
@@ -1253,8 +1157,8 @@ try_again:
* based cpu-clock-tick sw counter, which
* is always available even if no PMU support:
*/
- if (attr->type == PERF_TYPE_HARDWARE
- && attr->config == PERF_COUNT_HW_CPU_CYCLES) {
+ if (attr->type == PERF_TYPE_HARDWARE &&
+ attr->config == PERF_COUNT_HW_CPU_CYCLES) {
if (verbose)
warning(" ... trying to fall back to cpu-clock-ticks\n");
@@ -1264,39 +1168,23 @@ try_again:
goto try_again;
}
printf("\n");
- error("sys_perf_event_open() syscall returned with %d (%s). /bin/dmesg may provide additional information.\n",
- FD(evsel, i, thread_index), strerror(err));
+ error("sys_perf_event_open() syscall returned with %d "
+ "(%s). /bin/dmesg may provide additional information.\n",
+ err, strerror(err));
die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
exit(-1);
}
- assert(FD(evsel, i, thread_index) >= 0);
- fcntl(FD(evsel, i, thread_index), F_SETFL, O_NONBLOCK);
-
- /*
- * First counter acts as the group leader:
- */
- if (group && group_fd == -1)
- group_fd = FD(evsel, i, thread_index);
-
- event_array[nr_poll].fd = FD(evsel, i, thread_index);
- event_array[nr_poll].events = POLLIN;
- nr_poll++;
-
- mm = xyarray__entry(mmap_array, i, thread_index);
- mm->prev = 0;
- mm->mask = mmap_pages*page_size - 1;
- mm->base = mmap(NULL, (mmap_pages+1)*page_size,
- PROT_READ, MAP_SHARED, FD(evsel, i, thread_index), 0);
- if (mm->base == MAP_FAILED)
- die("failed to mmap with %d (%s)\n", errno, strerror(errno));
}
+
+ if (perf_evlist__mmap(evlist, cpus, threads, mmap_pages, true) < 0)
+ die("failed to mmap with %d (%s)\n", errno, strerror(errno));
}
static int __cmd_top(void)
{
pthread_t thread;
- struct perf_evsel *counter;
- int i, ret;
+ struct perf_evsel *first;
+ int ret;
/*
* FIXME: perf_session__new should allow passing a O_MMAP, so that all this
* mmap reading, etc is encapsulated in it. Use O_WRONLY for now.
@@ -1310,14 +1198,12 @@ static int __cmd_top(void)
else
event__synthesize_threads(event__process, session);
- for (i = 0; i < cpus->nr; i++) {
- group_fd = -1;
- list_for_each_entry(counter, &evsel_list, node)
- start_counter(i, counter);
- }
+ start_counters(evsel_list);
+ first = list_entry(evsel_list->entries.next, struct perf_evsel, node);
+ perf_session__set_sample_type(session, first->attr.sample_type);
/* Wait for a minimal set of events before starting the snapshot */
- poll(&event_array[0], nr_poll, 100);
+ poll(evsel_list->pollfd, evsel_list->nr_fds, 100);
perf_session__mmap_read(session);
@@ -1342,7 +1228,7 @@ static int __cmd_top(void)
perf_session__mmap_read(session);
if (hits == samples)
- ret = poll(event_array, nr_poll, 100);
+ ret = poll(evsel_list->pollfd, evsel_list->nr_fds, 100);
}
return 0;
@@ -1354,7 +1240,7 @@ static const char * const top_usage[] = {
};
static const struct option options[] = {
- OPT_CALLBACK('e', "event", NULL, "event",
+ OPT_CALLBACK('e', "event", &evsel_list, "event",
"event selector. use 'perf list' to list available events",
parse_events),
OPT_INTEGER('c', "count", &default_interval,
@@ -1404,6 +1290,10 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
struct perf_evsel *pos;
int status = -ENOMEM;
+ evsel_list = perf_evlist__new();
+ if (evsel_list == NULL)
+ return -ENOMEM;
+
page_size = sysconf(_SC_PAGE_SIZE);
argc = parse_options(argc, argv, options, top_usage, 0);
@@ -1419,11 +1309,6 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
usage_with_options(top_usage, options);
}
- event_array = malloc((sizeof(struct pollfd) *
- MAX_NR_CPUS * MAX_COUNTERS * threads->nr));
- if (!event_array)
- return -ENOMEM;
-
/* CPU and PID are mutually exclusive */
if (target_tid > 0 && cpu_list) {
printf("WARNING: PID switch overriding CPU\n");
@@ -1431,7 +1316,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
cpu_list = NULL;
}
- if (!nr_counters && perf_evsel_list__create_default() < 0) {
+ if (!evsel_list->nr_entries &&
+ perf_evlist__add_default(evsel_list) < 0) {
pr_err("Not enough memory for event selector list\n");
return -ENOMEM;
}
@@ -1459,9 +1345,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
if (cpus == NULL)
usage_with_options(top_usage, options);
- list_for_each_entry(pos, &evsel_list, node) {
- if (perf_evsel__alloc_mmap_per_thread(pos, cpus->nr, threads->nr) < 0 ||
- perf_evsel__alloc_fd(pos, cpus->nr, threads->nr) < 0)
+ list_for_each_entry(pos, &evsel_list->entries, node) {
+ if (perf_evsel__alloc_fd(pos, cpus->nr, threads->nr) < 0)
goto out_free_fd;
/*
* Fill in the ones not specifically initialized via -c:
@@ -1472,10 +1357,14 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
pos->attr.sample_period = default_interval;
}
- sym_evsel = list_entry(evsel_list.next, struct perf_evsel, node);
+ if (perf_evlist__alloc_pollfd(evsel_list, cpus->nr, threads->nr) < 0 ||
+ perf_evlist__alloc_mmap(evsel_list, cpus->nr) < 0)
+ goto out_free_fd;
+
+ sym_evsel = list_entry(evsel_list->entries.next, struct perf_evsel, node);
symbol_conf.priv_size = (sizeof(struct sym_entry) +
- (nr_counters + 1) * sizeof(unsigned long));
+ (evsel_list->nr_entries + 1) * sizeof(unsigned long));
symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
if (symbol__init() < 0)
@@ -1489,9 +1378,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
status = __cmd_top();
out_free_fd:
- list_for_each_entry(pos, &evsel_list, node)
- perf_evsel__free_mmap(pos);
- perf_evsel_list__delete();
+ perf_evlist__delete(evsel_list);
return status;
}
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 95aaf565c704..a5fc660c1f12 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -94,6 +94,32 @@ void get_term_dimensions(struct winsize *ws);
#include "util/types.h"
#include <stdbool.h>
+struct perf_mmap {
+ void *base;
+ int mask;
+ unsigned int prev;
+};
+
+static inline unsigned int perf_mmap__read_head(struct perf_mmap *mm)
+{
+ struct perf_event_mmap_page *pc = mm->base;
+ int head = pc->data_head;
+ rmb();
+ return head;
+}
+
+static inline void perf_mmap__write_tail(struct perf_mmap *md,
+ unsigned long tail)
+{
+ struct perf_event_mmap_page *pc = md->base;
+
+ /*
+ * ensure all reads are done before we write the tail out.
+ */
+ /* mb(); */
+ pc->data_tail = tail;
+}
+
/*
* prctl(PR_TASK_PERF_EVENTS_DISABLE) will (cheaply) disable all
* counters in the current task.
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index e12d539417b2..f8c66d1435e0 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2009-2010, Frederic Weisbecker <fweisbec@gmail.com>
+ * Copyright (C) 2009-2011, Frederic Weisbecker <fweisbec@gmail.com>
*
* Handle the callchains from the stream in an ad-hoc radix tree and then
* sort them in an rbtree.
@@ -26,10 +26,10 @@ bool ip_callchain__valid(struct ip_callchain *chain, const event_t *event)
}
#define chain_for_each_child(child, parent) \
- list_for_each_entry(child, &parent->children, brothers)
+ list_for_each_entry(child, &parent->children, siblings)
#define chain_for_each_child_safe(child, next, parent) \
- list_for_each_entry_safe(child, next, &parent->children, brothers)
+ list_for_each_entry_safe(child, next, &parent->children, siblings)
static void
rb_insert_callchain(struct rb_root *root, struct callchain_node *chain,
@@ -38,14 +38,14 @@ rb_insert_callchain(struct rb_root *root, struct callchain_node *chain,
struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL;
struct callchain_node *rnode;
- u64 chain_cumul = cumul_hits(chain);
+ u64 chain_cumul = callchain_cumul_hits(chain);
while (*p) {
u64 rnode_cumul;
parent = *p;
rnode = rb_entry(parent, struct callchain_node, rb_node);
- rnode_cumul = cumul_hits(rnode);
+ rnode_cumul = callchain_cumul_hits(rnode);
switch (mode) {
case CHAIN_FLAT:
@@ -104,7 +104,7 @@ static void __sort_chain_graph_abs(struct callchain_node *node,
chain_for_each_child(child, node) {
__sort_chain_graph_abs(child, min_hit);
- if (cumul_hits(child) >= min_hit)
+ if (callchain_cumul_hits(child) >= min_hit)
rb_insert_callchain(&node->rb_root, child,
CHAIN_GRAPH_ABS);
}
@@ -129,7 +129,7 @@ static void __sort_chain_graph_rel(struct callchain_node *node,
chain_for_each_child(child, node) {
__sort_chain_graph_rel(child, min_percent);
- if (cumul_hits(child) >= min_hit)
+ if (callchain_cumul_hits(child) >= min_hit)
rb_insert_callchain(&node->rb_root, child,
CHAIN_GRAPH_REL);
}
@@ -143,7 +143,7 @@ sort_chain_graph_rel(struct rb_root *rb_root, struct callchain_root *chain_root,
rb_root->rb_node = chain_root->node.rb_root.rb_node;
}
-int register_callchain_param(struct callchain_param *param)
+int callchain_register_param(struct callchain_param *param)
{
switch (param->mode) {
case CHAIN_GRAPH_ABS:
@@ -189,32 +189,27 @@ create_child(struct callchain_node *parent, bool inherit_children)
chain_for_each_child(next, new)
next->parent = new;
}
- list_add_tail(&new->brothers, &parent->children);
+ list_add_tail(&new->siblings, &parent->children);
return new;
}
-struct resolved_ip {
- u64 ip;
- struct map_symbol ms;
-};
-
-struct resolved_chain {
- u64 nr;
- struct resolved_ip ips[0];
-};
-
-
/*
* Fill the node with callchain values
*/
static void
-fill_node(struct callchain_node *node, struct resolved_chain *chain, int start)
+fill_node(struct callchain_node *node, struct callchain_cursor *cursor)
{
- unsigned int i;
+ struct callchain_cursor_node *cursor_node;
+
+ node->val_nr = cursor->nr - cursor->pos;
+ if (!node->val_nr)
+ pr_warning("Warning: empty node in callchain tree\n");
- for (i = start; i < chain->nr; i++) {
+ cursor_node = callchain_cursor_current(cursor);
+
+ while (cursor_node) {
struct callchain_list *call;
call = zalloc(sizeof(*call));
@@ -222,23 +217,25 @@ fill_node(struct callchain_node *node, struct resolved_chain *chain, int start)
perror("not enough memory for the code path tree");
return;
}
- call->ip = chain->ips[i].ip;
- call->ms = chain->ips[i].ms;
+ call->ip = cursor_node->ip;
+ call->ms.sym = cursor_node->sym;
+ call->ms.map = cursor_node->map;
list_add_tail(&call->list, &node->val);
+
+ callchain_cursor_advance(cursor);
+ cursor_node = callchain_cursor_current(cursor);
}
- node->val_nr = chain->nr - start;
- if (!node->val_nr)
- pr_warning("Warning: empty node in callchain tree\n");
}
static void
-add_child(struct callchain_node *parent, struct resolved_chain *chain,
- int start, u64 period)
+add_child(struct callchain_node *parent,
+ struct callchain_cursor *cursor,
+ u64 period)
{
struct callchain_node *new;
new = create_child(parent, false);
- fill_node(new, chain, start);
+ fill_node(new, cursor);
new->children_hit = 0;
new->hit = period;
@@ -250,9 +247,10 @@ add_child(struct callchain_node *parent, struct resolved_chain *chain,
* Then create another child to host the given callchain of new branch
*/
static void
-split_add_child(struct callchain_node *parent, struct resolved_chain *chain,
- struct callchain_list *to_split, int idx_parents, int idx_local,
- u64 period)
+split_add_child(struct callchain_node *parent,
+ struct callchain_cursor *cursor,
+ struct callchain_list *to_split,
+ u64 idx_parents, u64 idx_local, u64 period)
{
struct callchain_node *new;
struct list_head *old_tail;
@@ -272,14 +270,14 @@ split_add_child(struct callchain_node *parent, struct resolved_chain *chain,
/* split the hits */
new->hit = parent->hit;
new->children_hit = parent->children_hit;
- parent->children_hit = cumul_hits(new);
+ parent->children_hit = callchain_cumul_hits(new);
new->val_nr = parent->val_nr - idx_local;
parent->val_nr = idx_local;
/* create a new child for the new branch if any */
- if (idx_total < chain->nr) {
+ if (idx_total < cursor->nr) {
parent->hit = 0;
- add_child(parent, chain, idx_total, period);
+ add_child(parent, cursor, period);
parent->children_hit += period;
} else {
parent->hit = period;
@@ -287,36 +285,41 @@ split_add_child(struct callchain_node *parent, struct resolved_chain *chain,
}
static int
-append_chain(struct callchain_node *root, struct resolved_chain *chain,
- unsigned int start, u64 period);
+append_chain(struct callchain_node *root,
+ struct callchain_cursor *cursor,
+ u64 period);
static void
-append_chain_children(struct callchain_node *root, struct resolved_chain *chain,
- unsigned int start, u64 period)
+append_chain_children(struct callchain_node *root,
+ struct callchain_cursor *cursor,
+ u64 period)
{
struct callchain_node *rnode;
/* lookup in childrens */
chain_for_each_child(rnode, root) {
- unsigned int ret = append_chain(rnode, chain, start, period);
+ unsigned int ret = append_chain(rnode, cursor, period);
if (!ret)
goto inc_children_hit;
}
/* nothing in children, add to the current node */
- add_child(root, chain, start, period);
+ add_child(root, cursor, period);
inc_children_hit:
root->children_hit += period;
}
static int
-append_chain(struct callchain_node *root, struct resolved_chain *chain,
- unsigned int start, u64 period)
+append_chain(struct callchain_node *root,
+ struct callchain_cursor *cursor,
+ u64 period)
{
+ struct callchain_cursor_node *curr_snap = cursor->curr;
struct callchain_list *cnode;
- unsigned int i = start;
+ u64 start = cursor->pos;
bool found = false;
+ u64 matches;
/*
* Lookup in the current node
@@ -324,141 +327,134 @@ append_chain(struct callchain_node *root, struct resolved_chain *chain,
* anywhere inside a function.
*/
list_for_each_entry(cnode, &root->val, list) {
+ struct callchain_cursor_node *node;
struct symbol *sym;
- if (i == chain->nr)
+ node = callchain_cursor_current(cursor);
+ if (!node)
break;
- sym = chain->ips[i].ms.sym;
+ sym = node->sym;
if (cnode->ms.sym && sym) {
if (cnode->ms.sym->start != sym->start)
break;
- } else if (cnode->ip != chain->ips[i].ip)
+ } else if (cnode->ip != node->ip)
break;
if (!found)
found = true;
- i++;
+
+ callchain_cursor_advance(cursor);
}
/* matches not, relay on the parent */
- if (!found)
+ if (!found) {
+ cursor->curr = curr_snap;
+ cursor->pos = start;
return -1;
+ }
+
+ matches = cursor->pos - start;
/* we match only a part of the node. Split it and add the new chain */
- if (i - start < root->val_nr) {
- split_add_child(root, chain, cnode, start, i - start, period);
+ if (matches < root->val_nr) {
+ split_add_child(root, cursor, cnode, start, matches, period);
return 0;
}
/* we match 100% of the path, increment the hit */
- if (i - start == root->val_nr && i == chain->nr) {
+ if (matches == root->val_nr && cursor->pos == cursor->nr) {
root->hit += period;
return 0;
}
/* We match the node and still have a part remaining */
- append_chain_children(root, chain, i, period);
+ append_chain_children(root, cursor, period);
return 0;
}
-static void filter_context(struct ip_callchain *old, struct resolved_chain *new,
- struct map_symbol *syms)
-{
- int i, j = 0;
-
- for (i = 0; i < (int)old->nr; i++) {
- if (old->ips[i] >= PERF_CONTEXT_MAX)
- continue;
-
- new->ips[j].ip = old->ips[i];
- new->ips[j].ms = syms[i];
- j++;
- }
-
- new->nr = j;
-}
-
-
-int callchain_append(struct callchain_root *root, struct ip_callchain *chain,
- struct map_symbol *syms, u64 period)
+int callchain_append(struct callchain_root *root,
+ struct callchain_cursor *cursor,
+ u64 period)
{
- struct resolved_chain *filtered;
-
- if (!chain->nr)
+ if (!cursor->nr)
return 0;
- filtered = zalloc(sizeof(*filtered) +
- chain->nr * sizeof(struct resolved_ip));
- if (!filtered)
- return -ENOMEM;
-
- filter_context(chain, filtered, syms);
-
- if (!filtered->nr)
- goto end;
+ callchain_cursor_commit(cursor);
- append_chain_children(&root->node, filtered, 0, period);
+ append_chain_children(&root->node, cursor, period);
- if (filtered->nr > root->max_depth)
- root->max_depth = filtered->nr;
-end:
- free(filtered);
+ if (cursor->nr > root->max_depth)
+ root->max_depth = cursor->nr;
return 0;
}
static int
-merge_chain_branch(struct callchain_node *dst, struct callchain_node *src,
- struct resolved_chain *chain)
+merge_chain_branch(struct callchain_cursor *cursor,
+ struct callchain_node *dst, struct callchain_node *src)
{
+ struct callchain_cursor_node **old_last = cursor->last;
struct callchain_node *child, *next_child;
struct callchain_list *list, *next_list;
- int old_pos = chain->nr;
+ int old_pos = cursor->nr;
int err = 0;
list_for_each_entry_safe(list, next_list, &src->val, list) {
- chain->ips[chain->nr].ip = list->ip;
- chain->ips[chain->nr].ms = list->ms;
- chain->nr++;
+ callchain_cursor_append(cursor, list->ip,
+ list->ms.map, list->ms.sym);
list_del(&list->list);
free(list);
}
- if (src->hit)
- append_chain_children(dst, chain, 0, src->hit);
+ if (src->hit) {
+ callchain_cursor_commit(cursor);
+ append_chain_children(dst, cursor, src->hit);
+ }
chain_for_each_child_safe(child, next_child, src) {
- err = merge_chain_branch(dst, child, chain);
+ err = merge_chain_branch(cursor, dst, child);
if (err)
break;
- list_del(&child->brothers);
+ list_del(&child->siblings);
free(child);
}
- chain->nr = old_pos;
+ cursor->nr = old_pos;
+ cursor->last = old_last;
return err;
}
-int callchain_merge(struct callchain_root *dst, struct callchain_root *src)
+int callchain_merge(struct callchain_cursor *cursor,
+ struct callchain_root *dst, struct callchain_root *src)
+{
+ return merge_chain_branch(cursor, &dst->node, &src->node);
+}
+
+int callchain_cursor_append(struct callchain_cursor *cursor,
+ u64 ip, struct map *map, struct symbol *sym)
{
- struct resolved_chain *chain;
- int err;
+ struct callchain_cursor_node *node = *cursor->last;
- chain = malloc(sizeof(*chain) +
- src->max_depth * sizeof(struct resolved_ip));
- if (!chain)
- return -ENOMEM;
+ if (!node) {
+ node = calloc(sizeof(*node), 1);
+ if (!node)
+ return -ENOMEM;
- chain->nr = 0;
+ *cursor->last = node;
+ }
- err = merge_chain_branch(&dst->node, &src->node, chain);
+ node->ip = ip;
+ node->map = map;
+ node->sym = sym;
- free(chain);
+ cursor->nr++;
- return err;
+ cursor->last = &node->next;
+
+ return 0;
}
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index c15fb8c24ad2..67137256a1cd 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -16,7 +16,7 @@ enum chain_mode {
struct callchain_node {
struct callchain_node *parent;
- struct list_head brothers;
+ struct list_head siblings;
struct list_head children;
struct list_head val;
struct rb_node rb_node; /* to sort nodes in an rbtree */
@@ -49,9 +49,30 @@ struct callchain_list {
struct list_head list;
};
+/*
+ * A callchain cursor is a single linked list that
+ * let one feed a callchain progressively.
+ * It keeps persitent allocated entries to minimize
+ * allocations.
+ */
+struct callchain_cursor_node {
+ u64 ip;
+ struct map *map;
+ struct symbol *sym;
+ struct callchain_cursor_node *next;
+};
+
+struct callchain_cursor {
+ u64 nr;
+ struct callchain_cursor_node *first;
+ struct callchain_cursor_node **last;
+ u64 pos;
+ struct callchain_cursor_node *curr;
+};
+
static inline void callchain_init(struct callchain_root *root)
{
- INIT_LIST_HEAD(&root->node.brothers);
+ INIT_LIST_HEAD(&root->node.siblings);
INIT_LIST_HEAD(&root->node.children);
INIT_LIST_HEAD(&root->node.val);
@@ -61,15 +82,54 @@ static inline void callchain_init(struct callchain_root *root)
root->max_depth = 0;
}
-static inline u64 cumul_hits(struct callchain_node *node)
+static inline u64 callchain_cumul_hits(struct callchain_node *node)
{
return node->hit + node->children_hit;
}
-int register_callchain_param(struct callchain_param *param);
-int callchain_append(struct callchain_root *root, struct ip_callchain *chain,
- struct map_symbol *syms, u64 period);
-int callchain_merge(struct callchain_root *dst, struct callchain_root *src);
+int callchain_register_param(struct callchain_param *param);
+int callchain_append(struct callchain_root *root,
+ struct callchain_cursor *cursor,
+ u64 period);
+
+int callchain_merge(struct callchain_cursor *cursor,
+ struct callchain_root *dst, struct callchain_root *src);
bool ip_callchain__valid(struct ip_callchain *chain, const event_t *event);
+
+/*
+ * Initialize a cursor before adding entries inside, but keep
+ * the previously allocated entries as a cache.
+ */
+static inline void callchain_cursor_reset(struct callchain_cursor *cursor)
+{
+ cursor->nr = 0;
+ cursor->last = &cursor->first;
+}
+
+int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip,
+ struct map *map, struct symbol *sym);
+
+/* Close a cursor writing session. Initialize for the reader */
+static inline void callchain_cursor_commit(struct callchain_cursor *cursor)
+{
+ cursor->curr = cursor->first;
+ cursor->pos = 0;
+}
+
+/* Cursor reading iteration helpers */
+static inline struct callchain_cursor_node *
+callchain_cursor_current(struct callchain_cursor *cursor)
+{
+ if (cursor->pos == cursor->nr)
+ return NULL;
+
+ return cursor->curr;
+}
+
+static inline void callchain_cursor_advance(struct callchain_cursor *cursor)
+{
+ cursor->curr = cursor->curr->next;
+ cursor->pos++;
+}
#endif /* __PERF_CALLCHAIN_H */
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 3ccaa1043383..6893eec693ab 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -177,3 +177,8 @@ struct cpu_map *cpu_map__dummy_new(void)
return cpus;
}
+
+void cpu_map__delete(struct cpu_map *map)
+{
+ free(map);
+}
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index f7a4f42f6307..072c0a374794 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -8,6 +8,6 @@ struct cpu_map {
struct cpu_map *cpu_map__new(const char *cpu_list);
struct cpu_map *cpu_map__dummy_new(void);
-void *cpu_map__delete(struct cpu_map *map);
+void cpu_map__delete(struct cpu_map *map);
#endif /* __PERF_CPUMAP_H */
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 1478ab4ee222..e4db8b888546 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -826,128 +826,3 @@ out_filtered:
al->filtered = true;
return 0;
}
-
-static int event__parse_id_sample(const event_t *event,
- struct perf_session *session,
- struct sample_data *sample)
-{
- const u64 *array;
- u64 type;
-
- sample->cpu = sample->pid = sample->tid = -1;
- sample->stream_id = sample->id = sample->time = -1ULL;
-
- if (!session->sample_id_all)
- return 0;
-
- array = event->sample.array;
- array += ((event->header.size -
- sizeof(event->header)) / sizeof(u64)) - 1;
- type = session->sample_type;
-
- if (type & PERF_SAMPLE_CPU) {
- u32 *p = (u32 *)array;
- sample->cpu = *p;
- array--;
- }
-
- if (type & PERF_SAMPLE_STREAM_ID) {
- sample->stream_id = *array;
- array--;
- }
-
- if (type & PERF_SAMPLE_ID) {
- sample->id = *array;
- array--;
- }
-
- if (type & PERF_SAMPLE_TIME) {
- sample->time = *array;
- array--;
- }
-
- if (type & PERF_SAMPLE_TID) {
- u32 *p = (u32 *)array;
- sample->pid = p[0];
- sample->tid = p[1];
- }
-
- return 0;
-}
-
-int event__parse_sample(const event_t *event, struct perf_session *session,
- struct sample_data *data)
-{
- const u64 *array;
- u64 type;
-
- if (event->header.type != PERF_RECORD_SAMPLE)
- return event__parse_id_sample(event, session, data);
-
- array = event->sample.array;
- type = session->sample_type;
-
- if (type & PERF_SAMPLE_IP) {
- data->ip = event->ip.ip;
- array++;
- }
-
- if (type & PERF_SAMPLE_TID) {
- u32 *p = (u32 *)array;
- data->pid = p[0];
- data->tid = p[1];
- array++;
- }
-
- if (type & PERF_SAMPLE_TIME) {
- data->time = *array;
- array++;
- }
-
- if (type & PERF_SAMPLE_ADDR) {
- data->addr = *array;
- array++;
- }
-
- data->id = -1ULL;
- if (type & PERF_SAMPLE_ID) {
- data->id = *array;
- array++;
- }
-
- if (type & PERF_SAMPLE_STREAM_ID) {
- data->stream_id = *array;
- array++;
- }
-
- if (type & PERF_SAMPLE_CPU) {
- u32 *p = (u32 *)array;
- data->cpu = *p;
- array++;
- } else
- data->cpu = -1;
-
- if (type & PERF_SAMPLE_PERIOD) {
- data->period = *array;
- array++;
- }
-
- if (type & PERF_SAMPLE_READ) {
- pr_debug("PERF_SAMPLE_READ is unsuported for now\n");
- return -1;
- }
-
- if (type & PERF_SAMPLE_CALLCHAIN) {
- data->callchain = (struct ip_callchain *)array;
- array += 1 + data->callchain->nr;
- }
-
- if (type & PERF_SAMPLE_RAW) {
- u32 *p = (u32 *)array;
- data->raw_size = *p;
- p++;
- data->raw_data = p;
- }
-
- return 0;
-}
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 2b7e91902f10..d79e4edd82f9 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -169,9 +169,10 @@ struct addr_location;
int event__preprocess_sample(const event_t *self, struct perf_session *session,
struct addr_location *al, struct sample_data *data,
symbol_filter_t filter);
-int event__parse_sample(const event_t *event, struct perf_session *session,
- struct sample_data *sample);
const char *event__get_event_name(unsigned int id);
+int event__parse_sample(const event_t *event, u64 type, bool sample_id_all,
+ struct sample_data *sample);
+
#endif /* __PERF_RECORD_H */
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
new file mode 100644
index 000000000000..df0610e9c61b
--- /dev/null
+++ b/tools/perf/util/evlist.c
@@ -0,0 +1,170 @@
+#include <poll.h>
+#include "evlist.h"
+#include "evsel.h"
+#include "util.h"
+
+#include <linux/bitops.h>
+#include <linux/hash.h>
+
+void perf_evlist__init(struct perf_evlist *evlist)
+{
+ int i;
+
+ for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i)
+ INIT_HLIST_HEAD(&evlist->heads[i]);
+ INIT_LIST_HEAD(&evlist->entries);
+}
+
+struct perf_evlist *perf_evlist__new(void)
+{
+ struct perf_evlist *evlist = zalloc(sizeof(*evlist));
+
+ if (evlist != NULL)
+ perf_evlist__init(evlist);
+
+ return evlist;
+}
+
+static void perf_evlist__purge(struct perf_evlist *evlist)
+{
+ struct perf_evsel *pos, *n;
+
+ list_for_each_entry_safe(pos, n, &evlist->entries, node) {
+ list_del_init(&pos->node);
+ perf_evsel__delete(pos);
+ }
+
+ evlist->nr_entries = 0;
+}
+
+void perf_evlist__exit(struct perf_evlist *evlist)
+{
+ free(evlist->mmap);
+ free(evlist->pollfd);
+ evlist->mmap = NULL;
+ evlist->pollfd = NULL;
+}
+
+void perf_evlist__delete(struct perf_evlist *evlist)
+{
+ perf_evlist__purge(evlist);
+ perf_evlist__exit(evlist);
+ free(evlist);
+}
+
+void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry)
+{
+ list_add_tail(&entry->node, &evlist->entries);
+ ++evlist->nr_entries;
+}
+
+int perf_evlist__add_default(struct perf_evlist *evlist)
+{
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_CPU_CYCLES,
+ };
+ struct perf_evsel *evsel = perf_evsel__new(&attr, 0);
+
+ if (evsel == NULL)
+ return -ENOMEM;
+
+ perf_evlist__add(evlist, evsel);
+ return 0;
+}
+
+int perf_evlist__alloc_pollfd(struct perf_evlist *evlist, int ncpus, int nthreads)
+{
+ int nfds = ncpus * nthreads * evlist->nr_entries;
+ evlist->pollfd = malloc(sizeof(struct pollfd) * nfds);
+ return evlist->pollfd != NULL ? 0 : -ENOMEM;
+}
+
+void perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd)
+{
+ fcntl(fd, F_SETFL, O_NONBLOCK);
+ evlist->pollfd[evlist->nr_fds].fd = fd;
+ evlist->pollfd[evlist->nr_fds].events = POLLIN;
+ evlist->nr_fds++;
+}
+
+struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id)
+{
+ struct hlist_head *head;
+ struct hlist_node *pos;
+ struct perf_sample_id *sid;
+ int hash;
+
+ if (evlist->nr_entries == 1)
+ return list_entry(evlist->entries.next, struct perf_evsel, node);
+
+ hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
+ head = &evlist->heads[hash];
+
+ hlist_for_each_entry(sid, pos, head, node)
+ if (sid->id == id)
+ return sid->evsel;
+ return NULL;
+}
+
+event_t *perf_evlist__read_on_cpu(struct perf_evlist *evlist, int cpu)
+{
+ /* XXX Move this to perf.c, making it generally available */
+ unsigned int page_size = sysconf(_SC_PAGE_SIZE);
+ struct perf_mmap *md = &evlist->mmap[cpu];
+ unsigned int head = perf_mmap__read_head(md);
+ unsigned int old = md->prev;
+ unsigned char *data = md->base + page_size;
+ event_t *event = NULL;
+ int diff;
+
+ /*
+ * If we're further behind than half the buffer, there's a chance
+ * the writer will bite our tail and mess up the samples under us.
+ *
+ * If we somehow ended up ahead of the head, we got messed up.
+ *
+ * In either case, truncate and restart at head.
+ */
+ diff = head - old;
+ if (diff > md->mask / 2 || diff < 0) {
+ fprintf(stderr, "WARNING: failed to keep up with mmap data.\n");
+
+ /*
+ * head points to a known good entry, start there.
+ */
+ old = head;
+ }
+
+ if (old != head) {
+ size_t size;
+
+ event = (event_t *)&data[old & md->mask];
+ size = event->header.size;
+
+ /*
+ * Event straddles the mmap boundary -- header should always
+ * be inside due to u64 alignment of output.
+ */
+ if ((old & md->mask) + size != ((old + size) & md->mask)) {
+ unsigned int offset = old;
+ unsigned int len = min(sizeof(*event), size), cpy;
+ void *dst = &evlist->event_copy;
+
+ do {
+ cpy = min(md->mask + 1 - (offset & md->mask), len);
+ memcpy(dst, &data[offset & md->mask], cpy);
+ offset += cpy;
+ dst += cpy;
+ len -= cpy;
+ } while (len);
+
+ event = &evlist->event_copy;
+ }
+
+ old += size;
+ }
+
+ md->prev = old;
+ return event;
+}
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
new file mode 100644
index 000000000000..acbe48eac608
--- /dev/null
+++ b/tools/perf/util/evlist.h
@@ -0,0 +1,41 @@
+#ifndef __PERF_EVLIST_H
+#define __PERF_EVLIST_H 1
+
+#include <linux/list.h>
+#include "../perf.h"
+#include "event.h"
+
+struct pollfd;
+
+#define PERF_EVLIST__HLIST_BITS 8
+#define PERF_EVLIST__HLIST_SIZE (1 << PERF_EVLIST__HLIST_BITS)
+
+struct perf_evlist {
+ struct list_head entries;
+ struct hlist_head heads[PERF_EVLIST__HLIST_SIZE];
+ int nr_entries;
+ int nr_fds;
+ int mmap_len;
+ event_t event_copy;
+ struct perf_mmap *mmap;
+ struct pollfd *pollfd;
+};
+
+struct perf_evsel;
+
+struct perf_evlist *perf_evlist__new(void);
+void perf_evlist__init(struct perf_evlist *evlist);
+void perf_evlist__exit(struct perf_evlist *evlist);
+void perf_evlist__delete(struct perf_evlist *evlist);
+
+void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry);
+int perf_evlist__add_default(struct perf_evlist *evlist);
+
+int perf_evlist__alloc_pollfd(struct perf_evlist *evlist, int ncpus, int nthreads);
+void perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd);
+
+struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id);
+
+event_t *perf_evlist__read_on_cpu(struct perf_evlist *self, int cpu);
+
+#endif /* __PERF_EVLIST_H */
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index f5cfed60af98..76ab553637d6 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1,20 +1,33 @@
#include "evsel.h"
+#include "evlist.h"
#include "../perf.h"
#include "util.h"
#include "cpumap.h"
-#include "thread.h"
+#include "thread_map.h"
+
+#include <unistd.h>
+#include <sys/mman.h>
+
+#include <linux/bitops.h>
+#include <linux/hash.h>
#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
+#define SID(e, x, y) xyarray__entry(e->id, x, y)
+
+void perf_evsel__init(struct perf_evsel *evsel,
+ struct perf_event_attr *attr, int idx)
+{
+ evsel->idx = idx;
+ evsel->attr = *attr;
+ INIT_LIST_HEAD(&evsel->node);
+}
struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx)
{
struct perf_evsel *evsel = zalloc(sizeof(*evsel));
- if (evsel != NULL) {
- evsel->idx = idx;
- evsel->attr = *attr;
- INIT_LIST_HEAD(&evsel->node);
- }
+ if (evsel != NULL)
+ perf_evsel__init(evsel, attr, idx);
return evsel;
}
@@ -25,6 +38,12 @@ int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
return evsel->fd != NULL ? 0 : -ENOMEM;
}
+int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads)
+{
+ evsel->id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id));
+ return evsel->id != NULL ? 0 : -ENOMEM;
+}
+
int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus)
{
evsel->counts = zalloc((sizeof(*evsel->counts) +
@@ -38,6 +57,12 @@ void perf_evsel__free_fd(struct perf_evsel *evsel)
evsel->fd = NULL;
}
+void perf_evsel__free_id(struct perf_evsel *evsel)
+{
+ xyarray__delete(evsel->id);
+ evsel->id = NULL;
+}
+
void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
{
int cpu, thread;
@@ -49,10 +74,34 @@ void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
}
}
-void perf_evsel__delete(struct perf_evsel *evsel)
+void perf_evlist__munmap(struct perf_evlist *evlist, int ncpus)
+{
+ int cpu;
+
+ for (cpu = 0; cpu < ncpus; cpu++) {
+ if (evlist->mmap[cpu].base != NULL) {
+ munmap(evlist->mmap[cpu].base, evlist->mmap_len);
+ evlist->mmap[cpu].base = NULL;
+ }
+ }
+}
+
+int perf_evlist__alloc_mmap(struct perf_evlist *evlist, int ncpus)
+{
+ evlist->mmap = zalloc(ncpus * sizeof(struct perf_mmap));
+ return evlist->mmap != NULL ? 0 : -ENOMEM;
+}
+
+void perf_evsel__exit(struct perf_evsel *evsel)
{
assert(list_empty(&evsel->node));
xyarray__delete(evsel->fd);
+ xyarray__delete(evsel->id);
+}
+
+void perf_evsel__delete(struct perf_evsel *evsel)
+{
+ perf_evsel__exit(evsel);
free(evsel);
}
@@ -128,7 +177,7 @@ int __perf_evsel__read(struct perf_evsel *evsel,
}
static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
- struct thread_map *threads)
+ struct thread_map *threads, bool group, bool inherit)
{
int cpu, thread;
@@ -137,12 +186,20 @@ static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
return -1;
for (cpu = 0; cpu < cpus->nr; cpu++) {
+ int group_fd = -1;
+
+ evsel->attr.inherit = (cpus->map[cpu] < 0) && inherit;
+
for (thread = 0; thread < threads->nr; thread++) {
FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr,
threads->map[thread],
- cpus->map[cpu], -1, 0);
+ cpus->map[cpu],
+ group_fd, 0);
if (FD(evsel, cpu, thread) < 0)
goto out_close;
+
+ if (group && group_fd == -1)
+ group_fd = FD(evsel, cpu, thread);
}
}
@@ -175,10 +232,9 @@ static struct {
.threads = { -1, },
};
-int perf_evsel__open(struct perf_evsel *evsel,
- struct cpu_map *cpus, struct thread_map *threads)
+int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
+ struct thread_map *threads, bool group, bool inherit)
{
-
if (cpus == NULL) {
/* Work around old compiler warnings about strict aliasing */
cpus = &empty_cpu_map.map;
@@ -187,15 +243,243 @@ int perf_evsel__open(struct perf_evsel *evsel,
if (threads == NULL)
threads = &empty_thread_map.map;
- return __perf_evsel__open(evsel, cpus, threads);
+ return __perf_evsel__open(evsel, cpus, threads, group, inherit);
+}
+
+int perf_evsel__open_per_cpu(struct perf_evsel *evsel,
+ struct cpu_map *cpus, bool group, bool inherit)
+{
+ return __perf_evsel__open(evsel, cpus, &empty_thread_map.map, group, inherit);
+}
+
+int perf_evsel__open_per_thread(struct perf_evsel *evsel,
+ struct thread_map *threads, bool group, bool inherit)
+{
+ return __perf_evsel__open(evsel, &empty_cpu_map.map, threads, group, inherit);
+}
+
+static int __perf_evlist__mmap(struct perf_evlist *evlist, int cpu, int prot,
+ int mask, int fd)
+{
+ evlist->mmap[cpu].prev = 0;
+ evlist->mmap[cpu].mask = mask;
+ evlist->mmap[cpu].base = mmap(NULL, evlist->mmap_len, prot,
+ MAP_SHARED, fd, 0);
+ if (evlist->mmap[cpu].base == MAP_FAILED)
+ return -1;
+
+ perf_evlist__add_pollfd(evlist, fd);
+ return 0;
+}
+
+static int perf_evlist__id_hash(struct perf_evlist *evlist, struct perf_evsel *evsel,
+ int cpu, int thread, int fd)
+{
+ struct perf_sample_id *sid;
+ u64 read_data[4] = { 0, };
+ int hash, id_idx = 1; /* The first entry is the counter value */
+
+ if (!(evsel->attr.read_format & PERF_FORMAT_ID) ||
+ read(fd, &read_data, sizeof(read_data)) == -1)
+ return -1;
+
+ if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+ ++id_idx;
+ if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+ ++id_idx;
+
+ sid = SID(evsel, cpu, thread);
+ sid->id = read_data[id_idx];
+ sid->evsel = evsel;
+ hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS);
+ hlist_add_head(&sid->node, &evlist->heads[hash]);
+ return 0;
+}
+
+/** perf_evlist__mmap - Create per cpu maps to receive events
+ *
+ * @evlist - list of events
+ * @cpus - cpu map being monitored
+ * @threads - threads map being monitored
+ * @pages - map length in pages
+ * @overwrite - overwrite older events?
+ *
+ * If overwrite is false the user needs to signal event consuption using:
+ *
+ * struct perf_mmap *m = &evlist->mmap[cpu];
+ * unsigned int head = perf_mmap__read_head(m);
+ *
+ * perf_mmap__write_tail(m, head)
+ */
+int perf_evlist__mmap(struct perf_evlist *evlist, struct cpu_map *cpus,
+ struct thread_map *threads, int pages, bool overwrite)
+{
+ unsigned int page_size = sysconf(_SC_PAGE_SIZE);
+ int mask = pages * page_size - 1, cpu;
+ struct perf_evsel *first_evsel, *evsel;
+ int thread, prot = PROT_READ | (overwrite ? 0 : PROT_WRITE);
+
+ if (evlist->mmap == NULL &&
+ perf_evlist__alloc_mmap(evlist, cpus->nr) < 0)
+ return -ENOMEM;
+
+ if (evlist->pollfd == NULL &&
+ perf_evlist__alloc_pollfd(evlist, cpus->nr, threads->nr) < 0)
+ return -ENOMEM;
+
+ evlist->mmap_len = (pages + 1) * page_size;
+ first_evsel = list_entry(evlist->entries.next, struct perf_evsel, node);
+
+ list_for_each_entry(evsel, &evlist->entries, node) {
+ if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
+ evsel->id == NULL &&
+ perf_evsel__alloc_id(evsel, cpus->nr, threads->nr) < 0)
+ return -ENOMEM;
+
+ for (cpu = 0; cpu < cpus->nr; cpu++) {
+ for (thread = 0; thread < threads->nr; thread++) {
+ int fd = FD(evsel, cpu, thread);
+
+ if (evsel->idx || thread) {
+ if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT,
+ FD(first_evsel, cpu, 0)) != 0)
+ goto out_unmap;
+ } else if (__perf_evlist__mmap(evlist, cpu, prot, mask, fd) < 0)
+ goto out_unmap;
+
+ if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
+ perf_evlist__id_hash(evlist, evsel, cpu, thread, fd) < 0)
+ goto out_unmap;
+ }
+ }
+ }
+
+ return 0;
+
+out_unmap:
+ for (cpu = 0; cpu < cpus->nr; cpu++) {
+ if (evlist->mmap[cpu].base != NULL) {
+ munmap(evlist->mmap[cpu].base, evlist->mmap_len);
+ evlist->mmap[cpu].base = NULL;
+ }
+ }
+ return -1;
}
-int perf_evsel__open_per_cpu(struct perf_evsel *evsel, struct cpu_map *cpus)
+static int event__parse_id_sample(const event_t *event, u64 type,
+ struct sample_data *sample)
{
- return __perf_evsel__open(evsel, cpus, &empty_thread_map.map);
+ const u64 *array = event->sample.array;
+
+ array += ((event->header.size -
+ sizeof(event->header)) / sizeof(u64)) - 1;
+
+ if (type & PERF_SAMPLE_CPU) {
+ u32 *p = (u32 *)array;
+ sample->cpu = *p;
+ array--;
+ }
+
+ if (type & PERF_SAMPLE_STREAM_ID) {
+ sample->stream_id = *array;
+ array--;
+ }
+
+ if (type & PERF_SAMPLE_ID) {
+ sample->id = *array;
+ array--;
+ }
+
+ if (type & PERF_SAMPLE_TIME) {
+ sample->time = *array;
+ array--;
+ }
+
+ if (type & PERF_SAMPLE_TID) {
+ u32 *p = (u32 *)array;
+ sample->pid = p[0];
+ sample->tid = p[1];
+ }
+
+ return 0;
}
-int perf_evsel__open_per_thread(struct perf_evsel *evsel, struct thread_map *threads)
+int event__parse_sample(const event_t *event, u64 type, bool sample_id_all,
+ struct sample_data *data)
{
- return __perf_evsel__open(evsel, &empty_cpu_map.map, threads);
+ const u64 *array;
+
+ data->cpu = data->pid = data->tid = -1;
+ data->stream_id = data->id = data->time = -1ULL;
+
+ if (event->header.type != PERF_RECORD_SAMPLE) {
+ if (!sample_id_all)
+ return 0;
+ return event__parse_id_sample(event, type, data);
+ }
+
+ array = event->sample.array;
+
+ if (type & PERF_SAMPLE_IP) {
+ data->ip = event->ip.ip;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_TID) {
+ u32 *p = (u32 *)array;
+ data->pid = p[0];
+ data->tid = p[1];
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_TIME) {
+ data->time = *array;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_ADDR) {
+ data->addr = *array;
+ array++;
+ }
+
+ data->id = -1ULL;
+ if (type & PERF_SAMPLE_ID) {
+ data->id = *array;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_STREAM_ID) {
+ data->stream_id = *array;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_CPU) {
+ u32 *p = (u32 *)array;
+ data->cpu = *p;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_PERIOD) {
+ data->period = *array;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_READ) {
+ fprintf(stderr, "PERF_SAMPLE_READ is unsuported for now\n");
+ return -1;
+ }
+
+ if (type & PERF_SAMPLE_CALLCHAIN) {
+ data->callchain = (struct ip_callchain *)array;
+ array += 1 + data->callchain->nr;
+ }
+
+ if (type & PERF_SAMPLE_RAW) {
+ u32 *p = (u32 *)array;
+ data->raw_size = *p;
+ p++;
+ data->raw_data = p;
+ }
+
+ return 0;
}
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index b2d755fe88a5..7962e7587dea 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -24,11 +24,24 @@ struct perf_counts {
struct perf_counts_values cpu[];
};
+struct perf_evsel;
+
+/*
+ * Per fd, to map back from PERF_SAMPLE_ID to evsel, only used when there are
+ * more than one entry in the evlist.
+ */
+struct perf_sample_id {
+ struct hlist_node node;
+ u64 id;
+ struct perf_evsel *evsel;
+};
+
struct perf_evsel {
struct list_head node;
struct perf_event_attr attr;
char *filter;
struct xyarray *fd;
+ struct xyarray *id;
struct perf_counts *counts;
int idx;
void *priv;
@@ -36,19 +49,31 @@ struct perf_evsel {
struct cpu_map;
struct thread_map;
+struct perf_evlist;
struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx);
+void perf_evsel__init(struct perf_evsel *evsel,
+ struct perf_event_attr *attr, int idx);
+void perf_evsel__exit(struct perf_evsel *evsel);
void perf_evsel__delete(struct perf_evsel *evsel);
int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
+int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads);
int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus);
+int perf_evlist__alloc_mmap(struct perf_evlist *evlist, int ncpus);
void perf_evsel__free_fd(struct perf_evsel *evsel);
+void perf_evsel__free_id(struct perf_evsel *evsel);
void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
-int perf_evsel__open_per_cpu(struct perf_evsel *evsel, struct cpu_map *cpus);
-int perf_evsel__open_per_thread(struct perf_evsel *evsel, struct thread_map *threads);
-int perf_evsel__open(struct perf_evsel *evsel,
- struct cpu_map *cpus, struct thread_map *threads);
+int perf_evsel__open_per_cpu(struct perf_evsel *evsel,
+ struct cpu_map *cpus, bool group, bool inherit);
+int perf_evsel__open_per_thread(struct perf_evsel *evsel,
+ struct thread_map *threads, bool group, bool inherit);
+int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
+ struct thread_map *threads, bool group, bool inherit);
+int perf_evlist__mmap(struct perf_evlist *evlist, struct cpu_map *cpus,
+ struct thread_map *threads, int pages, bool overwrite);
+void perf_evlist__munmap(struct perf_evlist *evlist, int ncpus);
#define perf_evsel__match(evsel, t, c) \
(evsel->attr.type == PERF_TYPE_##t && \
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index f6a929e74981..f0138d472339 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -8,6 +8,7 @@
#include <linux/list.h>
#include <linux/kernel.h>
+#include "evlist.h"
#include "util.h"
#include "header.h"
#include "../perf.h"
@@ -428,7 +429,8 @@ static bool perf_session__read_build_ids(struct perf_session *self, bool with_hi
return ret;
}
-static int perf_header__adds_write(struct perf_header *self, int fd)
+static int perf_header__adds_write(struct perf_header *self,
+ struct perf_evlist *evlist, int fd)
{
int nr_sections;
struct perf_session *session;
@@ -463,7 +465,7 @@ static int perf_header__adds_write(struct perf_header *self, int fd)
/* Write trace info */
trace_sec->offset = lseek(fd, 0, SEEK_CUR);
- read_tracing_data(fd, &evsel_list);
+ read_tracing_data(fd, &evlist->entries);
trace_sec->size = lseek(fd, 0, SEEK_CUR) - trace_sec->offset;
}
@@ -513,7 +515,8 @@ int perf_header__write_pipe(int fd)
return 0;
}
-int perf_header__write(struct perf_header *self, int fd, bool at_exit)
+int perf_header__write(struct perf_header *self, struct perf_evlist *evlist,
+ int fd, bool at_exit)
{
struct perf_file_header f_header;
struct perf_file_attr f_attr;
@@ -566,7 +569,7 @@ int perf_header__write(struct perf_header *self, int fd, bool at_exit)
self->data_offset = lseek(fd, 0, SEEK_CUR);
if (at_exit) {
- err = perf_header__adds_write(self, fd);
+ err = perf_header__adds_write(self, evlist, fd);
if (err < 0)
return err;
}
@@ -1133,7 +1136,7 @@ int event__process_event_type(event_t *self,
return 0;
}
-int event__synthesize_tracing_data(int fd, struct list_head *pattrs,
+int event__synthesize_tracing_data(int fd, struct perf_evlist *evlist,
event__handler_t process,
struct perf_session *session __unused)
{
@@ -1144,7 +1147,7 @@ int event__synthesize_tracing_data(int fd, struct list_head *pattrs,
memset(&ev, 0, sizeof(ev));
ev.tracing_data.header.type = PERF_RECORD_HEADER_TRACING_DATA;
- size = read_tracing_data_size(fd, pattrs);
+ size = read_tracing_data_size(fd, &evlist->entries);
if (size <= 0)
return size;
aligned_size = ALIGN(size, sizeof(u64));
@@ -1154,7 +1157,7 @@ int event__synthesize_tracing_data(int fd, struct list_head *pattrs,
process(&ev, NULL, session);
- err = read_tracing_data(fd, pattrs);
+ err = read_tracing_data(fd, &evlist->entries);
write_padded(fd, NULL, 0, padding);
return aligned_size;
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 33f16be7b72f..65afd7f74e0d 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -65,8 +65,11 @@ struct perf_header {
int perf_header__init(struct perf_header *self);
void perf_header__exit(struct perf_header *self);
+struct perf_evlist;
+
int perf_header__read(struct perf_session *session, int fd);
-int perf_header__write(struct perf_header *self, int fd, bool at_exit);
+int perf_header__write(struct perf_header *self, struct perf_evlist *evlist,
+ int fd, bool at_exit);
int perf_header__write_pipe(int fd);
int perf_header__add_attr(struct perf_header *self,
@@ -113,7 +116,7 @@ int event__synthesize_event_types(event__handler_t process,
int event__process_event_type(event_t *self,
struct perf_session *session);
-int event__synthesize_tracing_data(int fd, struct list_head *pattrs,
+int event__synthesize_tracing_data(int fd, struct perf_evlist *evlist,
event__handler_t process,
struct perf_session *session);
int event__process_tracing_data(event_t *self,
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 32f4f1f2f6e4..02ed318d7312 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -211,7 +211,9 @@ void hist_entry__free(struct hist_entry *he)
* collapse the histogram
*/
-static bool collapse__insert_entry(struct rb_root *root, struct hist_entry *he)
+static bool hists__collapse_insert_entry(struct hists *self,
+ struct rb_root *root,
+ struct hist_entry *he)
{
struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL;
@@ -226,8 +228,11 @@ static bool collapse__insert_entry(struct rb_root *root, struct hist_entry *he)
if (!cmp) {
iter->period += he->period;
- if (symbol_conf.use_callchain)
- callchain_merge(iter->callchain, he->callchain);
+ if (symbol_conf.use_callchain) {
+ callchain_cursor_reset(&self->callchain_cursor);
+ callchain_merge(&self->callchain_cursor, iter->callchain,
+ he->callchain);
+ }
hist_entry__free(he);
return false;
}
@@ -262,7 +267,7 @@ void hists__collapse_resort(struct hists *self)
next = rb_next(&n->rb_node);
rb_erase(&n->rb_node, &self->entries);
- if (collapse__insert_entry(&tmp, n))
+ if (hists__collapse_insert_entry(self, &tmp, n))
hists__inc_nr_entries(self, n);
}
@@ -425,7 +430,7 @@ static size_t __callchain__fprintf_graph(FILE *fp, struct callchain_node *self,
u64 cumul;
child = rb_entry(node, struct callchain_node, rb_node);
- cumul = cumul_hits(child);
+ cumul = callchain_cumul_hits(child);
remaining -= cumul;
/*
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index ee789856a8c9..889559b86492 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -77,6 +77,8 @@ struct hists {
u64 event_stream;
u32 type;
u16 col_len[HISTC_NR_COLS];
+ /* Best would be to reuse the session callchain cursor */
+ struct callchain_cursor callchain_cursor;
};
struct hist_entry *__hists__add_entry(struct hists *self,
diff --git a/tools/perf/util/include/linux/list.h b/tools/perf/util/include/linux/list.h
index f5ca26e53fbb..356c7e467b83 100644
--- a/tools/perf/util/include/linux/list.h
+++ b/tools/perf/util/include/linux/list.h
@@ -1,3 +1,4 @@
+#include <linux/kernel.h>
#include "../../../../include/linux/list.h"
#ifndef PERF_LIST_H
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 135f69baf966..cf082daa43e3 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -1,6 +1,7 @@
#include "../../../include/linux/hw_breakpoint.h"
#include "util.h"
#include "../perf.h"
+#include "evlist.h"
#include "evsel.h"
#include "parse-options.h"
#include "parse-events.h"
@@ -11,10 +12,6 @@
#include "header.h"
#include "debugfs.h"
-int nr_counters;
-
-LIST_HEAD(evsel_list);
-
struct event_symbol {
u8 type;
u64 config;
@@ -449,8 +446,8 @@ parse_single_tracepoint_event(char *sys_name,
/* sys + ':' + event + ':' + flags*/
#define MAX_EVOPT_LEN (MAX_EVENT_LENGTH * 2 + 2 + 128)
static enum event_result
-parse_multiple_tracepoint_event(char *sys_name, const char *evt_exp,
- char *flags)
+parse_multiple_tracepoint_event(const struct option *opt, char *sys_name,
+ const char *evt_exp, char *flags)
{
char evt_path[MAXPATHLEN];
struct dirent *evt_ent;
@@ -483,15 +480,16 @@ parse_multiple_tracepoint_event(char *sys_name, const char *evt_exp,
if (len < 0)
return EVT_FAILED;
- if (parse_events(NULL, event_opt, 0))
+ if (parse_events(opt, event_opt, 0))
return EVT_FAILED;
}
return EVT_HANDLED_ALL;
}
-static enum event_result parse_tracepoint_event(const char **strp,
- struct perf_event_attr *attr)
+static enum event_result
+parse_tracepoint_event(const struct option *opt, const char **strp,
+ struct perf_event_attr *attr)
{
const char *evt_name;
char *flags = NULL, *comma_loc;
@@ -530,7 +528,7 @@ static enum event_result parse_tracepoint_event(const char **strp,
return EVT_FAILED;
if (strpbrk(evt_name, "*?")) {
*strp += strlen(sys_name) + evt_length + 1; /* 1 == the ':' */
- return parse_multiple_tracepoint_event(sys_name, evt_name,
+ return parse_multiple_tracepoint_event(opt, sys_name, evt_name,
flags);
} else {
return parse_single_tracepoint_event(sys_name, evt_name,
@@ -740,11 +738,12 @@ parse_event_modifier(const char **strp, struct perf_event_attr *attr)
* Symbolic names are (almost) exactly matched.
*/
static enum event_result
-parse_event_symbols(const char **str, struct perf_event_attr *attr)
+parse_event_symbols(const struct option *opt, const char **str,
+ struct perf_event_attr *attr)
{
enum event_result ret;
- ret = parse_tracepoint_event(str, attr);
+ ret = parse_tracepoint_event(opt, str, attr);
if (ret != EVT_FAILED)
goto modifier;
@@ -778,14 +777,15 @@ modifier:
return ret;
}
-int parse_events(const struct option *opt __used, const char *str, int unset __used)
+int parse_events(const struct option *opt, const char *str, int unset __used)
{
+ struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
struct perf_event_attr attr;
enum event_result ret;
for (;;) {
memset(&attr, 0, sizeof(attr));
- ret = parse_event_symbols(&str, &attr);
+ ret = parse_event_symbols(opt, &str, &attr);
if (ret == EVT_FAILED)
return -1;
@@ -794,12 +794,10 @@ int parse_events(const struct option *opt __used, const char *str, int unset __u
if (ret != EVT_HANDLED_ALL) {
struct perf_evsel *evsel;
- evsel = perf_evsel__new(&attr,
- nr_counters);
+ evsel = perf_evsel__new(&attr, evlist->nr_entries);
if (evsel == NULL)
return -1;
- list_add_tail(&evsel->node, &evsel_list);
- ++nr_counters;
+ perf_evlist__add(evlist, evsel);
}
if (*str == 0)
@@ -813,13 +811,14 @@ int parse_events(const struct option *opt __used, const char *str, int unset __u
return 0;
}
-int parse_filter(const struct option *opt __used, const char *str,
+int parse_filter(const struct option *opt, const char *str,
int unset __used)
{
+ struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
struct perf_evsel *last = NULL;
- if (!list_empty(&evsel_list))
- last = list_entry(evsel_list.prev, struct perf_evsel, node);
+ if (evlist->nr_entries > 0)
+ last = list_entry(evlist->entries.prev, struct perf_evsel, node);
if (last == NULL || last->attr.type != PERF_TYPE_TRACEPOINT) {
fprintf(stderr,
@@ -981,33 +980,3 @@ void print_events(void)
exit(129);
}
-
-int perf_evsel_list__create_default(void)
-{
- struct perf_evsel *evsel;
- struct perf_event_attr attr;
-
- memset(&attr, 0, sizeof(attr));
- attr.type = PERF_TYPE_HARDWARE;
- attr.config = PERF_COUNT_HW_CPU_CYCLES;
-
- evsel = perf_evsel__new(&attr, 0);
-
- if (evsel == NULL)
- return -ENOMEM;
-
- list_add(&evsel->node, &evsel_list);
- ++nr_counters;
- return 0;
-}
-
-void perf_evsel_list__delete(void)
-{
- struct perf_evsel *pos, *n;
-
- list_for_each_entry_safe(pos, n, &evsel_list, node) {
- list_del_init(&pos->node);
- perf_evsel__delete(pos);
- }
- nr_counters = 0;
-}
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 458e3ecf17af..cf7e94abb676 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -9,11 +9,6 @@
struct list_head;
struct perf_evsel;
-extern struct list_head evsel_list;
-
-int perf_evsel_list__create_default(void);
-void perf_evsel_list__delete(void);
-
struct option;
struct tracepoint_path {
@@ -25,8 +20,6 @@ struct tracepoint_path {
extern struct tracepoint_path *tracepoint_id_to_path(u64 config);
extern bool have_tracepoints(struct list_head *evlist);
-extern int nr_counters;
-
const char *event_name(struct perf_evsel *event);
extern const char *__event_name(int type, u64 config);
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 6e29d9c9dccc..859d377a3df3 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -31,6 +31,7 @@
#include <string.h>
#include <stdarg.h>
#include <limits.h>
+#include <elf.h>
#undef _GNU_SOURCE
#include "util.h"
@@ -111,7 +112,25 @@ static struct symbol *__find_kernel_function_by_name(const char *name,
NULL);
}
-const char *kernel_get_module_path(const char *module)
+static struct map *kernel_get_module_map(const char *module)
+{
+ struct rb_node *nd;
+ struct map_groups *grp = &machine.kmaps;
+
+ if (!module)
+ module = "kernel";
+
+ for (nd = rb_first(&grp->maps[MAP__FUNCTION]); nd; nd = rb_next(nd)) {
+ struct map *pos = rb_entry(nd, struct map, rb_node);
+ if (strncmp(pos->dso->short_name + 1, module,
+ pos->dso->short_name_len - 2) == 0) {
+ return pos;
+ }
+ }
+ return NULL;
+}
+
+static struct dso *kernel_get_module_dso(const char *module)
{
struct dso *dso;
struct map *map;
@@ -141,7 +160,13 @@ const char *kernel_get_module_path(const char *module)
}
}
found:
- return dso->long_name;
+ return dso;
+}
+
+const char *kernel_get_module_path(const char *module)
+{
+ struct dso *dso = kernel_get_module_dso(module);
+ return (dso) ? dso->long_name : NULL;
}
#ifdef DWARF_SUPPORT
@@ -1913,3 +1938,42 @@ int del_perf_probe_events(struct strlist *dellist)
return ret;
}
+/*
+ * If a symbol corresponds to a function with global binding return 0.
+ * For all others return 1.
+ */
+static int filter_non_global_functions(struct map *map __unused,
+ struct symbol *sym)
+{
+ if (sym->binding != STB_GLOBAL)
+ return 1;
+
+ return 0;
+}
+
+int show_available_funcs(const char *module)
+{
+ struct map *map;
+ int ret;
+
+ setup_pager();
+
+ ret = init_vmlinux();
+ if (ret < 0)
+ return ret;
+
+ map = kernel_get_module_map(module);
+ if (!map) {
+ pr_err("Failed to find %s map.\n", (module) ? : "kernel");
+ return -EINVAL;
+ }
+ if (map__load(map, filter_non_global_functions)) {
+ pr_err("Failed to load map.\n");
+ return -EINVAL;
+ }
+ if (!dso__sorted_by_name(map->dso, map->type))
+ dso__sort_by_name(map->dso, map->type);
+
+ dso__fprintf_symbols_by_name(map->dso, map->type, stdout);
+ return 0;
+}
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index 5accbedfea37..1fb4f18337d3 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -127,6 +127,7 @@ extern int show_line_range(struct line_range *lr, const char *module);
extern int show_available_vars(struct perf_probe_event *pevs, int npevs,
int max_probe_points, const char *module,
bool externs);
+extern int show_available_funcs(const char *module);
/* Maximum index number of event-name postfix */
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index ab83b6ac5d65..69215bff17e9 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -280,6 +280,19 @@ static bool die_compare_name(Dwarf_Die *dw_die, const char *tname)
return name ? (strcmp(tname, name) == 0) : false;
}
+/* Get callsite line number of inline-function instance */
+static int die_get_call_lineno(Dwarf_Die *in_die)
+{
+ Dwarf_Attribute attr;
+ Dwarf_Word ret;
+
+ if (!dwarf_attr(in_die, DW_AT_call_line, &attr))
+ return -ENOENT;
+
+ dwarf_formudata(&attr, &ret);
+ return (int)ret;
+}
+
/* Get type die */
static Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem)
{
@@ -458,6 +471,151 @@ static Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
return die_find_child(sp_die, __die_find_inline_cb, &addr, die_mem);
}
+/* Walker on lines (Note: line number will not be sorted) */
+typedef int (* line_walk_handler_t) (const char *fname, int lineno,
+ Dwarf_Addr addr, void *data);
+
+struct __line_walk_param {
+ const char *fname;
+ line_walk_handler_t handler;
+ void *data;
+ int retval;
+};
+
+static int __die_walk_funclines_cb(Dwarf_Die *in_die, void *data)
+{
+ struct __line_walk_param *lw = data;
+ Dwarf_Addr addr;
+ int lineno;
+
+ if (dwarf_tag(in_die) == DW_TAG_inlined_subroutine) {
+ lineno = die_get_call_lineno(in_die);
+ if (lineno > 0 && dwarf_entrypc(in_die, &addr) == 0) {
+ lw->retval = lw->handler(lw->fname, lineno, addr,
+ lw->data);
+ if (lw->retval != 0)
+ return DIE_FIND_CB_FOUND;
+ }
+ }
+ return DIE_FIND_CB_SIBLING;
+}
+
+/* Walk on lines of blocks included in given DIE */
+static int __die_walk_funclines(Dwarf_Die *sp_die,
+ line_walk_handler_t handler, void *data)
+{
+ struct __line_walk_param lw = {
+ .handler = handler,
+ .data = data,
+ .retval = 0,
+ };
+ Dwarf_Die die_mem;
+ Dwarf_Addr addr;
+ int lineno;
+
+ /* Handle function declaration line */
+ lw.fname = dwarf_decl_file(sp_die);
+ if (lw.fname && dwarf_decl_line(sp_die, &lineno) == 0 &&
+ dwarf_entrypc(sp_die, &addr) == 0) {
+ lw.retval = handler(lw.fname, lineno, addr, data);
+ if (lw.retval != 0)
+ goto done;
+ }
+ die_find_child(sp_die, __die_walk_funclines_cb, &lw, &die_mem);
+done:
+ return lw.retval;
+}
+
+static int __die_walk_culines_cb(Dwarf_Die *sp_die, void *data)
+{
+ struct __line_walk_param *lw = data;
+
+ lw->retval = __die_walk_funclines(sp_die, lw->handler, lw->data);
+ if (lw->retval != 0)
+ return DWARF_CB_ABORT;
+
+ return DWARF_CB_OK;
+}
+
+/*
+ * Walk on lines inside given PDIE. If the PDIE is subprogram, walk only on
+ * the lines inside the subprogram, otherwise PDIE must be a CU DIE.
+ */
+static int die_walk_lines(Dwarf_Die *pdie, line_walk_handler_t handler,
+ void *data)
+{
+ Dwarf_Lines *lines;
+ Dwarf_Line *line;
+ Dwarf_Addr addr;
+ const char *fname;
+ int lineno, ret = 0;
+ Dwarf_Die die_mem, *cu_die;
+ size_t nlines, i;
+
+ /* Get the CU die */
+ if (dwarf_tag(pdie) == DW_TAG_subprogram)
+ cu_die = dwarf_diecu(pdie, &die_mem, NULL, NULL);
+ else
+ cu_die = pdie;
+ if (!cu_die) {
+ pr_debug2("Failed to get CU from subprogram\n");
+ return -EINVAL;
+ }
+
+ /* Get lines list in the CU */
+ if (dwarf_getsrclines(cu_die, &lines, &nlines) != 0) {
+ pr_debug2("Failed to get source lines on this CU.\n");
+ return -ENOENT;
+ }
+ pr_debug2("Get %zd lines from this CU\n", nlines);
+
+ /* Walk on the lines on lines list */
+ for (i = 0; i < nlines; i++) {
+ line = dwarf_onesrcline(lines, i);
+ if (line == NULL ||
+ dwarf_lineno(line, &lineno) != 0 ||
+ dwarf_lineaddr(line, &addr) != 0) {
+ pr_debug2("Failed to get line info. "
+ "Possible error in debuginfo.\n");
+ continue;
+ }
+ /* Filter lines based on address */
+ if (pdie != cu_die)
+ /*
+ * Address filtering
+ * The line is included in given function, and
+ * no inline block includes it.
+ */
+ if (!dwarf_haspc(pdie, addr) ||
+ die_find_inlinefunc(pdie, addr, &die_mem))
+ continue;
+ /* Get source line */
+ fname = dwarf_linesrc(line, NULL, NULL);
+
+ ret = handler(fname, lineno, addr, data);
+ if (ret != 0)
+ return ret;
+ }
+
+ /*
+ * Dwarf lines doesn't include function declarations and inlined
+ * subroutines. We have to check functions list or given function.
+ */
+ if (pdie != cu_die)
+ ret = __die_walk_funclines(pdie, handler, data);
+ else {
+ struct __line_walk_param param = {
+ .handler = handler,
+ .data = data,
+ .retval = 0,
+ };
+ dwarf_getfuncs(cu_die, __die_walk_culines_cb, &param, 0);
+ ret = param.retval;
+ }
+
+ return ret;
+}
+
struct __find_variable_param {
const char *name;
Dwarf_Addr addr;
@@ -1050,43 +1208,26 @@ static int call_probe_finder(Dwarf_Die *sp_die, struct probe_finder *pf)
return ret;
}
-/* Find probe point from its line number */
-static int find_probe_point_by_line(struct probe_finder *pf)
+static int probe_point_line_walker(const char *fname, int lineno,
+ Dwarf_Addr addr, void *data)
{
- Dwarf_Lines *lines;
- Dwarf_Line *line;
- size_t nlines, i;
- Dwarf_Addr addr;
- int lineno;
- int ret = 0;
-
- if (dwarf_getsrclines(&pf->cu_die, &lines, &nlines) != 0) {
- pr_warning("No source lines found.\n");
- return -ENOENT;
- }
+ struct probe_finder *pf = data;
+ int ret;
- for (i = 0; i < nlines && ret == 0; i++) {
- line = dwarf_onesrcline(lines, i);
- if (dwarf_lineno(line, &lineno) != 0 ||
- lineno != pf->lno)
- continue;
+ if (lineno != pf->lno || strtailcmp(fname, pf->fname) != 0)
+ return 0;
- /* TODO: Get fileno from line, but how? */
- if (strtailcmp(dwarf_linesrc(line, NULL, NULL), pf->fname) != 0)
- continue;
+ pf->addr = addr;
+ ret = call_probe_finder(NULL, pf);
- if (dwarf_lineaddr(line, &addr) != 0) {
- pr_warning("Failed to get the address of the line.\n");
- return -ENOENT;
- }
- pr_debug("Probe line found: line[%d]:%d addr:0x%jx\n",
- (int)i, lineno, (uintmax_t)addr);
- pf->addr = addr;
+ /* Continue if no error, because the line will be in inline function */
+ return ret < 0 ?: 0;
+}
- ret = call_probe_finder(NULL, pf);
- /* Continuing, because target line might be inlined. */
- }
- return ret;
+/* Find probe point from its line number */
+static int find_probe_point_by_line(struct probe_finder *pf)
+{
+ return die_walk_lines(&pf->cu_die, probe_point_line_walker, pf);
}
/* Find lines which match lazy pattern */
@@ -1140,15 +1281,31 @@ out_close:
return nlines;
}
+static int probe_point_lazy_walker(const char *fname, int lineno,
+ Dwarf_Addr addr, void *data)
+{
+ struct probe_finder *pf = data;
+ int ret;
+
+ if (!line_list__has_line(&pf->lcache, lineno) ||
+ strtailcmp(fname, pf->fname) != 0)
+ return 0;
+
+ pr_debug("Probe line found: line:%d addr:0x%llx\n",
+ lineno, (unsigned long long)addr);
+ pf->addr = addr;
+ ret = call_probe_finder(NULL, pf);
+
+ /*
+ * Continue if no error, because the lazy pattern will match
+ * to other lines
+ */
+ return ret < 0 ?: 0;
+}
+
/* Find probe points from lazy pattern */
static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf)
{
- Dwarf_Lines *lines;
- Dwarf_Line *line;
- size_t nlines, i;
- Dwarf_Addr addr;
- Dwarf_Die die_mem;
- int lineno;
int ret = 0;
if (list_empty(&pf->lcache)) {
@@ -1162,45 +1319,7 @@ static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf)
return ret;
}
- if (dwarf_getsrclines(&pf->cu_die, &lines, &nlines) != 0) {
- pr_warning("No source lines found.\n");
- return -ENOENT;
- }
-
- for (i = 0; i < nlines && ret >= 0; i++) {
- line = dwarf_onesrcline(lines, i);
-
- if (dwarf_lineno(line, &lineno) != 0 ||
- !line_list__has_line(&pf->lcache, lineno))
- continue;
-
- /* TODO: Get fileno from line, but how? */
- if (strtailcmp(dwarf_linesrc(line, NULL, NULL), pf->fname) != 0)
- continue;
-
- if (dwarf_lineaddr(line, &addr) != 0) {
- pr_debug("Failed to get the address of line %d.\n",
- lineno);
- continue;
- }
- if (sp_die) {
- /* Address filtering 1: does sp_die include addr? */
- if (!dwarf_haspc(sp_die, addr))
- continue;
- /* Address filtering 2: No child include addr? */
- if (die_find_inlinefunc(sp_die, addr, &die_mem))
- continue;
- }
-
- pr_debug("Probe line found: line[%d]:%d addr:0x%llx\n",
- (int)i, lineno, (unsigned long long)addr);
- pf->addr = addr;
-
- ret = call_probe_finder(sp_die, pf);
- /* Continuing, because target line might be inlined. */
- }
- /* TODO: deallocate lines, but how? */
- return ret;
+ return die_walk_lines(sp_die, probe_point_lazy_walker, pf);
}
/* Callback parameter with return value */
@@ -1644,91 +1763,28 @@ static int line_range_add_line(const char *src, unsigned int lineno,
return line_list__add_line(&lr->line_list, lineno);
}
-/* Search function declaration lines */
-static int line_range_funcdecl_cb(Dwarf_Die *sp_die, void *data)
+static int line_range_walk_cb(const char *fname, int lineno,
+ Dwarf_Addr addr __used,
+ void *data)
{
- struct dwarf_callback_param *param = data;
- struct line_finder *lf = param->data;
- const char *src;
- int lineno;
+ struct line_finder *lf = data;
- src = dwarf_decl_file(sp_die);
- if (src && strtailcmp(src, lf->fname) != 0)
- return DWARF_CB_OK;
-
- if (dwarf_decl_line(sp_die, &lineno) != 0 ||
+ if ((strtailcmp(fname, lf->fname) != 0) ||
(lf->lno_s > lineno || lf->lno_e < lineno))
- return DWARF_CB_OK;
+ return 0;
- param->retval = line_range_add_line(src, lineno, lf->lr);
- if (param->retval < 0)
- return DWARF_CB_ABORT;
- return DWARF_CB_OK;
-}
+ if (line_range_add_line(fname, lineno, lf->lr) < 0)
+ return -EINVAL;
-static int find_line_range_func_decl_lines(struct line_finder *lf)
-{
- struct dwarf_callback_param param = {.data = (void *)lf, .retval = 0};
- dwarf_getfuncs(&lf->cu_die, line_range_funcdecl_cb, &param, 0);
- return param.retval;
+ return 0;
}
/* Find line range from its line number */
static int find_line_range_by_line(Dwarf_Die *sp_die, struct line_finder *lf)
{
- Dwarf_Lines *lines;
- Dwarf_Line *line;
- size_t nlines, i;
- Dwarf_Addr addr;
- int lineno, ret = 0;
- const char *src;
- Dwarf_Die die_mem;
-
- line_list__init(&lf->lr->line_list);
- if (dwarf_getsrclines(&lf->cu_die, &lines, &nlines) != 0) {
- pr_warning("No source lines found.\n");
- return -ENOENT;
- }
-
- /* Search probable lines on lines list */
- for (i = 0; i < nlines; i++) {
- line = dwarf_onesrcline(lines, i);
- if (dwarf_lineno(line, &lineno) != 0 ||
- (lf->lno_s > lineno || lf->lno_e < lineno))
- continue;
-
- if (sp_die) {
- /* Address filtering 1: does sp_die include addr? */
- if (dwarf_lineaddr(line, &addr) != 0 ||
- !dwarf_haspc(sp_die, addr))
- continue;
-
- /* Address filtering 2: No child include addr? */
- if (die_find_inlinefunc(sp_die, addr, &die_mem))
- continue;
- }
-
- /* TODO: Get fileno from line, but how? */
- src = dwarf_linesrc(line, NULL, NULL);
- if (strtailcmp(src, lf->fname) != 0)
- continue;
-
- ret = line_range_add_line(src, lineno, lf->lr);
- if (ret < 0)
- return ret;
- }
+ int ret;
- /*
- * Dwarf lines doesn't include function declarations. We have to
- * check functions list or given function.
- */
- if (sp_die) {
- src = dwarf_decl_file(sp_die);
- if (src && dwarf_decl_line(sp_die, &lineno) == 0 &&
- (lf->lno_s <= lineno && lf->lno_e >= lineno))
- ret = line_range_add_line(src, lineno, lf->lr);
- } else
- ret = find_line_range_func_decl_lines(lf);
+ ret = die_walk_lines(sp_die ?: &lf->cu_die, line_range_walk_cb, lf);
/* Update status */
if (ret >= 0)
@@ -1758,9 +1814,6 @@ static int line_range_search_cb(Dwarf_Die *sp_die, void *data)
struct line_finder *lf = param->data;
struct line_range *lr = lf->lr;
- pr_debug("find (%llx) %s\n",
- (unsigned long long)dwarf_dieoffset(sp_die),
- dwarf_diename(sp_die));
if (dwarf_tag(sp_die) == DW_TAG_subprogram &&
die_compare_name(sp_die, lr->function)) {
lf->fname = dwarf_decl_file(sp_die);
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 105f00bfd555..e6a07408669e 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -242,17 +242,16 @@ static bool symbol__match_parent_regex(struct symbol *sym)
return 0;
}
-struct map_symbol *perf_session__resolve_callchain(struct perf_session *self,
- struct thread *thread,
- struct ip_callchain *chain,
- struct symbol **parent)
+int perf_session__resolve_callchain(struct perf_session *self,
+ struct thread *thread,
+ struct ip_callchain *chain,
+ struct symbol **parent)
{
u8 cpumode = PERF_RECORD_MISC_USER;
unsigned int i;
- struct map_symbol *syms = calloc(chain->nr, sizeof(*syms));
+ int err;
- if (!syms)
- return NULL;
+ callchain_cursor_reset(&self->callchain_cursor);
for (i = 0; i < chain->nr; i++) {
u64 ip = chain->ips[i];
@@ -281,12 +280,15 @@ struct map_symbol *perf_session__resolve_callchain(struct perf_session *self,
*parent = al.sym;
if (!symbol_conf.use_callchain)
break;
- syms[i].map = al.map;
- syms[i].sym = al.sym;
}
+
+ err = callchain_cursor_append(&self->callchain_cursor,
+ ip, al.map, al.sym);
+ if (err)
+ return err;
}
- return syms;
+ return 0;
}
static int process_event_synth_stub(event_t *event __used,
@@ -494,7 +496,7 @@ static void flush_sample_queue(struct perf_session *s,
if (iter->timestamp > limit)
break;
- event__parse_sample(iter->event, s, &sample);
+ perf_session__parse_sample(s, iter->event, &sample);
perf_session_deliver_event(s, iter->event, &sample, ops,
iter->file_offset);
@@ -804,7 +806,7 @@ static int perf_session__process_event(struct perf_session *session,
/*
* For all kernel events we get the sample data
*/
- event__parse_sample(event, session, &sample);
+ perf_session__parse_sample(session, event, &sample);
/* Preprocess sample records - precheck callchains */
if (perf_session__preprocess_sample(session, event, &sample))
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index decd83f274fd..78239767011e 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -51,7 +51,8 @@ struct perf_session {
int cwdlen;
char *cwd;
struct ordered_samples ordered_samples;
- char filename[0];
+ struct callchain_cursor callchain_cursor;
+ char filename[0];
};
struct perf_event_ops;
@@ -94,10 +95,10 @@ int __perf_session__process_events(struct perf_session *self,
int perf_session__process_events(struct perf_session *self,
struct perf_event_ops *event_ops);
-struct map_symbol *perf_session__resolve_callchain(struct perf_session *self,
- struct thread *thread,
- struct ip_callchain *chain,
- struct symbol **parent);
+int perf_session__resolve_callchain(struct perf_session *self,
+ struct thread *thread,
+ struct ip_callchain *chain,
+ struct symbol **parent);
bool perf_session__has_traces(struct perf_session *self, const char *msg);
@@ -154,4 +155,13 @@ size_t perf_session__fprintf_nr_events(struct perf_session *self, FILE *fp)
{
return hists__fprintf_nr_events(&self->hists, fp);
}
+
+static inline int perf_session__parse_sample(struct perf_session *session,
+ const event_t *event,
+ struct sample_data *sample)
+{
+ return event__parse_sample(event, session->sample_type,
+ session->sample_id_all, sample);
+}
+
#endif /* __PERF_SESSION_H */
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 00f4eade2e3e..d5d3b22250f3 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -7,61 +7,6 @@
#include "util.h"
#include "debug.h"
-/* Skip "." and ".." directories */
-static int filter(const struct dirent *dir)
-{
- if (dir->d_name[0] == '.')
- return 0;
- else
- return 1;
-}
-
-struct thread_map *thread_map__new_by_pid(pid_t pid)
-{
- struct thread_map *threads;
- char name[256];
- int items;
- struct dirent **namelist = NULL;
- int i;
-
- sprintf(name, "/proc/%d/task", pid);
- items = scandir(name, &namelist, filter, NULL);
- if (items <= 0)
- return NULL;
-
- threads = malloc(sizeof(*threads) + sizeof(pid_t) * items);
- if (threads != NULL) {
- for (i = 0; i < items; i++)
- threads->map[i] = atoi(namelist[i]->d_name);
- threads->nr = items;
- }
-
- for (i=0; i<items; i++)
- free(namelist[i]);
- free(namelist);
-
- return threads;
-}
-
-struct thread_map *thread_map__new_by_tid(pid_t tid)
-{
- struct thread_map *threads = malloc(sizeof(*threads) + sizeof(pid_t));
-
- if (threads != NULL) {
- threads->map[0] = tid;
- threads->nr = 1;
- }
-
- return threads;
-}
-
-struct thread_map *thread_map__new(pid_t pid, pid_t tid)
-{
- if (pid != -1)
- return thread_map__new_by_pid(pid);
- return thread_map__new_by_tid(tid);
-}
-
static struct thread *thread__new(pid_t pid)
{
struct thread *self = zalloc(sizeof(*self));
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index d7574101054a..e5f2401c1b5e 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -18,24 +18,10 @@ struct thread {
int comm_len;
};
-struct thread_map {
- int nr;
- int map[];
-};
-
struct perf_session;
void thread__delete(struct thread *self);
-struct thread_map *thread_map__new_by_pid(pid_t pid);
-struct thread_map *thread_map__new_by_tid(pid_t tid);
-struct thread_map *thread_map__new(pid_t pid, pid_t tid);
-
-static inline void thread_map__delete(struct thread_map *threads)
-{
- free(threads);
-}
-
int thread__set_comm(struct thread *self, const char *comm);
int thread__comm_len(struct thread *self);
struct thread *perf_session__findnew(struct perf_session *self, pid_t pid);
diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c
new file mode 100644
index 000000000000..a5df131b77c3
--- /dev/null
+++ b/tools/perf/util/thread_map.c
@@ -0,0 +1,64 @@
+#include <dirent.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include "thread_map.h"
+
+/* Skip "." and ".." directories */
+static int filter(const struct dirent *dir)
+{
+ if (dir->d_name[0] == '.')
+ return 0;
+ else
+ return 1;
+}
+
+struct thread_map *thread_map__new_by_pid(pid_t pid)
+{
+ struct thread_map *threads;
+ char name[256];
+ int items;
+ struct dirent **namelist = NULL;
+ int i;
+
+ sprintf(name, "/proc/%d/task", pid);
+ items = scandir(name, &namelist, filter, NULL);
+ if (items <= 0)
+ return NULL;
+
+ threads = malloc(sizeof(*threads) + sizeof(pid_t) * items);
+ if (threads != NULL) {
+ for (i = 0; i < items; i++)
+ threads->map[i] = atoi(namelist[i]->d_name);
+ threads->nr = items;
+ }
+
+ for (i=0; i<items; i++)
+ free(namelist[i]);
+ free(namelist);
+
+ return threads;
+}
+
+struct thread_map *thread_map__new_by_tid(pid_t tid)
+{
+ struct thread_map *threads = malloc(sizeof(*threads) + sizeof(pid_t));
+
+ if (threads != NULL) {
+ threads->map[0] = tid;
+ threads->nr = 1;
+ }
+
+ return threads;
+}
+
+struct thread_map *thread_map__new(pid_t pid, pid_t tid)
+{
+ if (pid != -1)
+ return thread_map__new_by_pid(pid);
+ return thread_map__new_by_tid(tid);
+}
+
+void thread_map__delete(struct thread_map *threads)
+{
+ free(threads);
+}
diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h
new file mode 100644
index 000000000000..3cb907311409
--- /dev/null
+++ b/tools/perf/util/thread_map.h
@@ -0,0 +1,15 @@
+#ifndef __PERF_THREAD_MAP_H
+#define __PERF_THREAD_MAP_H
+
+#include <sys/types.h>
+
+struct thread_map {
+ int nr;
+ int map[];
+};
+
+struct thread_map *thread_map__new_by_pid(pid_t pid);
+struct thread_map *thread_map__new_by_tid(pid_t tid);
+struct thread_map *thread_map__new(pid_t pid, pid_t tid);
+void thread_map__delete(struct thread_map *threads);
+#endif /* __PERF_THREAD_MAP_H */
diff --git a/tools/perf/util/ui/browsers/hists.c b/tools/perf/util/ui/browsers/hists.c
index 60c463c16028..86428239fa65 100644
--- a/tools/perf/util/ui/browsers/hists.c
+++ b/tools/perf/util/ui/browsers/hists.c
@@ -377,7 +377,7 @@ static int hist_browser__show_callchain_node_rb_tree(struct hist_browser *self,
while (node) {
struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node);
struct rb_node *next = rb_next(node);
- u64 cumul = cumul_hits(child);
+ u64 cumul = callchain_cumul_hits(child);
struct callchain_list *chain;
char folded_sign = ' ';
int first = true;