summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2010-01-25 15:33:58 +1100
committerStephen Rothwell <sfr@canb.auug.org.au>2010-01-25 15:33:58 +1100
commit2f51276d9133b0069307661d627dba179694e1c3 (patch)
tree9091756d87c882da0a8c86617dc553800b1fc1ee
parent3a76a6e6ed4f441caa475a297c7f30b90884a357 (diff)
parent1d677ff56f7a9e86ab86bc859a6e667c16c9181c (diff)
Merge remote branch 'tip/auto-latest'
-rw-r--r--Documentation/RCU/00-INDEX8
-rw-r--r--Documentation/RCU/RTFP.txt58
-rw-r--r--Documentation/RCU/checklist.txt200
-rw-r--r--Documentation/RCU/rcu.txt48
-rw-r--r--Documentation/RCU/stallwarn.txt58
-rw-r--r--Documentation/RCU/torture.txt12
-rw-r--r--Documentation/RCU/whatisRCU.txt3
-rw-r--r--Documentation/filesystems/dentry-locking.txt3
-rw-r--r--Documentation/trace/kprobetrace.txt48
-rw-r--r--MAINTAINERS13
-rw-r--r--arch/x86/include/asm/alternative.h7
-rw-r--r--arch/x86/include/asm/atomic.h299
-rw-r--r--arch/x86/include/asm/atomic64_32.h160
-rw-r--r--arch/x86/include/asm/atomic64_64.h224
-rw-r--r--arch/x86/include/asm/atomic_32.h415
-rw-r--r--arch/x86/include/asm/atomic_64.h485
-rw-r--r--arch/x86/include/asm/fixmap.h16
-rw-r--r--arch/x86/include/asm/irq_vectors.h47
-rw-r--r--arch/x86/include/asm/nmi.h1
-rw-r--r--arch/x86/include/asm/ptrace.h4
-rw-r--r--arch/x86/include/asm/rwsem.h30
-rw-r--r--arch/x86/include/asm/stacktrace.h2
-rw-r--r--arch/x86/include/asm/uaccess_64.h21
-rw-r--r--arch/x86/include/asm/uv/bios.h7
-rw-r--r--arch/x86/include/asm/uv/uv_hub.h3
-rw-r--r--arch/x86/kernel/alternative.c4
-rw-r--r--arch/x86/kernel/apic/io_apic.c5
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c15
-rw-r--r--arch/x86/kernel/bios_uv.c20
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c84
-rw-r--r--arch/x86/kernel/cpu/perf_event.c8
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c11
-rw-r--r--arch/x86/kernel/dumpstack_32.c5
-rw-r--r--arch/x86/kernel/dumpstack_64.c5
-rw-r--r--arch/x86/kernel/process.c7
-rw-r--r--arch/x86/kernel/process_32.c6
-rw-r--r--arch/x86/kernel/process_64.c6
-rw-r--r--arch/x86/kernel/ptrace.c24
-rw-r--r--arch/x86/kernel/x8664_ksyms_64.c3
-rw-r--r--arch/x86/lib/copy_user_64.S6
-rw-r--r--arch/x86/lib/memcpy_64.S23
-rw-r--r--arch/x86/lib/memset_64.S18
-rw-r--r--arch/x86/mm/ioremap.c4
-rw-r--r--drivers/char/nvram.c3
-rw-r--r--include/linux/ftrace_event.h5
-rw-r--r--include/linux/irq.h2
-rw-r--r--include/linux/kernel.h5
-rw-r--r--include/linux/list.h14
-rw-r--r--include/linux/perf_event.h22
-rw-r--r--include/linux/plist.h4
-rw-r--r--include/linux/rcutiny.h12
-rw-r--r--include/linux/rcutree.h3
-rw-r--r--include/linux/syscalls.h10
-rw-r--r--include/trace/ftrace.h173
-rw-r--r--include/trace/syscall.h8
-rw-r--r--init/Kconfig13
-rw-r--r--kernel/futex_compat.c6
-rw-r--r--kernel/perf_event.c293
-rw-r--r--kernel/rcutorture.c80
-rw-r--r--kernel/rcutree.c108
-rw-r--r--kernel/rcutree.h20
-rw-r--r--kernel/rcutree_plugin.h22
-rw-r--r--kernel/sched.c14
-rw-r--r--kernel/sched_fair.c2
-rw-r--r--kernel/sched_idletask.c2
-rw-r--r--kernel/sched_rt.c2
-rw-r--r--kernel/srcu.c2
-rw-r--r--kernel/trace/Makefile4
-rw-r--r--kernel/trace/trace.c144
-rw-r--r--kernel/trace/trace_events.c77
-rw-r--r--kernel/trace/trace_events_filter.c4
-rw-r--r--kernel/trace/trace_export.c87
-rw-r--r--kernel/trace/trace_functions_graph.c19
-rw-r--r--kernel/trace/trace_kprobe.c191
-rw-r--r--kernel/trace/trace_syscalls.c111
-rw-r--r--lib/Kconfig.debug4
-rw-r--r--lib/hweight.c7
-rwxr-xr-xscripts/recordmcount.pl39
-rw-r--r--tools/perf/Documentation/perf-probe.txt20
-rw-r--r--tools/perf/Documentation/perf.txt2
-rw-r--r--tools/perf/Makefile13
-rw-r--r--tools/perf/builtin-annotate.c12
-rw-r--r--tools/perf/builtin-buildid-list.c40
-rw-r--r--tools/perf/builtin-diff.c74
-rw-r--r--tools/perf/builtin-help.c5
-rw-r--r--tools/perf/builtin-kmem.c51
-rw-r--r--tools/perf/builtin-probe.c79
-rw-r--r--tools/perf/builtin-record.c42
-rw-r--r--tools/perf/builtin-report.c58
-rw-r--r--tools/perf/builtin-sched.c32
-rw-r--r--tools/perf/builtin-stat.c106
-rw-r--r--tools/perf/builtin-timechart.c25
-rw-r--r--tools/perf/builtin-top.c7
-rw-r--r--tools/perf/builtin-trace.c27
-rw-r--r--tools/perf/command-list.txt1
-rw-r--r--tools/perf/design.txt8
-rw-r--r--tools/perf/perf-archive.sh32
-rw-r--r--tools/perf/perf.c2
-rw-r--r--tools/perf/util/data_map.c252
-rw-r--r--tools/perf/util/debug.c1
-rw-r--r--tools/perf/util/debugfs.c17
-rw-r--r--tools/perf/util/debugfs.h2
-rw-r--r--tools/perf/util/event.c213
-rw-r--r--tools/perf/util/event.h79
-rw-r--r--tools/perf/util/header.c224
-rw-r--r--tools/perf/util/header.h4
-rw-r--r--tools/perf/util/map.h73
-rw-r--r--tools/perf/util/parse-events.c48
-rw-r--r--tools/perf/util/probe-event.c105
-rw-r--r--tools/perf/util/probe-event.h2
-rw-r--r--tools/perf/util/probe-finder.c203
-rw-r--r--tools/perf/util/probe-finder.h33
-rw-r--r--tools/perf/util/session.c393
-rw-r--r--tools/perf/util/session.h42
-rw-r--r--tools/perf/util/string.c65
-rw-r--r--tools/perf/util/symbol.c299
-rw-r--r--tools/perf/util/symbol.h27
-rw-r--r--tools/perf/util/thread.h9
-rw-r--r--tools/perf/util/trace-event-info.c64
-rw-r--r--tools/perf/util/util.c94
-rw-r--r--tools/perf/util/util.h3
-rw-r--r--tools/perf/util/values.c1
122 files changed, 3999 insertions, 2828 deletions
diff --git a/Documentation/RCU/00-INDEX b/Documentation/RCU/00-INDEX
index 9bb62f7b89c3..0a27ea9621fa 100644
--- a/Documentation/RCU/00-INDEX
+++ b/Documentation/RCU/00-INDEX
@@ -8,14 +8,18 @@ listRCU.txt
- Using RCU to Protect Read-Mostly Linked Lists
NMI-RCU.txt
- Using RCU to Protect Dynamic NMI Handlers
+rcubarrier.txt
+ - RCU and Unloadable Modules
+rculist_nulls.txt
+ - RCU list primitives for use with SLAB_DESTROY_BY_RCU
rcuref.txt
- Reference-count design for elements of lists/arrays protected by RCU
rcu.txt
- RCU Concepts
-rcubarrier.txt
- - Unloading modules that use RCU callbacks
RTFP.txt
- List of RCU papers (bibliography) going back to 1980.
+stallwarn.txt
+ - RCU CPU stall warnings (CONFIG_RCU_CPU_STALL_DETECTOR)
torture.txt
- RCU Torture Test Operation (CONFIG_RCU_TORTURE_TEST)
trace.txt
diff --git a/Documentation/RCU/RTFP.txt b/Documentation/RCU/RTFP.txt
index d2b85237c76e..5051209e6835 100644
--- a/Documentation/RCU/RTFP.txt
+++ b/Documentation/RCU/RTFP.txt
@@ -25,10 +25,10 @@ to be referencing the data structure. However, this mechanism was not
optimized for modern computer systems, which is not surprising given
that these overheads were not so expensive in the mid-80s. Nonetheless,
passive serialization appears to be the first deferred-destruction
-mechanism to be used in production. Furthermore, the relevant patent has
-lapsed, so this approach may be used in non-GPL software, if desired.
-(In contrast, use of RCU is permitted only in software licensed under
-GPL. Sorry!!!)
+mechanism to be used in production. Furthermore, the relevant patent
+has lapsed, so this approach may be used in non-GPL software, if desired.
+(In contrast, implementation of RCU is permitted only in software licensed
+under either GPL or LGPL. Sorry!!!)
In 1990, Pugh [Pugh90] noted that explicitly tracking which threads
were reading a given data structure permitted deferred free to operate
@@ -150,6 +150,18 @@ preemptible RCU [PaulEMcKenney2007PreemptibleRCU], and the three-part
LWN "What is RCU?" series [PaulEMcKenney2007WhatIsRCUFundamentally,
PaulEMcKenney2008WhatIsRCUUsage, and PaulEMcKenney2008WhatIsRCUAPI].
+2008 saw a journal paper on real-time RCU [DinakarGuniguntala2008IBMSysJ],
+a history of how Linux changed RCU more than RCU changed Linux
+[PaulEMcKenney2008RCUOSR], and a design overview of hierarchical RCU
+[PaulEMcKenney2008HierarchicalRCU].
+
+2009 introduced user-level RCU algorithms [PaulEMcKenney2009MaliciousURCU],
+which Mathieu Desnoyers is now maintaining [MathieuDesnoyers2009URCU]
+[MathieuDesnoyersPhD]. TINY_RCU [PaulEMcKenney2009BloatWatchRCU] made
+its appearance, as did expedited RCU [PaulEMcKenney2009expeditedRCU].
+The problem of resizeable RCU-protected hash tables may now be on a path
+to a solution [JoshTriplett2009RPHash].
+
Bibtex Entries
@article{Kung80
@@ -730,6 +742,11 @@ Revised:
"
}
+#
+# "What is RCU?" LWN series.
+#
+########################################################################
+
@article{DinakarGuniguntala2008IBMSysJ
,author="D. Guniguntala and P. E. McKenney and J. Triplett and J. Walpole"
,title="The read-copy-update mechanism for supporting real-time applications on shared-memory multiprocessor systems with {Linux}"
@@ -820,3 +837,36 @@ Revised:
Uniprocessor assumptions allow simplified RCU implementation.
"
}
+
+@unpublished{PaulEMcKenney2009expeditedRCU
+,Author="Paul E. McKenney"
+,Title="[{PATCH} -tip 0/3] expedited 'big hammer' {RCU} grace periods"
+,month="June"
+,day="25"
+,year="2009"
+,note="Available:
+\url{http://lkml.org/lkml/2009/6/25/306}
+[Viewed August 16, 2009]"
+,annotation="
+ First posting of expedited RCU to be accepted into -tip.
+"
+}
+
+@unpublished{JoshTriplett2009RPHash
+,Author="Josh Triplett"
+,Title="Scalable concurrent hash tables via relativistic programming"
+,month="September"
+,year="2009"
+,note="Linux Plumbers Conference presentation"
+,annotation="
+ RP fun with hash tables.
+"
+}
+
+@phdthesis{MathieuDesnoyersPhD
+, title = "Low-impact Operating System Tracing"
+, author = "Mathieu Desnoyers"
+, school = "Ecole Polytechnique de Montr\'{e}al"
+, month = "December"
+, year = 2009
+}
diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt
index 51525a30e8b4..767cf06a4276 100644
--- a/Documentation/RCU/checklist.txt
+++ b/Documentation/RCU/checklist.txt
@@ -8,13 +8,12 @@ would cause. This list is based on experiences reviewing such patches
over a rather long period of time, but improvements are always welcome!
0. Is RCU being applied to a read-mostly situation? If the data
- structure is updated more than about 10% of the time, then
- you should strongly consider some other approach, unless
- detailed performance measurements show that RCU is nonetheless
- the right tool for the job. Yes, you might think of RCU
- as simply cutting overhead off of the readers and imposing it
- on the writers. That is exactly why normal uses of RCU will
- do much more reading than updating.
+ structure is updated more than about 10% of the time, then you
+ should strongly consider some other approach, unless detailed
+ performance measurements show that RCU is nonetheless the right
+ tool for the job. Yes, RCU does reduce read-side overhead by
+ increasing write-side overhead, which is exactly why normal uses
+ of RCU will do much more reading than updating.
Another exception is where performance is not an issue, and RCU
provides a simpler implementation. An example of this situation
@@ -35,13 +34,13 @@ over a rather long period of time, but improvements are always welcome!
If you choose #b, be prepared to describe how you have handled
memory barriers on weakly ordered machines (pretty much all of
- them -- even x86 allows reads to be reordered), and be prepared
- to explain why this added complexity is worthwhile. If you
- choose #c, be prepared to explain how this single task does not
- become a major bottleneck on big multiprocessor machines (for
- example, if the task is updating information relating to itself
- that other tasks can read, there by definition can be no
- bottleneck).
+ them -- even x86 allows later loads to be reordered to precede
+ earlier stores), and be prepared to explain why this added
+ complexity is worthwhile. If you choose #c, be prepared to
+ explain how this single task does not become a major bottleneck on
+ big multiprocessor machines (for example, if the task is updating
+ information relating to itself that other tasks can read, there
+ by definition can be no bottleneck).
2. Do the RCU read-side critical sections make proper use of
rcu_read_lock() and friends? These primitives are needed
@@ -51,8 +50,10 @@ over a rather long period of time, but improvements are always welcome!
actuarial risk of your kernel.
As a rough rule of thumb, any dereference of an RCU-protected
- pointer must be covered by rcu_read_lock() or rcu_read_lock_bh()
- or by the appropriate update-side lock.
+ pointer must be covered by rcu_read_lock(), rcu_read_lock_bh(),
+ rcu_read_lock_sched(), or by the appropriate update-side lock.
+ Disabling of preemption can serve as rcu_read_lock_sched(), but
+ is less readable.
3. Does the update code tolerate concurrent accesses?
@@ -62,25 +63,27 @@ over a rather long period of time, but improvements are always welcome!
of ways to handle this concurrency, depending on the situation:
a. Use the RCU variants of the list and hlist update
- primitives to add, remove, and replace elements on an
- RCU-protected list. Alternatively, use the RCU-protected
- trees that have been added to the Linux kernel.
+ primitives to add, remove, and replace elements on
+ an RCU-protected list. Alternatively, use the other
+ RCU-protected data structures that have been added to
+ the Linux kernel.
This is almost always the best approach.
b. Proceed as in (a) above, but also maintain per-element
locks (that are acquired by both readers and writers)
that guard per-element state. Of course, fields that
- the readers refrain from accessing can be guarded by the
- update-side lock.
+ the readers refrain from accessing can be guarded by
+ some other lock acquired only by updaters, if desired.
This works quite well, also.
c. Make updates appear atomic to readers. For example,
- pointer updates to properly aligned fields will appear
- atomic, as will individual atomic primitives. Operations
- performed under a lock and sequences of multiple atomic
- primitives will -not- appear to be atomic.
+ pointer updates to properly aligned fields will
+ appear atomic, as will individual atomic primitives.
+ Sequences of perations performed under a lock will -not-
+ appear to be atomic to RCU readers, nor will sequences
+ of multiple atomic primitives.
This can work, but is starting to get a bit tricky.
@@ -98,9 +101,9 @@ over a rather long period of time, but improvements are always welcome!
a new structure containing updated values.
4. Weakly ordered CPUs pose special challenges. Almost all CPUs
- are weakly ordered -- even i386 CPUs allow reads to be reordered.
- RCU code must take all of the following measures to prevent
- memory-corruption problems:
+ are weakly ordered -- even x86 CPUs allow later loads to be
+ reordered to precede earlier stores. RCU code must take all of
+ the following measures to prevent memory-corruption problems:
a. Readers must maintain proper ordering of their memory
accesses. The rcu_dereference() primitive ensures that
@@ -113,14 +116,21 @@ over a rather long period of time, but improvements are always welcome!
The rcu_dereference() primitive is also an excellent
documentation aid, letting the person reading the code
know exactly which pointers are protected by RCU.
-
- The rcu_dereference() primitive is used by the various
- "_rcu()" list-traversal primitives, such as the
- list_for_each_entry_rcu(). Note that it is perfectly
- legal (if redundant) for update-side code to use
- rcu_dereference() and the "_rcu()" list-traversal
- primitives. This is particularly useful in code
- that is common to readers and updaters.
+ Please note that compilers can also reorder code, and
+ they are becoming increasingly aggressive about doing
+ just that. The rcu_dereference() primitive therefore
+ also prevents destructive compiler optimizations.
+
+ The rcu_dereference() primitive is used by the
+ various "_rcu()" list-traversal primitives, such
+ as the list_for_each_entry_rcu(). Note that it is
+ perfectly legal (if redundant) for update-side code to
+ use rcu_dereference() and the "_rcu()" list-traversal
+ primitives. This is particularly useful in code that
+ is common to readers and updaters. However, neither
+ rcu_dereference() nor the "_rcu()" list-traversal
+ primitives can substitute for a good concurrency design
+ coordinating among multiple updaters.
b. If the list macros are being used, the list_add_tail_rcu()
and list_add_rcu() primitives must be used in order
@@ -135,11 +145,14 @@ over a rather long period of time, but improvements are always welcome!
readers. Similarly, if the hlist macros are being used,
the hlist_del_rcu() primitive is required.
- The list_replace_rcu() primitive may be used to
- replace an old structure with a new one in an
- RCU-protected list.
+ The list_replace_rcu() and hlist_replace_rcu() primitives
+ may be used to replace an old structure with a new one
+ in their respective types of RCU-protected lists.
+
+ d. Rules similar to (4b) and (4c) apply to the "hlist_nulls"
+ type of RCU-protected linked lists.
- d. Updates must ensure that initialization of a given
+ e. Updates must ensure that initialization of a given
structure happens before pointers to that structure are
publicized. Use the rcu_assign_pointer() primitive
when publicizing a pointer to a structure that can
@@ -151,16 +164,31 @@ over a rather long period of time, but improvements are always welcome!
it cannot block.
6. Since synchronize_rcu() can block, it cannot be called from
- any sort of irq context. Ditto for synchronize_sched() and
- synchronize_srcu().
-
-7. If the updater uses call_rcu(), then the corresponding readers
- must use rcu_read_lock() and rcu_read_unlock(). If the updater
- uses call_rcu_bh(), then the corresponding readers must use
- rcu_read_lock_bh() and rcu_read_unlock_bh(). If the updater
- uses call_rcu_sched(), then the corresponding readers must
- disable preemption. Mixing things up will result in confusion
- and broken kernels.
+ any sort of irq context. The same rule applies for
+ synchronize_rcu_bh(), synchronize_sched(), synchronize_srcu(),
+ synchronize_rcu_expedited(), synchronize_rcu_bh_expedited(),
+ synchronize_sched_expedite(), and synchronize_srcu_expedited().
+
+ The expedited forms of these primitives have the same semantics
+ as the non-expedited forms, but expediting is both expensive
+ and unfriendly to real-time workloads. Use of the expedited
+ primitives should be restricted to rare configuration-change
+ operations that would not normally be undertaken while a real-time
+ workload is running.
+
+7. If the updater uses call_rcu() or synchronize_rcu(), then the
+ corresponding readers must use rcu_read_lock() and
+ rcu_read_unlock(). If the updater uses call_rcu_bh() or
+ synchronize_rcu_bh(), then the corresponding readers must
+ use rcu_read_lock_bh() and rcu_read_unlock_bh(). If the
+ updater uses call_rcu_sched() or synchronize_sched(), then
+ the corresponding readers must disable preemption, possibly
+ by calling rcu_read_lock_sched() and rcu_read_unlock_sched().
+ If the updater uses synchronize_srcu(), the the corresponding
+ readers must use srcu_read_lock() and srcu_read_unlock(),
+ and with the same srcu_struct. The rules for the expedited
+ primitives are the same as for their non-expedited counterparts.
+ Mixing things up will result in confusion and broken kernels.
One exception to this rule: rcu_read_lock() and rcu_read_unlock()
may be substituted for rcu_read_lock_bh() and rcu_read_unlock_bh()
@@ -212,6 +240,8 @@ over a rather long period of time, but improvements are always welcome!
e. Periodically invoke synchronize_rcu(), permitting a limited
number of updates per grace period.
+ The same cautions apply to call_rcu_bh() and call_rcu_sched().
+
9. All RCU list-traversal primitives, which include
rcu_dereference(), list_for_each_entry_rcu(),
list_for_each_continue_rcu(), and list_for_each_safe_rcu(),
@@ -229,7 +259,8 @@ over a rather long period of time, but improvements are always welcome!
10. Conversely, if you are in an RCU read-side critical section,
and you don't hold the appropriate update-side lock, you -must-
use the "_rcu()" variants of the list macros. Failing to do so
- will break Alpha and confuse people reading your code.
+ will break Alpha, cause aggressive compilers to generate bad code,
+ and confuse people trying to read your code.
11. Note that synchronize_rcu() -only- guarantees to wait until
all currently executing rcu_read_lock()-protected RCU read-side
@@ -239,15 +270,21 @@ over a rather long period of time, but improvements are always welcome!
rcu_read_lock()-protected read-side critical sections, do -not-
use synchronize_rcu().
- If you want to wait for some of these other things, you might
- instead need to use synchronize_irq() or synchronize_sched().
+ Similarly, disabling preemption is not an acceptable substitute
+ for rcu_read_lock(). Code that attempts to use preemption
+ disabling where it should be using rcu_read_lock() will break
+ in real-time kernel builds.
+
+ If you want to wait for interrupt handlers, NMI handlers, and
+ code under the influence of preempt_disable(), you instead
+ need to use synchronize_irq() or synchronize_sched().
12. Any lock acquired by an RCU callback must be acquired elsewhere
with softirq disabled, e.g., via spin_lock_irqsave(),
spin_lock_bh(), etc. Failing to disable irq on a given
- acquisition of that lock will result in deadlock as soon as the
- RCU callback happens to interrupt that acquisition's critical
- section.
+ acquisition of that lock will result in deadlock as soon as
+ the RCU softirq handler happens to run your RCU callback while
+ interrupting that acquisition's critical section.
13. RCU callbacks can be and are executed in parallel. In many cases,
the callback code simply wrappers around kfree(), so that this
@@ -265,29 +302,30 @@ over a rather long period of time, but improvements are always welcome!
not the case, a self-spawning RCU callback would prevent the
victim CPU from ever going offline.)
-14. SRCU (srcu_read_lock(), srcu_read_unlock(), and synchronize_srcu())
- may only be invoked from process context. Unlike other forms of
- RCU, it -is- permissible to block in an SRCU read-side critical
- section (demarked by srcu_read_lock() and srcu_read_unlock()),
- hence the "SRCU": "sleepable RCU". Please note that if you
- don't need to sleep in read-side critical sections, you should
- be using RCU rather than SRCU, because RCU is almost always
- faster and easier to use than is SRCU.
+14. SRCU (srcu_read_lock(), srcu_read_unlock(), synchronize_srcu(),
+ and synchronize_srcu_expedited()) may only be invoked from
+ process context. Unlike other forms of RCU, it -is- permissible
+ to block in an SRCU read-side critical section (demarked by
+ srcu_read_lock() and srcu_read_unlock()), hence the "SRCU":
+ "sleepable RCU". Please note that if you don't need to sleep
+ in read-side critical sections, you should be using RCU rather
+ than SRCU, because RCU is almost always faster and easier to
+ use than is SRCU.
Also unlike other forms of RCU, explicit initialization
and cleanup is required via init_srcu_struct() and
cleanup_srcu_struct(). These are passed a "struct srcu_struct"
that defines the scope of a given SRCU domain. Once initialized,
the srcu_struct is passed to srcu_read_lock(), srcu_read_unlock()
- and synchronize_srcu(). A given synchronize_srcu() waits only
- for SRCU read-side critical sections governed by srcu_read_lock()
- and srcu_read_unlock() calls that have been passd the same
- srcu_struct. This property is what makes sleeping read-side
- critical sections tolerable -- a given subsystem delays only
- its own updates, not those of other subsystems using SRCU.
- Therefore, SRCU is less prone to OOM the system than RCU would
- be if RCU's read-side critical sections were permitted to
- sleep.
+ synchronize_srcu(), and synchronize_srcu_expedited(). A given
+ synchronize_srcu() waits only for SRCU read-side critical
+ sections governed by srcu_read_lock() and srcu_read_unlock()
+ calls that have been passed the same srcu_struct. This property
+ is what makes sleeping read-side critical sections tolerable --
+ a given subsystem delays only its own updates, not those of other
+ subsystems using SRCU. Therefore, SRCU is less prone to OOM the
+ system than RCU would be if RCU's read-side critical sections
+ were permitted to sleep.
The ability to sleep in read-side critical sections does not
come for free. First, corresponding srcu_read_lock() and
@@ -311,12 +349,12 @@ over a rather long period of time, but improvements are always welcome!
destructive operation, and -only- -then- invoke call_rcu(),
synchronize_rcu(), or friends.
- Because these primitives only wait for pre-existing readers,
- it is the caller's responsibility to guarantee safety to
- any subsequent readers.
+ Because these primitives only wait for pre-existing readers, it
+ is the caller's responsibility to guarantee that any subsequent
+ readers will execute safely.
-16. The various RCU read-side primitives do -not- contain memory
- barriers. The CPU (and in some cases, the compiler) is free
- to reorder code into and out of RCU read-side critical sections.
- It is the responsibility of the RCU update-side primitives to
- deal with this.
+16. The various RCU read-side primitives do -not- necessarily contain
+ memory barriers. You should therefore plan for the CPU
+ and the compiler to freely reorder code into and out of RCU
+ read-side critical sections. It is the responsibility of the
+ RCU update-side primitives to deal with this.
diff --git a/Documentation/RCU/rcu.txt b/Documentation/RCU/rcu.txt
index 2a23523ce471..31852705b586 100644
--- a/Documentation/RCU/rcu.txt
+++ b/Documentation/RCU/rcu.txt
@@ -75,6 +75,8 @@ o I hear that RCU is patented? What is with that?
search for the string "Patent" in RTFP.txt to find them.
Of these, one was allowed to lapse by the assignee, and the
others have been contributed to the Linux kernel under GPL.
+ There are now also LGPL implementations of user-level RCU
+ available (http://lttng.org/?q=node/18).
o I hear that RCU needs work in order to support realtime kernels?
@@ -91,48 +93,4 @@ o Where can I find more information on RCU?
o What are all these files in this directory?
-
- NMI-RCU.txt
-
- Describes how to use RCU to implement dynamic
- NMI handlers, which can be revectored on the fly,
- without rebooting.
-
- RTFP.txt
-
- List of RCU-related publications and web sites.
-
- UP.txt
-
- Discussion of RCU usage in UP kernels.
-
- arrayRCU.txt
-
- Describes how to use RCU to protect arrays, with
- resizeable arrays whose elements reference other
- data structures being of the most interest.
-
- checklist.txt
-
- Lists things to check for when inspecting code that
- uses RCU.
-
- listRCU.txt
-
- Describes how to use RCU to protect linked lists.
- This is the simplest and most common use of RCU
- in the Linux kernel.
-
- rcu.txt
-
- You are reading it!
-
- rcuref.txt
-
- Describes how to combine use of reference counts
- with RCU.
-
- whatisRCU.txt
-
- Overview of how the RCU implementation works. Along
- the way, presents a conceptual view of RCU.
+ See 00-INDEX for the list.
diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt
new file mode 100644
index 000000000000..1423d2570d78
--- /dev/null
+++ b/Documentation/RCU/stallwarn.txt
@@ -0,0 +1,58 @@
+Using RCU's CPU Stall Detector
+
+The CONFIG_RCU_CPU_STALL_DETECTOR kernel config parameter enables
+RCU's CPU stall detector, which detects conditions that unduly delay
+RCU grace periods. The stall detector's idea of what constitutes
+"unduly delayed" is controlled by a pair of C preprocessor macros:
+
+RCU_SECONDS_TILL_STALL_CHECK
+
+ This macro defines the period of time that RCU will wait from
+ the beginning of a grace period until it issues an RCU CPU
+ stall warning. It is normally ten seconds.
+
+RCU_SECONDS_TILL_STALL_RECHECK
+
+ This macro defines the period of time that RCU will wait after
+ issuing a stall warning until it issues another stall warning.
+ It is normally set to thirty seconds.
+
+RCU_STALL_RAT_DELAY
+
+ The CPU stall detector tries to make the offending CPU rat on itself,
+ as this often gives better-quality stack traces. However, if
+ the offending CPU does not detect its own stall in the number
+ of jiffies specified by RCU_STALL_RAT_DELAY, then other CPUs will
+ complain. This is normally set to two jiffies.
+
+The following problems can result in an RCU CPU stall warning:
+
+o A CPU looping in an RCU read-side critical section.
+
+o A CPU looping with interrupts disabled.
+
+o A CPU looping with preemption disabled.
+
+o For !CONFIG_PREEMPT kernels, a CPU looping anywhere in the kernel
+ without invoking schedule().
+
+o A bug in the RCU implementation.
+
+o A hardware failure. This is quite unlikely, but has occurred
+ at least once in a former life. A CPU failed in a running system,
+ becoming unresponsive, but not causing an immediate crash.
+ This resulted in a series of RCU CPU stall warnings, eventually
+ leading the realization that the CPU had failed.
+
+The RCU, RCU-sched, and RCU-bh implementations have CPU stall warning.
+SRCU does not do so directly, but its calls to synchronize_sched() will
+result in RCU-sched detecting any CPU stalls that might be occurring.
+
+To diagnose the cause of the stall, inspect the stack traces. The offending
+function will usually be near the top of the stack. If you have a series
+of stall warnings from a single extended stall, comparing the stack traces
+can often help determine where the stall is occurring, which will usually
+be in the function nearest the top of the stack that stays the same from
+trace to trace.
+
+RCU bugs can often be debugged with the help of CONFIG_RCU_TRACE.
diff --git a/Documentation/RCU/torture.txt b/Documentation/RCU/torture.txt
index 9dba3bb90e60..0e50bc2aa1e2 100644
--- a/Documentation/RCU/torture.txt
+++ b/Documentation/RCU/torture.txt
@@ -30,6 +30,18 @@ MODULE PARAMETERS
This module has the following parameters:
+fqs_duration Duration (in microseconds) of artificially induced bursts
+ of force_quiescent_state() invocations. In RCU
+ implementations having force_quiescent_state(), these
+ bursts help force races between forcing a given grace
+ period and that grace period ending on its own.
+
+fqs_holdoff Holdoff time (in microseconds) between consecutive calls
+ to force_quiescent_state() within a burst.
+
+fqs_stutter Wait time (in seconds) between consecutive bursts
+ of calls to force_quiescent_state().
+
irqreaders Says to invoke RCU readers from irq level. This is currently
done via timers. Defaults to "1" for variants of RCU that
permit this. (Or, more accurately, variants of RCU that do
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index d542ca243b80..469a58b2e67e 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -327,7 +327,8 @@ a. synchronize_rcu() rcu_read_lock() / rcu_read_unlock()
b. call_rcu_bh() rcu_read_lock_bh() / rcu_read_unlock_bh()
-c. synchronize_sched() preempt_disable() / preempt_enable()
+c. synchronize_sched() rcu_read_lock_sched() / rcu_read_unlock_sched()
+ preempt_disable() / preempt_enable()
local_irq_save() / local_irq_restore()
hardirq enter / hardirq exit
NMI enter / NMI exit
diff --git a/Documentation/filesystems/dentry-locking.txt b/Documentation/filesystems/dentry-locking.txt
index 4c0c575a4012..79334ed5daa7 100644
--- a/Documentation/filesystems/dentry-locking.txt
+++ b/Documentation/filesystems/dentry-locking.txt
@@ -62,7 +62,8 @@ changes are :
2. Insertion of a dentry into the hash table is done using
hlist_add_head_rcu() which take care of ordering the writes - the
writes to the dentry must be visible before the dentry is
- inserted. This works in conjunction with hlist_for_each_rcu() while
+ inserted. This works in conjunction with hlist_for_each_rcu(),
+ which has since been replaced by hlist_for_each_entry_rcu(), while
walking the hash chain. The only requirement is that all
initialization to the dentry must be done before
hlist_add_head_rcu() since we don't have dcache_lock protection
diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt
index 47aabeebbdf6..f30978e001f8 100644
--- a/Documentation/trace/kprobetrace.txt
+++ b/Documentation/trace/kprobetrace.txt
@@ -37,15 +37,12 @@ Synopsis of kprobe_events
@SYM[+|-offs] : Fetch memory at SYM +|- offs (SYM should be a data symbol)
$stackN : Fetch Nth entry of stack (N >= 0)
$stack : Fetch stack address.
- $argN : Fetch function argument. (N >= 0)(*)
- $retval : Fetch return value.(**)
- +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(***)
+ $retval : Fetch return value.(*)
+ +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(**)
NAME=FETCHARG: Set NAME as the argument name of FETCHARG.
- (*) aN may not correct on asmlinkaged functions and at the middle of
- function body.
- (**) only for return probe.
- (***) this is useful for fetching a field of data structures.
+ (*) only for return probe.
+ (**) this is useful for fetching a field of data structures.
Per-Probe Event Filtering
@@ -82,11 +79,14 @@ Usage examples
To add a probe as a new event, write a new definition to kprobe_events
as below.
- echo p:myprobe do_sys_open dfd=$arg0 filename=$arg1 flags=$arg2 mode=$arg3 > /sys/kernel/debug/tracing/kprobe_events
+ echo p:myprobe do_sys_open dfd=%ax filename=%dx flags=%cx mode=+4($stack) > /sys/kernel/debug/tracing/kprobe_events
This sets a kprobe on the top of do_sys_open() function with recording
-1st to 4th arguments as "myprobe" event. As this example shows, users can
-choose more familiar names for each arguments.
+1st to 4th arguments as "myprobe" event. Note, which register/stack entry is
+assigned to each function argument depends on arch-specific ABI. If you unsure
+the ABI, please try to use probe subcommand of perf-tools (you can find it
+under tools/perf/).
+As this example shows, users can choose more familiar names for each arguments.
echo r:myretprobe do_sys_open $retval >> /sys/kernel/debug/tracing/kprobe_events
@@ -97,23 +97,24 @@ recording return value as "myretprobe" event.
cat /sys/kernel/debug/tracing/events/kprobes/myprobe/format
name: myprobe
-ID: 75
+ID: 780
format:
- field:unsigned short common_type; offset:0; size:2;
- field:unsigned char common_flags; offset:2; size:1;
- field:unsigned char common_preempt_count; offset:3; size:1;
- field:int common_pid; offset:4; size:4;
- field:int common_tgid; offset:8; size:4;
+ field:unsigned short common_type; offset:0; size:2; signed:0;
+ field:unsigned char common_flags; offset:2; size:1; signed:0;
+ field:unsigned char common_preempt_count; offset:3; size:1;signed:0;
+ field:int common_pid; offset:4; size:4; signed:1;
+ field:int common_lock_depth; offset:8; size:4; signed:1;
- field: unsigned long ip; offset:16;tsize:8;
- field: int nargs; offset:24;tsize:4;
- field: unsigned long dfd; offset:32;tsize:8;
- field: unsigned long filename; offset:40;tsize:8;
- field: unsigned long flags; offset:48;tsize:8;
- field: unsigned long mode; offset:56;tsize:8;
+ field:unsigned long __probe_ip; offset:12; size:4; signed:0;
+ field:int __probe_nargs; offset:16; size:4; signed:1;
+ field:unsigned long dfd; offset:20; size:4; signed:0;
+ field:unsigned long filename; offset:24; size:4; signed:0;
+ field:unsigned long flags; offset:28; size:4; signed:0;
+ field:unsigned long mode; offset:32; size:4; signed:0;
-print fmt: "(%lx) dfd=%lx filename=%lx flags=%lx mode=%lx", REC->ip, REC->dfd, REC->filename, REC->flags, REC->mode
+print fmt: "(%lx) dfd=%lx filename=%lx flags=%lx mode=%lx", REC->__probe_ip,
+REC->dfd, REC->filename, REC->flags, REC->mode
You can see that the event has 4 arguments as in the expressions you specified.
@@ -146,4 +147,3 @@ events, you need to enable it.
returns from SYMBOL(e.g. "sys_open+0x1b/0x1d <- do_sys_open" means kernel
returns from do_sys_open to sys_open+0x1b).
-
diff --git a/MAINTAINERS b/MAINTAINERS
index e0aa38e6a46c..4cc56825db95 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4562,7 +4562,7 @@ F: drivers/net/wireless/ray*
RCUTORTURE MODULE
M: Josh Triplett <josh@freedesktop.org>
M: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
-S: Maintained
+S: Supported
F: Documentation/RCU/torture.txt
F: kernel/rcutorture.c
@@ -4587,11 +4587,12 @@ M: Dipankar Sarma <dipankar@in.ibm.com>
M: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
W: http://www.rdrop.com/users/paulmck/rclock/
S: Supported
-F: Documentation/RCU/rcu.txt
-F: Documentation/RCU/rcuref.txt
-F: include/linux/rcupdate.h
-F: include/linux/srcu.h
-F: kernel/rcupdate.c
+F: Documentation/RCU/
+F: include/linux/rcu*
+F: include/linux/srcu*
+F: kernel/rcu*
+F: kernel/srcu*
+X: kernel/rcutorture.c
REAL TIME CLOCK DRIVER
M: Paul Gortmaker <p_gortmaker@yahoo.com>
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 69b74a7b877f..3b5b828767b6 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -125,11 +125,16 @@ static inline void alternatives_smp_switch(int smp) {}
asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \
: output : "i" (0), ## input)
+/* Like alternative_io, but for replacing a direct call with another one. */
+#define alternative_call(oldfunc, newfunc, feature, output, input...) \
+ asm volatile (ALTERNATIVE("call %P[old]", "call %P[new]", feature) \
+ : output : [old] "i" (oldfunc), [new] "i" (newfunc), ## input)
+
/*
* use this macro(s) if you need more than one output parameter
* in alternative_io
*/
-#define ASM_OUTPUT2(a, b) a, b
+#define ASM_OUTPUT2(a...) a
struct paravirt_patch_site;
#ifdef CONFIG_PARAVIRT
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index 4e1b8873c474..8baaa719fa7f 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -1,5 +1,300 @@
+#ifndef _ASM_X86_ATOMIC_H
+#define _ASM_X86_ATOMIC_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <asm/processor.h>
+#include <asm/alternative.h>
+#include <asm/cmpxchg.h>
+
+/*
+ * Atomic operations that C can't guarantee us. Useful for
+ * resource counting etc..
+ */
+
+#define ATOMIC_INIT(i) { (i) }
+
+/**
+ * atomic_read - read atomic variable
+ * @v: pointer of type atomic_t
+ *
+ * Atomically reads the value of @v.
+ */
+static inline int atomic_read(const atomic_t *v)
+{
+ return v->counter;
+}
+
+/**
+ * atomic_set - set atomic variable
+ * @v: pointer of type atomic_t
+ * @i: required value
+ *
+ * Atomically sets the value of @v to @i.
+ */
+static inline void atomic_set(atomic_t *v, int i)
+{
+ v->counter = i;
+}
+
+/**
+ * atomic_add - add integer to atomic variable
+ * @i: integer value to add
+ * @v: pointer of type atomic_t
+ *
+ * Atomically adds @i to @v.
+ */
+static inline void atomic_add(int i, atomic_t *v)
+{
+ asm volatile(LOCK_PREFIX "addl %1,%0"
+ : "+m" (v->counter)
+ : "ir" (i));
+}
+
+/**
+ * atomic_sub - subtract integer from atomic variable
+ * @i: integer value to subtract
+ * @v: pointer of type atomic_t
+ *
+ * Atomically subtracts @i from @v.
+ */
+static inline void atomic_sub(int i, atomic_t *v)
+{
+ asm volatile(LOCK_PREFIX "subl %1,%0"
+ : "+m" (v->counter)
+ : "ir" (i));
+}
+
+/**
+ * atomic_sub_and_test - subtract value from variable and test result
+ * @i: integer value to subtract
+ * @v: pointer of type atomic_t
+ *
+ * Atomically subtracts @i from @v and returns
+ * true if the result is zero, or false for all
+ * other cases.
+ */
+static inline int atomic_sub_and_test(int i, atomic_t *v)
+{
+ unsigned char c;
+
+ asm volatile(LOCK_PREFIX "subl %2,%0; sete %1"
+ : "+m" (v->counter), "=qm" (c)
+ : "ir" (i) : "memory");
+ return c;
+}
+
+/**
+ * atomic_inc - increment atomic variable
+ * @v: pointer of type atomic_t
+ *
+ * Atomically increments @v by 1.
+ */
+static inline void atomic_inc(atomic_t *v)
+{
+ asm volatile(LOCK_PREFIX "incl %0"
+ : "+m" (v->counter));
+}
+
+/**
+ * atomic_dec - decrement atomic variable
+ * @v: pointer of type atomic_t
+ *
+ * Atomically decrements @v by 1.
+ */
+static inline void atomic_dec(atomic_t *v)
+{
+ asm volatile(LOCK_PREFIX "decl %0"
+ : "+m" (v->counter));
+}
+
+/**
+ * atomic_dec_and_test - decrement and test
+ * @v: pointer of type atomic_t
+ *
+ * Atomically decrements @v by 1 and
+ * returns true if the result is 0, or false for all other
+ * cases.
+ */
+static inline int atomic_dec_and_test(atomic_t *v)
+{
+ unsigned char c;
+
+ asm volatile(LOCK_PREFIX "decl %0; sete %1"
+ : "+m" (v->counter), "=qm" (c)
+ : : "memory");
+ return c != 0;
+}
+
+/**
+ * atomic_inc_and_test - increment and test
+ * @v: pointer of type atomic_t
+ *
+ * Atomically increments @v by 1
+ * and returns true if the result is zero, or false for all
+ * other cases.
+ */
+static inline int atomic_inc_and_test(atomic_t *v)
+{
+ unsigned char c;
+
+ asm volatile(LOCK_PREFIX "incl %0; sete %1"
+ : "+m" (v->counter), "=qm" (c)
+ : : "memory");
+ return c != 0;
+}
+
+/**
+ * atomic_add_negative - add and test if negative
+ * @i: integer value to add
+ * @v: pointer of type atomic_t
+ *
+ * Atomically adds @i to @v and returns true
+ * if the result is negative, or false when
+ * result is greater than or equal to zero.
+ */
+static inline int atomic_add_negative(int i, atomic_t *v)
+{
+ unsigned char c;
+
+ asm volatile(LOCK_PREFIX "addl %2,%0; sets %1"
+ : "+m" (v->counter), "=qm" (c)
+ : "ir" (i) : "memory");
+ return c;
+}
+
+/**
+ * atomic_add_return - add integer and return
+ * @i: integer value to add
+ * @v: pointer of type atomic_t
+ *
+ * Atomically adds @i to @v and returns @i + @v
+ */
+static inline int atomic_add_return(int i, atomic_t *v)
+{
+ int __i;
+#ifdef CONFIG_M386
+ unsigned long flags;
+ if (unlikely(boot_cpu_data.x86 <= 3))
+ goto no_xadd;
+#endif
+ /* Modern 486+ processor */
+ __i = i;
+ asm volatile(LOCK_PREFIX "xaddl %0, %1"
+ : "+r" (i), "+m" (v->counter)
+ : : "memory");
+ return i + __i;
+
+#ifdef CONFIG_M386
+no_xadd: /* Legacy 386 processor */
+ local_irq_save(flags);
+ __i = atomic_read(v);
+ atomic_set(v, i + __i);
+ local_irq_restore(flags);
+ return i + __i;
+#endif
+}
+
+/**
+ * atomic_sub_return - subtract integer and return
+ * @v: pointer of type atomic_t
+ * @i: integer value to subtract
+ *
+ * Atomically subtracts @i from @v and returns @v - @i
+ */
+static inline int atomic_sub_return(int i, atomic_t *v)
+{
+ return atomic_add_return(-i, v);
+}
+
+#define atomic_inc_return(v) (atomic_add_return(1, v))
+#define atomic_dec_return(v) (atomic_sub_return(1, v))
+
+static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
+{
+ return cmpxchg(&v->counter, old, new);
+}
+
+static inline int atomic_xchg(atomic_t *v, int new)
+{
+ return xchg(&v->counter, new);
+}
+
+/**
+ * atomic_add_unless - add unless the number is already a given value
+ * @v: pointer of type atomic_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * Atomically adds @a to @v, so long as @v was not already @u.
+ * Returns non-zero if @v was not @u, and zero otherwise.
+ */
+static inline int atomic_add_unless(atomic_t *v, int a, int u)
+{
+ int c, old;
+ c = atomic_read(v);
+ for (;;) {
+ if (unlikely(c == (u)))
+ break;
+ old = atomic_cmpxchg((v), c, c + (a));
+ if (likely(old == c))
+ break;
+ c = old;
+ }
+ return c != (u);
+}
+
+#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
+
+/**
+ * atomic_inc_short - increment of a short integer
+ * @v: pointer to type int
+ *
+ * Atomically adds 1 to @v
+ * Returns the new value of @u
+ */
+static inline short int atomic_inc_short(short int *v)
+{
+ asm(LOCK_PREFIX "addw $1, %0" : "+m" (*v));
+ return *v;
+}
+
+#ifdef CONFIG_X86_64
+/**
+ * atomic_or_long - OR of two long integers
+ * @v1: pointer to type unsigned long
+ * @v2: pointer to type unsigned long
+ *
+ * Atomically ORs @v1 and @v2
+ * Returns the result of the OR
+ */
+static inline void atomic_or_long(unsigned long *v1, unsigned long v2)
+{
+ asm(LOCK_PREFIX "orq %1, %0" : "+m" (*v1) : "r" (v2));
+}
+#endif
+
+/* These are x86-specific, used by some header files */
+#define atomic_clear_mask(mask, addr) \
+ asm volatile(LOCK_PREFIX "andl %0,%1" \
+ : : "r" (~(mask)), "m" (*(addr)) : "memory")
+
+#define atomic_set_mask(mask, addr) \
+ asm volatile(LOCK_PREFIX "orl %0,%1" \
+ : : "r" ((unsigned)(mask)), "m" (*(addr)) \
+ : "memory")
+
+/* Atomic operations are already serializing on x86 */
+#define smp_mb__before_atomic_dec() barrier()
+#define smp_mb__after_atomic_dec() barrier()
+#define smp_mb__before_atomic_inc() barrier()
+#define smp_mb__after_atomic_inc() barrier()
+
#ifdef CONFIG_X86_32
-# include "atomic_32.h"
+# include "atomic64_32.h"
#else
-# include "atomic_64.h"
+# include "atomic64_64.h"
#endif
+
+#include <asm-generic/atomic-long.h>
+#endif /* _ASM_X86_ATOMIC_H */
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
new file mode 100644
index 000000000000..03027bf28de5
--- /dev/null
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -0,0 +1,160 @@
+#ifndef _ASM_X86_ATOMIC64_32_H
+#define _ASM_X86_ATOMIC64_32_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <asm/processor.h>
+//#include <asm/cmpxchg.h>
+
+/* An 64bit atomic type */
+
+typedef struct {
+ u64 __aligned(8) counter;
+} atomic64_t;
+
+#define ATOMIC64_INIT(val) { (val) }
+
+extern u64 atomic64_cmpxchg(atomic64_t *ptr, u64 old_val, u64 new_val);
+
+/**
+ * atomic64_xchg - xchg atomic64 variable
+ * @ptr: pointer to type atomic64_t
+ * @new_val: value to assign
+ *
+ * Atomically xchgs the value of @ptr to @new_val and returns
+ * the old value.
+ */
+extern u64 atomic64_xchg(atomic64_t *ptr, u64 new_val);
+
+/**
+ * atomic64_set - set atomic64 variable
+ * @ptr: pointer to type atomic64_t
+ * @new_val: value to assign
+ *
+ * Atomically sets the value of @ptr to @new_val.
+ */
+extern void atomic64_set(atomic64_t *ptr, u64 new_val);
+
+/**
+ * atomic64_read - read atomic64 variable
+ * @ptr: pointer to type atomic64_t
+ *
+ * Atomically reads the value of @ptr and returns it.
+ */
+static inline u64 atomic64_read(atomic64_t *ptr)
+{
+ u64 res;
+
+ /*
+ * Note, we inline this atomic64_t primitive because
+ * it only clobbers EAX/EDX and leaves the others
+ * untouched. We also (somewhat subtly) rely on the
+ * fact that cmpxchg8b returns the current 64-bit value
+ * of the memory location we are touching:
+ */
+ asm volatile(
+ "mov %%ebx, %%eax\n\t"
+ "mov %%ecx, %%edx\n\t"
+ LOCK_PREFIX "cmpxchg8b %1\n"
+ : "=&A" (res)
+ : "m" (*ptr)
+ );
+
+ return res;
+}
+
+extern u64 atomic64_read(atomic64_t *ptr);
+
+/**
+ * atomic64_add_return - add and return
+ * @delta: integer value to add
+ * @ptr: pointer to type atomic64_t
+ *
+ * Atomically adds @delta to @ptr and returns @delta + *@ptr
+ */
+extern u64 atomic64_add_return(u64 delta, atomic64_t *ptr);
+
+/*
+ * Other variants with different arithmetic operators:
+ */
+extern u64 atomic64_sub_return(u64 delta, atomic64_t *ptr);
+extern u64 atomic64_inc_return(atomic64_t *ptr);
+extern u64 atomic64_dec_return(atomic64_t *ptr);
+
+/**
+ * atomic64_add - add integer to atomic64 variable
+ * @delta: integer value to add
+ * @ptr: pointer to type atomic64_t
+ *
+ * Atomically adds @delta to @ptr.
+ */
+extern void atomic64_add(u64 delta, atomic64_t *ptr);
+
+/**
+ * atomic64_sub - subtract the atomic64 variable
+ * @delta: integer value to subtract
+ * @ptr: pointer to type atomic64_t
+ *
+ * Atomically subtracts @delta from @ptr.
+ */
+extern void atomic64_sub(u64 delta, atomic64_t *ptr);
+
+/**
+ * atomic64_sub_and_test - subtract value from variable and test result
+ * @delta: integer value to subtract
+ * @ptr: pointer to type atomic64_t
+ *
+ * Atomically subtracts @delta from @ptr and returns
+ * true if the result is zero, or false for all
+ * other cases.
+ */
+extern int atomic64_sub_and_test(u64 delta, atomic64_t *ptr);
+
+/**
+ * atomic64_inc - increment atomic64 variable
+ * @ptr: pointer to type atomic64_t
+ *
+ * Atomically increments @ptr by 1.
+ */
+extern void atomic64_inc(atomic64_t *ptr);
+
+/**
+ * atomic64_dec - decrement atomic64 variable
+ * @ptr: pointer to type atomic64_t
+ *
+ * Atomically decrements @ptr by 1.
+ */
+extern void atomic64_dec(atomic64_t *ptr);
+
+/**
+ * atomic64_dec_and_test - decrement and test
+ * @ptr: pointer to type atomic64_t
+ *
+ * Atomically decrements @ptr by 1 and
+ * returns true if the result is 0, or false for all other
+ * cases.
+ */
+extern int atomic64_dec_and_test(atomic64_t *ptr);
+
+/**
+ * atomic64_inc_and_test - increment and test
+ * @ptr: pointer to type atomic64_t
+ *
+ * Atomically increments @ptr by 1
+ * and returns true if the result is zero, or false for all
+ * other cases.
+ */
+extern int atomic64_inc_and_test(atomic64_t *ptr);
+
+/**
+ * atomic64_add_negative - add and test if negative
+ * @delta: integer value to add
+ * @ptr: pointer to type atomic64_t
+ *
+ * Atomically adds @delta to @ptr and returns true
+ * if the result is negative, or false when
+ * result is greater than or equal to zero.
+ */
+extern int atomic64_add_negative(u64 delta, atomic64_t *ptr);
+
+#endif /* _ASM_X86_ATOMIC64_32_H */
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
new file mode 100644
index 000000000000..51c5b4056929
--- /dev/null
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -0,0 +1,224 @@
+#ifndef _ASM_X86_ATOMIC64_64_H
+#define _ASM_X86_ATOMIC64_64_H
+
+#include <linux/types.h>
+#include <asm/alternative.h>
+#include <asm/cmpxchg.h>
+
+/* The 64-bit atomic type */
+
+#define ATOMIC64_INIT(i) { (i) }
+
+/**
+ * atomic64_read - read atomic64 variable
+ * @v: pointer of type atomic64_t
+ *
+ * Atomically reads the value of @v.
+ * Doesn't imply a read memory barrier.
+ */
+static inline long atomic64_read(const atomic64_t *v)
+{
+ return v->counter;
+}
+
+/**
+ * atomic64_set - set atomic64 variable
+ * @v: pointer to type atomic64_t
+ * @i: required value
+ *
+ * Atomically sets the value of @v to @i.
+ */
+static inline void atomic64_set(atomic64_t *v, long i)
+{
+ v->counter = i;
+}
+
+/**
+ * atomic64_add - add integer to atomic64 variable
+ * @i: integer value to add
+ * @v: pointer to type atomic64_t
+ *
+ * Atomically adds @i to @v.
+ */
+static inline void atomic64_add(long i, atomic64_t *v)
+{
+ asm volatile(LOCK_PREFIX "addq %1,%0"
+ : "=m" (v->counter)
+ : "er" (i), "m" (v->counter));
+}
+
+/**
+ * atomic64_sub - subtract the atomic64 variable
+ * @i: integer value to subtract
+ * @v: pointer to type atomic64_t
+ *
+ * Atomically subtracts @i from @v.
+ */
+static inline void atomic64_sub(long i, atomic64_t *v)
+{
+ asm volatile(LOCK_PREFIX "subq %1,%0"
+ : "=m" (v->counter)
+ : "er" (i), "m" (v->counter));
+}
+
+/**
+ * atomic64_sub_and_test - subtract value from variable and test result
+ * @i: integer value to subtract
+ * @v: pointer to type atomic64_t
+ *
+ * Atomically subtracts @i from @v and returns
+ * true if the result is zero, or false for all
+ * other cases.
+ */
+static inline int atomic64_sub_and_test(long i, atomic64_t *v)
+{
+ unsigned char c;
+
+ asm volatile(LOCK_PREFIX "subq %2,%0; sete %1"
+ : "=m" (v->counter), "=qm" (c)
+ : "er" (i), "m" (v->counter) : "memory");
+ return c;
+}
+
+/**
+ * atomic64_inc - increment atomic64 variable
+ * @v: pointer to type atomic64_t
+ *
+ * Atomically increments @v by 1.
+ */
+static inline void atomic64_inc(atomic64_t *v)
+{
+ asm volatile(LOCK_PREFIX "incq %0"
+ : "=m" (v->counter)
+ : "m" (v->counter));
+}
+
+/**
+ * atomic64_dec - decrement atomic64 variable
+ * @v: pointer to type atomic64_t
+ *
+ * Atomically decrements @v by 1.
+ */
+static inline void atomic64_dec(atomic64_t *v)
+{
+ asm volatile(LOCK_PREFIX "decq %0"
+ : "=m" (v->counter)
+ : "m" (v->counter));
+}
+
+/**
+ * atomic64_dec_and_test - decrement and test
+ * @v: pointer to type atomic64_t
+ *
+ * Atomically decrements @v by 1 and
+ * returns true if the result is 0, or false for all other
+ * cases.
+ */
+static inline int atomic64_dec_and_test(atomic64_t *v)
+{
+ unsigned char c;
+
+ asm volatile(LOCK_PREFIX "decq %0; sete %1"
+ : "=m" (v->counter), "=qm" (c)
+ : "m" (v->counter) : "memory");
+ return c != 0;
+}
+
+/**
+ * atomic64_inc_and_test - increment and test
+ * @v: pointer to type atomic64_t
+ *
+ * Atomically increments @v by 1
+ * and returns true if the result is zero, or false for all
+ * other cases.
+ */
+static inline int atomic64_inc_and_test(atomic64_t *v)
+{
+ unsigned char c;
+
+ asm volatile(LOCK_PREFIX "incq %0; sete %1"
+ : "=m" (v->counter), "=qm" (c)
+ : "m" (v->counter) : "memory");
+ return c != 0;
+}
+
+/**
+ * atomic64_add_negative - add and test if negative
+ * @i: integer value to add
+ * @v: pointer to type atomic64_t
+ *
+ * Atomically adds @i to @v and returns true
+ * if the result is negative, or false when
+ * result is greater than or equal to zero.
+ */
+static inline int atomic64_add_negative(long i, atomic64_t *v)
+{
+ unsigned char c;
+
+ asm volatile(LOCK_PREFIX "addq %2,%0; sets %1"
+ : "=m" (v->counter), "=qm" (c)
+ : "er" (i), "m" (v->counter) : "memory");
+ return c;
+}
+
+/**
+ * atomic64_add_return - add and return
+ * @i: integer value to add
+ * @v: pointer to type atomic64_t
+ *
+ * Atomically adds @i to @v and returns @i + @v
+ */
+static inline long atomic64_add_return(long i, atomic64_t *v)
+{
+ long __i = i;
+ asm volatile(LOCK_PREFIX "xaddq %0, %1;"
+ : "+r" (i), "+m" (v->counter)
+ : : "memory");
+ return i + __i;
+}
+
+static inline long atomic64_sub_return(long i, atomic64_t *v)
+{
+ return atomic64_add_return(-i, v);
+}
+
+#define atomic64_inc_return(v) (atomic64_add_return(1, (v)))
+#define atomic64_dec_return(v) (atomic64_sub_return(1, (v)))
+
+static inline long atomic64_cmpxchg(atomic64_t *v, long old, long new)
+{
+ return cmpxchg(&v->counter, old, new);
+}
+
+static inline long atomic64_xchg(atomic64_t *v, long new)
+{
+ return xchg(&v->counter, new);
+}
+
+/**
+ * atomic64_add_unless - add unless the number is a given value
+ * @v: pointer of type atomic64_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * Atomically adds @a to @v, so long as it was not @u.
+ * Returns non-zero if @v was not @u, and zero otherwise.
+ */
+static inline int atomic64_add_unless(atomic64_t *v, long a, long u)
+{
+ long c, old;
+ c = atomic64_read(v);
+ for (;;) {
+ if (unlikely(c == (u)))
+ break;
+ old = atomic64_cmpxchg((v), c, c + (a));
+ if (likely(old == c))
+ break;
+ c = old;
+ }
+ return c != (u);
+}
+
+#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
+
+#endif /* _ASM_X86_ATOMIC64_64_H */
diff --git a/arch/x86/include/asm/atomic_32.h b/arch/x86/include/asm/atomic_32.h
deleted file mode 100644
index dc5a667ff791..000000000000
--- a/arch/x86/include/asm/atomic_32.h
+++ /dev/null
@@ -1,415 +0,0 @@
-#ifndef _ASM_X86_ATOMIC_32_H
-#define _ASM_X86_ATOMIC_32_H
-
-#include <linux/compiler.h>
-#include <linux/types.h>
-#include <asm/processor.h>
-#include <asm/cmpxchg.h>
-
-/*
- * Atomic operations that C can't guarantee us. Useful for
- * resource counting etc..
- */
-
-#define ATOMIC_INIT(i) { (i) }
-
-/**
- * atomic_read - read atomic variable
- * @v: pointer of type atomic_t
- *
- * Atomically reads the value of @v.
- */
-static inline int atomic_read(const atomic_t *v)
-{
- return v->counter;
-}
-
-/**
- * atomic_set - set atomic variable
- * @v: pointer of type atomic_t
- * @i: required value
- *
- * Atomically sets the value of @v to @i.
- */
-static inline void atomic_set(atomic_t *v, int i)
-{
- v->counter = i;
-}
-
-/**
- * atomic_add - add integer to atomic variable
- * @i: integer value to add
- * @v: pointer of type atomic_t
- *
- * Atomically adds @i to @v.
- */
-static inline void atomic_add(int i, atomic_t *v)
-{
- asm volatile(LOCK_PREFIX "addl %1,%0"
- : "+m" (v->counter)
- : "ir" (i));
-}
-
-/**
- * atomic_sub - subtract integer from atomic variable
- * @i: integer value to subtract
- * @v: pointer of type atomic_t
- *
- * Atomically subtracts @i from @v.
- */
-static inline void atomic_sub(int i, atomic_t *v)
-{
- asm volatile(LOCK_PREFIX "subl %1,%0"
- : "+m" (v->counter)
- : "ir" (i));
-}
-
-/**
- * atomic_sub_and_test - subtract value from variable and test result
- * @i: integer value to subtract
- * @v: pointer of type atomic_t
- *
- * Atomically subtracts @i from @v and returns
- * true if the result is zero, or false for all
- * other cases.
- */
-static inline int atomic_sub_and_test(int i, atomic_t *v)
-{
- unsigned char c;
-
- asm volatile(LOCK_PREFIX "subl %2,%0; sete %1"
- : "+m" (v->counter), "=qm" (c)
- : "ir" (i) : "memory");
- return c;
-}
-
-/**
- * atomic_inc - increment atomic variable
- * @v: pointer of type atomic_t
- *
- * Atomically increments @v by 1.
- */
-static inline void atomic_inc(atomic_t *v)
-{
- asm volatile(LOCK_PREFIX "incl %0"
- : "+m" (v->counter));
-}
-
-/**
- * atomic_dec - decrement atomic variable
- * @v: pointer of type atomic_t
- *
- * Atomically decrements @v by 1.
- */
-static inline void atomic_dec(atomic_t *v)
-{
- asm volatile(LOCK_PREFIX "decl %0"
- : "+m" (v->counter));
-}
-
-/**
- * atomic_dec_and_test - decrement and test
- * @v: pointer of type atomic_t
- *
- * Atomically decrements @v by 1 and
- * returns true if the result is 0, or false for all other
- * cases.
- */
-static inline int atomic_dec_and_test(atomic_t *v)
-{
- unsigned char c;
-
- asm volatile(LOCK_PREFIX "decl %0; sete %1"
- : "+m" (v->counter), "=qm" (c)
- : : "memory");
- return c != 0;
-}
-
-/**
- * atomic_inc_and_test - increment and test
- * @v: pointer of type atomic_t
- *
- * Atomically increments @v by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */
-static inline int atomic_inc_and_test(atomic_t *v)
-{
- unsigned char c;
-
- asm volatile(LOCK_PREFIX "incl %0; sete %1"
- : "+m" (v->counter), "=qm" (c)
- : : "memory");
- return c != 0;
-}
-
-/**
- * atomic_add_negative - add and test if negative
- * @v: pointer of type atomic_t
- * @i: integer value to add
- *
- * Atomically adds @i to @v and returns true
- * if the result is negative, or false when
- * result is greater than or equal to zero.
- */
-static inline int atomic_add_negative(int i, atomic_t *v)
-{
- unsigned char c;
-
- asm volatile(LOCK_PREFIX "addl %2,%0; sets %1"
- : "+m" (v->counter), "=qm" (c)
- : "ir" (i) : "memory");
- return c;
-}
-
-/**
- * atomic_add_return - add integer and return
- * @v: pointer of type atomic_t
- * @i: integer value to add
- *
- * Atomically adds @i to @v and returns @i + @v
- */
-static inline int atomic_add_return(int i, atomic_t *v)
-{
- int __i;
-#ifdef CONFIG_M386
- unsigned long flags;
- if (unlikely(boot_cpu_data.x86 <= 3))
- goto no_xadd;
-#endif
- /* Modern 486+ processor */
- __i = i;
- asm volatile(LOCK_PREFIX "xaddl %0, %1"
- : "+r" (i), "+m" (v->counter)
- : : "memory");
- return i + __i;
-
-#ifdef CONFIG_M386
-no_xadd: /* Legacy 386 processor */
- local_irq_save(flags);
- __i = atomic_read(v);
- atomic_set(v, i + __i);
- local_irq_restore(flags);
- return i + __i;
-#endif
-}
-
-/**
- * atomic_sub_return - subtract integer and return
- * @v: pointer of type atomic_t
- * @i: integer value to subtract
- *
- * Atomically subtracts @i from @v and returns @v - @i
- */
-static inline int atomic_sub_return(int i, atomic_t *v)
-{
- return atomic_add_return(-i, v);
-}
-
-static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
-{
- return cmpxchg(&v->counter, old, new);
-}
-
-static inline int atomic_xchg(atomic_t *v, int new)
-{
- return xchg(&v->counter, new);
-}
-
-/**
- * atomic_add_unless - add unless the number is already a given value
- * @v: pointer of type atomic_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, so long as @v was not already @u.
- * Returns non-zero if @v was not @u, and zero otherwise.
- */
-static inline int atomic_add_unless(atomic_t *v, int a, int u)
-{
- int c, old;
- c = atomic_read(v);
- for (;;) {
- if (unlikely(c == (u)))
- break;
- old = atomic_cmpxchg((v), c, c + (a));
- if (likely(old == c))
- break;
- c = old;
- }
- return c != (u);
-}
-
-#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
-
-#define atomic_inc_return(v) (atomic_add_return(1, v))
-#define atomic_dec_return(v) (atomic_sub_return(1, v))
-
-/* These are x86-specific, used by some header files */
-#define atomic_clear_mask(mask, addr) \
- asm volatile(LOCK_PREFIX "andl %0,%1" \
- : : "r" (~(mask)), "m" (*(addr)) : "memory")
-
-#define atomic_set_mask(mask, addr) \
- asm volatile(LOCK_PREFIX "orl %0,%1" \
- : : "r" (mask), "m" (*(addr)) : "memory")
-
-/* Atomic operations are already serializing on x86 */
-#define smp_mb__before_atomic_dec() barrier()
-#define smp_mb__after_atomic_dec() barrier()
-#define smp_mb__before_atomic_inc() barrier()
-#define smp_mb__after_atomic_inc() barrier()
-
-/* An 64bit atomic type */
-
-typedef struct {
- u64 __aligned(8) counter;
-} atomic64_t;
-
-#define ATOMIC64_INIT(val) { (val) }
-
-extern u64 atomic64_cmpxchg(atomic64_t *ptr, u64 old_val, u64 new_val);
-
-/**
- * atomic64_xchg - xchg atomic64 variable
- * @ptr: pointer to type atomic64_t
- * @new_val: value to assign
- *
- * Atomically xchgs the value of @ptr to @new_val and returns
- * the old value.
- */
-extern u64 atomic64_xchg(atomic64_t *ptr, u64 new_val);
-
-/**
- * atomic64_set - set atomic64 variable
- * @ptr: pointer to type atomic64_t
- * @new_val: value to assign
- *
- * Atomically sets the value of @ptr to @new_val.
- */
-extern void atomic64_set(atomic64_t *ptr, u64 new_val);
-
-/**
- * atomic64_read - read atomic64 variable
- * @ptr: pointer to type atomic64_t
- *
- * Atomically reads the value of @ptr and returns it.
- */
-static inline u64 atomic64_read(atomic64_t *ptr)
-{
- u64 res;
-
- /*
- * Note, we inline this atomic64_t primitive because
- * it only clobbers EAX/EDX and leaves the others
- * untouched. We also (somewhat subtly) rely on the
- * fact that cmpxchg8b returns the current 64-bit value
- * of the memory location we are touching:
- */
- asm volatile(
- "mov %%ebx, %%eax\n\t"
- "mov %%ecx, %%edx\n\t"
- LOCK_PREFIX "cmpxchg8b %1\n"
- : "=&A" (res)
- : "m" (*ptr)
- );
-
- return res;
-}
-
-extern u64 atomic64_read(atomic64_t *ptr);
-
-/**
- * atomic64_add_return - add and return
- * @delta: integer value to add
- * @ptr: pointer to type atomic64_t
- *
- * Atomically adds @delta to @ptr and returns @delta + *@ptr
- */
-extern u64 atomic64_add_return(u64 delta, atomic64_t *ptr);
-
-/*
- * Other variants with different arithmetic operators:
- */
-extern u64 atomic64_sub_return(u64 delta, atomic64_t *ptr);
-extern u64 atomic64_inc_return(atomic64_t *ptr);
-extern u64 atomic64_dec_return(atomic64_t *ptr);
-
-/**
- * atomic64_add - add integer to atomic64 variable
- * @delta: integer value to add
- * @ptr: pointer to type atomic64_t
- *
- * Atomically adds @delta to @ptr.
- */
-extern void atomic64_add(u64 delta, atomic64_t *ptr);
-
-/**
- * atomic64_sub - subtract the atomic64 variable
- * @delta: integer value to subtract
- * @ptr: pointer to type atomic64_t
- *
- * Atomically subtracts @delta from @ptr.
- */
-extern void atomic64_sub(u64 delta, atomic64_t *ptr);
-
-/**
- * atomic64_sub_and_test - subtract value from variable and test result
- * @delta: integer value to subtract
- * @ptr: pointer to type atomic64_t
- *
- * Atomically subtracts @delta from @ptr and returns
- * true if the result is zero, or false for all
- * other cases.
- */
-extern int atomic64_sub_and_test(u64 delta, atomic64_t *ptr);
-
-/**
- * atomic64_inc - increment atomic64 variable
- * @ptr: pointer to type atomic64_t
- *
- * Atomically increments @ptr by 1.
- */
-extern void atomic64_inc(atomic64_t *ptr);
-
-/**
- * atomic64_dec - decrement atomic64 variable
- * @ptr: pointer to type atomic64_t
- *
- * Atomically decrements @ptr by 1.
- */
-extern void atomic64_dec(atomic64_t *ptr);
-
-/**
- * atomic64_dec_and_test - decrement and test
- * @ptr: pointer to type atomic64_t
- *
- * Atomically decrements @ptr by 1 and
- * returns true if the result is 0, or false for all other
- * cases.
- */
-extern int atomic64_dec_and_test(atomic64_t *ptr);
-
-/**
- * atomic64_inc_and_test - increment and test
- * @ptr: pointer to type atomic64_t
- *
- * Atomically increments @ptr by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */
-extern int atomic64_inc_and_test(atomic64_t *ptr);
-
-/**
- * atomic64_add_negative - add and test if negative
- * @delta: integer value to add
- * @ptr: pointer to type atomic64_t
- *
- * Atomically adds @delta to @ptr and returns true
- * if the result is negative, or false when
- * result is greater than or equal to zero.
- */
-extern int atomic64_add_negative(u64 delta, atomic64_t *ptr);
-
-#include <asm-generic/atomic-long.h>
-#endif /* _ASM_X86_ATOMIC_32_H */
diff --git a/arch/x86/include/asm/atomic_64.h b/arch/x86/include/asm/atomic_64.h
deleted file mode 100644
index d605dc268e79..000000000000
--- a/arch/x86/include/asm/atomic_64.h
+++ /dev/null
@@ -1,485 +0,0 @@
-#ifndef _ASM_X86_ATOMIC_64_H
-#define _ASM_X86_ATOMIC_64_H
-
-#include <linux/types.h>
-#include <asm/alternative.h>
-#include <asm/cmpxchg.h>
-
-/*
- * Atomic operations that C can't guarantee us. Useful for
- * resource counting etc..
- */
-
-#define ATOMIC_INIT(i) { (i) }
-
-/**
- * atomic_read - read atomic variable
- * @v: pointer of type atomic_t
- *
- * Atomically reads the value of @v.
- */
-static inline int atomic_read(const atomic_t *v)
-{
- return v->counter;
-}
-
-/**
- * atomic_set - set atomic variable
- * @v: pointer of type atomic_t
- * @i: required value
- *
- * Atomically sets the value of @v to @i.
- */
-static inline void atomic_set(atomic_t *v, int i)
-{
- v->counter = i;
-}
-
-/**
- * atomic_add - add integer to atomic variable
- * @i: integer value to add
- * @v: pointer of type atomic_t
- *
- * Atomically adds @i to @v.
- */
-static inline void atomic_add(int i, atomic_t *v)
-{
- asm volatile(LOCK_PREFIX "addl %1,%0"
- : "=m" (v->counter)
- : "ir" (i), "m" (v->counter));
-}
-
-/**
- * atomic_sub - subtract the atomic variable
- * @i: integer value to subtract
- * @v: pointer of type atomic_t
- *
- * Atomically subtracts @i from @v.
- */
-static inline void atomic_sub(int i, atomic_t *v)
-{
- asm volatile(LOCK_PREFIX "subl %1,%0"
- : "=m" (v->counter)
- : "ir" (i), "m" (v->counter));
-}
-
-/**
- * atomic_sub_and_test - subtract value from variable and test result
- * @i: integer value to subtract
- * @v: pointer of type atomic_t
- *
- * Atomically subtracts @i from @v and returns
- * true if the result is zero, or false for all
- * other cases.
- */
-static inline int atomic_sub_and_test(int i, atomic_t *v)
-{
- unsigned char c;
-
- asm volatile(LOCK_PREFIX "subl %2,%0; sete %1"
- : "=m" (v->counter), "=qm" (c)
- : "ir" (i), "m" (v->counter) : "memory");
- return c;
-}
-
-/**
- * atomic_inc - increment atomic variable
- * @v: pointer of type atomic_t
- *
- * Atomically increments @v by 1.
- */
-static inline void atomic_inc(atomic_t *v)
-{
- asm volatile(LOCK_PREFIX "incl %0"
- : "=m" (v->counter)
- : "m" (v->counter));
-}
-
-/**
- * atomic_dec - decrement atomic variable
- * @v: pointer of type atomic_t
- *
- * Atomically decrements @v by 1.
- */
-static inline void atomic_dec(atomic_t *v)
-{
- asm volatile(LOCK_PREFIX "decl %0"
- : "=m" (v->counter)
- : "m" (v->counter));
-}
-
-/**
- * atomic_dec_and_test - decrement and test
- * @v: pointer of type atomic_t
- *
- * Atomically decrements @v by 1 and
- * returns true if the result is 0, or false for all other
- * cases.
- */
-static inline int atomic_dec_and_test(atomic_t *v)
-{
- unsigned char c;
-
- asm volatile(LOCK_PREFIX "decl %0; sete %1"
- : "=m" (v->counter), "=qm" (c)
- : "m" (v->counter) : "memory");
- return c != 0;
-}
-
-/**
- * atomic_inc_and_test - increment and test
- * @v: pointer of type atomic_t
- *
- * Atomically increments @v by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */
-static inline int atomic_inc_and_test(atomic_t *v)
-{
- unsigned char c;
-
- asm volatile(LOCK_PREFIX "incl %0; sete %1"
- : "=m" (v->counter), "=qm" (c)
- : "m" (v->counter) : "memory");
- return c != 0;
-}
-
-/**
- * atomic_add_negative - add and test if negative
- * @i: integer value to add
- * @v: pointer of type atomic_t
- *
- * Atomically adds @i to @v and returns true
- * if the result is negative, or false when
- * result is greater than or equal to zero.
- */
-static inline int atomic_add_negative(int i, atomic_t *v)
-{
- unsigned char c;
-
- asm volatile(LOCK_PREFIX "addl %2,%0; sets %1"
- : "=m" (v->counter), "=qm" (c)
- : "ir" (i), "m" (v->counter) : "memory");
- return c;
-}
-
-/**
- * atomic_add_return - add and return
- * @i: integer value to add
- * @v: pointer of type atomic_t
- *
- * Atomically adds @i to @v and returns @i + @v
- */
-static inline int atomic_add_return(int i, atomic_t *v)
-{
- int __i = i;
- asm volatile(LOCK_PREFIX "xaddl %0, %1"
- : "+r" (i), "+m" (v->counter)
- : : "memory");
- return i + __i;
-}
-
-static inline int atomic_sub_return(int i, atomic_t *v)
-{
- return atomic_add_return(-i, v);
-}
-
-#define atomic_inc_return(v) (atomic_add_return(1, v))
-#define atomic_dec_return(v) (atomic_sub_return(1, v))
-
-/* The 64-bit atomic type */
-
-#define ATOMIC64_INIT(i) { (i) }
-
-/**
- * atomic64_read - read atomic64 variable
- * @v: pointer of type atomic64_t
- *
- * Atomically reads the value of @v.
- * Doesn't imply a read memory barrier.
- */
-static inline long atomic64_read(const atomic64_t *v)
-{
- return v->counter;
-}
-
-/**
- * atomic64_set - set atomic64 variable
- * @v: pointer to type atomic64_t
- * @i: required value
- *
- * Atomically sets the value of @v to @i.
- */
-static inline void atomic64_set(atomic64_t *v, long i)
-{
- v->counter = i;
-}
-
-/**
- * atomic64_add - add integer to atomic64 variable
- * @i: integer value to add
- * @v: pointer to type atomic64_t
- *
- * Atomically adds @i to @v.
- */
-static inline void atomic64_add(long i, atomic64_t *v)
-{
- asm volatile(LOCK_PREFIX "addq %1,%0"
- : "=m" (v->counter)
- : "er" (i), "m" (v->counter));
-}
-
-/**
- * atomic64_sub - subtract the atomic64 variable
- * @i: integer value to subtract
- * @v: pointer to type atomic64_t
- *
- * Atomically subtracts @i from @v.
- */
-static inline void atomic64_sub(long i, atomic64_t *v)
-{
- asm volatile(LOCK_PREFIX "subq %1,%0"
- : "=m" (v->counter)
- : "er" (i), "m" (v->counter));
-}
-
-/**
- * atomic64_sub_and_test - subtract value from variable and test result
- * @i: integer value to subtract
- * @v: pointer to type atomic64_t
- *
- * Atomically subtracts @i from @v and returns
- * true if the result is zero, or false for all
- * other cases.
- */
-static inline int atomic64_sub_and_test(long i, atomic64_t *v)
-{
- unsigned char c;
-
- asm volatile(LOCK_PREFIX "subq %2,%0; sete %1"
- : "=m" (v->counter), "=qm" (c)
- : "er" (i), "m" (v->counter) : "memory");
- return c;
-}
-
-/**
- * atomic64_inc - increment atomic64 variable
- * @v: pointer to type atomic64_t
- *
- * Atomically increments @v by 1.
- */
-static inline void atomic64_inc(atomic64_t *v)
-{
- asm volatile(LOCK_PREFIX "incq %0"
- : "=m" (v->counter)
- : "m" (v->counter));
-}
-
-/**
- * atomic64_dec - decrement atomic64 variable
- * @v: pointer to type atomic64_t
- *
- * Atomically decrements @v by 1.
- */
-static inline void atomic64_dec(atomic64_t *v)
-{
- asm volatile(LOCK_PREFIX "decq %0"
- : "=m" (v->counter)
- : "m" (v->counter));
-}
-
-/**
- * atomic64_dec_and_test - decrement and test
- * @v: pointer to type atomic64_t
- *
- * Atomically decrements @v by 1 and
- * returns true if the result is 0, or false for all other
- * cases.
- */
-static inline int atomic64_dec_and_test(atomic64_t *v)
-{
- unsigned char c;
-
- asm volatile(LOCK_PREFIX "decq %0; sete %1"
- : "=m" (v->counter), "=qm" (c)
- : "m" (v->counter) : "memory");
- return c != 0;
-}
-
-/**
- * atomic64_inc_and_test - increment and test
- * @v: pointer to type atomic64_t
- *
- * Atomically increments @v by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */
-static inline int atomic64_inc_and_test(atomic64_t *v)
-{
- unsigned char c;
-
- asm volatile(LOCK_PREFIX "incq %0; sete %1"
- : "=m" (v->counter), "=qm" (c)
- : "m" (v->counter) : "memory");
- return c != 0;
-}
-
-/**
- * atomic64_add_negative - add and test if negative
- * @i: integer value to add
- * @v: pointer to type atomic64_t
- *
- * Atomically adds @i to @v and returns true
- * if the result is negative, or false when
- * result is greater than or equal to zero.
- */
-static inline int atomic64_add_negative(long i, atomic64_t *v)
-{
- unsigned char c;
-
- asm volatile(LOCK_PREFIX "addq %2,%0; sets %1"
- : "=m" (v->counter), "=qm" (c)
- : "er" (i), "m" (v->counter) : "memory");
- return c;
-}
-
-/**
- * atomic64_add_return - add and return
- * @i: integer value to add
- * @v: pointer to type atomic64_t
- *
- * Atomically adds @i to @v and returns @i + @v
- */
-static inline long atomic64_add_return(long i, atomic64_t *v)
-{
- long __i = i;
- asm volatile(LOCK_PREFIX "xaddq %0, %1;"
- : "+r" (i), "+m" (v->counter)
- : : "memory");
- return i + __i;
-}
-
-static inline long atomic64_sub_return(long i, atomic64_t *v)
-{
- return atomic64_add_return(-i, v);
-}
-
-#define atomic64_inc_return(v) (atomic64_add_return(1, (v)))
-#define atomic64_dec_return(v) (atomic64_sub_return(1, (v)))
-
-static inline long atomic64_cmpxchg(atomic64_t *v, long old, long new)
-{
- return cmpxchg(&v->counter, old, new);
-}
-
-static inline long atomic64_xchg(atomic64_t *v, long new)
-{
- return xchg(&v->counter, new);
-}
-
-static inline long atomic_cmpxchg(atomic_t *v, int old, int new)
-{
- return cmpxchg(&v->counter, old, new);
-}
-
-static inline long atomic_xchg(atomic_t *v, int new)
-{
- return xchg(&v->counter, new);
-}
-
-/**
- * atomic_add_unless - add unless the number is a given value
- * @v: pointer of type atomic_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, so long as it was not @u.
- * Returns non-zero if @v was not @u, and zero otherwise.
- */
-static inline int atomic_add_unless(atomic_t *v, int a, int u)
-{
- int c, old;
- c = atomic_read(v);
- for (;;) {
- if (unlikely(c == (u)))
- break;
- old = atomic_cmpxchg((v), c, c + (a));
- if (likely(old == c))
- break;
- c = old;
- }
- return c != (u);
-}
-
-#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
-
-/**
- * atomic64_add_unless - add unless the number is a given value
- * @v: pointer of type atomic64_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, so long as it was not @u.
- * Returns non-zero if @v was not @u, and zero otherwise.
- */
-static inline int atomic64_add_unless(atomic64_t *v, long a, long u)
-{
- long c, old;
- c = atomic64_read(v);
- for (;;) {
- if (unlikely(c == (u)))
- break;
- old = atomic64_cmpxchg((v), c, c + (a));
- if (likely(old == c))
- break;
- c = old;
- }
- return c != (u);
-}
-
-/**
- * atomic_inc_short - increment of a short integer
- * @v: pointer to type int
- *
- * Atomically adds 1 to @v
- * Returns the new value of @u
- */
-static inline short int atomic_inc_short(short int *v)
-{
- asm(LOCK_PREFIX "addw $1, %0" : "+m" (*v));
- return *v;
-}
-
-/**
- * atomic_or_long - OR of two long integers
- * @v1: pointer to type unsigned long
- * @v2: pointer to type unsigned long
- *
- * Atomically ORs @v1 and @v2
- * Returns the result of the OR
- */
-static inline void atomic_or_long(unsigned long *v1, unsigned long v2)
-{
- asm(LOCK_PREFIX "orq %1, %0" : "+m" (*v1) : "r" (v2));
-}
-
-#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
-
-/* These are x86-specific, used by some header files */
-#define atomic_clear_mask(mask, addr) \
- asm volatile(LOCK_PREFIX "andl %0,%1" \
- : : "r" (~(mask)), "m" (*(addr)) : "memory")
-
-#define atomic_set_mask(mask, addr) \
- asm volatile(LOCK_PREFIX "orl %0,%1" \
- : : "r" ((unsigned)(mask)), "m" (*(addr)) \
- : "memory")
-
-/* Atomic operations are already serializing on x86 */
-#define smp_mb__before_atomic_dec() barrier()
-#define smp_mb__after_atomic_dec() barrier()
-#define smp_mb__before_atomic_inc() barrier()
-#define smp_mb__after_atomic_inc() barrier()
-
-#include <asm-generic/atomic-long.h>
-#endif /* _ASM_X86_ATOMIC_64_H */
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 14f9890eb495..635f03bb4995 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -118,14 +118,20 @@ enum fixed_addresses {
* 256 temporary boot-time mappings, used by early_ioremap(),
* before ioremap() is functional.
*
- * We round it up to the next 256 pages boundary so that we
- * can have a single pgd entry and a single pte table:
+ * If necessary we round it up to the next 256 pages boundary so
+ * that we can have a single pgd entry and a single pte table:
*/
#define NR_FIX_BTMAPS 64
#define FIX_BTMAPS_SLOTS 4
- FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 256 -
- (__end_of_permanent_fixed_addresses & 255),
- FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_SLOTS - 1,
+#define TOTAL_FIX_BTMAPS (NR_FIX_BTMAPS * FIX_BTMAPS_SLOTS)
+ FIX_BTMAP_END =
+ (__end_of_permanent_fixed_addresses ^
+ (__end_of_permanent_fixed_addresses + TOTAL_FIX_BTMAPS - 1)) &
+ -PTRS_PER_PTE
+ ? __end_of_permanent_fixed_addresses + TOTAL_FIX_BTMAPS -
+ (__end_of_permanent_fixed_addresses & (TOTAL_FIX_BTMAPS - 1))
+ : __end_of_permanent_fixed_addresses,
+ FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1,
#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
FIX_OHCI1394_BASE,
#endif
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 4611f085cd43..585a42810cf8 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -30,26 +30,38 @@
/*
* IDT vectors usable for external interrupt sources start
* at 0x20:
+ * hpa said we can start from 0x1f.
+ * 0x1f is documented as reserved. However, the ability for the APIC
+ * to generate vectors starting at 0x10 is documented, as is the
+ * ability for the CPU to receive any vector number as an interrupt.
+ * 0x1f is used for IRQ_MOVE_CLEANUP_VECTOR since that vector needs
+ * an entire privilege level (16 vectors) all by itself at a higher
+ * priority than any actual device vector. Thus, by placing it in the
+ * otherwise-unusable 0x10 privilege level, we avoid wasting a full
+ * 16-vector block.
*/
-#define FIRST_EXTERNAL_VECTOR 0x20
+#define FIRST_EXTERNAL_VECTOR 0x1f
+#define IA32_SYSCALL_VECTOR 0x80
#ifdef CONFIG_X86_32
# define SYSCALL_VECTOR 0x80
-# define IA32_SYSCALL_VECTOR 0x80
-#else
-# define IA32_SYSCALL_VECTOR 0x80
#endif
/*
- * Reserve the lowest usable priority level 0x20 - 0x2f for triggering
+ * Reserve the lowest usable priority level 0x10 - 0x1f for triggering
* cleanup after irq migration.
+ * this overlaps with the reserved range for cpu exceptions so this
+ * will need to be changed to 0x20 - 0x2f if the last cpu exception is
+ * ever allocated.
*/
+
#define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR
/*
- * Vectors 0x30-0x3f are used for ISA interrupts.
+ * Vectors 0x20-0x2f are used for ISA interrupts.
+ * round up to the next 16-vector boundary
*/
-#define IRQ0_VECTOR (FIRST_EXTERNAL_VECTOR + 0x10)
+#define IRQ0_VECTOR ((FIRST_EXTERNAL_VECTOR + 16) & ~15)
#define IRQ1_VECTOR (IRQ0_VECTOR + 1)
#define IRQ2_VECTOR (IRQ0_VECTOR + 2)
@@ -121,11 +133,12 @@
#define MCE_SELF_VECTOR 0xeb
/*
- * First APIC vector available to drivers: (vectors 0x30-0xee) we
- * start at 0x31(0x41) to spread out vectors evenly between priority
- * levels. (0x80 is the syscall vector)
+ * First APIC vector available to drivers: (vectors 0x30-0xee). We
+ * start allocating at 0x31 to spread out vectors evenly between
+ * priority levels. (0x80 is the syscall vector)
*/
-#define FIRST_DEVICE_VECTOR (IRQ15_VECTOR + 2)
+#define FIRST_DEVICE_VECTOR (IRQ15_VECTOR + 1)
+#define VECTOR_OFFSET_START 1
#define NR_VECTORS 256
@@ -154,21 +167,21 @@ static inline int invalid_vm86_irq(int irq)
#define NR_IRQS_LEGACY 16
-#define CPU_VECTOR_LIMIT ( 8 * NR_CPUS )
#define IO_APIC_VECTOR_LIMIT ( 32 * MAX_IO_APICS )
#ifdef CONFIG_X86_IO_APIC
# ifdef CONFIG_SPARSE_IRQ
+# define CPU_VECTOR_LIMIT (64 * NR_CPUS)
# define NR_IRQS \
(CPU_VECTOR_LIMIT > IO_APIC_VECTOR_LIMIT ? \
(NR_VECTORS + CPU_VECTOR_LIMIT) : \
(NR_VECTORS + IO_APIC_VECTOR_LIMIT))
# else
-# if NR_CPUS < MAX_IO_APICS
-# define NR_IRQS (NR_VECTORS + 4*CPU_VECTOR_LIMIT)
-# else
-# define NR_IRQS (NR_VECTORS + IO_APIC_VECTOR_LIMIT)
-# endif
+# define CPU_VECTOR_LIMIT (32 * NR_CPUS)
+# define NR_IRQS \
+ (CPU_VECTOR_LIMIT < IO_APIC_VECTOR_LIMIT ? \
+ (NR_VECTORS + CPU_VECTOR_LIMIT) : \
+ (NR_VECTORS + IO_APIC_VECTOR_LIMIT))
# endif
#else /* !CONFIG_X86_IO_APIC: */
# define NR_IRQS NR_IRQS_LEGACY
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index 139d4c1a33a7..93da9c3f3341 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -19,7 +19,6 @@ extern void die_nmi(char *str, struct pt_regs *regs, int do_panic);
extern int check_nmi_watchdog(void);
extern int nmi_watchdog_enabled;
extern int avail_to_resrv_perfctr_nmi_bit(unsigned int);
-extern int avail_to_resrv_perfctr_nmi(unsigned int);
extern int reserve_perfctr_nmi(unsigned int);
extern void release_perfctr_nmi(unsigned int);
extern int reserve_evntsel_nmi(unsigned int);
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 9d369f680321..20102808b191 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -274,10 +274,6 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
return 0;
}
-/* Get Nth argument at function call */
-extern unsigned long regs_get_argument_nth(struct pt_regs *regs,
- unsigned int n);
-
/*
* These are defined as per linux/ptrace.h, which see.
*/
diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h
index ca7517d33776..413620024768 100644
--- a/arch/x86/include/asm/rwsem.h
+++ b/arch/x86/include/asm/rwsem.h
@@ -105,7 +105,7 @@ do { \
static inline void __down_read(struct rw_semaphore *sem)
{
asm volatile("# beginning down_read\n\t"
- LOCK_PREFIX " incl (%%eax)\n\t"
+ LOCK_PREFIX " inc%z0 (%1)\n\t"
/* adds 0x00000001, returns the old value */
" jns 1f\n"
" call call_rwsem_down_read_failed\n"
@@ -123,12 +123,12 @@ static inline int __down_read_trylock(struct rw_semaphore *sem)
{
__s32 result, tmp;
asm volatile("# beginning __down_read_trylock\n\t"
- " movl %0,%1\n\t"
+ " mov %0,%1\n\t"
"1:\n\t"
- " movl %1,%2\n\t"
- " addl %3,%2\n\t"
+ " mov %1,%2\n\t"
+ " add %3,%2\n\t"
" jle 2f\n\t"
- LOCK_PREFIX " cmpxchgl %2,%0\n\t"
+ LOCK_PREFIX " cmpxchg %2,%0\n\t"
" jnz 1b\n\t"
"2:\n\t"
"# ending __down_read_trylock\n\t"
@@ -147,9 +147,9 @@ static inline void __down_write_nested(struct rw_semaphore *sem, int subclass)
tmp = RWSEM_ACTIVE_WRITE_BIAS;
asm volatile("# beginning down_write\n\t"
- LOCK_PREFIX " xadd %%edx,(%%eax)\n\t"
+ LOCK_PREFIX " xadd %1,(%2)\n\t"
/* subtract 0x0000ffff, returns the old value */
- " testl %%edx,%%edx\n\t"
+ " test %1,%1\n\t"
/* was the count 0 before? */
" jz 1f\n"
" call call_rwsem_down_write_failed\n"
@@ -185,7 +185,7 @@ static inline void __up_read(struct rw_semaphore *sem)
{
__s32 tmp = -RWSEM_ACTIVE_READ_BIAS;
asm volatile("# beginning __up_read\n\t"
- LOCK_PREFIX " xadd %%edx,(%%eax)\n\t"
+ LOCK_PREFIX " xadd %1,(%2)\n\t"
/* subtracts 1, returns the old value */
" jns 1f\n\t"
" call call_rwsem_wake\n"
@@ -201,18 +201,18 @@ static inline void __up_read(struct rw_semaphore *sem)
*/
static inline void __up_write(struct rw_semaphore *sem)
{
+ unsigned long tmp;
asm volatile("# beginning __up_write\n\t"
- " movl %2,%%edx\n\t"
- LOCK_PREFIX " xaddl %%edx,(%%eax)\n\t"
+ LOCK_PREFIX " xadd %1,(%2)\n\t"
/* tries to transition
0xffff0001 -> 0x00000000 */
" jz 1f\n"
" call call_rwsem_wake\n"
"1:\n\t"
"# ending __up_write\n"
- : "+m" (sem->count)
- : "a" (sem), "i" (-RWSEM_ACTIVE_WRITE_BIAS)
- : "memory", "cc", "edx");
+ : "+m" (sem->count), "=d" (tmp)
+ : "a" (sem), "1" (-RWSEM_ACTIVE_WRITE_BIAS)
+ : "memory", "cc");
}
/*
@@ -221,7 +221,7 @@ static inline void __up_write(struct rw_semaphore *sem)
static inline void __downgrade_write(struct rw_semaphore *sem)
{
asm volatile("# beginning __downgrade_write\n\t"
- LOCK_PREFIX " addl %2,(%%eax)\n\t"
+ LOCK_PREFIX " add%z0 %2,(%1)\n\t"
/* transitions 0xZZZZ0001 -> 0xYYYY0001 */
" jns 1f\n\t"
" call call_rwsem_downgrade_wake\n"
@@ -237,7 +237,7 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
*/
static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem)
{
- asm volatile(LOCK_PREFIX "addl %1,%0"
+ asm volatile(LOCK_PREFIX "add%z0 %1,%0"
: "+m" (sem->count)
: "ir" (delta));
}
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index 35e89122a42f..4dab78edbad9 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -3,8 +3,6 @@
extern int kstack_depth_to_print;
-int x86_is_stack_id(int id, char *name);
-
struct thread_info;
struct stacktrace_ops;
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 535e421498f6..316708d5af92 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -8,6 +8,8 @@
#include <linux/errno.h>
#include <linux/prefetch.h>
#include <linux/lockdep.h>
+#include <asm/alternative.h>
+#include <asm/cpufeature.h>
#include <asm/page.h>
/*
@@ -16,7 +18,24 @@
/* Handles exceptions in both to and from, but doesn't do access_ok */
__must_check unsigned long
-copy_user_generic(void *to, const void *from, unsigned len);
+copy_user_generic_string(void *to, const void *from, unsigned len);
+__must_check unsigned long
+copy_user_generic_unrolled(void *to, const void *from, unsigned len);
+
+static __always_inline __must_check unsigned long
+copy_user_generic(void *to, const void *from, unsigned len)
+{
+ unsigned ret;
+
+ alternative_call(copy_user_generic_unrolled,
+ copy_user_generic_string,
+ X86_FEATURE_REP_GOOD,
+ ASM_OUTPUT2("=a" (ret), "=D" (to), "=S" (from),
+ "=d" (len)),
+ "1" (to), "2" (from), "3" (len)
+ : "memory", "rcx", "r8", "r9", "r10", "r11");
+ return ret;
+}
__must_check unsigned long
_copy_to_user(void __user *to, const void *from, unsigned len);
diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h
index 2751f3075d8b..3fbc1f348a7d 100644
--- a/arch/x86/include/asm/uv/bios.h
+++ b/arch/x86/include/asm/uv/bios.h
@@ -18,8 +18,8 @@
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
- * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
- * Copyright (c) Russ Anderson
+ * Copyright (c) 2008-2009 Silicon Graphics, Inc. All Rights Reserved.
+ * Copyright (c) Russ Anderson <rja@sgi.com>
*/
#include <linux/rtc.h>
@@ -89,7 +89,7 @@ extern s64 uv_bios_call(enum uv_bios_cmd, u64, u64, u64, u64, u64);
extern s64 uv_bios_call_irqsave(enum uv_bios_cmd, u64, u64, u64, u64, u64);
extern s64 uv_bios_call_reentrant(enum uv_bios_cmd, u64, u64, u64, u64, u64);
-extern s64 uv_bios_get_sn_info(int, int *, long *, long *, long *);
+extern s64 uv_bios_get_sn_info(int, int *, long *, long *, long *, long *);
extern s64 uv_bios_freq_base(u64, u64 *);
extern int uv_bios_mq_watchlist_alloc(unsigned long, unsigned int,
unsigned long *);
@@ -104,6 +104,7 @@ extern int uv_type;
extern long sn_partition_id;
extern long sn_coherency_id;
extern long sn_region_size;
+extern long system_serial_number;
#define partition_coherence_id() (sn_coherency_id)
extern struct kobject *sgi_uv_kobj; /* /sys/firmware/sgi_uv */
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index 40be813fefb1..14cc74ba5d23 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -329,7 +329,8 @@ static inline unsigned long uv_read_global_mmr64(int pnode, unsigned long offset
*/
static inline unsigned long uv_global_gru_mmr_address(int pnode, unsigned long offset)
{
- return UV_GLOBAL_GRU_MMR_BASE | offset | (pnode << uv_hub_info->m_val);
+ return UV_GLOBAL_GRU_MMR_BASE | offset |
+ ((unsigned long)pnode << uv_hub_info->m_val);
}
static inline void uv_write_global_mmr8(int pnode, unsigned long offset, unsigned char val)
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index de7353c0ce9c..2589ea4c60ce 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -205,7 +205,7 @@ void __init_or_module apply_alternatives(struct alt_instr *start,
struct alt_instr *end)
{
struct alt_instr *a;
- char insnbuf[MAX_PATCH_LEN];
+ u8 insnbuf[MAX_PATCH_LEN];
DPRINTK("%s: alt table %p -> %p\n", __func__, start, end);
for (a = start; a < end; a++) {
@@ -223,6 +223,8 @@ void __init_or_module apply_alternatives(struct alt_instr *start,
}
#endif
memcpy(insnbuf, a->replacement, a->replacementlen);
+ if (*insnbuf == 0xe8 && a->replacementlen == 5)
+ *(s32 *)(insnbuf + 1) += a->replacement - a->instr;
add_nops(insnbuf + a->replacementlen,
a->instrlen - a->replacementlen);
text_poke_early(instr, insnbuf, a->instrlen);
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 53243ca7816d..d5bfa2964e70 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1162,7 +1162,8 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
* Also, we've got to be careful not to trash gate
* 0x80, because int 0x80 is hm, kind of importantish. ;)
*/
- static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
+ static int current_vector = FIRST_DEVICE_VECTOR + VECTOR_OFFSET_START;
+ static int current_offset = VECTOR_OFFSET_START % 8;
unsigned int old_vector;
int cpu, err;
cpumask_var_t tmp_mask;
@@ -3847,7 +3848,7 @@ int __init arch_probe_nr_irqs(void)
/*
* for MSI and HT dyn irq
*/
- nr += nr_irqs_gsi * 16;
+ nr += nr_irqs_gsi * 64;
#endif
if (nr < nr_irqs)
nr_irqs = nr;
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 21db3cbea7dc..a8ae82e0a654 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -5,7 +5,7 @@
*
* SGI UV APIC functions (note: not an Intel compatible APIC)
*
- * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 2007-2009 Silicon Graphics, Inc. All rights reserved.
*/
#include <linux/cpumask.h>
#include <linux/hardirq.h>
@@ -482,7 +482,7 @@ static void uv_heartbeat(unsigned long ignored)
static void __cpuinit uv_heartbeat_enable(int cpu)
{
- if (!uv_cpu_hub_info(cpu)->scir.enabled) {
+ while (!uv_cpu_hub_info(cpu)->scir.enabled) {
struct timer_list *timer = &uv_cpu_hub_info(cpu)->scir.timer;
uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY);
@@ -490,11 +490,10 @@ static void __cpuinit uv_heartbeat_enable(int cpu)
timer->expires = jiffies + SCIR_CPU_HB_INTERVAL;
add_timer_on(timer, cpu);
uv_cpu_hub_info(cpu)->scir.enabled = 1;
- }
- /* check boot cpu */
- if (!uv_cpu_hub_info(0)->scir.enabled)
- uv_heartbeat_enable(0);
+ /* also ensure that boot cpu is enabled */
+ cpu = 0;
+ }
}
#ifdef CONFIG_HOTPLUG_CPU
@@ -634,8 +633,8 @@ void __init uv_system_init(void)
}
uv_bios_init();
- uv_bios_get_sn_info(0, &uv_type, &sn_partition_id,
- &sn_coherency_id, &sn_region_size);
+ uv_bios_get_sn_info(0, &uv_type, &sn_partition_id, &sn_coherency_id,
+ &sn_region_size, &system_serial_number);
uv_rtc_init();
for_each_present_cpu(cpu) {
diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c
index b0206a211b09..c918ebab52ab 100644
--- a/arch/x86/kernel/bios_uv.c
+++ b/arch/x86/kernel/bios_uv.c
@@ -15,8 +15,8 @@
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
- * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
- * Copyright (c) Russ Anderson
+ * Copyright (c) 2008-2009 Silicon Graphics, Inc. All Rights Reserved.
+ * Copyright (c) Russ Anderson <rja@sgi.com>
*/
#include <linux/efi.h>
@@ -30,6 +30,7 @@ static struct uv_systab uv_systab;
s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
{
struct uv_systab *tab = &uv_systab;
+ s64 ret;
if (!tab->function)
/*
@@ -37,9 +38,11 @@ s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
*/
return BIOS_STATUS_UNIMPLEMENTED;
- return efi_call6((void *)__va(tab->function),
- (u64)which, a1, a2, a3, a4, a5);
+ ret = efi_call6((void *)__va(tab->function), (u64)which,
+ a1, a2, a3, a4, a5);
+ return ret;
}
+EXPORT_SYMBOL_GPL(uv_bios_call);
s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
u64 a4, u64 a5)
@@ -73,11 +76,14 @@ long sn_coherency_id;
EXPORT_SYMBOL_GPL(sn_coherency_id);
long sn_region_size;
EXPORT_SYMBOL_GPL(sn_region_size);
+long system_serial_number;
+EXPORT_SYMBOL_GPL(system_serial_number);
int uv_type;
+EXPORT_SYMBOL_GPL(uv_type);
s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher,
- long *region)
+ long *region, long *ssn)
{
s64 ret;
u64 v0, v1;
@@ -97,8 +103,11 @@ s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher,
*coher = part.coherence_id;
if (region)
*region = part.region_size;
+ if (ssn)
+ *ssn = v1;
return ret;
}
+EXPORT_SYMBOL_GPL(uv_bios_get_sn_info);
int
uv_bios_mq_watchlist_alloc(unsigned long addr, unsigned int mq_size,
@@ -185,4 +194,3 @@ void uv_bios_init(void)
void uv_bios_init(void) { }
#endif
-
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index fc6c8ef92dcc..c2b722d5a722 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -31,6 +31,8 @@ struct _cache_table {
short size;
};
+#define MB(x) ((x) * 1024)
+
/* All the cache descriptor types we care about (no TLB or
trace cache entries) */
@@ -44,9 +46,9 @@ static const struct _cache_table __cpuinitconst cache_table[] =
{ 0x0d, LVL_1_DATA, 16 }, /* 4-way set assoc, 64 byte line size */
{ 0x21, LVL_2, 256 }, /* 8-way set assoc, 64 byte line size */
{ 0x22, LVL_3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */
- { 0x23, LVL_3, 1024 }, /* 8-way set assoc, sectored cache, 64 byte line size */
- { 0x25, LVL_3, 2048 }, /* 8-way set assoc, sectored cache, 64 byte line size */
- { 0x29, LVL_3, 4096 }, /* 8-way set assoc, sectored cache, 64 byte line size */
+ { 0x23, LVL_3, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */
+ { 0x25, LVL_3, MB(2) }, /* 8-way set assoc, sectored cache, 64 byte line size */
+ { 0x29, LVL_3, MB(4) }, /* 8-way set assoc, sectored cache, 64 byte line size */
{ 0x2c, LVL_1_DATA, 32 }, /* 8-way set assoc, 64 byte line size */
{ 0x30, LVL_1_INST, 32 }, /* 8-way set assoc, 64 byte line size */
{ 0x39, LVL_2, 128 }, /* 4-way set assoc, sectored cache, 64 byte line size */
@@ -59,16 +61,16 @@ static const struct _cache_table __cpuinitconst cache_table[] =
{ 0x41, LVL_2, 128 }, /* 4-way set assoc, 32 byte line size */
{ 0x42, LVL_2, 256 }, /* 4-way set assoc, 32 byte line size */
{ 0x43, LVL_2, 512 }, /* 4-way set assoc, 32 byte line size */
- { 0x44, LVL_2, 1024 }, /* 4-way set assoc, 32 byte line size */
- { 0x45, LVL_2, 2048 }, /* 4-way set assoc, 32 byte line size */
- { 0x46, LVL_3, 4096 }, /* 4-way set assoc, 64 byte line size */
- { 0x47, LVL_3, 8192 }, /* 8-way set assoc, 64 byte line size */
- { 0x49, LVL_3, 4096 }, /* 16-way set assoc, 64 byte line size */
- { 0x4a, LVL_3, 6144 }, /* 12-way set assoc, 64 byte line size */
- { 0x4b, LVL_3, 8192 }, /* 16-way set assoc, 64 byte line size */
- { 0x4c, LVL_3, 12288 }, /* 12-way set assoc, 64 byte line size */
- { 0x4d, LVL_3, 16384 }, /* 16-way set assoc, 64 byte line size */
- { 0x4e, LVL_2, 6144 }, /* 24-way set assoc, 64 byte line size */
+ { 0x44, LVL_2, MB(1) }, /* 4-way set assoc, 32 byte line size */
+ { 0x45, LVL_2, MB(2) }, /* 4-way set assoc, 32 byte line size */
+ { 0x46, LVL_3, MB(4) }, /* 4-way set assoc, 64 byte line size */
+ { 0x47, LVL_3, MB(8) }, /* 8-way set assoc, 64 byte line size */
+ { 0x49, LVL_3, MB(4) }, /* 16-way set assoc, 64 byte line size */
+ { 0x4a, LVL_3, MB(6) }, /* 12-way set assoc, 64 byte line size */
+ { 0x4b, LVL_3, MB(8) }, /* 16-way set assoc, 64 byte line size */
+ { 0x4c, LVL_3, MB(12) }, /* 12-way set assoc, 64 byte line size */
+ { 0x4d, LVL_3, MB(16) }, /* 16-way set assoc, 64 byte line size */
+ { 0x4e, LVL_2, MB(6) }, /* 24-way set assoc, 64 byte line size */
{ 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */
{ 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */
{ 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */
@@ -77,34 +79,34 @@ static const struct _cache_table __cpuinitconst cache_table[] =
{ 0x71, LVL_TRACE, 16 }, /* 8-way set assoc */
{ 0x72, LVL_TRACE, 32 }, /* 8-way set assoc */
{ 0x73, LVL_TRACE, 64 }, /* 8-way set assoc */
- { 0x78, LVL_2, 1024 }, /* 4-way set assoc, 64 byte line size */
- { 0x79, LVL_2, 128 }, /* 8-way set assoc, sectored cache, 64 byte line size */
- { 0x7a, LVL_2, 256 }, /* 8-way set assoc, sectored cache, 64 byte line size */
- { 0x7b, LVL_2, 512 }, /* 8-way set assoc, sectored cache, 64 byte line size */
- { 0x7c, LVL_2, 1024 }, /* 8-way set assoc, sectored cache, 64 byte line size */
- { 0x7d, LVL_2, 2048 }, /* 8-way set assoc, 64 byte line size */
- { 0x7f, LVL_2, 512 }, /* 2-way set assoc, 64 byte line size */
- { 0x82, LVL_2, 256 }, /* 8-way set assoc, 32 byte line size */
- { 0x83, LVL_2, 512 }, /* 8-way set assoc, 32 byte line size */
- { 0x84, LVL_2, 1024 }, /* 8-way set assoc, 32 byte line size */
- { 0x85, LVL_2, 2048 }, /* 8-way set assoc, 32 byte line size */
- { 0x86, LVL_2, 512 }, /* 4-way set assoc, 64 byte line size */
- { 0x87, LVL_2, 1024 }, /* 8-way set assoc, 64 byte line size */
- { 0xd0, LVL_3, 512 }, /* 4-way set assoc, 64 byte line size */
- { 0xd1, LVL_3, 1024 }, /* 4-way set assoc, 64 byte line size */
- { 0xd2, LVL_3, 2048 }, /* 4-way set assoc, 64 byte line size */
- { 0xd6, LVL_3, 1024 }, /* 8-way set assoc, 64 byte line size */
- { 0xd7, LVL_3, 2048 }, /* 8-way set assoc, 64 byte line size */
- { 0xd8, LVL_3, 4096 }, /* 12-way set assoc, 64 byte line size */
- { 0xdc, LVL_3, 2048 }, /* 12-way set assoc, 64 byte line size */
- { 0xdd, LVL_3, 4096 }, /* 12-way set assoc, 64 byte line size */
- { 0xde, LVL_3, 8192 }, /* 12-way set assoc, 64 byte line size */
- { 0xe2, LVL_3, 2048 }, /* 16-way set assoc, 64 byte line size */
- { 0xe3, LVL_3, 4096 }, /* 16-way set assoc, 64 byte line size */
- { 0xe4, LVL_3, 8192 }, /* 16-way set assoc, 64 byte line size */
- { 0xea, LVL_3, 12288 }, /* 24-way set assoc, 64 byte line size */
- { 0xeb, LVL_3, 18432 }, /* 24-way set assoc, 64 byte line size */
- { 0xec, LVL_3, 24576 }, /* 24-way set assoc, 64 byte line size */
+ { 0x78, LVL_2, MB(1) }, /* 4-way set assoc, 64 byte line size */
+ { 0x79, LVL_2, 128 }, /* 8-way set assoc, sectored cache, 64 byte line size */
+ { 0x7a, LVL_2, 256 }, /* 8-way set assoc, sectored cache, 64 byte line size */
+ { 0x7b, LVL_2, 512 }, /* 8-way set assoc, sectored cache, 64 byte line size */
+ { 0x7c, LVL_2, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */
+ { 0x7d, LVL_2, MB(2) }, /* 8-way set assoc, 64 byte line size */
+ { 0x7f, LVL_2, 512 }, /* 2-way set assoc, 64 byte line size */
+ { 0x82, LVL_2, 256 }, /* 8-way set assoc, 32 byte line size */
+ { 0x83, LVL_2, 512 }, /* 8-way set assoc, 32 byte line size */
+ { 0x84, LVL_2, MB(1) }, /* 8-way set assoc, 32 byte line size */
+ { 0x85, LVL_2, MB(2) }, /* 8-way set assoc, 32 byte line size */
+ { 0x86, LVL_2, 512 }, /* 4-way set assoc, 64 byte line size */
+ { 0x87, LVL_2, MB(1) }, /* 8-way set assoc, 64 byte line size */
+ { 0xd0, LVL_3, 512 }, /* 4-way set assoc, 64 byte line size */
+ { 0xd1, LVL_3, MB(1) }, /* 4-way set assoc, 64 byte line size */
+ { 0xd2, LVL_3, MB(2) }, /* 4-way set assoc, 64 byte line size */
+ { 0xd6, LVL_3, MB(1) }, /* 8-way set assoc, 64 byte line size */
+ { 0xd7, LVL_3, MB(2) }, /* 8-way set assoc, 64 byte line size */
+ { 0xd8, LVL_3, MB(4) }, /* 12-way set assoc, 64 byte line size */
+ { 0xdc, LVL_3, MB(2) }, /* 12-way set assoc, 64 byte line size */
+ { 0xdd, LVL_3, MB(4) }, /* 12-way set assoc, 64 byte line size */
+ { 0xde, LVL_3, MB(8) }, /* 12-way set assoc, 64 byte line size */
+ { 0xe2, LVL_3, MB(2) }, /* 16-way set assoc, 64 byte line size */
+ { 0xe3, LVL_3, MB(4) }, /* 16-way set assoc, 64 byte line size */
+ { 0xe4, LVL_3, MB(8) }, /* 16-way set assoc, 64 byte line size */
+ { 0xea, LVL_3, MB(12) }, /* 24-way set assoc, 64 byte line size */
+ { 0xeb, LVL_3, MB(18) }, /* 24-way set assoc, 64 byte line size */
+ { 0xec, LVL_3, MB(24) }, /* 24-way set assoc, 64 byte line size */
{ 0x00, 0, 0}
};
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 8c1c07073ccc..76f2ccbb7597 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -2304,7 +2304,6 @@ void callchain_store(struct perf_callchain_entry *entry, u64 ip)
static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry);
-static DEFINE_PER_CPU(int, in_ignored_frame);
static void
@@ -2320,10 +2319,6 @@ static void backtrace_warning(void *data, char *msg)
static int backtrace_stack(void *data, char *name)
{
- per_cpu(in_ignored_frame, smp_processor_id()) =
- x86_is_stack_id(NMI_STACK, name) ||
- x86_is_stack_id(DEBUG_STACK, name);
-
return 0;
}
@@ -2331,9 +2326,6 @@ static void backtrace_address(void *data, unsigned long addr, int reliable)
{
struct perf_callchain_entry *entry = data;
- if (per_cpu(in_ignored_frame, smp_processor_id()))
- return;
-
if (reliable)
callchain_store(entry, addr);
}
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 898df9719afb..74f4e85a5727 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -115,17 +115,6 @@ int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
return !test_bit(counter, perfctr_nmi_owner);
}
-
-/* checks the an msr for availability */
-int avail_to_resrv_perfctr_nmi(unsigned int msr)
-{
- unsigned int counter;
-
- counter = nmi_perfctr_msr_to_bit(msr);
- BUG_ON(counter > NMI_MAX_COUNTER_BITS);
-
- return !test_bit(counter, perfctr_nmi_owner);
-}
EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
int reserve_perfctr_nmi(unsigned int msr)
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index ae775ca47b25..11540a189d93 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -18,11 +18,6 @@
#include "dumpstack.h"
-/* Just a stub for now */
-int x86_is_stack_id(int id, char *name)
-{
- return 0;
-}
void dump_trace(struct task_struct *task, struct pt_regs *regs,
unsigned long *stack, unsigned long bp,
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 0ad9597073f5..676bc051252e 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -33,11 +33,6 @@ static char x86_stack_ids[][8] = {
#endif
};
-int x86_is_stack_id(int id, char *name)
-{
- return x86_stack_ids[id - 1] == name;
-}
-
static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
unsigned *usedp, char **idp)
{
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 02c3ee013ccd..55d7f9a2b0bf 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -92,6 +92,13 @@ void exit_thread(void)
}
}
+void show_regs(struct pt_regs *regs)
+{
+ show_registers(regs);
+ show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs),
+ regs->bp);
+}
+
void show_regs_common(void)
{
const char *board, *product;
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 37ad1e046aae..f6c62667e30c 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -174,12 +174,6 @@ void __show_regs(struct pt_regs *regs, int all)
d6, d7);
}
-void show_regs(struct pt_regs *regs)
-{
- show_registers(regs);
- show_trace(NULL, regs, &regs->sp, regs->bp);
-}
-
void release_thread(struct task_struct *dead_task)
{
BUG_ON(dead_task->mm);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index f9e033150cdf..ff5830b62747 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -211,12 +211,6 @@ void __show_regs(struct pt_regs *regs, int all)
printk(KERN_DEFAULT "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
}
-void show_regs(struct pt_regs *regs)
-{
- show_registers(regs);
- show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
-}
-
void release_thread(struct task_struct *dead_task)
{
if (dead_task->mm) {
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 118428085ea2..16a98858c415 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -140,30 +140,6 @@ static const int arg_offs_table[] = {
#endif
};
-/**
- * regs_get_argument_nth() - get Nth argument at function call
- * @regs: pt_regs which contains registers at function entry.
- * @n: argument number.
- *
- * regs_get_argument_nth() returns @n th argument of a function call.
- * Since usually the kernel stack will be changed right after function entry,
- * you must use this at function entry. If the @n th entry is NOT in the
- * kernel stack or pt_regs, this returns 0.
- */
-unsigned long regs_get_argument_nth(struct pt_regs *regs, unsigned int n)
-{
- if (n < ARRAY_SIZE(arg_offs_table))
- return *(unsigned long *)((char *)regs + arg_offs_table[n]);
- else {
- /*
- * The typical case: arg n is on the stack.
- * (Note: stack[0] = return address, so skip it)
- */
- n -= ARRAY_SIZE(arg_offs_table);
- return regs_get_kernel_stack_nth(regs, 1 + n);
- }
-}
-
/*
* does not yet catch signals sent when the child dies.
* in exit.c or in signal.c.
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index 619f7f88b8cc..693920b22496 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -26,7 +26,8 @@ EXPORT_SYMBOL(__put_user_2);
EXPORT_SYMBOL(__put_user_4);
EXPORT_SYMBOL(__put_user_8);
-EXPORT_SYMBOL(copy_user_generic);
+EXPORT_SYMBOL(copy_user_generic_string);
+EXPORT_SYMBOL(copy_user_generic_unrolled);
EXPORT_SYMBOL(__copy_user_nocache);
EXPORT_SYMBOL(_copy_from_user);
EXPORT_SYMBOL(_copy_to_user);
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index cf889d4e076a..71100c98e337 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -90,12 +90,6 @@ ENTRY(_copy_from_user)
CFI_ENDPROC
ENDPROC(_copy_from_user)
-ENTRY(copy_user_generic)
- CFI_STARTPROC
- ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
- CFI_ENDPROC
-ENDPROC(copy_user_generic)
-
.section .fixup,"ax"
/* must zero dest */
ENTRY(bad_from_user)
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index ad5441ed1b57..f82e884928af 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -20,12 +20,11 @@
/*
* memcpy_c() - fast string ops (REP MOVSQ) based variant.
*
- * Calls to this get patched into the kernel image via the
+ * This gets patched over the unrolled variant (below) via the
* alternative instructions framework:
*/
- ALIGN
-memcpy_c:
- CFI_STARTPROC
+ .section .altinstr_replacement, "ax", @progbits
+.Lmemcpy_c:
movq %rdi, %rax
movl %edx, %ecx
@@ -35,8 +34,8 @@ memcpy_c:
movl %edx, %ecx
rep movsb
ret
- CFI_ENDPROC
-ENDPROC(memcpy_c)
+.Lmemcpy_e:
+ .previous
ENTRY(__memcpy)
ENTRY(memcpy)
@@ -128,16 +127,10 @@ ENDPROC(__memcpy)
* It is also a lot simpler. Use this when possible:
*/
- .section .altinstr_replacement, "ax"
-1: .byte 0xeb /* jmp <disp8> */
- .byte (memcpy_c - memcpy) - (2f - 1b) /* offset */
-2:
- .previous
-
.section .altinstructions, "a"
.align 8
.quad memcpy
- .quad 1b
+ .quad .Lmemcpy_c
.byte X86_FEATURE_REP_GOOD
/*
@@ -145,6 +138,6 @@ ENDPROC(__memcpy)
* so it is silly to overwrite itself with nops - reboot is the
* only outcome...
*/
- .byte 2b - 1b
- .byte 2b - 1b
+ .byte .Lmemcpy_e - .Lmemcpy_c
+ .byte .Lmemcpy_e - .Lmemcpy_c
.previous
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S
index 2c5948116bd2..e88d3b81644a 100644
--- a/arch/x86/lib/memset_64.S
+++ b/arch/x86/lib/memset_64.S
@@ -12,9 +12,8 @@
*
* rax original destination
*/
- ALIGN
-memset_c:
- CFI_STARTPROC
+ .section .altinstr_replacement, "ax", @progbits
+.Lmemset_c:
movq %rdi,%r9
movl %edx,%r8d
andl $7,%r8d
@@ -29,8 +28,8 @@ memset_c:
rep stosb
movq %r9,%rax
ret
- CFI_ENDPROC
-ENDPROC(memset_c)
+.Lmemset_e:
+ .previous
ENTRY(memset)
ENTRY(__memset)
@@ -118,16 +117,11 @@ ENDPROC(__memset)
#include <asm/cpufeature.h>
- .section .altinstr_replacement,"ax"
-1: .byte 0xeb /* jmp <disp8> */
- .byte (memset_c - memset) - (2f - 1b) /* offset */
-2:
- .previous
.section .altinstructions,"a"
.align 8
.quad memset
- .quad 1b
+ .quad .Lmemset_c
.byte X86_FEATURE_REP_GOOD
.byte .Lfinal - memset
- .byte 2b - 1b
+ .byte .Lmemset_e - .Lmemset_c
.previous
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index c246d259822d..03c75ffd5c2a 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -422,6 +422,10 @@ void __init early_ioremap_init(void)
* The boot-ioremap range spans multiple pmds, for which
* we are not prepared:
*/
+#define __FIXADDR_TOP (-PAGE_SIZE)
+ BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
+ != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
+#undef __FIXADDR_TOP
if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) {
WARN_ON(1);
printk(KERN_WARNING "pmd %p != %p\n",
diff --git a/drivers/char/nvram.c b/drivers/char/nvram.c
index fdbcc9fd6d31..5eb83c3ca20d 100644
--- a/drivers/char/nvram.c
+++ b/drivers/char/nvram.c
@@ -336,14 +336,12 @@ static int nvram_ioctl(struct inode *inode, struct file *file,
static int nvram_open(struct inode *inode, struct file *file)
{
- lock_kernel();
spin_lock(&nvram_state_lock);
if ((nvram_open_cnt && (file->f_flags & O_EXCL)) ||
(nvram_open_mode & NVRAM_EXCL) ||
((file->f_mode & FMODE_WRITE) && (nvram_open_mode & NVRAM_WRITE))) {
spin_unlock(&nvram_state_lock);
- unlock_kernel();
return -EBUSY;
}
@@ -354,7 +352,6 @@ static int nvram_open(struct inode *inode, struct file *file)
nvram_open_cnt++;
spin_unlock(&nvram_state_lock);
- unlock_kernel();
return 0;
}
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 2233c98d80df..3ca948525f91 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -121,9 +121,8 @@ struct ftrace_event_call {
int (*regfunc)(struct ftrace_event_call *);
void (*unregfunc)(struct ftrace_event_call *);
int id;
+ const char *print_fmt;
int (*raw_init)(struct ftrace_event_call *);
- int (*show_format)(struct ftrace_event_call *,
- struct trace_seq *);
int (*define_fields)(struct ftrace_event_call *);
struct list_head fields;
int filter_active;
@@ -188,7 +187,7 @@ do { \
__trace_printk(ip, fmt, ##args); \
} while (0)
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_PERF_EVENTS
struct perf_event;
extern int ftrace_profile_enable(int event_id);
extern void ftrace_profile_disable(int event_id);
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 451481c082b5..d13492df57a1 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -90,7 +90,7 @@ struct msi_desc;
* @startup: start up the interrupt (defaults to ->enable if NULL)
* @shutdown: shut down the interrupt (defaults to ->disable if NULL)
* @enable: enable the interrupt (defaults to chip->unmask if NULL)
- * @disable: disable the interrupt (defaults to chip->mask if NULL)
+ * @disable: disable the interrupt
* @ack: start of a new interrupt
* @mask: mask an interrupt source
* @mask_ack: ack and mask an interrupt source
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index b632226ddc2f..c5cd65fea0a3 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -124,7 +124,7 @@ extern int _cond_resched(void);
#endif
#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
- void __might_sleep(char *file, int line, int preempt_offset);
+ void __might_sleep(const char *file, int line, int preempt_offset);
/**
* might_sleep - annotation for functions that can sleep
*
@@ -138,7 +138,8 @@ extern int _cond_resched(void);
# define might_sleep() \
do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0)
#else
- static inline void __might_sleep(char *file, int line, int preempt_offset) { }
+ static inline void __might_sleep(const char *file, int line,
+ int preempt_offset) { }
# define might_sleep() do { might_resched(); } while (0)
#endif
diff --git a/include/linux/list.h b/include/linux/list.h
index 969f6e92d089..5d9c6558e8ab 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -206,6 +206,20 @@ static inline int list_empty_careful(const struct list_head *head)
}
/**
+ * list_rotate_left - rotate the list to the left
+ * @head: the head of the list
+ */
+static inline void list_rotate_left(struct list_head *head)
+{
+ struct list_head *first;
+
+ if (!list_empty(head)) {
+ first = head->next;
+ list_move_tail(first, head);
+ }
+}
+
+/**
* list_is_singular - tests whether a list has just one entry.
* @head: the list to test.
*/
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 8fa71874113f..c5ddcdedb794 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -565,6 +565,10 @@ typedef void (*perf_overflow_handler_t)(struct perf_event *, int,
struct perf_sample_data *,
struct pt_regs *regs);
+enum perf_group_flag {
+ PERF_GROUP_SOFTWARE = 0x1,
+};
+
/**
* struct perf_event - performance event kernel representation:
*/
@@ -574,6 +578,7 @@ struct perf_event {
struct list_head event_entry;
struct list_head sibling_list;
int nr_siblings;
+ int group_flags;
struct perf_event *group_leader;
struct perf_event *output;
const struct pmu *pmu;
@@ -658,7 +663,7 @@ struct perf_event {
perf_overflow_handler_t overflow_handler;
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_EVENT_TRACING
struct event_filter *filter;
#endif
@@ -683,7 +688,8 @@ struct perf_event_context {
*/
struct mutex mutex;
- struct list_head group_list;
+ struct list_head pinned_groups;
+ struct list_head flexible_groups;
struct list_head event_list;
int nr_events;
int nr_active;
@@ -746,10 +752,10 @@ extern int perf_max_events;
extern const struct pmu *hw_perf_event_init(struct perf_event *event);
-extern void perf_event_task_sched_in(struct task_struct *task, int cpu);
+extern void perf_event_task_sched_in(struct task_struct *task);
extern void perf_event_task_sched_out(struct task_struct *task,
- struct task_struct *next, int cpu);
-extern void perf_event_task_tick(struct task_struct *task, int cpu);
+ struct task_struct *next);
+extern void perf_event_task_tick(struct task_struct *task);
extern int perf_event_init_task(struct task_struct *child);
extern void perf_event_exit_task(struct task_struct *child);
extern void perf_event_free_task(struct task_struct *task);
@@ -875,12 +881,12 @@ extern void perf_event_enable(struct perf_event *event);
extern void perf_event_disable(struct perf_event *event);
#else
static inline void
-perf_event_task_sched_in(struct task_struct *task, int cpu) { }
+perf_event_task_sched_in(struct task_struct *task) { }
static inline void
perf_event_task_sched_out(struct task_struct *task,
- struct task_struct *next, int cpu) { }
+ struct task_struct *next) { }
static inline void
-perf_event_task_tick(struct task_struct *task, int cpu) { }
+perf_event_task_tick(struct task_struct *task) { }
static inline int perf_event_init_task(struct task_struct *child) { return 0; }
static inline void perf_event_exit_task(struct task_struct *child) { }
static inline void perf_event_free_task(struct task_struct *task) { }
diff --git a/include/linux/plist.h b/include/linux/plist.h
index 8227f717c70f..6898985e7b38 100644
--- a/include/linux/plist.h
+++ b/include/linux/plist.h
@@ -45,7 +45,7 @@
* the insertion of new nodes. There are no nodes with duplicate
* priorites on the list.
*
- * The nodes on the node_list is ordered by priority and can contain
+ * The nodes on the node_list are ordered by priority and can contain
* entries which have the same priority. Those entries are ordered
* FIFO
*
@@ -265,7 +265,7 @@ static inline int plist_node_empty(const struct plist_node *node)
*
* Assumes the plist is _not_ empty.
*/
-static inline struct plist_node* plist_first(const struct plist_head *head)
+static inline struct plist_node *plist_first(const struct plist_head *head)
{
return list_entry(head->node_list.next,
struct plist_node, plist.node_list);
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 96cc307ed9f4..2b70d4e37383 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -62,6 +62,18 @@ static inline long rcu_batches_completed_bh(void)
extern int rcu_expedited_torture_stats(char *page);
+static inline void rcu_force_quiescent_state(void)
+{
+}
+
+static inline void rcu_bh_force_quiescent_state(void)
+{
+}
+
+static inline void rcu_sched_force_quiescent_state(void)
+{
+}
+
#define synchronize_rcu synchronize_sched
static inline void synchronize_rcu_expedited(void)
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 8044b1b94333..704a010f686c 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -99,6 +99,9 @@ extern void rcu_check_callbacks(int cpu, int user);
extern long rcu_batches_completed(void);
extern long rcu_batches_completed_bh(void);
extern long rcu_batches_completed_sched(void);
+extern void rcu_force_quiescent_state(void);
+extern void rcu_bh_force_quiescent_state(void);
+extern void rcu_sched_force_quiescent_state(void);
#ifdef CONFIG_NO_HZ
void rcu_enter_nohz(void);
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index e4b90e16ee1b..52fe33963e82 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -99,7 +99,7 @@ struct perf_event_attr;
#define __SC_TEST5(t5, a5, ...) __SC_TEST(t5); __SC_TEST4(__VA_ARGS__)
#define __SC_TEST6(t6, a6, ...) __SC_TEST(t6); __SC_TEST5(__VA_ARGS__)
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_PERF_EVENTS
#define TRACE_SYS_ENTER_PROFILE_INIT(sname) \
.profile_enable = prof_sysenter_enable, \
@@ -113,7 +113,7 @@ struct perf_event_attr;
#define TRACE_SYS_ENTER_PROFILE_INIT(sname)
#define TRACE_SYS_EXIT_PROFILE(sname)
#define TRACE_SYS_EXIT_PROFILE_INIT(sname)
-#endif
+#endif /* CONFIG_PERF_EVENTS */
#ifdef CONFIG_FTRACE_SYSCALLS
#define __SC_STR_ADECL1(t, a) #a
@@ -143,8 +143,7 @@ struct perf_event_attr;
.name = "sys_enter"#sname, \
.system = "syscalls", \
.event = &enter_syscall_print_##sname, \
- .raw_init = trace_event_raw_init, \
- .show_format = syscall_enter_format, \
+ .raw_init = init_syscall_trace, \
.define_fields = syscall_enter_define_fields, \
.regfunc = reg_event_syscall_enter, \
.unregfunc = unreg_event_syscall_enter, \
@@ -165,8 +164,7 @@ struct perf_event_attr;
.name = "sys_exit"#sname, \
.system = "syscalls", \
.event = &exit_syscall_print_##sname, \
- .raw_init = trace_event_raw_init, \
- .show_format = syscall_exit_format, \
+ .raw_init = init_syscall_trace, \
.define_fields = syscall_exit_define_fields, \
.regfunc = reg_event_syscall_exit, \
.unregfunc = unreg_event_syscall_exit, \
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index c6fe03e902ca..5b46cf9dd843 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -131,130 +131,6 @@
#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
/*
- * Setup the showing format of trace point.
- *
- * int
- * ftrace_format_##call(struct trace_seq *s)
- * {
- * struct ftrace_raw_##call field;
- * int ret;
- *
- * ret = trace_seq_printf(s, #type " " #item ";"
- * " offset:%u; size:%u;\n",
- * offsetof(struct ftrace_raw_##call, item),
- * sizeof(field.type));
- *
- * }
- */
-
-#undef TP_STRUCT__entry
-#define TP_STRUCT__entry(args...) args
-
-#undef __field
-#define __field(type, item) \
- ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
- "offset:%u;\tsize:%u;\tsigned:%u;\n", \
- (unsigned int)offsetof(typeof(field), item), \
- (unsigned int)sizeof(field.item), \
- (unsigned int)is_signed_type(type)); \
- if (!ret) \
- return 0;
-
-#undef __field_ext
-#define __field_ext(type, item, filter_type) __field(type, item)
-
-#undef __array
-#define __array(type, item, len) \
- ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
- "offset:%u;\tsize:%u;\tsigned:%u;\n", \
- (unsigned int)offsetof(typeof(field), item), \
- (unsigned int)sizeof(field.item), \
- (unsigned int)is_signed_type(type)); \
- if (!ret) \
- return 0;
-
-#undef __dynamic_array
-#define __dynamic_array(type, item, len) \
- ret = trace_seq_printf(s, "\tfield:__data_loc " #type "[] " #item ";\t"\
- "offset:%u;\tsize:%u;\tsigned:%u;\n", \
- (unsigned int)offsetof(typeof(field), \
- __data_loc_##item), \
- (unsigned int)sizeof(field.__data_loc_##item), \
- (unsigned int)is_signed_type(type)); \
- if (!ret) \
- return 0;
-
-#undef __string
-#define __string(item, src) __dynamic_array(char, item, -1)
-
-#undef __entry
-#define __entry REC
-
-#undef __print_symbolic
-#undef __get_dynamic_array
-#undef __get_str
-
-#undef TP_printk
-#define TP_printk(fmt, args...) "\"%s\", %s\n", fmt, __stringify(args)
-
-#undef TP_fast_assign
-#define TP_fast_assign(args...) args
-
-#undef TP_perf_assign
-#define TP_perf_assign(args...)
-
-#undef DECLARE_EVENT_CLASS
-#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, func, print) \
-static int \
-ftrace_format_setup_##call(struct ftrace_event_call *unused, \
- struct trace_seq *s) \
-{ \
- struct ftrace_raw_##call field __attribute__((unused)); \
- int ret = 0; \
- \
- tstruct; \
- \
- return ret; \
-} \
- \
-static int \
-ftrace_format_##call(struct ftrace_event_call *unused, \
- struct trace_seq *s) \
-{ \
- int ret = 0; \
- \
- ret = ftrace_format_setup_##call(unused, s); \
- if (!ret) \
- return ret; \
- \
- ret = trace_seq_printf(s, "\nprint fmt: " print); \
- \
- return ret; \
-}
-
-#undef DEFINE_EVENT
-#define DEFINE_EVENT(template, name, proto, args)
-
-#undef DEFINE_EVENT_PRINT
-#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
-static int \
-ftrace_format_##name(struct ftrace_event_call *unused, \
- struct trace_seq *s) \
-{ \
- int ret = 0; \
- \
- ret = ftrace_format_setup_##template(unused, s); \
- if (!ret) \
- return ret; \
- \
- trace_seq_printf(s, "\nprint fmt: " print); \
- \
- return ret; \
-}
-
-#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
-
-/*
* Stage 3 of the trace events.
*
* Override the macros in <trace/trace_events.h> to include the following:
@@ -499,7 +375,7 @@ static inline int ftrace_get_offsets_##call( \
#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_PERF_EVENTS
/*
* Generate the functions needed for tracepoint perf_event support.
@@ -542,7 +418,7 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\
#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
-#endif
+#endif /* CONFIG_PERF_EVENTS */
/*
* Stage 4 of the trace events.
@@ -622,12 +498,11 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\
* .raw_init = trace_event_raw_init,
* .regfunc = ftrace_reg_event_<call>,
* .unregfunc = ftrace_unreg_event_<call>,
- * .show_format = ftrace_format_<call>,
* }
*
*/
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_PERF_EVENTS
#define _TRACE_PROFILE_INIT(call) \
.profile_enable = ftrace_profile_enable_##call, \
@@ -635,7 +510,7 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\
#else
#define _TRACE_PROFILE_INIT(call)
-#endif
+#endif /* CONFIG_PERF_EVENTS */
#undef __entry
#define __entry entry
@@ -657,6 +532,12 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\
#define __assign_str(dst, src) \
strcpy(__get_str(dst), src);
+#undef TP_fast_assign
+#define TP_fast_assign(args...) args
+
+#undef TP_perf_assign
+#define TP_perf_assign(args...)
+
#undef DECLARE_EVENT_CLASS
#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
\
@@ -722,8 +603,20 @@ static struct trace_event ftrace_event_type_##call = { \
#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+#undef __entry
+#define __entry REC
+
+#undef __print_flags
+#undef __print_symbolic
+#undef __get_dynamic_array
+#undef __get_str
+
+#undef TP_printk
+#define TP_printk(fmt, args...) "\"" fmt "\", " __stringify(args)
+
#undef DECLARE_EVENT_CLASS
-#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)
+#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
+static const char print_fmt_##call[] = print;
#undef DEFINE_EVENT
#define DEFINE_EVENT(template, call, proto, args) \
@@ -737,7 +630,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
.raw_init = trace_event_raw_init, \
.regfunc = ftrace_raw_reg_event_##call, \
.unregfunc = ftrace_raw_unreg_event_##call, \
- .show_format = ftrace_format_##template, \
+ .print_fmt = print_fmt_##template, \
.define_fields = ftrace_define_fields_##template, \
_TRACE_PROFILE_INIT(call) \
}
@@ -745,6 +638,8 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
#undef DEFINE_EVENT_PRINT
#define DEFINE_EVENT_PRINT(template, call, proto, args, print) \
\
+static const char print_fmt_##call[] = print; \
+ \
static struct ftrace_event_call __used \
__attribute__((__aligned__(4))) \
__attribute__((section("_ftrace_events"))) event_##call = { \
@@ -754,7 +649,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
.raw_init = trace_event_raw_init, \
.regfunc = ftrace_raw_reg_event_##call, \
.unregfunc = ftrace_raw_unreg_event_##call, \
- .show_format = ftrace_format_##call, \
+ .print_fmt = print_fmt_##call, \
.define_fields = ftrace_define_fields_##template, \
_TRACE_PROFILE_INIT(call) \
}
@@ -835,7 +730,17 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
* }
*/
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_PERF_EVENTS
+
+#undef __entry
+#define __entry entry
+
+#undef __get_dynamic_array
+#define __get_dynamic_array(field) \
+ ((void *)__entry + (__entry->__data_loc_##field & 0xffff))
+
+#undef __get_str
+#define __get_str(field) (char *)__get_dynamic_array(field)
#undef __perf_addr
#define __perf_addr(a) __addr = (a)
@@ -927,7 +832,7 @@ static void ftrace_profile_##call(proto) \
DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
-#endif /* CONFIG_EVENT_PROFILE */
+#endif /* CONFIG_PERF_EVENTS */
#undef _TRACE_PROFILE_INIT
diff --git a/include/trace/syscall.h b/include/trace/syscall.h
index 961fda3556bb..0387100752f0 100644
--- a/include/trace/syscall.h
+++ b/include/trace/syscall.h
@@ -34,10 +34,6 @@ struct syscall_metadata {
extern unsigned long arch_syscall_addr(int nr);
extern int init_syscall_trace(struct ftrace_event_call *call);
-extern int syscall_enter_format(struct ftrace_event_call *call,
- struct trace_seq *s);
-extern int syscall_exit_format(struct ftrace_event_call *call,
- struct trace_seq *s);
extern int syscall_enter_define_fields(struct ftrace_event_call *call);
extern int syscall_exit_define_fields(struct ftrace_event_call *call);
extern int reg_event_syscall_enter(struct ftrace_event_call *call);
@@ -49,12 +45,12 @@ ftrace_format_syscall(struct ftrace_event_call *call, struct trace_seq *s);
enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags);
enum print_line_t print_syscall_exit(struct trace_iterator *iter, int flags);
#endif
-#ifdef CONFIG_EVENT_PROFILE
+
+#ifdef CONFIG_PERF_EVENTS
int prof_sysenter_enable(struct ftrace_event_call *call);
void prof_sysenter_disable(struct ftrace_event_call *call);
int prof_sysexit_enable(struct ftrace_event_call *call);
void prof_sysexit_disable(struct ftrace_event_call *call);
-
#endif
#endif /* _TRACE_SYSCALL_H */
diff --git a/init/Kconfig b/init/Kconfig
index 79d375d92728..8db90673e5ce 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -988,19 +988,6 @@ config PERF_EVENTS
Say Y if unsure.
-config EVENT_PROFILE
- bool "Tracepoint profiling sources"
- depends on PERF_EVENTS && EVENT_TRACING
- default y
- help
- Allow the use of tracepoints as software performance events.
-
- When this is enabled, you can create perf events based on
- tracepoints using PERF_TYPE_TRACEPOINT and the tracepoint ID
- found in debugfs://tracing/events/*/*/id. (The -e/--events
- option to the perf tool can parse and interpret symbolic
- tracepoints, in the subsystem:tracepoint_name format.)
-
config PERF_COUNTERS
bool "Kernel performance counters (old config option)"
depends on HAVE_PERF_EVENTS
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
index 235716556bf1..d49afb2395e5 100644
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -146,7 +146,7 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr,
struct task_struct *p;
ret = -ESRCH;
- read_lock(&tasklist_lock);
+ rcu_read_lock();
p = find_task_by_vpid(pid);
if (!p)
goto err_unlock;
@@ -157,7 +157,7 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr,
!capable(CAP_SYS_PTRACE))
goto err_unlock;
head = p->compat_robust_list;
- read_unlock(&tasklist_lock);
+ rcu_read_unlock();
}
if (put_user(sizeof(*head), len_ptr))
@@ -165,7 +165,7 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr,
return put_user(ptr_to_compat(head), head_ptr);
err_unlock:
- read_unlock(&tasklist_lock);
+ rcu_read_unlock();
return ret;
}
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index d27746bd3a06..74782e530d5f 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -289,6 +289,15 @@ static void update_event_times(struct perf_event *event)
event->total_time_running = run_end - event->tstamp_running;
}
+static struct list_head *
+ctx_group_list(struct perf_event *event, struct perf_event_context *ctx)
+{
+ if (event->attr.pinned)
+ return &ctx->pinned_groups;
+ else
+ return &ctx->flexible_groups;
+}
+
/*
* Add a event from the lists for its context.
* Must be called with ctx->mutex and ctx->lock held.
@@ -303,9 +312,19 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
* add it straight to the context's event list, or to the group
* leader's sibling list:
*/
- if (group_leader == event)
- list_add_tail(&event->group_entry, &ctx->group_list);
- else {
+ if (group_leader == event) {
+ struct list_head *list;
+
+ if (is_software_event(event))
+ event->group_flags |= PERF_GROUP_SOFTWARE;
+
+ list = ctx_group_list(event, ctx);
+ list_add_tail(&event->group_entry, list);
+ } else {
+ if (group_leader->group_flags & PERF_GROUP_SOFTWARE &&
+ !is_software_event(event))
+ group_leader->group_flags &= ~PERF_GROUP_SOFTWARE;
+
list_add_tail(&event->group_entry, &group_leader->sibling_list);
group_leader->nr_siblings++;
}
@@ -355,9 +374,14 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
* to the context list directly:
*/
list_for_each_entry_safe(sibling, tmp, &event->sibling_list, group_entry) {
+ struct list_head *list;
- list_move_tail(&sibling->group_entry, &ctx->group_list);
+ list = ctx_group_list(event, ctx);
+ list_move_tail(&sibling->group_entry, list);
sibling->group_leader = sibling;
+
+ /* Inherit group flags from the previous leader */
+ sibling->group_flags = event->group_flags;
}
}
@@ -686,24 +710,6 @@ group_error:
}
/*
- * Return 1 for a group consisting entirely of software events,
- * 0 if the group contains any hardware events.
- */
-static int is_software_only_group(struct perf_event *leader)
-{
- struct perf_event *event;
-
- if (!is_software_event(leader))
- return 0;
-
- list_for_each_entry(event, &leader->sibling_list, group_entry)
- if (!is_software_event(event))
- return 0;
-
- return 1;
-}
-
-/*
* Work out whether we can put this event group on the CPU now.
*/
static int group_can_go_on(struct perf_event *event,
@@ -713,7 +719,7 @@ static int group_can_go_on(struct perf_event *event,
/*
* Groups consisting entirely of software events can always go on.
*/
- if (is_software_only_group(event))
+ if (event->group_flags & PERF_GROUP_SOFTWARE)
return 1;
/*
* If an exclusive group is already on, no other hardware
@@ -1056,7 +1062,10 @@ void __perf_event_sched_out(struct perf_event_context *ctx,
perf_disable();
if (ctx->nr_active) {
- list_for_each_entry(event, &ctx->group_list, group_entry)
+ list_for_each_entry(event, &ctx->pinned_groups, group_entry)
+ group_sched_out(event, cpuctx, ctx);
+
+ list_for_each_entry(event, &ctx->flexible_groups, group_entry)
group_sched_out(event, cpuctx, ctx);
}
perf_enable();
@@ -1170,9 +1179,9 @@ static void perf_event_sync_stat(struct perf_event_context *ctx,
* not restart the event.
*/
void perf_event_task_sched_out(struct task_struct *task,
- struct task_struct *next, int cpu)
+ struct task_struct *next)
{
- struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
+ struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
struct perf_event_context *ctx = task->perf_event_ctxp;
struct perf_event_context *next_ctx;
struct perf_event_context *parent;
@@ -1252,8 +1261,9 @@ static void perf_event_cpu_sched_out(struct perf_cpu_context *cpuctx)
static void
__perf_event_sched_in(struct perf_event_context *ctx,
- struct perf_cpu_context *cpuctx, int cpu)
+ struct perf_cpu_context *cpuctx)
{
+ int cpu = smp_processor_id();
struct perf_event *event;
int can_add_hw = 1;
@@ -1270,9 +1280,8 @@ __perf_event_sched_in(struct perf_event_context *ctx,
* First go through the list and put on any pinned groups
* in order to give them the best chance of going on.
*/
- list_for_each_entry(event, &ctx->group_list, group_entry) {
- if (event->state <= PERF_EVENT_STATE_OFF ||
- !event->attr.pinned)
+ list_for_each_entry(event, &ctx->pinned_groups, group_entry) {
+ if (event->state <= PERF_EVENT_STATE_OFF)
continue;
if (event->cpu != -1 && event->cpu != cpu)
continue;
@@ -1290,15 +1299,10 @@ __perf_event_sched_in(struct perf_event_context *ctx,
}
}
- list_for_each_entry(event, &ctx->group_list, group_entry) {
- /*
- * Ignore events in OFF or ERROR state, and
- * ignore pinned events since we did them already.
- */
- if (event->state <= PERF_EVENT_STATE_OFF ||
- event->attr.pinned)
+ list_for_each_entry(event, &ctx->flexible_groups, group_entry) {
+ /* Ignore events in OFF or ERROR state */
+ if (event->state <= PERF_EVENT_STATE_OFF)
continue;
-
/*
* Listen to the 'cpu' scheduling filter constraint
* of events:
@@ -1326,24 +1330,24 @@ __perf_event_sched_in(struct perf_event_context *ctx,
* accessing the event control register. If a NMI hits, then it will
* keep the event running.
*/
-void perf_event_task_sched_in(struct task_struct *task, int cpu)
+void perf_event_task_sched_in(struct task_struct *task)
{
- struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
+ struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
struct perf_event_context *ctx = task->perf_event_ctxp;
if (likely(!ctx))
return;
if (cpuctx->task_ctx == ctx)
return;
- __perf_event_sched_in(ctx, cpuctx, cpu);
+ __perf_event_sched_in(ctx, cpuctx);
cpuctx->task_ctx = ctx;
}
-static void perf_event_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu)
+static void perf_event_cpu_sched_in(struct perf_cpu_context *cpuctx)
{
struct perf_event_context *ctx = &cpuctx->ctx;
- __perf_event_sched_in(ctx, cpuctx, cpu);
+ __perf_event_sched_in(ctx, cpuctx);
}
#define MAX_INTERRUPTS (~0ULL)
@@ -1442,26 +1446,22 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
*/
static void rotate_ctx(struct perf_event_context *ctx)
{
- struct perf_event *event;
-
if (!ctx->nr_events)
return;
raw_spin_lock(&ctx->lock);
- /*
- * Rotate the first entry last (works just fine for group events too):
- */
+
+ /* Rotate the first entry last of non-pinned groups */
perf_disable();
- list_for_each_entry(event, &ctx->group_list, group_entry) {
- list_move_tail(&event->group_entry, &ctx->group_list);
- break;
- }
+
+ list_rotate_left(&ctx->flexible_groups);
+
perf_enable();
raw_spin_unlock(&ctx->lock);
}
-void perf_event_task_tick(struct task_struct *curr, int cpu)
+void perf_event_task_tick(struct task_struct *curr)
{
struct perf_cpu_context *cpuctx;
struct perf_event_context *ctx;
@@ -1469,7 +1469,7 @@ void perf_event_task_tick(struct task_struct *curr, int cpu)
if (!atomic_read(&nr_events))
return;
- cpuctx = &per_cpu(perf_cpu_context, cpu);
+ cpuctx = &__get_cpu_var(perf_cpu_context);
ctx = curr->perf_event_ctxp;
perf_ctx_adjust_freq(&cpuctx->ctx);
@@ -1484,9 +1484,24 @@ void perf_event_task_tick(struct task_struct *curr, int cpu)
if (ctx)
rotate_ctx(ctx);
- perf_event_cpu_sched_in(cpuctx, cpu);
+ perf_event_cpu_sched_in(cpuctx);
if (ctx)
- perf_event_task_sched_in(curr, cpu);
+ perf_event_task_sched_in(curr);
+}
+
+static int event_enable_on_exec(struct perf_event *event,
+ struct perf_event_context *ctx)
+{
+ if (!event->attr.enable_on_exec)
+ return 0;
+
+ event->attr.enable_on_exec = 0;
+ if (event->state >= PERF_EVENT_STATE_INACTIVE)
+ return 0;
+
+ __perf_event_mark_enabled(event, ctx);
+
+ return 1;
}
/*
@@ -1499,6 +1514,7 @@ static void perf_event_enable_on_exec(struct task_struct *task)
struct perf_event *event;
unsigned long flags;
int enabled = 0;
+ int ret;
local_irq_save(flags);
ctx = task->perf_event_ctxp;
@@ -1509,14 +1525,16 @@ static void perf_event_enable_on_exec(struct task_struct *task)
raw_spin_lock(&ctx->lock);
- list_for_each_entry(event, &ctx->group_list, group_entry) {
- if (!event->attr.enable_on_exec)
- continue;
- event->attr.enable_on_exec = 0;
- if (event->state >= PERF_EVENT_STATE_INACTIVE)
- continue;
- __perf_event_mark_enabled(event, ctx);
- enabled = 1;
+ list_for_each_entry(event, &ctx->pinned_groups, group_entry) {
+ ret = event_enable_on_exec(event, ctx);
+ if (ret)
+ enabled = 1;
+ }
+
+ list_for_each_entry(event, &ctx->flexible_groups, group_entry) {
+ ret = event_enable_on_exec(event, ctx);
+ if (ret)
+ enabled = 1;
}
/*
@@ -1527,7 +1545,7 @@ static void perf_event_enable_on_exec(struct task_struct *task)
raw_spin_unlock(&ctx->lock);
- perf_event_task_sched_in(task, smp_processor_id());
+ perf_event_task_sched_in(task);
out:
local_irq_restore(flags);
}
@@ -1590,7 +1608,8 @@ __perf_event_init_context(struct perf_event_context *ctx,
{
raw_spin_lock_init(&ctx->lock);
mutex_init(&ctx->mutex);
- INIT_LIST_HEAD(&ctx->group_list);
+ INIT_LIST_HEAD(&ctx->pinned_groups);
+ INIT_LIST_HEAD(&ctx->flexible_groups);
INIT_LIST_HEAD(&ctx->event_list);
atomic_set(&ctx->refcount, 1);
ctx->task = task;
@@ -4185,7 +4204,7 @@ static const struct pmu perf_ops_task_clock = {
.read = task_clock_perf_event_read,
};
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_EVENT_TRACING
void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
int entry_size)
@@ -4290,7 +4309,7 @@ static void perf_event_free_filter(struct perf_event *event)
{
}
-#endif /* CONFIG_EVENT_PROFILE */
+#endif /* CONFIG_EVENT_TRACING */
#ifdef CONFIG_HAVE_HW_BREAKPOINT
static void bp_perf_event_destroy(struct perf_event *event)
@@ -5040,7 +5059,11 @@ void perf_event_exit_task(struct task_struct *child)
mutex_lock_nested(&child_ctx->mutex, SINGLE_DEPTH_NESTING);
again:
- list_for_each_entry_safe(child_event, tmp, &child_ctx->group_list,
+ list_for_each_entry_safe(child_event, tmp, &child_ctx->pinned_groups,
+ group_entry)
+ __perf_event_exit_task(child_event, child_ctx, child);
+
+ list_for_each_entry_safe(child_event, tmp, &child_ctx->flexible_groups,
group_entry)
__perf_event_exit_task(child_event, child_ctx, child);
@@ -5049,7 +5072,8 @@ again:
* its siblings to the list, but we obtained 'tmp' before that which
* will still point to the list head terminating the iteration.
*/
- if (!list_empty(&child_ctx->group_list))
+ if (!list_empty(&child_ctx->pinned_groups) ||
+ !list_empty(&child_ctx->flexible_groups))
goto again;
mutex_unlock(&child_ctx->mutex);
@@ -5057,6 +5081,24 @@ again:
put_ctx(child_ctx);
}
+static void perf_free_event(struct perf_event *event,
+ struct perf_event_context *ctx)
+{
+ struct perf_event *parent = event->parent;
+
+ if (WARN_ON_ONCE(!parent))
+ return;
+
+ mutex_lock(&parent->child_mutex);
+ list_del_init(&event->child_list);
+ mutex_unlock(&parent->child_mutex);
+
+ fput(parent->filp);
+
+ list_del_event(event, ctx);
+ free_event(event);
+}
+
/*
* free an unexposed, unused context as created by inheritance by
* init_task below, used by fork() in case of fail.
@@ -5071,36 +5113,70 @@ void perf_event_free_task(struct task_struct *task)
mutex_lock(&ctx->mutex);
again:
- list_for_each_entry_safe(event, tmp, &ctx->group_list, group_entry) {
- struct perf_event *parent = event->parent;
+ list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry)
+ perf_free_event(event, ctx);
- if (WARN_ON_ONCE(!parent))
- continue;
+ list_for_each_entry_safe(event, tmp, &ctx->flexible_groups,
+ group_entry)
+ perf_free_event(event, ctx);
- mutex_lock(&parent->child_mutex);
- list_del_init(&event->child_list);
- mutex_unlock(&parent->child_mutex);
+ if (!list_empty(&ctx->pinned_groups) ||
+ !list_empty(&ctx->flexible_groups))
+ goto again;
- fput(parent->filp);
+ mutex_unlock(&ctx->mutex);
- list_del_event(event, ctx);
- free_event(event);
+ put_ctx(ctx);
+}
+
+static int
+inherit_task_group(struct perf_event *event, struct task_struct *parent,
+ struct perf_event_context *parent_ctx,
+ struct task_struct *child,
+ int *inherited_all)
+{
+ int ret;
+ struct perf_event_context *child_ctx = child->perf_event_ctxp;
+
+ if (!event->attr.inherit) {
+ *inherited_all = 0;
+ return 0;
}
- if (!list_empty(&ctx->group_list))
- goto again;
+ if (!child_ctx) {
+ /*
+ * This is executed from the parent task context, so
+ * inherit events that have been marked for cloning.
+ * First allocate and initialize a context for the
+ * child.
+ */
- mutex_unlock(&ctx->mutex);
+ child_ctx = kzalloc(sizeof(struct perf_event_context),
+ GFP_KERNEL);
+ if (!child_ctx)
+ return -ENOMEM;
- put_ctx(ctx);
+ __perf_event_init_context(child_ctx, child);
+ child->perf_event_ctxp = child_ctx;
+ get_task_struct(child);
+ }
+
+ ret = inherit_group(event, parent, parent_ctx,
+ child, child_ctx);
+
+ if (ret)
+ *inherited_all = 0;
+
+ return ret;
}
+
/*
* Initialize the perf_event context in task_struct
*/
int perf_event_init_task(struct task_struct *child)
{
- struct perf_event_context *child_ctx = NULL, *parent_ctx;
+ struct perf_event_context *child_ctx, *parent_ctx;
struct perf_event_context *cloned_ctx;
struct perf_event *event;
struct task_struct *parent = current;
@@ -5138,41 +5214,22 @@ int perf_event_init_task(struct task_struct *child)
* We dont have to disable NMIs - we are only looking at
* the list, not manipulating it:
*/
- list_for_each_entry(event, &parent_ctx->group_list, group_entry) {
-
- if (!event->attr.inherit) {
- inherited_all = 0;
- continue;
- }
-
- if (!child->perf_event_ctxp) {
- /*
- * This is executed from the parent task context, so
- * inherit events that have been marked for cloning.
- * First allocate and initialize a context for the
- * child.
- */
-
- child_ctx = kzalloc(sizeof(struct perf_event_context),
- GFP_KERNEL);
- if (!child_ctx) {
- ret = -ENOMEM;
- break;
- }
-
- __perf_event_init_context(child_ctx, child);
- child->perf_event_ctxp = child_ctx;
- get_task_struct(child);
- }
+ list_for_each_entry(event, &parent_ctx->pinned_groups, group_entry) {
+ ret = inherit_task_group(event, parent, parent_ctx, child,
+ &inherited_all);
+ if (ret)
+ break;
+ }
- ret = inherit_group(event, parent, parent_ctx,
- child, child_ctx);
- if (ret) {
- inherited_all = 0;
+ list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) {
+ ret = inherit_task_group(event, parent, parent_ctx, child,
+ &inherited_all);
+ if (ret)
break;
- }
}
+ child_ctx = child->perf_event_ctxp;
+
if (child_ctx && inherited_all) {
/*
* Mark the child context as a clone of the parent
@@ -5221,7 +5278,9 @@ static void __perf_event_exit_cpu(void *info)
struct perf_event_context *ctx = &cpuctx->ctx;
struct perf_event *event, *tmp;
- list_for_each_entry_safe(event, tmp, &ctx->group_list, group_entry)
+ list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry)
+ __perf_event_remove_from_context(event);
+ list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry)
__perf_event_remove_from_context(event);
}
static void perf_event_exit_cpu(int cpu)
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 9bb52177af02..adda92bfafac 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -61,6 +61,9 @@ static int test_no_idle_hz; /* Test RCU's support for tickless idle CPUs. */
static int shuffle_interval = 3; /* Interval between shuffles (in sec)*/
static int stutter = 5; /* Start/stop testing interval (in sec) */
static int irqreader = 1; /* RCU readers from irq (timers). */
+static int fqs_duration = 0; /* Duration of bursts (us), 0 to disable. */
+static int fqs_holdoff = 0; /* Hold time within burst (us). */
+static int fqs_stutter = 3; /* Wait time between bursts (s). */
static char *torture_type = "rcu"; /* What RCU implementation to torture. */
module_param(nreaders, int, 0444);
@@ -79,6 +82,12 @@ module_param(stutter, int, 0444);
MODULE_PARM_DESC(stutter, "Number of seconds to run/halt test");
module_param(irqreader, int, 0444);
MODULE_PARM_DESC(irqreader, "Allow RCU readers from irq handlers");
+module_param(fqs_duration, int, 0444);
+MODULE_PARM_DESC(fqs_duration, "Duration of fqs bursts (us)");
+module_param(fqs_holdoff, int, 0444);
+MODULE_PARM_DESC(fqs_holdoff, "Holdoff time within fqs bursts (us)");
+module_param(fqs_stutter, int, 0444);
+MODULE_PARM_DESC(fqs_stutter, "Wait time between fqs bursts (s)");
module_param(torture_type, charp, 0444);
MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, srcu)");
@@ -99,6 +108,7 @@ static struct task_struct **reader_tasks;
static struct task_struct *stats_task;
static struct task_struct *shuffler_task;
static struct task_struct *stutter_task;
+static struct task_struct *fqs_task;
#define RCU_TORTURE_PIPE_LEN 10
@@ -263,6 +273,7 @@ struct rcu_torture_ops {
void (*deferred_free)(struct rcu_torture *p);
void (*sync)(void);
void (*cb_barrier)(void);
+ void (*fqs)(void);
int (*stats)(char *page);
int irq_capable;
char *name;
@@ -347,6 +358,7 @@ static struct rcu_torture_ops rcu_ops = {
.deferred_free = rcu_torture_deferred_free,
.sync = synchronize_rcu,
.cb_barrier = rcu_barrier,
+ .fqs = rcu_force_quiescent_state,
.stats = NULL,
.irq_capable = 1,
.name = "rcu"
@@ -388,6 +400,7 @@ static struct rcu_torture_ops rcu_sync_ops = {
.deferred_free = rcu_sync_torture_deferred_free,
.sync = synchronize_rcu,
.cb_barrier = NULL,
+ .fqs = rcu_force_quiescent_state,
.stats = NULL,
.irq_capable = 1,
.name = "rcu_sync"
@@ -403,6 +416,7 @@ static struct rcu_torture_ops rcu_expedited_ops = {
.deferred_free = rcu_sync_torture_deferred_free,
.sync = synchronize_rcu_expedited,
.cb_barrier = NULL,
+ .fqs = rcu_force_quiescent_state,
.stats = NULL,
.irq_capable = 1,
.name = "rcu_expedited"
@@ -465,6 +479,7 @@ static struct rcu_torture_ops rcu_bh_ops = {
.deferred_free = rcu_bh_torture_deferred_free,
.sync = rcu_bh_torture_synchronize,
.cb_barrier = rcu_barrier_bh,
+ .fqs = rcu_bh_force_quiescent_state,
.stats = NULL,
.irq_capable = 1,
.name = "rcu_bh"
@@ -480,6 +495,7 @@ static struct rcu_torture_ops rcu_bh_sync_ops = {
.deferred_free = rcu_sync_torture_deferred_free,
.sync = rcu_bh_torture_synchronize,
.cb_barrier = NULL,
+ .fqs = rcu_bh_force_quiescent_state,
.stats = NULL,
.irq_capable = 1,
.name = "rcu_bh_sync"
@@ -621,6 +637,7 @@ static struct rcu_torture_ops sched_ops = {
.deferred_free = rcu_sched_torture_deferred_free,
.sync = sched_torture_synchronize,
.cb_barrier = rcu_barrier_sched,
+ .fqs = rcu_sched_force_quiescent_state,
.stats = NULL,
.irq_capable = 1,
.name = "sched"
@@ -636,6 +653,7 @@ static struct rcu_torture_ops sched_sync_ops = {
.deferred_free = rcu_sync_torture_deferred_free,
.sync = sched_torture_synchronize,
.cb_barrier = NULL,
+ .fqs = rcu_sched_force_quiescent_state,
.stats = NULL,
.name = "sched_sync"
};
@@ -650,12 +668,45 @@ static struct rcu_torture_ops sched_expedited_ops = {
.deferred_free = rcu_sync_torture_deferred_free,
.sync = synchronize_sched_expedited,
.cb_barrier = NULL,
+ .fqs = rcu_sched_force_quiescent_state,
.stats = rcu_expedited_torture_stats,
.irq_capable = 1,
.name = "sched_expedited"
};
/*
+ * RCU torture force-quiescent-state kthread. Repeatedly induces
+ * bursts of calls to force_quiescent_state(), increasing the probability
+ * of occurrence of some important types of race conditions.
+ */
+static int
+rcu_torture_fqs(void *arg)
+{
+ unsigned long fqs_resume_time;
+ int fqs_burst_remaining;
+
+ VERBOSE_PRINTK_STRING("rcu_torture_fqs task started");
+ do {
+ fqs_resume_time = jiffies + fqs_stutter * HZ;
+ while (jiffies - fqs_resume_time > LONG_MAX) {
+ schedule_timeout_interruptible(1);
+ }
+ fqs_burst_remaining = fqs_duration;
+ while (fqs_burst_remaining > 0) {
+ cur_ops->fqs();
+ udelay(fqs_holdoff);
+ fqs_burst_remaining -= fqs_holdoff;
+ }
+ rcu_stutter_wait("rcu_torture_fqs");
+ } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
+ VERBOSE_PRINTK_STRING("rcu_torture_fqs task stopping");
+ rcutorture_shutdown_absorb("rcu_torture_fqs");
+ while (!kthread_should_stop())
+ schedule_timeout_uninterruptible(1);
+ return 0;
+}
+
+/*
* RCU torture writer kthread. Repeatedly substitutes a new structure
* for that pointed to by rcu_torture_current, freeing the old structure
* after a series of grace periods (the "pipeline").
@@ -1030,10 +1081,11 @@ rcu_torture_print_module_parms(char *tag)
printk(KERN_ALERT "%s" TORTURE_FLAG
"--- %s: nreaders=%d nfakewriters=%d "
"stat_interval=%d verbose=%d test_no_idle_hz=%d "
- "shuffle_interval=%d stutter=%d irqreader=%d\n",
+ "shuffle_interval=%d stutter=%d irqreader=%d "
+ "fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d\n",
torture_type, tag, nrealreaders, nfakewriters,
stat_interval, verbose, test_no_idle_hz, shuffle_interval,
- stutter, irqreader);
+ stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter);
}
static struct notifier_block rcutorture_nb = {
@@ -1109,6 +1161,12 @@ rcu_torture_cleanup(void)
}
stats_task = NULL;
+ if (fqs_task) {
+ VERBOSE_PRINTK_STRING("Stopping rcu_torture_fqs task");
+ kthread_stop(fqs_task);
+ }
+ fqs_task = NULL;
+
/* Wait for all RCU callbacks to fire. */
if (cur_ops->cb_barrier != NULL)
@@ -1154,6 +1212,11 @@ rcu_torture_init(void)
mutex_unlock(&fullstop_mutex);
return -EINVAL;
}
+ if (cur_ops->fqs == NULL && fqs_duration != 0) {
+ printk(KERN_ALERT "rcu-torture: ->fqs NULL and non-zero "
+ "fqs_duration, fqs disabled.\n");
+ fqs_duration = 0;
+ }
if (cur_ops->init)
cur_ops->init(); /* no "goto unwind" prior to this point!!! */
@@ -1282,6 +1345,19 @@ rcu_torture_init(void)
goto unwind;
}
}
+ if (fqs_duration < 0)
+ fqs_duration = 0;
+ if (fqs_duration) {
+ /* Create the stutter thread */
+ fqs_task = kthread_run(rcu_torture_fqs, NULL,
+ "rcu_torture_fqs");
+ if (IS_ERR(fqs_task)) {
+ firsterr = PTR_ERR(fqs_task);
+ VERBOSE_PRINTK_ERRSTRING("Failed to create fqs");
+ fqs_task = NULL;
+ goto unwind;
+ }
+ }
register_reboot_notifier(&rcutorture_nb);
mutex_unlock(&fullstop_mutex);
return 0;
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 53ae9598f798..099a255ede4c 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -157,6 +157,24 @@ long rcu_batches_completed_bh(void)
EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
/*
+ * Force a quiescent state for RCU BH.
+ */
+void rcu_bh_force_quiescent_state(void)
+{
+ force_quiescent_state(&rcu_bh_state, 0);
+}
+EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state);
+
+/*
+ * Force a quiescent state for RCU-sched.
+ */
+void rcu_sched_force_quiescent_state(void)
+{
+ force_quiescent_state(&rcu_sched_state, 0);
+}
+EXPORT_SYMBOL_GPL(rcu_sched_force_quiescent_state);
+
+/*
* Does the CPU have callbacks ready to be invoked?
*/
static int
@@ -659,7 +677,9 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
struct rcu_data *rdp = rsp->rda[smp_processor_id()];
struct rcu_node *rnp = rcu_get_root(rsp);
- if (!cpu_needs_another_gp(rsp, rdp)) {
+ if (!cpu_needs_another_gp(rsp, rdp) || rsp->fqs_active) {
+ if (cpu_needs_another_gp(rsp, rdp))
+ rsp->fqs_need_gp = 1;
if (rnp->completed == rsp->completed) {
spin_unlock_irqrestore(&rnp->lock, flags);
return;
@@ -1144,11 +1164,9 @@ void rcu_check_callbacks(int cpu, int user)
/*
* Scan the leaf rcu_node structures, processing dyntick state for any that
* have not yet encountered a quiescent state, using the function specified.
- * Returns 1 if the current grace period ends while scanning (possibly
- * because we made it end).
+ * The caller must have suppressed start of new grace periods.
*/
-static int rcu_process_dyntick(struct rcu_state *rsp, long lastcomp,
- int (*f)(struct rcu_data *))
+static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *))
{
unsigned long bit;
int cpu;
@@ -1159,9 +1177,9 @@ static int rcu_process_dyntick(struct rcu_state *rsp, long lastcomp,
rcu_for_each_leaf_node(rsp, rnp) {
mask = 0;
spin_lock_irqsave(&rnp->lock, flags);
- if (rnp->completed != lastcomp) {
+ if (!rcu_gp_in_progress(rsp)) {
spin_unlock_irqrestore(&rnp->lock, flags);
- return 1;
+ return;
}
if (rnp->qsmask == 0) {
spin_unlock_irqrestore(&rnp->lock, flags);
@@ -1173,7 +1191,7 @@ static int rcu_process_dyntick(struct rcu_state *rsp, long lastcomp,
if ((rnp->qsmask & bit) != 0 && f(rsp->rda[cpu]))
mask |= bit;
}
- if (mask != 0 && rnp->completed == lastcomp) {
+ if (mask != 0) {
/* rcu_report_qs_rnp() releases rnp->lock. */
rcu_report_qs_rnp(mask, rsp, rnp, flags);
@@ -1181,7 +1199,6 @@ static int rcu_process_dyntick(struct rcu_state *rsp, long lastcomp,
}
spin_unlock_irqrestore(&rnp->lock, flags);
}
- return 0;
}
/*
@@ -1191,10 +1208,7 @@ static int rcu_process_dyntick(struct rcu_state *rsp, long lastcomp,
static void force_quiescent_state(struct rcu_state *rsp, int relaxed)
{
unsigned long flags;
- long lastcomp;
struct rcu_node *rnp = rcu_get_root(rsp);
- u8 signaled;
- u8 forcenow;
if (!rcu_gp_in_progress(rsp))
return; /* No grace period in progress, nothing to force. */
@@ -1204,19 +1218,17 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed)
}
if (relaxed &&
(long)(rsp->jiffies_force_qs - jiffies) >= 0)
- goto unlock_ret; /* no emergency and done recently. */
+ goto unlock_fqs_ret; /* no emergency and done recently. */
rsp->n_force_qs++;
- spin_lock(&rnp->lock);
- lastcomp = rsp->gpnum - 1;
- signaled = rsp->signaled;
+ spin_lock(&rnp->lock); /* irqs already disabled */
rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS;
if(!rcu_gp_in_progress(rsp)) {
rsp->n_force_qs_ngp++;
- spin_unlock(&rnp->lock);
- goto unlock_ret; /* no GP in progress, time updated. */
+ spin_unlock(&rnp->lock); /* irqs remain disabled */
+ goto unlock_fqs_ret; /* no GP in progress, time updated. */
}
- spin_unlock(&rnp->lock);
- switch (signaled) {
+ rsp->fqs_active = 1;
+ switch (rsp->signaled) {
case RCU_GP_IDLE:
case RCU_GP_INIT:
@@ -1224,44 +1236,37 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed)
case RCU_SAVE_DYNTICK:
+ spin_unlock(&rnp->lock); /* irqs remain disabled */
if (RCU_SIGNAL_INIT != RCU_SAVE_DYNTICK)
break; /* So gcc recognizes the dead code. */
/* Record dyntick-idle state. */
- if (rcu_process_dyntick(rsp, lastcomp,
- dyntick_save_progress_counter))
- goto unlock_ret;
- /* fall into next case. */
-
- case RCU_SAVE_COMPLETED:
-
- /* Update state, record completion counter. */
- forcenow = 0;
- spin_lock(&rnp->lock);
- if (lastcomp + 1 == rsp->gpnum &&
- lastcomp == rsp->completed &&
- rsp->signaled == signaled) {
+ force_qs_rnp(rsp, dyntick_save_progress_counter);
+ spin_lock(&rnp->lock); /* irqs already disabled */
+ if (rcu_gp_in_progress(rsp))
rsp->signaled = RCU_FORCE_QS;
- rsp->completed_fqs = lastcomp;
- forcenow = signaled == RCU_SAVE_COMPLETED;
- }
- spin_unlock(&rnp->lock);
- if (!forcenow)
- break;
- /* fall into next case. */
+ break;
case RCU_FORCE_QS:
/* Check dyntick-idle state, send IPI to laggarts. */
- if (rcu_process_dyntick(rsp, rsp->completed_fqs,
- rcu_implicit_dynticks_qs))
- goto unlock_ret;
+ spin_unlock(&rnp->lock); /* irqs remain disabled */
+ force_qs_rnp(rsp, rcu_implicit_dynticks_qs);
/* Leave state in case more forcing is required. */
+ spin_lock(&rnp->lock); /* irqs already disabled */
break;
}
-unlock_ret:
+ rsp->fqs_active = 0;
+ if (rsp->fqs_need_gp) {
+ spin_unlock(&rsp->fqslock); /* irqs remain disabled */
+ rsp->fqs_need_gp = 0;
+ rcu_start_gp(rsp, flags); /* releases rnp->lock */
+ return;
+ }
+ spin_unlock(&rnp->lock); /* irqs remain disabled */
+unlock_fqs_ret:
spin_unlock_irqrestore(&rsp->fqslock, flags);
}
@@ -1806,11 +1811,17 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp)
*/
static void __init rcu_init_one(struct rcu_state *rsp)
{
+ static char *buf[] = { "rcu_node_level_0",
+ "rcu_node_level_1",
+ "rcu_node_level_2",
+ "rcu_node_level_3" }; /* Match MAX_RCU_LVLS */
int cpustride = 1;
int i;
int j;
struct rcu_node *rnp;
+ BUILD_BUG_ON(MAX_RCU_LVLS > ARRAY_SIZE(buf)); /* Fix buf[] init! */
+
/* Initialize the level-tracking arrays. */
for (i = 1; i < NUM_RCU_LVLS; i++)
@@ -1824,7 +1835,8 @@ static void __init rcu_init_one(struct rcu_state *rsp)
rnp = rsp->level[i];
for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) {
spin_lock_init(&rnp->lock);
- lockdep_set_class(&rnp->lock, &rcu_node_class[i]);
+ lockdep_set_class_and_name(&rnp->lock,
+ &rcu_node_class[i], buf[i]);
rnp->gpnum = 0;
rnp->qsmask = 0;
rnp->qsmaskinit = 0;
@@ -1876,7 +1888,7 @@ do { \
void __init rcu_init(void)
{
- int i;
+ int cpu;
rcu_bootup_announce();
#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
@@ -1896,8 +1908,8 @@ void __init rcu_init(void)
* or the scheduler are operational.
*/
cpu_notifier(rcu_cpu_notify, 0);
- for_each_online_cpu(i)
- rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)i);
+ for_each_online_cpu(cpu)
+ rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu);
}
#include "rcutree_plugin.h"
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index d2a0046f63b2..d9d032abd665 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -237,12 +237,11 @@ struct rcu_data {
#define RCU_GP_IDLE 0 /* No grace period in progress. */
#define RCU_GP_INIT 1 /* Grace period being initialized. */
#define RCU_SAVE_DYNTICK 2 /* Need to scan dyntick state. */
-#define RCU_SAVE_COMPLETED 3 /* Need to save rsp->completed. */
-#define RCU_FORCE_QS 4 /* Need to force quiescent state. */
+#define RCU_FORCE_QS 3 /* Need to force quiescent state. */
#ifdef CONFIG_NO_HZ
#define RCU_SIGNAL_INIT RCU_SAVE_DYNTICK
#else /* #ifdef CONFIG_NO_HZ */
-#define RCU_SIGNAL_INIT RCU_SAVE_COMPLETED
+#define RCU_SIGNAL_INIT RCU_FORCE_QS
#endif /* #else #ifdef CONFIG_NO_HZ */
#define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */
@@ -277,6 +276,13 @@ struct rcu_state {
u8 signaled ____cacheline_internodealigned_in_smp;
/* Force QS state. */
+ u8 fqs_active; /* force_quiescent_state() */
+ /* is running. */
+ u8 fqs_need_gp; /* A CPU was prevented from */
+ /* starting a new grace */
+ /* period because */
+ /* force_quiescent_state() */
+ /* was running. */
long gpnum; /* Current gp number. */
long completed; /* # of last completed gp. */
@@ -294,8 +300,6 @@ struct rcu_state {
long orphan_qlen; /* Number of orphaned cbs. */
spinlock_t fqslock; /* Only one task forcing */
/* quiescent states. */
- long completed_fqs; /* Value of completed @ snap. */
- /* Protected by fqslock. */
unsigned long jiffies_force_qs; /* Time at which to invoke */
/* force_quiescent_state(). */
unsigned long n_force_qs; /* Number of calls to */
@@ -319,8 +323,6 @@ struct rcu_state {
#define RCU_OFL_TASKS_EXP_GP 0x2 /* Tasks blocking expedited */
/* GP were moved to root. */
-#ifdef RCU_TREE_NONCORE
-
/*
* RCU implementation internal declarations:
*/
@@ -335,7 +337,7 @@ extern struct rcu_state rcu_preempt_state;
DECLARE_PER_CPU(struct rcu_data, rcu_preempt_data);
#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
-#else /* #ifdef RCU_TREE_NONCORE */
+#ifndef RCU_TREE_NONCORE
/* Forward declarations for rcutree_plugin.h */
static void rcu_bootup_announce(void);
@@ -368,4 +370,4 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu);
static void rcu_preempt_send_cbs_to_orphanage(void);
static void __init __rcu_init_preempt(void);
-#endif /* #else #ifdef RCU_TREE_NONCORE */
+#endif /* #ifndef RCU_TREE_NONCORE */
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 37fbccdf41d5..e77cdf321e13 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -62,6 +62,15 @@ long rcu_batches_completed(void)
EXPORT_SYMBOL_GPL(rcu_batches_completed);
/*
+ * Force a quiescent state for preemptible RCU.
+ */
+void rcu_force_quiescent_state(void)
+{
+ force_quiescent_state(&rcu_preempt_state, 0);
+}
+EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
+
+/*
* Record a preemptable-RCU quiescent state for the specified CPU. Note
* that this just means that the task currently running on the CPU is
* not in a quiescent state. There might be any number of tasks blocked
@@ -295,6 +304,9 @@ void __rcu_read_unlock(void)
if (--ACCESS_ONCE(t->rcu_read_lock_nesting) == 0 &&
unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
rcu_read_unlock_special(t);
+#ifdef CONFIG_PROVE_LOCKING
+ WARN_ON_ONCE(ACCESS_ONCE(t->rcu_read_lock_nesting) < 0);
+#endif /* #ifdef CONFIG_PROVE_LOCKING */
}
EXPORT_SYMBOL_GPL(__rcu_read_unlock);
@@ -713,6 +725,16 @@ long rcu_batches_completed(void)
EXPORT_SYMBOL_GPL(rcu_batches_completed);
/*
+ * Force a quiescent state for RCU, which, because there is no preemptible
+ * RCU, becomes the same as rcu-sched.
+ */
+void rcu_force_quiescent_state(void)
+{
+ rcu_sched_force_quiescent_state();
+}
+EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
+
+/*
* Because preemptable RCU does not exist, we never have to check for
* CPUs being in quiescent states.
*/
diff --git a/kernel/sched.c b/kernel/sched.c
index b59727d76e4e..80f27b16592d 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2783,7 +2783,13 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
*/
prev_state = prev->state;
finish_arch_switch(prev);
- perf_event_task_sched_in(current, cpu_of(rq));
+#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
+ local_irq_disable();
+#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
+ perf_event_task_sched_in(current);
+#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
+ local_irq_enable();
+#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
finish_lock_switch(rq, prev);
fire_sched_in_preempt_notifiers(current);
@@ -5298,7 +5304,7 @@ void scheduler_tick(void)
curr->sched_class->task_tick(rq, curr, 0);
raw_spin_unlock(&rq->lock);
- perf_event_task_tick(curr, cpu);
+ perf_event_task_tick(curr);
#ifdef CONFIG_SMP
rq->idle_at_tick = idle_cpu(cpu);
@@ -5512,7 +5518,7 @@ need_resched_nonpreemptible:
if (likely(prev != next)) {
sched_info_switch(prev, next);
- perf_event_task_sched_out(prev, next, cpu);
+ perf_event_task_sched_out(prev, next);
rq->nr_switches++;
rq->curr = next;
@@ -9697,7 +9703,7 @@ static inline int preempt_count_equals(int preempt_offset)
return (nested == PREEMPT_INATOMIC_BASE + preempt_offset);
}
-void __might_sleep(char *file, int line, int preempt_offset)
+void __might_sleep(const char *file, int line, int preempt_offset)
{
#ifdef in_atomic
static unsigned long prev_jiffy; /* ratelimiting */
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 8fe7ee81c552..43a493590656 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -2076,7 +2076,7 @@ static void moved_group_fair(struct task_struct *p, int on_rq)
}
#endif
-unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task)
+static unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task)
{
struct sched_entity *se = &task->se;
unsigned int rr_interval = 0;
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index 5f93b570d383..01332bfc61a7 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -97,7 +97,7 @@ static void prio_changed_idle(struct rq *rq, struct task_struct *p,
check_preempt_curr(rq, p, 0);
}
-unsigned int get_rr_interval_idle(struct rq *rq, struct task_struct *task)
+static unsigned int get_rr_interval_idle(struct rq *rq, struct task_struct *task)
{
return 0;
}
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index f48328ac216f..072b3fcee8d8 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1721,7 +1721,7 @@ static void set_curr_task_rt(struct rq *rq)
dequeue_pushable_task(rq, p);
}
-unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)
+static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)
{
/*
* Time slice is 0 for SCHED_FIFO tasks
diff --git a/kernel/srcu.c b/kernel/srcu.c
index 818d7d9aa03c..31b275b9c112 100644
--- a/kernel/srcu.c
+++ b/kernel/srcu.c
@@ -144,7 +144,7 @@ EXPORT_SYMBOL_GPL(srcu_read_unlock);
/*
* Helper function for synchronize_srcu() and synchronize_srcu_expedited().
*/
-void __synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void))
+static void __synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void))
{
int idx;
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index cd9ecd89ec77..d00c6fe23f54 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -51,7 +51,9 @@ endif
obj-$(CONFIG_EVENT_TRACING) += trace_events.o
obj-$(CONFIG_EVENT_TRACING) += trace_export.o
obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
-obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o
+ifeq ($(CONFIG_PERF_EVENTS),y)
+obj-$(CONFIG_EVENT_TRACING) += trace_event_profile.o
+endif
obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 0df1b0f2cb9e..5314c90bbc83 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -32,6 +32,7 @@
#include <linux/splice.h>
#include <linux/kdebug.h>
#include <linux/string.h>
+#include <linux/rwsem.h>
#include <linux/ctype.h>
#include <linux/init.h>
#include <linux/poll.h>
@@ -102,9 +103,6 @@ static inline void ftrace_enable_cpu(void)
static cpumask_var_t __read_mostly tracing_buffer_mask;
-/* Define which cpu buffers are currently read in trace_pipe */
-static cpumask_var_t tracing_reader_cpumask;
-
#define for_each_tracing_cpu(cpu) \
for_each_cpu(cpu, tracing_buffer_mask)
@@ -243,12 +241,91 @@ static struct tracer *current_trace __read_mostly;
/*
* trace_types_lock is used to protect the trace_types list.
- * This lock is also used to keep user access serialized.
- * Accesses from userspace will grab this lock while userspace
- * activities happen inside the kernel.
*/
static DEFINE_MUTEX(trace_types_lock);
+/*
+ * serialize the access of the ring buffer
+ *
+ * ring buffer serializes readers, but it is low level protection.
+ * The validity of the events (which returns by ring_buffer_peek() ..etc)
+ * are not protected by ring buffer.
+ *
+ * The content of events may become garbage if we allow other process consumes
+ * these events concurrently:
+ * A) the page of the consumed events may become a normal page
+ * (not reader page) in ring buffer, and this page will be rewrited
+ * by events producer.
+ * B) The page of the consumed events may become a page for splice_read,
+ * and this page will be returned to system.
+ *
+ * These primitives allow multi process access to different cpu ring buffer
+ * concurrently.
+ *
+ * These primitives don't distinguish read-only and read-consume access.
+ * Multi read-only access are also serialized.
+ */
+
+#ifdef CONFIG_SMP
+static DECLARE_RWSEM(all_cpu_access_lock);
+static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
+
+static inline void trace_access_lock(int cpu)
+{
+ if (cpu == TRACE_PIPE_ALL_CPU) {
+ /* gain it for accessing the whole ring buffer. */
+ down_write(&all_cpu_access_lock);
+ } else {
+ /* gain it for accessing a cpu ring buffer. */
+
+ /* Firstly block other trace_access_lock(TRACE_PIPE_ALL_CPU). */
+ down_read(&all_cpu_access_lock);
+
+ /* Secondly block other access to this @cpu ring buffer. */
+ mutex_lock(&per_cpu(cpu_access_lock, cpu));
+ }
+}
+
+static inline void trace_access_unlock(int cpu)
+{
+ if (cpu == TRACE_PIPE_ALL_CPU) {
+ up_write(&all_cpu_access_lock);
+ } else {
+ mutex_unlock(&per_cpu(cpu_access_lock, cpu));
+ up_read(&all_cpu_access_lock);
+ }
+}
+
+static inline void trace_access_lock_init(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ mutex_init(&per_cpu(cpu_access_lock, cpu));
+}
+
+#else
+
+static DEFINE_MUTEX(access_lock);
+
+static inline void trace_access_lock(int cpu)
+{
+ (void)cpu;
+ mutex_lock(&access_lock);
+}
+
+static inline void trace_access_unlock(int cpu)
+{
+ (void)cpu;
+ mutex_unlock(&access_lock);
+}
+
+static inline void trace_access_lock_init(void)
+{
+}
+
+#endif
+
/* trace_wait is a waitqueue for tasks blocked on trace_poll */
static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
@@ -1315,8 +1392,10 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
entry->fmt = fmt;
memcpy(entry->buf, trace_buf, sizeof(u32) * len);
- if (!filter_check_discard(call, entry, buffer, event))
+ if (!filter_check_discard(call, entry, buffer, event)) {
ring_buffer_unlock_commit(buffer, event);
+ ftrace_trace_stack(buffer, flags, 6, pc);
+ }
out_unlock:
arch_spin_unlock(&trace_buf_lock);
@@ -1389,8 +1468,10 @@ int trace_array_vprintk(struct trace_array *tr,
memcpy(&entry->buf, trace_buf, len);
entry->buf[len] = '\0';
- if (!filter_check_discard(call, entry, buffer, event))
+ if (!filter_check_discard(call, entry, buffer, event)) {
ring_buffer_unlock_commit(buffer, event);
+ ftrace_trace_stack(buffer, irq_flags, 6, pc);
+ }
out_unlock:
arch_spin_unlock(&trace_buf_lock);
@@ -1580,12 +1661,6 @@ static void tracing_iter_reset(struct trace_iterator *iter, int cpu)
}
/*
- * No necessary locking here. The worst thing which can
- * happen is loosing events consumed at the same time
- * by a trace_pipe reader.
- * Other than that, we don't risk to crash the ring buffer
- * because it serializes the readers.
- *
* The current tracer is copied to avoid a global locking
* all around.
*/
@@ -1640,12 +1715,16 @@ static void *s_start(struct seq_file *m, loff_t *pos)
}
trace_event_read_lock();
+ trace_access_lock(cpu_file);
return p;
}
static void s_stop(struct seq_file *m, void *p)
{
+ struct trace_iterator *iter = m->private;
+
atomic_dec(&trace_record_cmdline_disabled);
+ trace_access_unlock(iter->cpu_file);
trace_event_read_unlock();
}
@@ -2836,22 +2915,6 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
mutex_lock(&trace_types_lock);
- /* We only allow one reader per cpu */
- if (cpu_file == TRACE_PIPE_ALL_CPU) {
- if (!cpumask_empty(tracing_reader_cpumask)) {
- ret = -EBUSY;
- goto out;
- }
- cpumask_setall(tracing_reader_cpumask);
- } else {
- if (!cpumask_test_cpu(cpu_file, tracing_reader_cpumask))
- cpumask_set_cpu(cpu_file, tracing_reader_cpumask);
- else {
- ret = -EBUSY;
- goto out;
- }
- }
-
/* create a buffer to store the information to pass to userspace */
iter = kzalloc(sizeof(*iter), GFP_KERNEL);
if (!iter) {
@@ -2907,12 +2970,6 @@ static int tracing_release_pipe(struct inode *inode, struct file *file)
mutex_lock(&trace_types_lock);
- if (iter->cpu_file == TRACE_PIPE_ALL_CPU)
- cpumask_clear(tracing_reader_cpumask);
- else
- cpumask_clear_cpu(iter->cpu_file, tracing_reader_cpumask);
-
-
if (iter->trace->pipe_close)
iter->trace->pipe_close(iter);
@@ -3074,6 +3131,7 @@ waitagain:
iter->pos = -1;
trace_event_read_lock();
+ trace_access_lock(iter->cpu_file);
while (find_next_entry_inc(iter) != NULL) {
enum print_line_t ret;
int len = iter->seq.len;
@@ -3090,6 +3148,7 @@ waitagain:
if (iter->seq.len >= cnt)
break;
}
+ trace_access_unlock(iter->cpu_file);
trace_event_read_unlock();
/* Now copy what we have to the user */
@@ -3215,6 +3274,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
}
trace_event_read_lock();
+ trace_access_lock(iter->cpu_file);
/* Fill as many pages as possible. */
for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) {
@@ -3238,6 +3298,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
trace_seq_init(&iter->seq);
}
+ trace_access_unlock(iter->cpu_file);
trace_event_read_unlock();
mutex_unlock(&iter->mutex);
@@ -3539,10 +3600,12 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
info->read = 0;
+ trace_access_lock(info->cpu);
ret = ring_buffer_read_page(info->tr->buffer,
&info->spare,
count,
info->cpu, 0);
+ trace_access_unlock(info->cpu);
if (ret < 0)
return 0;
@@ -3670,6 +3733,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
len &= PAGE_MASK;
}
+ trace_access_lock(info->cpu);
entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
for (i = 0; i < PIPE_BUFFERS && len && entries; i++, len -= PAGE_SIZE) {
@@ -3717,6 +3781,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
}
+ trace_access_unlock(info->cpu);
spd.nr_pages = i;
/* did we read anything? */
@@ -4153,6 +4218,8 @@ static __init int tracer_init_debugfs(void)
struct dentry *d_tracer;
int cpu;
+ trace_access_lock_init();
+
d_tracer = tracing_init_dentry();
trace_create_file("tracing_enabled", 0644, d_tracer,
@@ -4387,9 +4454,6 @@ __init static int tracer_alloc_buffers(void)
if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL))
goto out_free_buffer_mask;
- if (!zalloc_cpumask_var(&tracing_reader_cpumask, GFP_KERNEL))
- goto out_free_tracing_cpumask;
-
/* To save memory, keep the ring buffer size to its minimum */
if (ring_buffer_expanded)
ring_buf_size = trace_buf_size;
@@ -4447,8 +4511,6 @@ __init static int tracer_alloc_buffers(void)
return 0;
out_free_cpumask:
- free_cpumask_var(tracing_reader_cpumask);
-out_free_tracing_cpumask:
free_cpumask_var(tracing_cpumask);
out_free_buffer_mask:
free_cpumask_var(tracing_buffer_mask);
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 189b09baf4fb..c2a3077b7353 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -520,41 +520,16 @@ out:
return ret;
}
-extern char *__bad_type_size(void);
-
-#undef FIELD
-#define FIELD(type, name) \
- sizeof(type) != sizeof(field.name) ? __bad_type_size() : \
- #type, "common_" #name, offsetof(typeof(field), name), \
- sizeof(field.name), is_signed_type(type)
-
-static int trace_write_header(struct trace_seq *s)
-{
- struct trace_entry field;
-
- /* struct trace_entry */
- return trace_seq_printf(s,
- "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
- "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
- "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
- "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
- "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n"
- "\n",
- FIELD(unsigned short, type),
- FIELD(unsigned char, flags),
- FIELD(unsigned char, preempt_count),
- FIELD(int, pid),
- FIELD(int, lock_depth));
-}
-
static ssize_t
event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
loff_t *ppos)
{
struct ftrace_event_call *call = filp->private_data;
+ struct ftrace_event_field *field;
struct trace_seq *s;
+ int common_field_count = 5;
char *buf;
- int r;
+ int r = 0;
if (*ppos)
return 0;
@@ -565,14 +540,48 @@ event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
trace_seq_init(s);
- /* If any of the first writes fail, so will the show_format. */
-
trace_seq_printf(s, "name: %s\n", call->name);
trace_seq_printf(s, "ID: %d\n", call->id);
trace_seq_printf(s, "format:\n");
- trace_write_header(s);
- r = call->show_format(call, s);
+ list_for_each_entry_reverse(field, &call->fields, link) {
+ /*
+ * Smartly shows the array type(except dynamic array).
+ * Normal:
+ * field:TYPE VAR
+ * If TYPE := TYPE[LEN], it is shown:
+ * field:TYPE VAR[LEN]
+ */
+ const char *array_descriptor = strchr(field->type, '[');
+
+ if (!strncmp(field->type, "__data_loc", 10))
+ array_descriptor = NULL;
+
+ if (!array_descriptor) {
+ r = trace_seq_printf(s, "\tfield:%s %s;\toffset:%u;"
+ "\tsize:%u;\tsigned:%d;\n",
+ field->type, field->name, field->offset,
+ field->size, !!field->is_signed);
+ } else {
+ r = trace_seq_printf(s, "\tfield:%.*s %s%s;\toffset:%u;"
+ "\tsize:%u;\tsigned:%d;\n",
+ (int)(array_descriptor - field->type),
+ field->type, field->name,
+ array_descriptor, field->offset,
+ field->size, !!field->is_signed);
+ }
+
+ if (--common_field_count == 0)
+ r = trace_seq_printf(s, "\n");
+
+ if (!r)
+ break;
+ }
+
+ if (r)
+ r = trace_seq_printf(s, "\nprint fmt: %s\n",
+ call->print_fmt);
+
if (!r) {
/*
* ug! The format output is bigger than a PAGE!!
@@ -948,10 +957,6 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
filter);
}
- /* A trace may not want to export its format */
- if (!call->show_format)
- return 0;
-
trace_create_file("format", 0444, call->dir, call,
format);
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index e42af9aad69f..4615f62a04f1 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -1371,7 +1371,7 @@ out_unlock:
return err;
}
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_PERF_EVENTS
void ftrace_profile_free_filter(struct perf_event *event)
{
@@ -1439,5 +1439,5 @@ out_unlock:
return err;
}
-#endif /* CONFIG_EVENT_PROFILE */
+#endif /* CONFIG_PERF_EVENTS */
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index d4fa5dc1ee4e..e091f64ba6ce 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -62,78 +62,6 @@ static void __always_unused ____ftrace_check_##name(void) \
#include "trace_entries.h"
-
-#undef __field
-#define __field(type, item) \
- ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
- "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \
- offsetof(typeof(field), item), \
- sizeof(field.item), is_signed_type(type)); \
- if (!ret) \
- return 0;
-
-#undef __field_desc
-#define __field_desc(type, container, item) \
- ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
- "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \
- offsetof(typeof(field), container.item), \
- sizeof(field.container.item), \
- is_signed_type(type)); \
- if (!ret) \
- return 0;
-
-#undef __array
-#define __array(type, item, len) \
- ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
- "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \
- offsetof(typeof(field), item), \
- sizeof(field.item), is_signed_type(type)); \
- if (!ret) \
- return 0;
-
-#undef __array_desc
-#define __array_desc(type, container, item, len) \
- ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \
- "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \
- offsetof(typeof(field), container.item), \
- sizeof(field.container.item), \
- is_signed_type(type)); \
- if (!ret) \
- return 0;
-
-#undef __dynamic_array
-#define __dynamic_array(type, item) \
- ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \
- "offset:%zu;\tsize:0;\tsigned:%u;\n", \
- offsetof(typeof(field), item), \
- is_signed_type(type)); \
- if (!ret) \
- return 0;
-
-#undef F_printk
-#define F_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args)
-
-#undef __entry
-#define __entry REC
-
-#undef FTRACE_ENTRY
-#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \
-static int \
-ftrace_format_##name(struct ftrace_event_call *unused, \
- struct trace_seq *s) \
-{ \
- struct struct_name field __attribute__((unused)); \
- int ret = 0; \
- \
- tstruct; \
- \
- trace_seq_printf(s, "\nprint fmt: " print); \
- \
- return ret; \
-}
-
-#include "trace_entries.h"
-
#undef __field
#define __field(type, item) \
ret = trace_define_field(event_call, #type, #item, \
@@ -175,7 +103,12 @@ ftrace_format_##name(struct ftrace_event_call *unused, \
return ret;
#undef __dynamic_array
-#define __dynamic_array(type, item)
+#define __dynamic_array(type, item) \
+ ret = trace_define_field(event_call, #type, #item, \
+ offsetof(typeof(field), item), \
+ 0, is_signed_type(type), FILTER_OTHER);\
+ if (ret) \
+ return ret;
#undef FTRACE_ENTRY
#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \
@@ -198,6 +131,9 @@ static int ftrace_raw_init_event(struct ftrace_event_call *call)
return 0;
}
+#undef __entry
+#define __entry REC
+
#undef __field
#define __field(type, item)
@@ -213,6 +149,9 @@ static int ftrace_raw_init_event(struct ftrace_event_call *call)
#undef __dynamic_array
#define __dynamic_array(type, item)
+#undef F_printk
+#define F_printk(fmt, args...) #fmt ", " __stringify(args)
+
#undef FTRACE_ENTRY
#define FTRACE_ENTRY(call, struct_name, type, tstruct, print) \
\
@@ -223,7 +162,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
.id = type, \
.system = __stringify(TRACE_SYSTEM), \
.raw_init = ftrace_raw_init_event, \
- .show_format = ftrace_format_##call, \
+ .print_fmt = print, \
.define_fields = ftrace_define_fields_##call, \
}; \
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index b1342c5d37cf..f2252296607c 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -212,9 +212,6 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
int cpu;
int pc;
- if (unlikely(!tr))
- return 0;
-
if (!ftrace_trace_task(current))
return 0;
@@ -287,11 +284,20 @@ void trace_graph_return(struct ftrace_graph_ret *trace)
local_irq_restore(flags);
}
+void set_graph_array(struct trace_array *tr)
+{
+ graph_array = tr;
+
+ /* Make graph_array visible before we start tracing */
+
+ smp_mb();
+}
+
static int graph_trace_init(struct trace_array *tr)
{
int ret;
- graph_array = tr;
+ set_graph_array(tr);
ret = register_ftrace_graph(&trace_graph_return,
&trace_graph_entry);
if (ret)
@@ -301,11 +307,6 @@ static int graph_trace_init(struct trace_array *tr)
return 0;
}
-void set_graph_array(struct trace_array *tr)
-{
- graph_array = tr;
-}
-
static void graph_trace_reset(struct trace_array *tr)
{
tracing_stop_cmdline_record();
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 6ea90c0e2c96..aa19b6a7de22 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -91,11 +91,6 @@ static __kprobes unsigned long fetch_memory(struct pt_regs *regs, void *addr)
return retval;
}
-static __kprobes unsigned long fetch_argument(struct pt_regs *regs, void *num)
-{
- return regs_get_argument_nth(regs, (unsigned int)((unsigned long)num));
-}
-
static __kprobes unsigned long fetch_retvalue(struct pt_regs *regs,
void *dummy)
{
@@ -231,9 +226,7 @@ static int probe_arg_string(char *buf, size_t n, struct fetch_func *ff)
{
int ret = -EINVAL;
- if (ff->func == fetch_argument)
- ret = snprintf(buf, n, "$arg%lu", (unsigned long)ff->data);
- else if (ff->func == fetch_register) {
+ if (ff->func == fetch_register) {
const char *name;
name = regs_query_register_name((unsigned int)((long)ff->data));
ret = snprintf(buf, n, "%%%s", name);
@@ -489,14 +482,6 @@ static int parse_probe_vars(char *arg, struct fetch_func *ff, int is_return)
}
} else
ret = -EINVAL;
- } else if (strncmp(arg, "arg", 3) == 0 && isdigit(arg[3])) {
- ret = strict_strtoul(arg + 3, 10, &param);
- if (ret || param > PARAM_MAX_ARGS)
- ret = -EINVAL;
- else {
- ff->func = fetch_argument;
- ff->data = (void *)param;
- }
} else
ret = -EINVAL;
return ret;
@@ -611,7 +596,6 @@ static int create_trace_probe(int argc, char **argv)
* - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS]
* - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS]
* Fetch args:
- * $argN : fetch Nth of function argument. (N:0-)
* $retval : fetch return value
* $stack : fetch stack address
* $stackN : fetch Nth of stack (N:0-)
@@ -1174,83 +1158,63 @@ static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
return 0;
}
-static int __probe_event_show_format(struct trace_seq *s,
- struct trace_probe *tp, const char *fmt,
- const char *arg)
+static int __set_print_fmt(struct trace_probe *tp, char *buf, int len)
{
int i;
+ int pos = 0;
- /* Show format */
- if (!trace_seq_printf(s, "\nprint fmt: \"%s", fmt))
- return 0;
+ const char *fmt, *arg;
- for (i = 0; i < tp->nr_args; i++)
- if (!trace_seq_printf(s, " %s=%%lx", tp->args[i].name))
- return 0;
-
- if (!trace_seq_printf(s, "\", %s", arg))
- return 0;
+ if (!probe_is_return(tp)) {
+ fmt = "(%lx)";
+ arg = "REC->" FIELD_STRING_IP;
+ } else {
+ fmt = "(%lx <- %lx)";
+ arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP;
+ }
- for (i = 0; i < tp->nr_args; i++)
- if (!trace_seq_printf(s, ", REC->%s", tp->args[i].name))
- return 0;
+ /* When len=0, we just calculate the needed length */
+#define LEN_OR_ZERO (len ? len - pos : 0)
- return trace_seq_puts(s, "\n");
-}
+ pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", fmt);
-#undef SHOW_FIELD
-#define SHOW_FIELD(type, item, name) \
- do { \
- ret = trace_seq_printf(s, "\tfield:" #type " %s;\t" \
- "offset:%u;\tsize:%u;\tsigned:%d;\n", name,\
- (unsigned int)offsetof(typeof(field), item),\
- (unsigned int)sizeof(type), \
- is_signed_type(type)); \
- if (!ret) \
- return 0; \
- } while (0)
+ for (i = 0; i < tp->nr_args; i++) {
+ pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%%lx",
+ tp->args[i].name);
+ }
-static int kprobe_event_show_format(struct ftrace_event_call *call,
- struct trace_seq *s)
-{
- struct kprobe_trace_entry field __attribute__((unused));
- int ret, i;
- struct trace_probe *tp = (struct trace_probe *)call->data;
+ pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg);
- SHOW_FIELD(unsigned long, ip, FIELD_STRING_IP);
- SHOW_FIELD(int, nargs, FIELD_STRING_NARGS);
+ for (i = 0; i < tp->nr_args; i++) {
+ pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s",
+ tp->args[i].name);
+ }
- /* Show fields */
- for (i = 0; i < tp->nr_args; i++)
- SHOW_FIELD(unsigned long, args[i], tp->args[i].name);
- trace_seq_puts(s, "\n");
+#undef LEN_OR_ZERO
- return __probe_event_show_format(s, tp, "(%lx)",
- "REC->" FIELD_STRING_IP);
+ /* return the length of print_fmt */
+ return pos;
}
-static int kretprobe_event_show_format(struct ftrace_event_call *call,
- struct trace_seq *s)
+static int set_print_fmt(struct trace_probe *tp)
{
- struct kretprobe_trace_entry field __attribute__((unused));
- int ret, i;
- struct trace_probe *tp = (struct trace_probe *)call->data;
+ int len;
+ char *print_fmt;
- SHOW_FIELD(unsigned long, func, FIELD_STRING_FUNC);
- SHOW_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP);
- SHOW_FIELD(int, nargs, FIELD_STRING_NARGS);
+ /* First: called with 0 length to calculate the needed length */
+ len = __set_print_fmt(tp, NULL, 0);
+ print_fmt = kmalloc(len + 1, GFP_KERNEL);
+ if (!print_fmt)
+ return -ENOMEM;
- /* Show fields */
- for (i = 0; i < tp->nr_args; i++)
- SHOW_FIELD(unsigned long, args[i], tp->args[i].name);
- trace_seq_puts(s, "\n");
+ /* Second: actually write the @print_fmt */
+ __set_print_fmt(tp, print_fmt, len + 1);
+ tp->call.print_fmt = print_fmt;
- return __probe_event_show_format(s, tp, "(%lx <- %lx)",
- "REC->" FIELD_STRING_FUNC
- ", REC->" FIELD_STRING_RETIP);
+ return 0;
}
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_PERF_EVENTS
/* Kprobe profile handler */
static __kprobes int kprobe_profile_func(struct kprobe *kp,
@@ -1408,7 +1372,7 @@ static void probe_profile_disable(struct ftrace_event_call *call)
disable_kprobe(&tp->rp.kp);
}
}
-#endif /* CONFIG_EVENT_PROFILE */
+#endif /* CONFIG_PERF_EVENTS */
static __kprobes
@@ -1418,10 +1382,10 @@ int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
if (tp->flags & TP_FLAG_TRACE)
kprobe_trace_func(kp, regs);
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_PERF_EVENTS
if (tp->flags & TP_FLAG_PROFILE)
kprobe_profile_func(kp, regs);
-#endif /* CONFIG_EVENT_PROFILE */
+#endif
return 0; /* We don't tweek kernel, so just return 0 */
}
@@ -1432,10 +1396,10 @@ int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
if (tp->flags & TP_FLAG_TRACE)
kretprobe_trace_func(ri, regs);
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_PERF_EVENTS
if (tp->flags & TP_FLAG_PROFILE)
kretprobe_profile_func(ri, regs);
-#endif /* CONFIG_EVENT_PROFILE */
+#endif
return 0; /* We don't tweek kernel, so just return 0 */
}
@@ -1448,23 +1412,25 @@ static int register_probe_event(struct trace_probe *tp)
if (probe_is_return(tp)) {
tp->event.trace = print_kretprobe_event;
call->raw_init = probe_event_raw_init;
- call->show_format = kretprobe_event_show_format;
call->define_fields = kretprobe_event_define_fields;
} else {
tp->event.trace = print_kprobe_event;
call->raw_init = probe_event_raw_init;
- call->show_format = kprobe_event_show_format;
call->define_fields = kprobe_event_define_fields;
}
+ if (set_print_fmt(tp) < 0)
+ return -ENOMEM;
call->event = &tp->event;
call->id = register_ftrace_event(&tp->event);
- if (!call->id)
+ if (!call->id) {
+ kfree(call->print_fmt);
return -ENODEV;
+ }
call->enabled = 0;
call->regfunc = probe_event_enable;
call->unregfunc = probe_event_disable;
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_PERF_EVENTS
call->profile_enable = probe_profile_enable;
call->profile_disable = probe_profile_disable;
#endif
@@ -1472,6 +1438,7 @@ static int register_probe_event(struct trace_probe *tp)
ret = trace_add_event_call(call);
if (ret) {
pr_info("Failed to register kprobe event: %s\n", call->name);
+ kfree(call->print_fmt);
unregister_ftrace_event(&tp->event);
}
return ret;
@@ -1481,6 +1448,7 @@ static void unregister_probe_event(struct trace_probe *tp)
{
/* tp->event is unregistered in trace_remove_event_call() */
trace_remove_event_call(&tp->call);
+ kfree(tp->call.print_fmt);
}
/* Make a debugfs interface for controling probe points */
@@ -1523,28 +1491,67 @@ static int kprobe_trace_selftest_target(int a1, int a2, int a3,
static __init int kprobe_trace_self_tests_init(void)
{
- int ret;
+ int ret, warn = 0;
int (*target)(int, int, int, int, int, int);
+ struct trace_probe *tp;
target = kprobe_trace_selftest_target;
pr_info("Testing kprobe tracing: ");
ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target "
- "$arg1 $arg2 $arg3 $arg4 $stack $stack0");
- if (WARN_ON_ONCE(ret))
- pr_warning("error enabling function entry\n");
+ "$stack $stack0 +0($stack)");
+ if (WARN_ON_ONCE(ret)) {
+ pr_warning("error on probing function entry.\n");
+ warn++;
+ } else {
+ /* Enable trace point */
+ tp = find_probe_event("testprobe", KPROBE_EVENT_SYSTEM);
+ if (WARN_ON_ONCE(tp == NULL)) {
+ pr_warning("error on getting new probe.\n");
+ warn++;
+ } else
+ probe_event_enable(&tp->call);
+ }
ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target "
"$retval");
- if (WARN_ON_ONCE(ret))
- pr_warning("error enabling function return\n");
+ if (WARN_ON_ONCE(ret)) {
+ pr_warning("error on probing function return.\n");
+ warn++;
+ } else {
+ /* Enable trace point */
+ tp = find_probe_event("testprobe2", KPROBE_EVENT_SYSTEM);
+ if (WARN_ON_ONCE(tp == NULL)) {
+ pr_warning("error on getting new probe.\n");
+ warn++;
+ } else
+ probe_event_enable(&tp->call);
+ }
+
+ if (warn)
+ goto end;
ret = target(1, 2, 3, 4, 5, 6);
- cleanup_all_probes();
+ ret = command_trace_probe("-:testprobe");
+ if (WARN_ON_ONCE(ret)) {
+ pr_warning("error on deleting a probe.\n");
+ warn++;
+ }
+
+ ret = command_trace_probe("-:testprobe2");
+ if (WARN_ON_ONCE(ret)) {
+ pr_warning("error on deleting a probe.\n");
+ warn++;
+ }
- pr_cont("OK\n");
+end:
+ cleanup_all_probes();
+ if (warn)
+ pr_cont("NG: Some tests are failed. Please check them.\n");
+ else
+ pr_cont("OK\n");
return 0;
}
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 75289f372dd2..f6b071254e0b 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -143,70 +143,65 @@ extern char *__bad_type_size(void);
#type, #name, offsetof(typeof(trace), name), \
sizeof(trace.name), is_signed_type(type)
-int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s)
+static
+int __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len)
{
int i;
- int ret;
- struct syscall_metadata *entry = call->data;
- struct syscall_trace_enter trace;
- int offset = offsetof(struct syscall_trace_enter, args);
+ int pos = 0;
- ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
- "\tsigned:%u;\n",
- SYSCALL_FIELD(int, nr));
- if (!ret)
- return 0;
+ /* When len=0, we just calculate the needed length */
+#define LEN_OR_ZERO (len ? len - pos : 0)
+ pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
for (i = 0; i < entry->nb_args; i++) {
- ret = trace_seq_printf(s, "\tfield:%s %s;", entry->types[i],
- entry->args[i]);
- if (!ret)
- return 0;
- ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;"
- "\tsigned:%u;\n", offset,
- sizeof(unsigned long),
- is_signed_type(unsigned long));
- if (!ret)
- return 0;
- offset += sizeof(unsigned long);
+ pos += snprintf(buf + pos, LEN_OR_ZERO, "%s: 0x%%0%zulx%s",
+ entry->args[i], sizeof(unsigned long),
+ i == entry->nb_args - 1 ? "" : ", ");
}
+ pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
- trace_seq_puts(s, "\nprint fmt: \"");
for (i = 0; i < entry->nb_args; i++) {
- ret = trace_seq_printf(s, "%s: 0x%%0%zulx%s", entry->args[i],
- sizeof(unsigned long),
- i == entry->nb_args - 1 ? "" : ", ");
- if (!ret)
- return 0;
+ pos += snprintf(buf + pos, LEN_OR_ZERO,
+ ", ((unsigned long)(REC->%s))", entry->args[i]);
}
- trace_seq_putc(s, '"');
- for (i = 0; i < entry->nb_args; i++) {
- ret = trace_seq_printf(s, ", ((unsigned long)(REC->%s))",
- entry->args[i]);
- if (!ret)
- return 0;
- }
+#undef LEN_OR_ZERO
- return trace_seq_putc(s, '\n');
+ /* return the length of print_fmt */
+ return pos;
}
-int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s)
+static int set_syscall_print_fmt(struct ftrace_event_call *call)
{
- int ret;
- struct syscall_trace_exit trace;
+ char *print_fmt;
+ int len;
+ struct syscall_metadata *entry = call->data;
- ret = trace_seq_printf(s,
- "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
- "\tsigned:%u;\n"
- "\tfield:%s %s;\toffset:%zu;\tsize:%zu;"
- "\tsigned:%u;\n",
- SYSCALL_FIELD(int, nr),
- SYSCALL_FIELD(long, ret));
- if (!ret)
+ if (entry->enter_event != call) {
+ call->print_fmt = "\"0x%lx\", REC->ret";
return 0;
+ }
- return trace_seq_printf(s, "\nprint fmt: \"0x%%lx\", REC->ret\n");
+ /* First: called with 0 length to calculate the needed length */
+ len = __set_enter_print_fmt(entry, NULL, 0);
+
+ print_fmt = kmalloc(len + 1, GFP_KERNEL);
+ if (!print_fmt)
+ return -ENOMEM;
+
+ /* Second: actually write the @print_fmt */
+ __set_enter_print_fmt(entry, print_fmt, len + 1);
+ call->print_fmt = print_fmt;
+
+ return 0;
+}
+
+static void free_syscall_print_fmt(struct ftrace_event_call *call)
+{
+ struct syscall_metadata *entry = call->data;
+
+ if (entry->enter_event == call)
+ kfree(call->print_fmt);
}
int syscall_enter_define_fields(struct ftrace_event_call *call)
@@ -386,12 +381,17 @@ int init_syscall_trace(struct ftrace_event_call *call)
{
int id;
- id = register_ftrace_event(call->event);
- if (!id)
- return -ENODEV;
- call->id = id;
- INIT_LIST_HEAD(&call->fields);
- return 0;
+ if (set_syscall_print_fmt(call) < 0)
+ return -ENOMEM;
+
+ id = trace_event_raw_init(call);
+
+ if (id < 0) {
+ free_syscall_print_fmt(call);
+ return id;
+ }
+
+ return id;
}
int __init init_ftrace_syscalls(void)
@@ -421,7 +421,7 @@ int __init init_ftrace_syscalls(void)
}
core_initcall(init_ftrace_syscalls);
-#ifdef CONFIG_EVENT_PROFILE
+#ifdef CONFIG_PERF_EVENTS
static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls);
static DECLARE_BITMAP(enabled_prof_exit_syscalls, NR_syscalls);
@@ -626,6 +626,5 @@ void prof_sysexit_disable(struct ftrace_event_call *call)
mutex_unlock(&syscall_trace_lock);
}
-#endif
-
+#endif /* CONFIG_PERF_EVENTS */
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 25c3ed594c54..6bf97d176326 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -765,9 +765,9 @@ config RCU_CPU_STALL_DETECTOR
CPUs are delaying the current grace period, but only when
the grace period extends for excessive time periods.
- Say Y if you want RCU to perform such checks.
+ Say N if you want to disable such checks.
- Say N if you are unsure.
+ Say Y if you are unsure.
config KPROBES_SANITY_TEST
bool "Kprobes sanity tests"
diff --git a/lib/hweight.c b/lib/hweight.c
index 389424ecb129..63ee4eb1228d 100644
--- a/lib/hweight.c
+++ b/lib/hweight.c
@@ -11,11 +11,18 @@
unsigned int hweight32(unsigned int w)
{
+#ifdef ARCH_HAS_FAST_MULTIPLIER
+ w -= (w >> 1) & 0x55555555;
+ w = (w & 0x33333333) + ((w >> 2) & 0x33333333);
+ w = (w + (w >> 4)) & 0x0f0f0f0f;
+ return (w * 0x01010101) >> 24;
+#else
unsigned int res = w - ((w >> 1) & 0x55555555);
res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
res = (res + (res >> 4)) & 0x0F0F0F0F;
res = res + (res >> 8);
return (res + (res >> 16)) & 0x000000FF;
+#endif
}
EXPORT_SYMBOL(hweight32);
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index ea6f6e3adaea..f3c9c0a90b98 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -136,13 +136,14 @@ my %text_sections = (
".text.unlikely" => 1,
);
-$objdump = "objdump" if ((length $objdump) == 0);
-$objcopy = "objcopy" if ((length $objcopy) == 0);
-$cc = "gcc" if ((length $cc) == 0);
-$ld = "ld" if ((length $ld) == 0);
-$nm = "nm" if ((length $nm) == 0);
-$rm = "rm" if ((length $rm) == 0);
-$mv = "mv" if ((length $mv) == 0);
+# Note: we are nice to C-programmers here, thus we skip the '||='-idiom.
+$objdump = 'objdump' if (!$objdump);
+$objcopy = 'objcopy' if (!$objcopy);
+$cc = 'gcc' if (!$cc);
+$ld = 'ld' if (!$ld);
+$nm = 'nm' if (!$nm);
+$rm = 'rm' if (!$rm);
+$mv = 'mv' if (!$mv);
#print STDERR "running: $P '$arch' '$objdump' '$objcopy' '$cc' '$ld' " .
# "'$nm' '$rm' '$mv' '$inputfile'\n";
@@ -432,14 +433,14 @@ sub update_funcs
# Loop through all the mcount caller offsets and print a reference
# to the caller based from the ref_func.
- for (my $i=0; $i <= $#offsets; $i++) {
- if (!$opened) {
- open(FILE, ">$mcount_s") || die "can't create $mcount_s\n";
- $opened = 1;
- print FILE "\t.section $mcount_section,\"a\",$section_type\n";
- print FILE "\t.align $alignment\n" if (defined($alignment));
- }
- printf FILE "\t%s %s + %d\n", $type, $ref_func, $offsets[$i] - $offset;
+ if (!$opened) {
+ open(FILE, ">$mcount_s") || die "can't create $mcount_s\n";
+ $opened = 1;
+ print FILE "\t.section $mcount_section,\"a\",$section_type\n";
+ print FILE "\t.align $alignment\n" if (defined($alignment));
+ }
+ foreach my $cur_offset (@offsets) {
+ printf FILE "\t%s %s + %d\n", $type, $ref_func, $cur_offset - $offset;
}
}
@@ -476,11 +477,7 @@ while (<IN>) {
$read_headers = 0;
# Only record text sections that we know are safe
- if (defined($text_sections{$1})) {
- $read_function = 1;
- } else {
- $read_function = 0;
- }
+ $read_function = defined($text_sections{$1});
# print out any recorded offsets
update_funcs();
@@ -514,7 +511,7 @@ while (<IN>) {
}
# is this a call site to mcount? If so, record it to print later
if ($text_found && /$mcount_regex/) {
- $offsets[$#offsets + 1] = hex $1;
+ push(@offsets, hex $1);
}
}
diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
index 250e391b4bc8..2de34075f6a4 100644
--- a/tools/perf/Documentation/perf-probe.txt
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -15,6 +15,8 @@ or
'perf probe' [options] --del='[GROUP:]EVENT' [...]
or
'perf probe' --list
+or
+'perf probe' --line='FUNC[:RLN[+NUM|:RLN2]]|SRC:ALN[+NUM|:ALN2]'
DESCRIPTION
-----------
@@ -45,6 +47,11 @@ OPTIONS
--list::
List up current probe events.
+-L::
+--line=::
+ Show source code lines which can be probed. This needs an argument
+ which specifies a range of the source code.
+
PROBE SYNTAX
------------
Probe points are defined by following syntax.
@@ -56,6 +63,19 @@ Probe points are defined by following syntax.
It is also possible to specify a probe point by the source line number by using 'SRC:ALN' syntax, where 'SRC' is the source file path and 'ALN' is the line number.
'ARG' specifies the arguments of this probe point. You can use the name of local variable, or kprobe-tracer argument format (e.g. $retval, %ax, etc).
+LINE SYNTAX
+-----------
+Line range is descripted by following syntax.
+
+ "FUNC[:RLN[+NUM|:RLN2]]|SRC:ALN[+NUM|:ALN2]"
+
+FUNC specifies the function name of showing lines. 'RLN' is the start line
+number from function entry line, and 'RLN2' is the end line number. As same as
+probe syntax, 'SRC' means the source file path, 'ALN' is start line number,
+and 'ALN2' is end line number in the file. It is also possible to specify how
+many lines to show by using 'NUM'.
+So, "source.c:100-120" shows lines between 100th to l20th in source.c file. And "func:10+20" shows 20 lines from 10th line of func function.
+
SEE ALSO
--------
linkperf:perf-trace[1], linkperf:perf-record[1]
diff --git a/tools/perf/Documentation/perf.txt b/tools/perf/Documentation/perf.txt
index 69c832557199..0eeb247dc7d2 100644
--- a/tools/perf/Documentation/perf.txt
+++ b/tools/perf/Documentation/perf.txt
@@ -12,7 +12,7 @@ SYNOPSIS
DESCRIPTION
-----------
-Performance counters for Linux are are a new kernel-based subsystem
+Performance counters for Linux are a new kernel-based subsystem
that provide a framework for all things performance analysis. It
covers hardware level (CPU/PMU, Performance Monitoring Unit) features
and software features (software counters, tracepoints) as well.
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 2e7fa3a06806..d739552036d0 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -286,11 +286,7 @@ SCRIPT_PERL =
SCRIPT_SH =
TEST_PROGRAMS =
-#
-# No scripts right now:
-#
-
-# SCRIPT_SH += perf-am.sh
+SCRIPT_SH += perf-archive.sh
#
# No Perl scripts right now:
@@ -315,9 +311,7 @@ PROGRAMS += perf
# List built-in command $C whose implementation cmd_$C() is not in
# builtin-$C.o but is linked in as part of some other command.
#
-# None right now:
-#
-# BUILT_INS += perf-init $X
+BUILT_INS += perf-archive
# what 'all' will build and 'install' will install, in perfexecdir
ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS)
@@ -369,6 +363,7 @@ LIB_H += util/event.h
LIB_H += util/exec_cmd.h
LIB_H += util/types.h
LIB_H += util/levenshtein.h
+LIB_H += util/map.h
LIB_H += util/parse-options.h
LIB_H += util/parse-events.h
LIB_H += util/quote.h
@@ -435,8 +430,8 @@ LIB_OBJS += util/trace-event-perl.o
LIB_OBJS += util/svghelper.o
LIB_OBJS += util/sort.o
LIB_OBJS += util/hist.o
-LIB_OBJS += util/data_map.o
LIB_OBJS += util/probe-event.o
+LIB_OBJS += util/util.o
BUILTIN_OBJS += builtin-annotate.o
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 593ff25006de..73c202ee0882 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -132,8 +132,8 @@ static int process_sample_event(event_t *event, struct perf_session *session)
{
struct addr_location al;
- dump_printf("(IP, %d): %d: %p\n", event->header.misc,
- event->ip.pid, (void *)(long)event->ip.ip);
+ dump_printf("(IP, %d): %d: %#Lx\n", event->header.misc,
+ event->ip.pid, event->ip.ip);
if (event__preprocess_sample(event, session, &al, symbol_filter) < 0) {
fprintf(stderr, "problem processing %d event, skipping it.\n",
@@ -451,10 +451,10 @@ static void perf_session__find_annotations(struct perf_session *self)
}
static struct perf_event_ops event_ops = {
- .process_sample_event = process_sample_event,
- .process_mmap_event = event__process_mmap,
- .process_comm_event = event__process_comm,
- .process_fork_event = event__process_task,
+ .sample = process_sample_event,
+ .mmap = event__process_mmap,
+ .comm = event__process_comm,
+ .fork = event__process_task,
};
static int __cmd_annotate(void)
diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c
index 1e99ac806913..431f204bde64 100644
--- a/tools/perf/builtin-buildid-list.c
+++ b/tools/perf/builtin-buildid-list.c
@@ -16,6 +16,7 @@
static char const *input_name = "perf.data";
static int force;
+static bool with_hits;
static const char * const buildid_list_usage[] = {
"perf buildid-list [<options>]",
@@ -23,6 +24,7 @@ static const char * const buildid_list_usage[] = {
};
static const struct option options[] = {
+ OPT_BOOLEAN('H', "with-hits", &with_hits, "Show only DSOs with hits"),
OPT_STRING('i', "input", &input_name, "file",
"input file name"),
OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
@@ -31,26 +33,34 @@ static const struct option options[] = {
OPT_END()
};
-static int perf_file_section__process_buildids(struct perf_file_section *self,
- int feat, int fd)
+static int build_id_list__process_event(event_t *event,
+ struct perf_session *session)
{
- if (feat != HEADER_BUILD_ID)
- return 0;
+ struct addr_location al;
+ u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+ struct thread *thread = perf_session__findnew(session, event->ip.pid);
- if (lseek(fd, self->offset, SEEK_SET) < 0) {
- pr_warning("Failed to lseek to %Ld offset for buildids!\n",
- self->offset);
+ if (thread == NULL) {
+ pr_err("problem processing %d event, skipping it.\n",
+ event->header.type);
return -1;
}
- if (perf_header__read_build_ids(fd, self->offset, self->size)) {
- pr_warning("Failed to read buildids!\n");
- return -1;
- }
+ thread__find_addr_map(thread, session, cpumode, MAP__FUNCTION,
+ event->ip.ip, &al);
+
+ if (al.map != NULL)
+ al.map->dso->hit = 1;
return 0;
}
+static struct perf_event_ops build_id_list__event_ops = {
+ .sample = build_id_list__process_event,
+ .mmap = event__process_mmap,
+ .fork = event__process_task,
+};
+
static int __cmd_buildid_list(void)
{
int err = -1;
@@ -60,10 +70,10 @@ static int __cmd_buildid_list(void)
if (session == NULL)
return -1;
- err = perf_header__process_sections(&session->header, session->fd,
- perf_file_section__process_buildids);
- if (err >= 0)
- dsos__fprintf_buildid(stdout);
+ if (with_hits)
+ perf_session__process_events(session, &build_id_list__event_ops);
+
+ dsos__fprintf_buildid(stdout, with_hits);
perf_session__delete(session);
return err;
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index bd71b8ceafb7..18b3f505f9db 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -42,8 +42,8 @@ static int diff__process_sample_event(event_t *event, struct perf_session *sessi
struct addr_location al;
struct sample_data data = { .period = 1, };
- dump_printf("(IP, %d): %d: %p\n", event->header.misc,
- event->ip.pid, (void *)(long)event->ip.ip);
+ dump_printf("(IP, %d): %d: %#Lx\n", event->header.misc,
+ event->ip.pid, event->ip.ip);
if (event__preprocess_sample(event, session, &al, NULL) < 0) {
pr_warning("problem processing %d event, skipping it.\n",
@@ -51,12 +51,12 @@ static int diff__process_sample_event(event_t *event, struct perf_session *sessi
return -1;
}
- if (al.filtered)
+ if (al.filtered || al.sym == NULL)
return 0;
event__parse_sample(event, session->sample_type, &data);
- if (al.sym && perf_session__add_hist_entry(session, &al, data.period)) {
+ if (perf_session__add_hist_entry(session, &al, data.period)) {
pr_warning("problem incrementing symbol count, skipping event\n");
return -1;
}
@@ -66,12 +66,12 @@ static int diff__process_sample_event(event_t *event, struct perf_session *sessi
}
static struct perf_event_ops event_ops = {
- .process_sample_event = diff__process_sample_event,
- .process_mmap_event = event__process_mmap,
- .process_comm_event = event__process_comm,
- .process_exit_event = event__process_task,
- .process_fork_event = event__process_task,
- .process_lost_event = event__process_lost,
+ .sample = diff__process_sample_event,
+ .mmap = event__process_mmap,
+ .comm = event__process_comm,
+ .exit = event__process_task,
+ .fork = event__process_task,
+ .lost = event__process_lost,
};
static void perf_session__insert_hist_entry_by_name(struct rb_root *root,
@@ -82,29 +82,19 @@ static void perf_session__insert_hist_entry_by_name(struct rb_root *root,
struct hist_entry *iter;
while (*p != NULL) {
- int cmp;
parent = *p;
iter = rb_entry(parent, struct hist_entry, rb_node);
-
- cmp = strcmp(he->map->dso->name, iter->map->dso->name);
- if (cmp > 0)
+ if (hist_entry__cmp(he, iter) < 0)
p = &(*p)->rb_left;
- else if (cmp < 0)
+ else
p = &(*p)->rb_right;
- else {
- cmp = strcmp(he->sym->name, iter->sym->name);
- if (cmp > 0)
- p = &(*p)->rb_left;
- else
- p = &(*p)->rb_right;
- }
}
rb_link_node(&he->rb_node, parent, p);
rb_insert_color(&he->rb_node, root);
}
-static void perf_session__resort_by_name(struct perf_session *self)
+static void perf_session__resort_hist_entries(struct perf_session *self)
{
unsigned long position = 1;
struct rb_root tmp = RB_ROOT;
@@ -122,29 +112,28 @@ static void perf_session__resort_by_name(struct perf_session *self)
self->hists = tmp;
}
+static void perf_session__set_hist_entries_positions(struct perf_session *self)
+{
+ perf_session__output_resort(self, self->events_stats.total);
+ perf_session__resort_hist_entries(self);
+}
+
static struct hist_entry *
-perf_session__find_hist_entry_by_name(struct perf_session *self,
- struct hist_entry *he)
+perf_session__find_hist_entry(struct perf_session *self,
+ struct hist_entry *he)
{
struct rb_node *n = self->hists.rb_node;
while (n) {
struct hist_entry *iter = rb_entry(n, struct hist_entry, rb_node);
- int cmp = strcmp(he->map->dso->name, iter->map->dso->name);
+ int64_t cmp = hist_entry__cmp(he, iter);
- if (cmp > 0)
+ if (cmp < 0)
n = n->rb_left;
- else if (cmp < 0)
+ else if (cmp > 0)
n = n->rb_right;
- else {
- cmp = strcmp(he->sym->name, iter->sym->name);
- if (cmp > 0)
- n = n->rb_left;
- else if (cmp < 0)
- n = n->rb_right;
- else
- return iter;
- }
+ else
+ return iter;
}
return NULL;
@@ -155,11 +144,9 @@ static void perf_session__match_hists(struct perf_session *old_session,
{
struct rb_node *nd;
- perf_session__resort_by_name(old_session);
-
for (nd = rb_first(&new_session->hists); nd; nd = rb_next(nd)) {
struct hist_entry *pos = rb_entry(nd, struct hist_entry, rb_node);
- pos->pair = perf_session__find_hist_entry_by_name(old_session, pos);
+ pos->pair = perf_session__find_hist_entry(old_session, pos);
}
}
@@ -177,9 +164,12 @@ static int __cmd_diff(void)
ret = perf_session__process_events(session[i], &event_ops);
if (ret)
goto out_delete;
- perf_session__output_resort(session[i], session[i]->events_stats.total);
}
+ perf_session__output_resort(session[1], session[1]->events_stats.total);
+ if (show_displacement)
+ perf_session__set_hist_entries_positions(session[0]);
+
perf_session__match_hists(session[0], session[1]);
perf_session__fprintf_hists(session[1], session[0],
show_displacement, stdout);
@@ -204,7 +194,7 @@ static const struct option options[] = {
OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules,
"load module symbols - WARNING: use only with -k and LIVE kernel"),
- OPT_BOOLEAN('P', "full-paths", &event_ops.full_paths,
+ OPT_BOOLEAN('P', "full-paths", &symbol_conf.full_paths,
"Don't shorten the pathnames taking into account the cwd"),
OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
"only consider symbols in these dsos"),
diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c
index 9f810b17c25c..215b584007b1 100644
--- a/tools/perf/builtin-help.c
+++ b/tools/perf/builtin-help.c
@@ -286,8 +286,7 @@ void list_common_cmds_help(void)
puts(" The most commonly used perf commands are:");
for (i = 0; i < ARRAY_SIZE(common_cmds); i++) {
- printf(" %s ", common_cmds[i].name);
- mput_char(' ', longest - strlen(common_cmds[i].name));
+ printf(" %-*s ", longest, common_cmds[i].name);
puts(common_cmds[i].help);
}
}
@@ -314,8 +313,6 @@ static const char *cmd_to_page(const char *perf_cmd)
return "perf";
else if (!prefixcmp(perf_cmd, "perf"))
return perf_cmd;
- else if (is_perf_command(perf_cmd))
- return prepend("perf-", perf_cmd);
else
return prepend("perf-", perf_cmd);
}
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 93c67bf53d2c..e8d716bf676e 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -92,23 +92,18 @@ static void setup_cpunode_map(void)
if (!dir1)
return;
- while (true) {
- dent1 = readdir(dir1);
- if (!dent1)
- break;
-
- if (sscanf(dent1->d_name, "node%u", &mem) < 1)
+ while ((dent1 = readdir(dir1)) != NULL) {
+ if (dent1->d_type != DT_DIR ||
+ sscanf(dent1->d_name, "node%u", &mem) < 1)
continue;
snprintf(buf, PATH_MAX, "%s/%s", PATH_SYS_NODE, dent1->d_name);
dir2 = opendir(buf);
if (!dir2)
continue;
- while (true) {
- dent2 = readdir(dir2);
- if (!dent2)
- break;
- if (sscanf(dent2->d_name, "cpu%u", &cpu) < 1)
+ while ((dent2 = readdir(dir2)) != NULL) {
+ if (dent2->d_type != DT_LNK ||
+ sscanf(dent2->d_name, "cpu%u", &cpu) < 1)
continue;
cpunode_map[cpu] = mem;
}
@@ -321,11 +316,8 @@ static int process_sample_event(event_t *event, struct perf_session *session)
event__parse_sample(event, session->sample_type, &data);
- dump_printf("(IP, %d): %d/%d: %p period: %Ld\n",
- event->header.misc,
- data.pid, data.tid,
- (void *)(long)data.ip,
- (long long)data.period);
+ dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
+ data.pid, data.tid, data.ip, data.period);
thread = perf_session__findnew(session, event->ip.pid);
if (thread == NULL) {
@@ -342,22 +334,9 @@ static int process_sample_event(event_t *event, struct perf_session *session)
return 0;
}
-static int sample_type_check(struct perf_session *session)
-{
- if (!(session->sample_type & PERF_SAMPLE_RAW)) {
- fprintf(stderr,
- "No trace sample to read. Did you call perf record "
- "without -R?");
- return -1;
- }
-
- return 0;
-}
-
static struct perf_event_ops event_ops = {
- .process_sample_event = process_sample_event,
- .process_comm_event = event__process_comm,
- .sample_type_check = sample_type_check,
+ .sample = process_sample_event,
+ .comm = event__process_comm,
};
static double fragmentation(unsigned long n_req, unsigned long n_alloc)
@@ -504,11 +483,19 @@ static void sort_result(void)
static int __cmd_kmem(void)
{
- int err;
+ int err = -EINVAL;
struct perf_session *session = perf_session__new(input_name, O_RDONLY, 0);
if (session == NULL)
return -ENOMEM;
+ if (!perf_session__has_traces(session, "kmem record"))
+ goto out_delete;
+
+ if (perf_session__create_kernel_maps(session) < 0) {
+ pr_err("Problems creating kernel maps\n");
+ return -1;
+ }
+
setup_pager();
err = perf_session__process_events(session, &event_ops);
if (err != 0)
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index c1e6774fd3ed..34f2acb1cc88 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -55,11 +55,13 @@ static struct {
bool need_dwarf;
bool list_events;
bool force_add;
+ bool show_lines;
int nr_probe;
struct probe_point probes[MAX_PROBES];
struct strlist *dellist;
struct perf_session *psession;
struct map *kmap;
+ struct line_range line_range;
} session;
@@ -137,6 +139,16 @@ static int open_vmlinux(void)
pr_debug("Try to open %s\n", session.kmap->dso->long_name);
return open(session.kmap->dso->long_name, O_RDONLY);
}
+
+static int opt_show_lines(const struct option *opt __used,
+ const char *str, int unset __used)
+{
+ if (str)
+ parse_line_range_desc(str, &session.line_range);
+ INIT_LIST_HEAD(&session.line_range.line_list);
+ session.show_lines = true;
+ return 0;
+}
#endif
static const char * const probe_usage[] = {
@@ -144,6 +156,7 @@ static const char * const probe_usage[] = {
"perf probe [<options>] --add 'PROBEDEF' [--add 'PROBEDEF' ...]",
"perf probe [<options>] --del '[GROUP:]EVENT' ...",
"perf probe --list",
+ "perf probe --line 'LINEDESC'",
NULL
};
@@ -182,9 +195,32 @@ static const struct option options[] = {
opt_add_probe_event),
OPT_BOOLEAN('f', "force", &session.force_add, "forcibly add events"
" with existing name"),
+#ifndef NO_LIBDWARF
+ OPT_CALLBACK('L', "line", NULL,
+ "FUNC[:RLN[+NUM|:RLN2]]|SRC:ALN[+NUM|:ALN2]",
+ "Show source code lines.", opt_show_lines),
+#endif
OPT_END()
};
+/* Initialize symbol maps for vmlinux */
+static void init_vmlinux(void)
+{
+ symbol_conf.sort_by_name = true;
+ if (symbol_conf.vmlinux_name == NULL)
+ symbol_conf.try_vmlinux_path = true;
+ else
+ pr_debug("Use vmlinux: %s\n", symbol_conf.vmlinux_name);
+ if (symbol__init() < 0)
+ die("Failed to init symbol map.");
+ session.psession = perf_session__new(NULL, O_WRONLY, false);
+ if (session.psession == NULL)
+ die("Failed to init perf_session.");
+ session.kmap = session.psession->vmlinux_maps[MAP__FUNCTION];
+ if (!session.kmap)
+ die("Could not find kernel map.\n");
+}
+
int cmd_probe(int argc, const char **argv, const char *prefix __used)
{
int i, ret;
@@ -203,7 +239,8 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
parse_probe_event_argv(argc, argv);
}
- if ((!session.nr_probe && !session.dellist && !session.list_events))
+ if ((!session.nr_probe && !session.dellist && !session.list_events &&
+ !session.show_lines))
usage_with_options(probe_usage, options);
if (debugfs_valid_mountpoint(debugfs_path) < 0)
@@ -215,10 +252,34 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
" --add/--del.\n");
usage_with_options(probe_usage, options);
}
+ if (session.show_lines) {
+ pr_warning(" Error: Don't use --list with --line.\n");
+ usage_with_options(probe_usage, options);
+ }
show_perf_probe_events();
return 0;
}
+#ifndef NO_LIBDWARF
+ if (session.show_lines) {
+ if (session.nr_probe != 0 || session.dellist) {
+ pr_warning(" Error: Don't use --line with"
+ " --add/--del.\n");
+ usage_with_options(probe_usage, options);
+ }
+ init_vmlinux();
+ fd = open_vmlinux();
+ if (fd < 0)
+ die("Could not open debuginfo file.");
+ ret = find_line_range(fd, &session.line_range);
+ if (ret <= 0)
+ die("Source line is not found.\n");
+ close(fd);
+ show_line_range(&session.line_range);
+ return 0;
+ }
+#endif
+
if (session.dellist) {
del_trace_kprobe_events(session.dellist);
strlist__delete(session.dellist);
@@ -226,20 +287,8 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
return 0;
}
- /* Initialize symbol maps for vmlinux */
- symbol_conf.sort_by_name = true;
- if (symbol_conf.vmlinux_name == NULL)
- symbol_conf.try_vmlinux_path = true;
- if (symbol__init() < 0)
- die("Failed to init symbol map.");
- session.psession = perf_session__new(NULL, O_WRONLY, false);
- if (session.psession == NULL)
- die("Failed to init perf_session.");
- session.kmap = map_groups__find_by_name(&session.psession->kmaps,
- MAP__FUNCTION,
- "[kernel.kallsyms]");
- if (!session.kmap)
- die("Could not find kernel map.\n");
+ /* Add probes */
+ init_vmlinux();
if (session.need_dwarf)
#ifdef NO_LIBDWARF
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 265425322734..7bb9ca1b30fa 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -113,12 +113,24 @@ static void write_output(void *buf, size_t size)
static void write_event(event_t *buf, size_t size)
{
- /*
- * Add it to the list of DSOs, so that when we finish this
- * record session we can pick the available build-ids.
- */
- if (buf->header.type == PERF_RECORD_MMAP)
- dsos__findnew(buf->mmap.filename);
+ size_t processed_size = buf->header.size;
+ event_t *ev = buf;
+
+ do {
+ /*
+ * Add it to the list of DSOs, so that when we finish this
+ * record session we can pick the available build-ids.
+ */
+ if (ev->header.type == PERF_RECORD_MMAP) {
+ struct list_head *head = &dsos__user;
+ if (ev->header.misc == 1)
+ head = &dsos__kernel;
+ __dsos__findnew(head, ev->mmap.filename);
+ }
+
+ ev = ((void *)ev) + ev->header.size;
+ processed_size += ev->header.size;
+ } while (processed_size < size);
write_output(buf, size);
}
@@ -465,6 +477,11 @@ static int __cmd_record(int argc, const char **argv)
return -1;
}
+ if (perf_session__create_kernel_maps(session) < 0) {
+ pr_err("Problems creating kernel maps\n");
+ return -1;
+ }
+
if (!file_new) {
err = perf_header__read(&session->header, output);
if (err < 0)
@@ -551,6 +568,19 @@ static int __cmd_record(int argc, const char **argv)
return err;
}
+ err = event__synthesize_kernel_mmap(process_synthesized_event,
+ session, "_text");
+ if (err < 0) {
+ pr_err("Couldn't record kernel reference relocation symbol.\n");
+ return err;
+ }
+
+ err = event__synthesize_modules(process_synthesized_event, session);
+ if (err < 0) {
+ pr_err("Couldn't record kernel reference relocation symbol.\n");
+ return err;
+ }
+
if (!system_wide && profile_cpu == -1)
event__synthesize_thread(pid, process_synthesized_event,
session);
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index db10c0e8ecae..4c3d6997995b 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -34,6 +34,8 @@
static char const *input_name = "perf.data";
static int force;
+static bool hide_unresolved;
+static bool dont_use_callchains;
static int show_threads;
static struct perf_read_values show_threads_values;
@@ -91,11 +93,8 @@ static int process_sample_event(event_t *event, struct perf_session *session)
event__parse_sample(event, session->sample_type, &data);
- dump_printf("(IP, %d): %d/%d: %p period: %Ld\n",
- event->header.misc,
- data.pid, data.tid,
- (void *)(long)data.ip,
- (long long)data.period);
+ dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
+ data.pid, data.tid, data.ip, data.period);
if (session->sample_type & PERF_SAMPLE_CALLCHAIN) {
unsigned int i;
@@ -121,7 +120,7 @@ static int process_sample_event(event_t *event, struct perf_session *session)
return -1;
}
- if (al.filtered)
+ if (al.filtered || (hide_unresolved && al.sym == NULL))
return 0;
if (perf_session__add_hist_entry(session, &al, data.callchain, data.period)) {
@@ -156,14 +155,14 @@ static int process_read_event(event_t *event, struct perf_session *session __use
return 0;
}
-static int sample_type_check(struct perf_session *session)
+static int perf_session__setup_sample_type(struct perf_session *self)
{
- if (!(session->sample_type & PERF_SAMPLE_CALLCHAIN)) {
+ if (!(self->sample_type & PERF_SAMPLE_CALLCHAIN)) {
if (sort__has_parent) {
fprintf(stderr, "selected --sort parent, but no"
" callchain data. Did you call"
" perf record without -g?\n");
- return -1;
+ return -EINVAL;
}
if (symbol_conf.use_callchain) {
fprintf(stderr, "selected -g but no callchain data."
@@ -171,12 +170,13 @@ static int sample_type_check(struct perf_session *session)
" -g?\n");
return -1;
}
- } else if (callchain_param.mode != CHAIN_NONE && !symbol_conf.use_callchain) {
+ } else if (!dont_use_callchains && callchain_param.mode != CHAIN_NONE &&
+ !symbol_conf.use_callchain) {
symbol_conf.use_callchain = true;
if (register_callchain_param(&callchain_param) < 0) {
fprintf(stderr, "Can't register callchain"
" params\n");
- return -1;
+ return -EINVAL;
}
}
@@ -184,20 +184,18 @@ static int sample_type_check(struct perf_session *session)
}
static struct perf_event_ops event_ops = {
- .process_sample_event = process_sample_event,
- .process_mmap_event = event__process_mmap,
- .process_comm_event = event__process_comm,
- .process_exit_event = event__process_task,
- .process_fork_event = event__process_task,
- .process_lost_event = event__process_lost,
- .process_read_event = process_read_event,
- .sample_type_check = sample_type_check,
+ .sample = process_sample_event,
+ .mmap = event__process_mmap,
+ .comm = event__process_comm,
+ .exit = event__process_task,
+ .fork = event__process_task,
+ .lost = event__process_lost,
+ .read = process_read_event,
};
-
static int __cmd_report(void)
{
- int ret;
+ int ret = -EINVAL;
struct perf_session *session;
session = perf_session__new(input_name, O_RDONLY, force);
@@ -207,6 +205,10 @@ static int __cmd_report(void)
if (show_threads)
perf_read_values_init(&show_threads_values);
+ ret = perf_session__setup_sample_type(session);
+ if (ret)
+ goto out_delete;
+
ret = perf_session__process_events(session, &event_ops);
if (ret)
goto out_delete;
@@ -243,11 +245,19 @@ out_delete:
static int
parse_callchain_opt(const struct option *opt __used, const char *arg,
- int unset __used)
+ int unset)
{
char *tok;
char *endptr;
+ /*
+ * --no-call-graph
+ */
+ if (unset) {
+ dont_use_callchains = true;
+ return 0;
+ }
+
symbol_conf.use_callchain = true;
if (!arg)
@@ -319,7 +329,7 @@ static const struct option options[] = {
"pretty printing style key: normal raw"),
OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
"sort by key(s): pid, comm, dso, symbol, parent"),
- OPT_BOOLEAN('P', "full-paths", &event_ops.full_paths,
+ OPT_BOOLEAN('P', "full-paths", &symbol_conf.full_paths,
"Don't shorten the pathnames taking into account the cwd"),
OPT_STRING('p', "parent", &parent_pattern, "regex",
"regex filter to identify parent, see: '--sort parent'"),
@@ -340,6 +350,8 @@ static const struct option options[] = {
OPT_STRING('t', "field-separator", &symbol_conf.field_sep, "separator",
"separator for columns, no spaces will be added between "
"columns '.' is reserved."),
+ OPT_BOOLEAN('U', "hide-unresolved", &hide_unresolved,
+ "Only display entries resolved to a symbol"),
OPT_END()
};
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 80209df6cfe8..4f5a03e43444 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -1621,11 +1621,8 @@ static int process_sample_event(event_t *event, struct perf_session *session)
event__parse_sample(event, session->sample_type, &data);
- dump_printf("(IP, %d): %d/%d: %p period: %Ld\n",
- event->header.misc,
- data.pid, data.tid,
- (void *)(long)data.ip,
- (long long)data.period);
+ dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
+ data.pid, data.tid, data.ip, data.period);
thread = perf_session__findnew(session, data.pid);
if (thread == NULL) {
@@ -1653,33 +1650,22 @@ static int process_lost_event(event_t *event __used,
return 0;
}
-static int sample_type_check(struct perf_session *session __used)
-{
- if (!(session->sample_type & PERF_SAMPLE_RAW)) {
- fprintf(stderr,
- "No trace sample to read. Did you call perf record "
- "without -R?");
- return -1;
- }
-
- return 0;
-}
-
static struct perf_event_ops event_ops = {
- .process_sample_event = process_sample_event,
- .process_comm_event = event__process_comm,
- .process_lost_event = process_lost_event,
- .sample_type_check = sample_type_check,
+ .sample = process_sample_event,
+ .comm = event__process_comm,
+ .lost = process_lost_event,
};
static int read_events(void)
{
- int err;
+ int err = -EINVAL;
struct perf_session *session = perf_session__new(input_name, O_RDONLY, 0);
if (session == NULL)
return -ENOMEM;
- err = perf_session__process_events(session, &event_ops);
+ if (perf_session__has_traces(session, "record -R"))
+ err = perf_session__process_events(session, &event_ops);
+
perf_session__delete(session);
return err;
}
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index c70d72003557..e8c85d5aec41 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -44,6 +44,7 @@
#include "util/parse-events.h"
#include "util/event.h"
#include "util/debug.h"
+#include "util/header.h"
#include <sys/prctl.h>
#include <math.h>
@@ -79,6 +80,8 @@ static int fd[MAX_NR_CPUS][MAX_COUNTERS];
static int event_scaled[MAX_COUNTERS];
+static volatile int done = 0;
+
struct stats
{
double n, mean, M2;
@@ -247,61 +250,64 @@ static int run_perf_stat(int argc __used, const char **argv)
unsigned long long t0, t1;
int status = 0;
int counter;
- int pid;
+ int pid = target_pid;
int child_ready_pipe[2], go_pipe[2];
+ const bool forks = (target_pid == -1 && argc > 0);
char buf;
if (!system_wide)
nr_cpus = 1;
- if (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0) {
+ if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
perror("failed to create pipes");
exit(1);
}
- if ((pid = fork()) < 0)
- perror("failed to fork");
+ if (forks) {
+ if ((pid = fork()) < 0)
+ perror("failed to fork");
+
+ if (!pid) {
+ close(child_ready_pipe[0]);
+ close(go_pipe[1]);
+ fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
+
+ /*
+ * Do a dummy execvp to get the PLT entry resolved,
+ * so we avoid the resolver overhead on the real
+ * execvp call.
+ */
+ execvp("", (char **)argv);
+
+ /*
+ * Tell the parent we're ready to go
+ */
+ close(child_ready_pipe[1]);
+
+ /*
+ * Wait until the parent tells us to go.
+ */
+ if (read(go_pipe[0], &buf, 1) == -1)
+ perror("unable to read pipe");
+
+ execvp(argv[0], (char **)argv);
+
+ perror(argv[0]);
+ exit(-1);
+ }
- if (!pid) {
- close(child_ready_pipe[0]);
- close(go_pipe[1]);
- fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
+ child_pid = pid;
/*
- * Do a dummy execvp to get the PLT entry resolved,
- * so we avoid the resolver overhead on the real
- * execvp call.
- */
- execvp("", (char **)argv);
-
- /*
- * Tell the parent we're ready to go
+ * Wait for the child to be ready to exec.
*/
close(child_ready_pipe[1]);
-
- /*
- * Wait until the parent tells us to go.
- */
- if (read(go_pipe[0], &buf, 1) == -1)
+ close(go_pipe[0]);
+ if (read(child_ready_pipe[0], &buf, 1) == -1)
perror("unable to read pipe");
-
- execvp(argv[0], (char **)argv);
-
- perror(argv[0]);
- exit(-1);
+ close(child_ready_pipe[0]);
}
- child_pid = pid;
-
- /*
- * Wait for the child to be ready to exec.
- */
- close(child_ready_pipe[1]);
- close(go_pipe[0]);
- if (read(child_ready_pipe[0], &buf, 1) == -1)
- perror("unable to read pipe");
- close(child_ready_pipe[0]);
-
for (counter = 0; counter < nr_counters; counter++)
create_perf_stat_counter(counter, pid);
@@ -310,8 +316,12 @@ static int run_perf_stat(int argc __used, const char **argv)
*/
t0 = rdclock();
- close(go_pipe[1]);
- wait(&status);
+ if (forks) {
+ close(go_pipe[1]);
+ wait(&status);
+ } else {
+ while(!done);
+ }
t1 = rdclock();
@@ -417,10 +427,13 @@ static void print_stat(int argc, const char **argv)
fflush(stdout);
fprintf(stderr, "\n");
- fprintf(stderr, " Performance counter stats for \'%s", argv[0]);
-
- for (i = 1; i < argc; i++)
- fprintf(stderr, " %s", argv[i]);
+ fprintf(stderr, " Performance counter stats for ");
+ if(target_pid == -1) {
+ fprintf(stderr, "\'%s", argv[0]);
+ for (i = 1; i < argc; i++)
+ fprintf(stderr, " %s", argv[i]);
+ }else
+ fprintf(stderr, "task pid \'%d", target_pid);
fprintf(stderr, "\'");
if (run_count > 1)
@@ -445,6 +458,9 @@ static volatile int signr = -1;
static void skip_signal(int signo)
{
+ if(target_pid != -1)
+ done = 1;
+
signr = signo;
}
@@ -461,7 +477,7 @@ static void sig_atexit(void)
}
static const char * const stat_usage[] = {
- "perf stat [<options>] <command>",
+ "perf stat [<options>] [<command>]",
NULL
};
@@ -492,7 +508,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
argc = parse_options(argc, argv, options, stat_usage,
PARSE_OPT_STOP_AT_NON_OPTION);
- if (!argc)
+ if (!argc && target_pid == -1)
usage_with_options(stat_usage, options);
if (run_count <= 0)
usage_with_options(stat_usage, options);
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index 3f8bbcfb1e9b..0d4d8ff7914b 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -1029,33 +1029,24 @@ static void process_samples(struct perf_session *session)
}
}
-static int sample_type_check(struct perf_session *session)
-{
- if (!(session->sample_type & PERF_SAMPLE_RAW)) {
- fprintf(stderr, "No trace samples found in the file.\n"
- "Have you used 'perf timechart record' to record it?\n");
- return -1;
- }
-
- return 0;
-}
-
static struct perf_event_ops event_ops = {
- .process_comm_event = process_comm_event,
- .process_fork_event = process_fork_event,
- .process_exit_event = process_exit_event,
- .process_sample_event = queue_sample_event,
- .sample_type_check = sample_type_check,
+ .comm = process_comm_event,
+ .fork = process_fork_event,
+ .exit = process_exit_event,
+ .sample = queue_sample_event,
};
static int __cmd_timechart(void)
{
struct perf_session *session = perf_session__new(input_name, O_RDONLY, 0);
- int ret;
+ int ret = -EINVAL;
if (session == NULL)
return -ENOMEM;
+ if (!perf_session__has_traces(session, "timechart record"))
+ goto out_delete;
+
ret = perf_session__process_events(session, &event_ops);
if (ret)
goto out_delete;
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index ddc584b64871..7a8a77ec2c9d 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -667,7 +667,7 @@ static void prompt_symbol(struct sym_entry **target, const char *msg)
}
if (!found) {
- fprintf(stderr, "Sorry, %s is not active.\n", sym_filter);
+ fprintf(stderr, "Sorry, %s is not active.\n", buf);
sleep(1);
return;
} else
@@ -1165,6 +1165,11 @@ static int __cmd_top(void)
if (session == NULL)
return -ENOMEM;
+ if (perf_session__create_kernel_maps(session) < 0) {
+ pr_err("Problems creating kernel maps\n");
+ return -1;
+ }
+
if (target_pid != -1)
event__synthesize_thread(target_pid, event__process, session);
else
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 574a215e800b..8e9cbfe608d6 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -75,11 +75,8 @@ static int process_sample_event(event_t *event, struct perf_session *session)
event__parse_sample(event, session->sample_type, &data);
- dump_printf("(IP, %d): %d/%d: %p period: %Ld\n",
- event->header.misc,
- data.pid, data.tid,
- (void *)(long)data.ip,
- (long long)data.period);
+ dump_printf("(IP, %d): %d/%d: %#Lx period: %Ld\n", event->header.misc,
+ data.pid, data.tid, data.ip, data.period);
thread = perf_session__findnew(session, event->ip.pid);
if (thread == NULL) {
@@ -103,22 +100,9 @@ static int process_sample_event(event_t *event, struct perf_session *session)
return 0;
}
-static int sample_type_check(struct perf_session *session)
-{
- if (!(session->sample_type & PERF_SAMPLE_RAW)) {
- fprintf(stderr,
- "No trace sample to read. Did you call perf record "
- "without -R?");
- return -1;
- }
-
- return 0;
-}
-
static struct perf_event_ops event_ops = {
- .process_sample_event = process_sample_event,
- .process_comm_event = event__process_comm,
- .sample_type_check = sample_type_check,
+ .sample = process_sample_event,
+ .comm = event__process_comm,
};
static int __cmd_trace(struct perf_session *session)
@@ -592,6 +576,9 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used)
if (session == NULL)
return -ENOMEM;
+ if (!perf_session__has_traces(session, "record -R"))
+ return -EINVAL;
+
if (generate_script_lang) {
struct stat perf_stat;
diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt
index 71dc7c3fe7b2..f73d1d90f5bd 100644
--- a/tools/perf/command-list.txt
+++ b/tools/perf/command-list.txt
@@ -3,6 +3,7 @@
# command name category [deprecated] [common]
#
perf-annotate mainporcelain common
+perf-archive mainporcelain
perf-bench mainporcelain common
perf-buildid-list mainporcelain common
perf-diff mainporcelain common
diff --git a/tools/perf/design.txt b/tools/perf/design.txt
index 8d0de5130db3..bd0bb1b1279b 100644
--- a/tools/perf/design.txt
+++ b/tools/perf/design.txt
@@ -101,10 +101,10 @@ enum hw_event_ids {
*/
PERF_COUNT_HW_CPU_CYCLES = 0,
PERF_COUNT_HW_INSTRUCTIONS = 1,
- PERF_COUNT_HW_CACHE_REFERENCES = 2,
+ PERF_COUNT_HW_CACHE_REFERENCES = 2,
PERF_COUNT_HW_CACHE_MISSES = 3,
PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4,
- PERF_COUNT_HW_BRANCH_MISSES = 5,
+ PERF_COUNT_HW_BRANCH_MISSES = 5,
PERF_COUNT_HW_BUS_CYCLES = 6,
};
@@ -131,8 +131,8 @@ software events, selected by 'event_id':
*/
enum sw_event_ids {
PERF_COUNT_SW_CPU_CLOCK = 0,
- PERF_COUNT_SW_TASK_CLOCK = 1,
- PERF_COUNT_SW_PAGE_FAULTS = 2,
+ PERF_COUNT_SW_TASK_CLOCK = 1,
+ PERF_COUNT_SW_PAGE_FAULTS = 2,
PERF_COUNT_SW_CONTEXT_SWITCHES = 3,
PERF_COUNT_SW_CPU_MIGRATIONS = 4,
PERF_COUNT_SW_PAGE_FAULTS_MIN = 5,
diff --git a/tools/perf/perf-archive.sh b/tools/perf/perf-archive.sh
new file mode 100644
index 000000000000..45fbe2f07b15
--- /dev/null
+++ b/tools/perf/perf-archive.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+# perf archive
+# Arnaldo Carvalho de Melo <acme@redhat.com>
+
+PERF_DATA=perf.data
+if [ $# -ne 0 ] ; then
+ PERF_DATA=$1
+fi
+
+DEBUGDIR=~/.debug/
+BUILDIDS=$(mktemp /tmp/perf-archive-buildids.XXXXXX)
+
+perf buildid-list -i $PERF_DATA --with-hits > $BUILDIDS
+if [ ! -s $BUILDIDS ] ; then
+ echo "perf archive: no build-ids found"
+ rm -f $BUILDIDS
+ exit 1
+fi
+
+MANIFEST=$(mktemp /tmp/perf-archive-manifest.XXXXXX)
+
+cut -d ' ' -f 1 $BUILDIDS | \
+while read build_id ; do
+ linkname=$DEBUGDIR.build-id/${build_id:0:2}/${build_id:2}
+ filename=$(readlink -f $linkname)
+ echo ${linkname#$DEBUGDIR} >> $MANIFEST
+ echo ${filename#$DEBUGDIR} >> $MANIFEST
+done
+
+tar cfj $PERF_DATA.tar.bz2 -C $DEBUGDIR -T $MANIFEST
+rm -f $MANIFEST $BUILDIDS
+exit 0
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 89eae67a358e..809e3e23e507 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -388,7 +388,7 @@ static int run_argv(int *argcp, const char ***argv)
/* mini /proc/mounts parser: searching for "^blah /mount/point debugfs" */
static void get_debugfs_mntpt(void)
{
- const char *path = debugfs_find_mountpoint();
+ const char *path = debugfs_mount(NULL);
if (path)
strncpy(debugfs_mntpt, path, sizeof(debugfs_mntpt));
diff --git a/tools/perf/util/data_map.c b/tools/perf/util/data_map.c
deleted file mode 100644
index b557b836de3d..000000000000
--- a/tools/perf/util/data_map.c
+++ /dev/null
@@ -1,252 +0,0 @@
-#include "symbol.h"
-#include "util.h"
-#include "debug.h"
-#include "thread.h"
-#include "session.h"
-
-static int process_event_stub(event_t *event __used,
- struct perf_session *session __used)
-{
- dump_printf(": unhandled!\n");
- return 0;
-}
-
-static void perf_event_ops__fill_defaults(struct perf_event_ops *handler)
-{
- if (!handler->process_sample_event)
- handler->process_sample_event = process_event_stub;
- if (!handler->process_mmap_event)
- handler->process_mmap_event = process_event_stub;
- if (!handler->process_comm_event)
- handler->process_comm_event = process_event_stub;
- if (!handler->process_fork_event)
- handler->process_fork_event = process_event_stub;
- if (!handler->process_exit_event)
- handler->process_exit_event = process_event_stub;
- if (!handler->process_lost_event)
- handler->process_lost_event = process_event_stub;
- if (!handler->process_read_event)
- handler->process_read_event = process_event_stub;
- if (!handler->process_throttle_event)
- handler->process_throttle_event = process_event_stub;
- if (!handler->process_unthrottle_event)
- handler->process_unthrottle_event = process_event_stub;
-}
-
-static const char *event__name[] = {
- [0] = "TOTAL",
- [PERF_RECORD_MMAP] = "MMAP",
- [PERF_RECORD_LOST] = "LOST",
- [PERF_RECORD_COMM] = "COMM",
- [PERF_RECORD_EXIT] = "EXIT",
- [PERF_RECORD_THROTTLE] = "THROTTLE",
- [PERF_RECORD_UNTHROTTLE] = "UNTHROTTLE",
- [PERF_RECORD_FORK] = "FORK",
- [PERF_RECORD_READ] = "READ",
- [PERF_RECORD_SAMPLE] = "SAMPLE",
-};
-
-unsigned long event__total[PERF_RECORD_MAX];
-
-void event__print_totals(void)
-{
- int i;
- for (i = 0; i < PERF_RECORD_MAX; ++i)
- pr_info("%10s events: %10ld\n",
- event__name[i], event__total[i]);
-}
-
-static int process_event(event_t *event, struct perf_session *session,
- struct perf_event_ops *ops,
- unsigned long offset, unsigned long head)
-{
- trace_event(event);
-
- if (event->header.type < PERF_RECORD_MAX) {
- dump_printf("%p [%p]: PERF_RECORD_%s",
- (void *)(offset + head),
- (void *)(long)(event->header.size),
- event__name[event->header.type]);
- ++event__total[0];
- ++event__total[event->header.type];
- }
-
- switch (event->header.type) {
- case PERF_RECORD_SAMPLE:
- return ops->process_sample_event(event, session);
- case PERF_RECORD_MMAP:
- return ops->process_mmap_event(event, session);
- case PERF_RECORD_COMM:
- return ops->process_comm_event(event, session);
- case PERF_RECORD_FORK:
- return ops->process_fork_event(event, session);
- case PERF_RECORD_EXIT:
- return ops->process_exit_event(event, session);
- case PERF_RECORD_LOST:
- return ops->process_lost_event(event, session);
- case PERF_RECORD_READ:
- return ops->process_read_event(event, session);
- case PERF_RECORD_THROTTLE:
- return ops->process_throttle_event(event, session);
- case PERF_RECORD_UNTHROTTLE:
- return ops->process_unthrottle_event(event, session);
- default:
- ops->total_unknown++;
- return -1;
- }
-}
-
-int perf_header__read_build_ids(int input, u64 offset, u64 size)
-{
- struct build_id_event bev;
- char filename[PATH_MAX];
- u64 limit = offset + size;
- int err = -1;
-
- while (offset < limit) {
- struct dso *dso;
- ssize_t len;
-
- if (read(input, &bev, sizeof(bev)) != sizeof(bev))
- goto out;
-
- len = bev.header.size - sizeof(bev);
- if (read(input, filename, len) != len)
- goto out;
-
- dso = dsos__findnew(filename);
- if (dso != NULL)
- dso__set_build_id(dso, &bev.build_id);
-
- offset += bev.header.size;
- }
- err = 0;
-out:
- return err;
-}
-
-static struct thread *perf_session__register_idle_thread(struct perf_session *self)
-{
- struct thread *thread = perf_session__findnew(self, 0);
-
- if (!thread || thread__set_comm(thread, "swapper")) {
- pr_err("problem inserting idle task.\n");
- thread = NULL;
- }
-
- return thread;
-}
-
-int perf_session__process_events(struct perf_session *self,
- struct perf_event_ops *ops)
-{
- int err;
- unsigned long head, shift;
- unsigned long offset = 0;
- size_t page_size;
- event_t *event;
- uint32_t size;
- char *buf;
-
- if (perf_session__register_idle_thread(self) == NULL)
- return -ENOMEM;
-
- perf_event_ops__fill_defaults(ops);
-
- page_size = getpagesize();
-
- head = self->header.data_offset;
- self->sample_type = perf_header__sample_type(&self->header);
-
- err = -EINVAL;
- if (ops->sample_type_check && ops->sample_type_check(self) < 0)
- goto out_err;
-
- if (!ops->full_paths) {
- char bf[PATH_MAX];
-
- if (getcwd(bf, sizeof(bf)) == NULL) {
- err = -errno;
-out_getcwd_err:
- pr_err("failed to get the current directory\n");
- goto out_err;
- }
- self->cwd = strdup(bf);
- if (self->cwd == NULL) {
- err = -ENOMEM;
- goto out_getcwd_err;
- }
- self->cwdlen = strlen(self->cwd);
- }
-
- shift = page_size * (head / page_size);
- offset += shift;
- head -= shift;
-
-remap:
- buf = mmap(NULL, page_size * self->mmap_window, PROT_READ,
- MAP_SHARED, self->fd, offset);
- if (buf == MAP_FAILED) {
- pr_err("failed to mmap file\n");
- err = -errno;
- goto out_err;
- }
-
-more:
- event = (event_t *)(buf + head);
-
- size = event->header.size;
- if (!size)
- size = 8;
-
- if (head + event->header.size >= page_size * self->mmap_window) {
- int munmap_ret;
-
- shift = page_size * (head / page_size);
-
- munmap_ret = munmap(buf, page_size * self->mmap_window);
- assert(munmap_ret == 0);
-
- offset += shift;
- head -= shift;
- goto remap;
- }
-
- size = event->header.size;
-
- dump_printf("\n%p [%p]: event: %d\n",
- (void *)(offset + head),
- (void *)(long)event->header.size,
- event->header.type);
-
- if (!size || process_event(event, self, ops, offset, head) < 0) {
-
- dump_printf("%p [%p]: skipping unknown header type: %d\n",
- (void *)(offset + head),
- (void *)(long)(event->header.size),
- event->header.type);
-
- /*
- * assume we lost track of the stream, check alignment, and
- * increment a single u64 in the hope to catch on again 'soon'.
- */
-
- if (unlikely(head & 7))
- head &= ~7ULL;
-
- size = 8;
- }
-
- head += size;
-
- if (offset + head >= self->header.data_offset + self->header.data_size)
- goto done;
-
- if (offset + head < self->size)
- goto more;
-
-done:
- err = 0;
-out_err:
- return err;
-}
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
index 28d520d5a1fb..0905600c3851 100644
--- a/tools/perf/util/debug.c
+++ b/tools/perf/util/debug.c
@@ -9,6 +9,7 @@
#include "color.h"
#include "event.h"
#include "debug.h"
+#include "util.h"
int verbose = 0;
int dump_trace = 0;
diff --git a/tools/perf/util/debugfs.c b/tools/perf/util/debugfs.c
index 06b73ee02c49..a88fefc0cc0a 100644
--- a/tools/perf/util/debugfs.c
+++ b/tools/perf/util/debugfs.c
@@ -106,16 +106,14 @@ int debugfs_valid_entry(const char *path)
return 0;
}
-/* mount the debugfs somewhere */
+/* mount the debugfs somewhere if it's not mounted */
-int debugfs_mount(const char *mountpoint)
+char *debugfs_mount(const char *mountpoint)
{
- char mountcmd[128];
-
/* see if it's already mounted */
if (debugfs_find_mountpoint()) {
debugfs_premounted = 1;
- return 0;
+ return debugfs_mountpoint;
}
/* if not mounted and no argument */
@@ -127,13 +125,14 @@ int debugfs_mount(const char *mountpoint)
mountpoint = "/sys/kernel/debug";
}
+ if (mount(NULL, mountpoint, "debugfs", 0, NULL) < 0)
+ return NULL;
+
/* save the mountpoint */
strncpy(debugfs_mountpoint, mountpoint, sizeof(debugfs_mountpoint));
+ debugfs_found = 1;
- /* mount it */
- snprintf(mountcmd, sizeof(mountcmd),
- "/bin/mount -t debugfs debugfs %s", mountpoint);
- return system(mountcmd);
+ return debugfs_mountpoint;
}
/* umount the debugfs */
diff --git a/tools/perf/util/debugfs.h b/tools/perf/util/debugfs.h
index 3cd14f9ae784..83a02879745f 100644
--- a/tools/perf/util/debugfs.h
+++ b/tools/perf/util/debugfs.h
@@ -15,7 +15,7 @@
extern const char *debugfs_find_mountpoint(void);
extern int debugfs_valid_mountpoint(const char *debugfs);
extern int debugfs_valid_entry(const char *path);
-extern int debugfs_mount(const char *mountpoint);
+extern char *debugfs_mount(const char *mountpoint);
extern int debugfs_umount(void);
extern int debugfs_write(const char *entry, const char *value);
extern int debugfs_read(const char *entry, char *buffer, size_t size);
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index bb0fd6da2d56..dc13cad828d7 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -8,8 +8,7 @@
#include "thread.h"
static pid_t event__synthesize_comm(pid_t pid, int full,
- int (*process)(event_t *event,
- struct perf_session *session),
+ event__handler_t process,
struct perf_session *session)
{
event_t ev;
@@ -91,8 +90,7 @@ out_failure:
}
static int event__synthesize_mmap_events(pid_t pid, pid_t tgid,
- int (*process)(event_t *event,
- struct perf_session *session),
+ event__handler_t process,
struct perf_session *session)
{
char filename[PATH_MAX];
@@ -112,7 +110,10 @@ static int event__synthesize_mmap_events(pid_t pid, pid_t tgid,
while (1) {
char bf[BUFSIZ], *pbf = bf;
event_t ev = {
- .header = { .type = PERF_RECORD_MMAP },
+ .header = {
+ .type = PERF_RECORD_MMAP,
+ .misc = 0, /* Just like the kernel, see kernel/perf_event.c __perf_event_mmap */
+ },
};
int n;
size_t size;
@@ -156,9 +157,38 @@ static int event__synthesize_mmap_events(pid_t pid, pid_t tgid,
return 0;
}
-int event__synthesize_thread(pid_t pid,
- int (*process)(event_t *event,
- struct perf_session *session),
+int event__synthesize_modules(event__handler_t process,
+ struct perf_session *session)
+{
+ struct rb_node *nd;
+
+ for (nd = rb_first(&session->kmaps.maps[MAP__FUNCTION]);
+ nd; nd = rb_next(nd)) {
+ event_t ev;
+ size_t size;
+ struct map *pos = rb_entry(nd, struct map, rb_node);
+
+ if (pos->dso->kernel)
+ continue;
+
+ size = ALIGN(pos->dso->long_name_len + 1, sizeof(u64));
+ memset(&ev, 0, sizeof(ev));
+ ev.mmap.header.misc = 1; /* kernel uses 0 for user space maps, see kernel/perf_event.c __perf_event_mmap */
+ ev.mmap.header.type = PERF_RECORD_MMAP;
+ ev.mmap.header.size = (sizeof(ev.mmap) -
+ (sizeof(ev.mmap.filename) - size));
+ ev.mmap.start = pos->start;
+ ev.mmap.len = pos->end - pos->start;
+
+ memcpy(ev.mmap.filename, pos->dso->long_name,
+ pos->dso->long_name_len + 1);
+ process(&ev, session);
+ }
+
+ return 0;
+}
+
+int event__synthesize_thread(pid_t pid, event__handler_t process,
struct perf_session *session)
{
pid_t tgid = event__synthesize_comm(pid, 1, process, session);
@@ -167,8 +197,7 @@ int event__synthesize_thread(pid_t pid,
return event__synthesize_mmap_events(pid, tgid, process, session);
}
-void event__synthesize_threads(int (*process)(event_t *event,
- struct perf_session *session),
+void event__synthesize_threads(event__handler_t process,
struct perf_session *session)
{
DIR *proc;
@@ -189,6 +218,59 @@ void event__synthesize_threads(int (*process)(event_t *event,
closedir(proc);
}
+struct process_symbol_args {
+ const char *name;
+ u64 start;
+};
+
+static int find_symbol_cb(void *arg, const char *name, char type, u64 start)
+{
+ struct process_symbol_args *args = arg;
+
+ /*
+ * Must be a function or at least an alias, as in PARISC64, where "_text" is
+ * an 'A' to the same address as "_stext".
+ */
+ if (!(symbol_type__is_a(type, MAP__FUNCTION) ||
+ type == 'A') || strcmp(name, args->name))
+ return 0;
+
+ args->start = start;
+ return 1;
+}
+
+int event__synthesize_kernel_mmap(event__handler_t process,
+ struct perf_session *session,
+ const char *symbol_name)
+{
+ size_t size;
+ event_t ev = {
+ .header = {
+ .type = PERF_RECORD_MMAP,
+ .misc = 1, /* kernel uses 0 for user space maps, see kernel/perf_event.c __perf_event_mmap */
+ },
+ };
+ /*
+ * We should get this from /sys/kernel/sections/.text, but till that is
+ * available use this, and after it is use this as a fallback for older
+ * kernels.
+ */
+ struct process_symbol_args args = { .name = symbol_name, };
+
+ if (kallsyms__parse("/proc/kallsyms", &args, find_symbol_cb) <= 0)
+ return -ENOENT;
+
+ size = snprintf(ev.mmap.filename, sizeof(ev.mmap.filename),
+ "[kernel.kallsyms.%s]", symbol_name) + 1;
+ size = ALIGN(size, sizeof(u64));
+ ev.mmap.header.size = (sizeof(ev.mmap) - (sizeof(ev.mmap.filename) - size));
+ ev.mmap.pgoff = args.start;
+ ev.mmap.start = session->vmlinux_maps[MAP__FUNCTION]->start;
+ ev.mmap.len = session->vmlinux_maps[MAP__FUNCTION]->end - ev.mmap.start ;
+
+ return process(&ev, session);
+}
+
static void thread__comm_adjust(struct thread *self)
{
char *comm = self->comm;
@@ -240,22 +322,83 @@ int event__process_lost(event_t *self, struct perf_session *session)
int event__process_mmap(event_t *self, struct perf_session *session)
{
- struct thread *thread = perf_session__findnew(session, self->mmap.pid);
- struct map *map = map__new(&self->mmap, MAP__FUNCTION,
- session->cwd, session->cwdlen);
+ struct thread *thread;
+ struct map *map;
+
+ dump_printf(" %d/%d: [%#Lx(%#Lx) @ %#Lx]: %s\n",
+ self->mmap.pid, self->mmap.tid, self->mmap.start,
+ self->mmap.len, self->mmap.pgoff, self->mmap.filename);
+
+ if (self->mmap.pid == 0) {
+ static const char kmmap_prefix[] = "[kernel.kallsyms.";
+
+ if (self->mmap.filename[0] == '/') {
+ char short_module_name[1024];
+ char *name = strrchr(self->mmap.filename, '/'), *dot;
+
+ if (name == NULL)
+ goto out_problem;
+
+ ++name; /* skip / */
+ dot = strrchr(name, '.');
+ if (dot == NULL)
+ goto out_problem;
+
+ snprintf(short_module_name, sizeof(short_module_name),
+ "[%.*s]", (int)(dot - name), name);
+ strxfrchar(short_module_name, '-', '_');
+
+ map = perf_session__new_module_map(session,
+ self->mmap.start,
+ self->mmap.filename);
+ if (map == NULL)
+ goto out_problem;
+
+ name = strdup(short_module_name);
+ if (name == NULL)
+ goto out_problem;
+
+ map->dso->short_name = name;
+ map->end = map->start + self->mmap.len;
+ } else if (memcmp(self->mmap.filename, kmmap_prefix,
+ sizeof(kmmap_prefix) - 1) == 0) {
+ const char *symbol_name = (self->mmap.filename +
+ sizeof(kmmap_prefix) - 1);
+ /*
+ * Should be there already, from the build-id table in
+ * the header.
+ */
+ struct dso *kernel = __dsos__findnew(&dsos__kernel,
+ "[kernel.kallsyms]");
+ if (kernel == NULL)
+ goto out_problem;
+
+ if (__map_groups__create_kernel_maps(&session->kmaps,
+ session->vmlinux_maps,
+ kernel) < 0)
+ goto out_problem;
+
+ session->vmlinux_maps[MAP__FUNCTION]->start = self->mmap.start;
+ session->vmlinux_maps[MAP__FUNCTION]->end = self->mmap.start + self->mmap.len;
+
+ perf_session__set_kallsyms_ref_reloc_sym(session, symbol_name,
+ self->mmap.pgoff);
+ }
+ return 0;
+ }
- dump_printf(" %d/%d: [%p(%p) @ %p]: %s\n",
- self->mmap.pid, self->mmap.tid,
- (void *)(long)self->mmap.start,
- (void *)(long)self->mmap.len,
- (void *)(long)self->mmap.pgoff,
- self->mmap.filename);
+ thread = perf_session__findnew(session, self->mmap.pid);
+ map = map__new(&self->mmap, MAP__FUNCTION,
+ session->cwd, session->cwdlen);
if (thread == NULL || map == NULL)
- dump_printf("problem processing PERF_RECORD_MMAP, skipping event.\n");
- else
- thread__insert_map(thread, map);
+ goto out_problem;
+ thread__insert_map(thread, map);
+ return 0;
+
+out_problem:
+ dump_printf("problem processing PERF_RECORD_MMAP, skipping event.\n");
return 0;
}
@@ -284,11 +427,10 @@ int event__process_task(event_t *self, struct perf_session *session)
return 0;
}
-void thread__find_addr_location(struct thread *self,
- struct perf_session *session, u8 cpumode,
- enum map_type type, u64 addr,
- struct addr_location *al,
- symbol_filter_t filter)
+void thread__find_addr_map(struct thread *self,
+ struct perf_session *session, u8 cpumode,
+ enum map_type type, u64 addr,
+ struct addr_location *al)
{
struct map_groups *mg = &self->mg;
@@ -303,7 +445,6 @@ void thread__find_addr_location(struct thread *self,
else {
al->level = 'H';
al->map = NULL;
- al->sym = NULL;
return;
}
try_again:
@@ -322,11 +463,21 @@ try_again:
mg = &session->kmaps;
goto try_again;
}
- al->sym = NULL;
- } else {
+ } else
al->addr = al->map->map_ip(al->map, al->addr);
+}
+
+void thread__find_addr_location(struct thread *self,
+ struct perf_session *session, u8 cpumode,
+ enum map_type type, u64 addr,
+ struct addr_location *al,
+ symbol_filter_t filter)
+{
+ thread__find_addr_map(self, session, cpumode, type, addr, al);
+ if (al->map != NULL)
al->sym = map__find_symbol(al->map, session, al->addr, filter);
- }
+ else
+ al->sym = NULL;
}
static void dso__calc_col_width(struct dso *self)
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 690a96d0467c..50a7132887f5 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -1,10 +1,10 @@
#ifndef __PERF_RECORD_H
#define __PERF_RECORD_H
+#include <limits.h>
+
#include "../perf.h"
-#include "util.h"
-#include <linux/list.h>
-#include <linux/rbtree.h>
+#include "map.h"
/*
* PERF_SAMPLE_IP | PERF_SAMPLE_TID | *
@@ -101,74 +101,19 @@ struct events_stats {
void event__print_totals(void);
-enum map_type {
- MAP__FUNCTION = 0,
- MAP__VARIABLE,
-};
-
-#define MAP__NR_TYPES (MAP__VARIABLE + 1)
-
-struct map {
- union {
- struct rb_node rb_node;
- struct list_head node;
- };
- u64 start;
- u64 end;
- enum map_type type;
- u64 pgoff;
- u64 (*map_ip)(struct map *, u64);
- u64 (*unmap_ip)(struct map *, u64);
- struct dso *dso;
-};
-
-static inline u64 map__map_ip(struct map *map, u64 ip)
-{
- return ip - map->start + map->pgoff;
-}
-
-static inline u64 map__unmap_ip(struct map *map, u64 ip)
-{
- return ip + map->start - map->pgoff;
-}
-
-static inline u64 identity__map_ip(struct map *map __used, u64 ip)
-{
- return ip;
-}
-
-struct symbol;
-
-typedef int (*symbol_filter_t)(struct map *map, struct symbol *sym);
-
-void map__init(struct map *self, enum map_type type,
- u64 start, u64 end, u64 pgoff, struct dso *dso);
-struct map *map__new(struct mmap_event *event, enum map_type,
- char *cwd, int cwdlen);
-void map__delete(struct map *self);
-struct map *map__clone(struct map *self);
-int map__overlap(struct map *l, struct map *r);
-size_t map__fprintf(struct map *self, FILE *fp);
-
struct perf_session;
-int map__load(struct map *self, struct perf_session *session,
- symbol_filter_t filter);
-struct symbol *map__find_symbol(struct map *self, struct perf_session *session,
- u64 addr, symbol_filter_t filter);
-struct symbol *map__find_symbol_by_name(struct map *self, const char *name,
- struct perf_session *session,
- symbol_filter_t filter);
-void map__fixup_start(struct map *self);
-void map__fixup_end(struct map *self);
-
-int event__synthesize_thread(pid_t pid,
- int (*process)(event_t *event,
- struct perf_session *session),
+typedef int (*event__handler_t)(event_t *event, struct perf_session *session);
+
+int event__synthesize_thread(pid_t pid, event__handler_t process,
struct perf_session *session);
-void event__synthesize_threads(int (*process)(event_t *event,
- struct perf_session *session),
+void event__synthesize_threads(event__handler_t process,
struct perf_session *session);
+int event__synthesize_kernel_mmap(event__handler_t process,
+ struct perf_session *session,
+ const char *symbol_name);
+int event__synthesize_modules(event__handler_t process,
+ struct perf_session *session);
int event__process_comm(event_t *self, struct perf_session *session);
int event__process_lost(event_t *self, struct perf_session *session);
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 8a0bca55106f..1b65fed0dd2d 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -1,8 +1,10 @@
#include <sys/types.h>
+#include <byteswap.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <linux/list.h>
+#include <linux/kernel.h>
#include "util.h"
#include "header.h"
@@ -105,24 +107,28 @@ struct perf_trace_event_type {
static int event_count;
static struct perf_trace_event_type *events;
-void perf_header__push_event(u64 id, const char *name)
+int perf_header__push_event(u64 id, const char *name)
{
if (strlen(name) > MAX_EVENT_NAME)
pr_warning("Event %s will be truncated\n", name);
if (!events) {
events = malloc(sizeof(struct perf_trace_event_type));
- if (!events)
- die("nomem");
+ if (events == NULL)
+ return -ENOMEM;
} else {
- events = realloc(events, (event_count + 1) * sizeof(struct perf_trace_event_type));
- if (!events)
- die("nomem");
+ struct perf_trace_event_type *nevents;
+
+ nevents = realloc(events, (event_count + 1) * sizeof(*events));
+ if (nevents == NULL)
+ return -ENOMEM;
+ events = nevents;
}
memset(&events[event_count], 0, sizeof(struct perf_trace_event_type));
events[event_count].event_id = id;
strncpy(events[event_count].name, name, MAX_EVENT_NAME - 1);
event_count++;
+ return 0;
}
char *perf_header__find_event(u64 id)
@@ -169,31 +175,45 @@ static int do_write(int fd, const void *buf, size_t size)
return 0;
}
-static int __dsos__write_buildid_table(struct list_head *head, int fd)
+#define NAME_ALIGN 64
+
+static int write_padded(int fd, const void *bf, size_t count,
+ size_t count_aligned)
{
-#define NAME_ALIGN 64
- struct dso *pos;
static const char zero_buf[NAME_ALIGN];
+ int err = do_write(fd, bf, count);
- list_for_each_entry(pos, head, node) {
+ if (!err)
+ err = do_write(fd, zero_buf, count_aligned - count);
+
+ return err;
+}
+
+#define dsos__for_each_with_build_id(pos, head) \
+ list_for_each_entry(pos, head, node) \
+ if (!pos->has_build_id) \
+ continue; \
+ else
+
+static int __dsos__write_buildid_table(struct list_head *head, u16 misc, int fd)
+{
+ struct dso *pos;
+
+ dsos__for_each_with_build_id(pos, head) {
int err;
struct build_id_event b;
- size_t len;
+ size_t len = pos->long_name_len + 1;
- if (!pos->has_build_id)
- continue;
- len = pos->long_name_len + 1;
len = ALIGN(len, NAME_ALIGN);
memset(&b, 0, sizeof(b));
memcpy(&b.build_id, pos->build_id, sizeof(pos->build_id));
+ b.header.misc = misc;
b.header.size = sizeof(b) + len;
err = do_write(fd, &b, sizeof(b));
if (err < 0)
return err;
- err = do_write(fd, pos->long_name, pos->long_name_len + 1);
- if (err < 0)
- return err;
- err = do_write(fd, zero_buf, len - pos->long_name_len - 1);
+ err = write_padded(fd, pos->long_name,
+ pos->long_name_len + 1, len);
if (err < 0)
return err;
}
@@ -203,12 +223,89 @@ static int __dsos__write_buildid_table(struct list_head *head, int fd)
static int dsos__write_buildid_table(int fd)
{
- int err = __dsos__write_buildid_table(&dsos__kernel, fd);
+ int err = __dsos__write_buildid_table(&dsos__kernel,
+ PERF_RECORD_MISC_KERNEL, fd);
if (err == 0)
- err = __dsos__write_buildid_table(&dsos__user, fd);
+ err = __dsos__write_buildid_table(&dsos__user,
+ PERF_RECORD_MISC_USER, fd);
+ return err;
+}
+
+static int dso__cache_build_id(struct dso *self, const char *debugdir)
+{
+ const size_t size = PATH_MAX;
+ char *filename = malloc(size),
+ *linkname = malloc(size), *targetname, *sbuild_id;
+ int len, err = -1;
+ bool is_kallsyms = self->kernel && self->long_name[0] != '/';
+
+ if (filename == NULL || linkname == NULL)
+ goto out_free;
+
+ len = snprintf(filename, size, "%s%s%s",
+ debugdir, is_kallsyms ? "/" : "", self->long_name);
+ if (mkdir_p(filename, 0755))
+ goto out_free;
+
+ len += snprintf(filename + len, sizeof(filename) - len, "/");
+ sbuild_id = filename + len;
+ build_id__sprintf(self->build_id, sizeof(self->build_id), sbuild_id);
+
+ if (access(filename, F_OK)) {
+ if (is_kallsyms) {
+ if (copyfile("/proc/kallsyms", filename))
+ goto out_free;
+ } else if (link(self->long_name, filename) &&
+ copyfile(self->long_name, filename))
+ goto out_free;
+ }
+
+ len = snprintf(linkname, size, "%s/.build-id/%.2s",
+ debugdir, sbuild_id);
+
+ if (access(linkname, X_OK) && mkdir_p(linkname, 0755))
+ goto out_free;
+
+ snprintf(linkname + len, size - len, "/%s", sbuild_id + 2);
+ targetname = filename + strlen(debugdir) - 5;
+ memcpy(targetname, "../..", 5);
+
+ if (symlink(targetname, linkname) == 0)
+ err = 0;
+out_free:
+ free(filename);
+ free(linkname);
+ return err;
+}
+
+static int __dsos__cache_build_ids(struct list_head *head, const char *debugdir)
+{
+ struct dso *pos;
+ int err = 0;
+
+ dsos__for_each_with_build_id(pos, head)
+ if (dso__cache_build_id(pos, debugdir))
+ err = -1;
+
return err;
}
+static int dsos__cache_build_ids(void)
+{
+ int err_kernel, err_user;
+ char debugdir[PATH_MAX];
+
+ snprintf(debugdir, sizeof(debugdir), "%s/%s", getenv("HOME"),
+ DEBUG_CACHE_DIR);
+
+ if (mkdir(debugdir, 0755) != 0 && errno != EEXIST)
+ return -1;
+
+ err_kernel = __dsos__cache_build_ids(&dsos__kernel, debugdir);
+ err_user = __dsos__cache_build_ids(&dsos__user, debugdir);
+ return err_kernel || err_user ? -1 : 0;
+}
+
static int perf_header__adds_write(struct perf_header *self, int fd)
{
int nr_sections;
@@ -258,6 +355,7 @@ static int perf_header__adds_write(struct perf_header *self, int fd)
goto out_free;
}
buildid_sec->size = lseek(fd, 0, SEEK_CUR) - buildid_sec->offset;
+ dsos__cache_build_ids();
}
lseek(fd, sec_start, SEEK_SET);
@@ -360,30 +458,43 @@ int perf_header__write(struct perf_header *self, int fd, bool at_exit)
return 0;
}
-static void do_read(int fd, void *buf, size_t size)
+static int do_read(int fd, void *buf, size_t size)
{
while (size) {
int ret = read(fd, buf, size);
- if (ret < 0)
- die("failed to read");
- if (ret == 0)
- die("failed to read: missing data");
+ if (ret <= 0)
+ return -1;
size -= ret;
buf += ret;
}
+
+ return 0;
+}
+
+static int perf_header__getbuffer64(struct perf_header *self,
+ int fd, void *buf, size_t size)
+{
+ if (do_read(fd, buf, size))
+ return -1;
+
+ if (self->needs_swap)
+ mem_bswap_64(buf, size);
+
+ return 0;
}
int perf_header__process_sections(struct perf_header *self, int fd,
int (*process)(struct perf_file_section *self,
+ struct perf_header *ph,
int feat, int fd))
{
struct perf_file_section *feat_sec;
int nr_sections;
int sec_size;
int idx = 0;
- int err = 0, feat = 1;
+ int err = -1, feat = 1;
nr_sections = bitmap_weight(self->adds_features, HEADER_FEAT_BITS);
if (!nr_sections)
@@ -397,33 +508,45 @@ int perf_header__process_sections(struct perf_header *self, int fd,
lseek(fd, self->data_offset + self->data_size, SEEK_SET);
- do_read(fd, feat_sec, sec_size);
+ if (perf_header__getbuffer64(self, fd, feat_sec, sec_size))
+ goto out_free;
+ err = 0;
while (idx < nr_sections && feat < HEADER_LAST_FEATURE) {
if (perf_header__has_feat(self, feat)) {
struct perf_file_section *sec = &feat_sec[idx++];
- err = process(sec, feat, fd);
+ err = process(sec, self, feat, fd);
if (err < 0)
break;
}
++feat;
}
-
+out_free:
free(feat_sec);
return err;
-};
+}
int perf_file_header__read(struct perf_file_header *self,
struct perf_header *ph, int fd)
{
lseek(fd, 0, SEEK_SET);
- do_read(fd, self, sizeof(*self));
- if (self->magic != PERF_MAGIC ||
- self->attr_size != sizeof(struct perf_file_attr))
+ if (do_read(fd, self, sizeof(*self)) ||
+ memcmp(&self->magic, __perf_magic, sizeof(self->magic)))
return -1;
+ if (self->attr_size != sizeof(struct perf_file_attr)) {
+ u64 attr_size = bswap_64(self->attr_size);
+
+ if (attr_size != sizeof(struct perf_file_attr))
+ return -1;
+
+ mem_bswap_64(self, offsetof(struct perf_file_header,
+ adds_features));
+ ph->needs_swap = true;
+ }
+
if (self->size != sizeof(*self)) {
/* Support the previous format */
if (self->size == offsetof(typeof(*self), adds_features))
@@ -433,16 +556,28 @@ int perf_file_header__read(struct perf_file_header *self,
}
memcpy(&ph->adds_features, &self->adds_features,
- sizeof(self->adds_features));
+ sizeof(ph->adds_features));
+ /*
+ * FIXME: hack that assumes that if we need swap the perf.data file
+ * may be coming from an arch with a different word-size, ergo different
+ * DEFINE_BITMAP format, investigate more later, but for now its mostly
+ * safe to assume that we have a build-id section. Trace files probably
+ * have several other issues in this realm anyway...
+ */
+ if (ph->needs_swap) {
+ memset(&ph->adds_features, 0, sizeof(ph->adds_features));
+ perf_header__set_feat(ph, HEADER_BUILD_ID);
+ }
ph->event_offset = self->event_types.offset;
- ph->event_size = self->event_types.size;
- ph->data_offset = self->data.offset;
+ ph->event_size = self->event_types.size;
+ ph->data_offset = self->data.offset;
ph->data_size = self->data.size;
return 0;
}
static int perf_file_section__process(struct perf_file_section *self,
+ struct perf_header *ph,
int feat, int fd)
{
if (lseek(fd, self->offset, SEEK_SET) < 0) {
@@ -457,7 +592,7 @@ static int perf_file_section__process(struct perf_file_section *self,
break;
case HEADER_BUILD_ID:
- if (perf_header__read_build_ids(fd, self->offset, self->size))
+ if (perf_header__read_build_ids(ph, fd, self->offset, self->size))
pr_debug("Failed to read buildids, continuing...\n");
break;
default:
@@ -469,7 +604,7 @@ static int perf_file_section__process(struct perf_file_section *self,
int perf_header__read(struct perf_header *self, int fd)
{
- struct perf_file_header f_header;
+ struct perf_file_header f_header;
struct perf_file_attr f_attr;
u64 f_id;
int nr_attrs, nr_ids, i, j;
@@ -486,7 +621,9 @@ int perf_header__read(struct perf_header *self, int fd)
struct perf_header_attr *attr;
off_t tmp;
- do_read(fd, &f_attr, sizeof(f_attr));
+ if (perf_header__getbuffer64(self, fd, &f_attr, sizeof(f_attr)))
+ goto out_errno;
+
tmp = lseek(fd, 0, SEEK_CUR);
attr = perf_header_attr__new(&f_attr.attr);
@@ -497,7 +634,8 @@ int perf_header__read(struct perf_header *self, int fd)
lseek(fd, f_attr.ids.offset, SEEK_SET);
for (j = 0; j < nr_ids; j++) {
- do_read(fd, &f_id, sizeof(f_id));
+ if (perf_header__getbuffer64(self, fd, &f_id, sizeof(f_id)))
+ goto out_errno;
if (perf_header_attr__add_id(attr, f_id) < 0) {
perf_header_attr__delete(attr);
@@ -517,7 +655,9 @@ int perf_header__read(struct perf_header *self, int fd)
events = malloc(f_header.event_types.size);
if (events == NULL)
return -ENOMEM;
- do_read(fd, events, f_header.event_types.size);
+ if (perf_header__getbuffer64(self, fd, events,
+ f_header.event_types.size))
+ goto out_errno;
event_count = f_header.event_types.size / sizeof(struct perf_trace_event_type);
}
@@ -527,6 +667,8 @@ int perf_header__read(struct perf_header *self, int fd)
self->frozen = 1;
return 0;
+out_errno:
+ return -errno;
}
u64 perf_header__sample_type(struct perf_header *header)
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index d118d05d3abe..ccc8540feccd 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -52,6 +52,7 @@ struct perf_header {
u64 data_size;
u64 event_offset;
u64 event_size;
+ bool needs_swap;
DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS);
};
@@ -64,7 +65,7 @@ int perf_header__write(struct perf_header *self, int fd, bool at_exit);
int perf_header__add_attr(struct perf_header *self,
struct perf_header_attr *attr);
-void perf_header__push_event(u64 id, const char *name);
+int perf_header__push_event(u64 id, const char *name);
char *perf_header__find_event(u64 id);
struct perf_header_attr *perf_header_attr__new(struct perf_event_attr *attr);
@@ -80,6 +81,7 @@ bool perf_header__has_feat(const struct perf_header *self, int feat);
int perf_header__process_sections(struct perf_header *self, int fd,
int (*process)(struct perf_file_section *self,
+ struct perf_header *ph,
int feat, int fd));
#endif /* __PERF_HEADER_H */
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
new file mode 100644
index 000000000000..72f0b6ab5ea5
--- /dev/null
+++ b/tools/perf/util/map.h
@@ -0,0 +1,73 @@
+#ifndef __PERF_MAP_H
+#define __PERF_MAP_H
+
+#include <linux/compiler.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <linux/types.h>
+
+enum map_type {
+ MAP__FUNCTION = 0,
+ MAP__VARIABLE,
+};
+
+#define MAP__NR_TYPES (MAP__VARIABLE + 1)
+
+struct dso;
+
+struct map {
+ union {
+ struct rb_node rb_node;
+ struct list_head node;
+ };
+ u64 start;
+ u64 end;
+ enum map_type type;
+ u64 pgoff;
+ u64 (*map_ip)(struct map *, u64);
+ u64 (*unmap_ip)(struct map *, u64);
+ struct dso *dso;
+};
+
+static inline u64 map__map_ip(struct map *map, u64 ip)
+{
+ return ip - map->start + map->pgoff;
+}
+
+static inline u64 map__unmap_ip(struct map *map, u64 ip)
+{
+ return ip + map->start - map->pgoff;
+}
+
+static inline u64 identity__map_ip(struct map *map __used, u64 ip)
+{
+ return ip;
+}
+
+struct symbol;
+struct mmap_event;
+
+typedef int (*symbol_filter_t)(struct map *map, struct symbol *sym);
+
+void map__init(struct map *self, enum map_type type,
+ u64 start, u64 end, u64 pgoff, struct dso *dso);
+struct map *map__new(struct mmap_event *event, enum map_type,
+ char *cwd, int cwdlen);
+void map__delete(struct map *self);
+struct map *map__clone(struct map *self);
+int map__overlap(struct map *l, struct map *r);
+size_t map__fprintf(struct map *self, FILE *fp);
+
+struct perf_session;
+
+int map__load(struct map *self, struct perf_session *session,
+ symbol_filter_t filter);
+struct symbol *map__find_symbol(struct map *self, struct perf_session *session,
+ u64 addr, symbol_filter_t filter);
+struct symbol *map__find_symbol_by_name(struct map *self, const char *name,
+ struct perf_session *session,
+ symbol_filter_t filter);
+void map__fixup_start(struct map *self);
+void map__fixup_end(struct map *self);
+
+#endif /* __PERF_MAP_H */
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index e5bc0fb016b2..05d0c5c2030c 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -450,7 +450,8 @@ parse_single_tracepoint_event(char *sys_name,
/* sys + ':' + event + ':' + flags*/
#define MAX_EVOPT_LEN (MAX_EVENT_LENGTH * 2 + 2 + 128)
static enum event_result
-parse_subsystem_tracepoint_event(char *sys_name, char *flags)
+parse_multiple_tracepoint_event(char *sys_name, const char *evt_exp,
+ char *flags)
{
char evt_path[MAXPATHLEN];
struct dirent *evt_ent;
@@ -474,6 +475,9 @@ parse_subsystem_tracepoint_event(char *sys_name, char *flags)
|| !strcmp(evt_ent->d_name, "filter"))
continue;
+ if (!strglobmatch(evt_ent->d_name, evt_exp))
+ continue;
+
len = snprintf(event_opt, MAX_EVOPT_LEN, "%s:%s%s%s", sys_name,
evt_ent->d_name, flags ? ":" : "",
flags ?: "");
@@ -522,9 +526,10 @@ static enum event_result parse_tracepoint_event(const char **strp,
if (evt_length >= MAX_EVENT_LENGTH)
return EVT_FAILED;
- if (!strcmp(evt_name, "*")) {
+ if (strpbrk(evt_name, "*?")) {
*strp = evt_name + evt_length;
- return parse_subsystem_tracepoint_event(sys_name, flags);
+ return parse_multiple_tracepoint_event(sys_name, evt_name,
+ flags);
} else
return parse_single_tracepoint_event(sys_name, evt_name,
evt_length, flags,
@@ -753,11 +758,11 @@ modifier:
return ret;
}
-static void store_event_type(const char *orgname)
+static int store_event_type(const char *orgname)
{
char filename[PATH_MAX], *c;
FILE *file;
- int id;
+ int id, n;
sprintf(filename, "%s/", debugfs_path);
strncat(filename, orgname, strlen(orgname));
@@ -769,11 +774,14 @@ static void store_event_type(const char *orgname)
file = fopen(filename, "r");
if (!file)
- return;
- if (fscanf(file, "%i", &id) < 1)
- die("cannot store event ID");
+ return 0;
+ n = fscanf(file, "%i", &id);
fclose(file);
- perf_header__push_event(id, orgname);
+ if (n < 1) {
+ pr_err("cannot store event ID\n");
+ return -EINVAL;
+ }
+ return perf_header__push_event(id, orgname);
}
int parse_events(const struct option *opt __used, const char *str, int unset __used)
@@ -782,7 +790,8 @@ int parse_events(const struct option *opt __used, const char *str, int unset __u
enum event_result ret;
if (strchr(str, ':'))
- store_event_type(str);
+ if (store_event_type(str) < 0)
+ return -1;
for (;;) {
if (nr_counters == MAX_COUNTERS)
@@ -835,11 +844,12 @@ int parse_filter(const struct option *opt __used, const char *str,
}
static const char * const event_type_descriptors[] = {
- "",
"Hardware event",
"Software event",
"Tracepoint event",
"Hardware cache event",
+ "Raw hardware event descriptor",
+ "Hardware breakpoint",
};
/*
@@ -872,7 +882,7 @@ static void print_tracepoint_events(void)
snprintf(evt_path, MAXPATHLEN, "%s:%s",
sys_dirent.d_name, evt_dirent.d_name);
printf(" %-42s [%s]\n", evt_path,
- event_type_descriptors[PERF_TYPE_TRACEPOINT+1]);
+ event_type_descriptors[PERF_TYPE_TRACEPOINT]);
}
closedir(evt_dir);
}
@@ -892,9 +902,7 @@ void print_events(void)
printf("List of pre-defined events (to be used in -e):\n");
for (i = 0; i < ARRAY_SIZE(event_symbols); i++, syms++) {
- type = syms->type + 1;
- if (type >= ARRAY_SIZE(event_type_descriptors))
- type = 0;
+ type = syms->type;
if (type != prev_type)
printf("\n");
@@ -919,17 +927,19 @@ void print_events(void)
for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) {
printf(" %-42s [%s]\n",
event_cache_name(type, op, i),
- event_type_descriptors[4]);
+ event_type_descriptors[PERF_TYPE_HW_CACHE]);
}
}
}
printf("\n");
- printf(" %-42s [raw hardware event descriptor]\n",
- "rNNN");
+ printf(" %-42s [%s]\n",
+ "rNNN", event_type_descriptors[PERF_TYPE_RAW]);
printf("\n");
- printf(" %-42s [hardware breakpoint]\n", "mem:<addr>[:access]");
+ printf(" %-42s [%s]\n",
+ "mem:<addr>[:access]",
+ event_type_descriptors[PERF_TYPE_BREAKPOINT]);
printf("\n");
print_tracepoint_events();
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 29465d440043..71b0dd590a37 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -37,6 +37,8 @@
#include "string.h"
#include "strlist.h"
#include "debug.h"
+#include "cache.h"
+#include "color.h"
#include "parse-events.h" /* For debugfs_path */
#include "probe-event.h"
@@ -62,6 +64,42 @@ static int e_snprintf(char *str, size_t size, const char *format, ...)
return ret;
}
+void parse_line_range_desc(const char *arg, struct line_range *lr)
+{
+ const char *ptr;
+ char *tmp;
+ /*
+ * <Syntax>
+ * SRC:SLN[+NUM|-ELN]
+ * FUNC[:SLN[+NUM|-ELN]]
+ */
+ ptr = strchr(arg, ':');
+ if (ptr) {
+ lr->start = (unsigned int)strtoul(ptr + 1, &tmp, 0);
+ if (*tmp == '+')
+ lr->end = lr->start + (unsigned int)strtoul(tmp + 1,
+ &tmp, 0);
+ else if (*tmp == '-')
+ lr->end = (unsigned int)strtoul(tmp + 1, &tmp, 0);
+ else
+ lr->end = 0;
+ pr_debug("Line range is %u to %u\n", lr->start, lr->end);
+ if (lr->end && lr->start > lr->end)
+ semantic_error("Start line must be smaller"
+ " than end line.");
+ if (*tmp != '\0')
+ semantic_error("Tailing with invalid character '%d'.",
+ *tmp);
+ tmp = strndup(arg, (ptr - arg));
+ } else
+ tmp = strdup(arg);
+
+ if (strchr(tmp, '.'))
+ lr->file = tmp;
+ else
+ lr->function = tmp;
+}
+
/* Check the name is good for event/group */
static bool check_event_name(const char *name)
{
@@ -368,7 +406,7 @@ static int open_kprobe_events(int flags, int mode)
if (ret < 0) {
if (errno == ENOENT)
die("kprobe_events file does not exist -"
- " please rebuild with CONFIG_KPROBE_TRACER.");
+ " please rebuild with CONFIG_KPROBE_EVENT.");
else
die("Could not open kprobe_events file: %s",
strerror(errno));
@@ -455,6 +493,8 @@ void show_perf_probe_events(void)
struct strlist *rawlist;
struct str_node *ent;
+ setup_pager();
+
fd = open_kprobe_events(O_RDONLY, 0);
rawlist = get_trace_kprobe_event_rawlist(fd);
close(fd);
@@ -675,3 +715,66 @@ void del_trace_kprobe_events(struct strlist *dellist)
close(fd);
}
+#define LINEBUF_SIZE 256
+
+static void show_one_line(FILE *fp, unsigned int l, bool skip, bool show_num)
+{
+ char buf[LINEBUF_SIZE];
+ const char *color = PERF_COLOR_BLUE;
+
+ if (fgets(buf, LINEBUF_SIZE, fp) == NULL)
+ goto error;
+ if (!skip) {
+ if (show_num)
+ fprintf(stdout, "%7u %s", l, buf);
+ else
+ color_fprintf(stdout, color, " %s", buf);
+ }
+
+ while (strlen(buf) == LINEBUF_SIZE - 1 &&
+ buf[LINEBUF_SIZE - 2] != '\n') {
+ if (fgets(buf, LINEBUF_SIZE, fp) == NULL)
+ goto error;
+ if (!skip) {
+ if (show_num)
+ fprintf(stdout, "%s", buf);
+ else
+ color_fprintf(stdout, color, "%s", buf);
+ }
+ }
+ return;
+error:
+ if (feof(fp))
+ die("Source file is shorter than expected.");
+ else
+ die("File read error: %s", strerror(errno));
+}
+
+void show_line_range(struct line_range *lr)
+{
+ unsigned int l = 1;
+ struct line_node *ln;
+ FILE *fp;
+
+ setup_pager();
+
+ if (lr->function)
+ fprintf(stdout, "<%s:%d>\n", lr->function,
+ lr->start - lr->offset);
+ else
+ fprintf(stdout, "<%s:%d>\n", lr->file, lr->start);
+
+ fp = fopen(lr->path, "r");
+ if (fp == NULL)
+ die("Failed to open %s: %s", lr->path, strerror(errno));
+ /* Skip to starting line number */
+ while (l < lr->start)
+ show_one_line(fp, l++, true, false);
+
+ list_for_each_entry(ln, &lr->line_list, list) {
+ while (ln->line > l)
+ show_one_line(fp, (l++) - lr->offset, false, false);
+ show_one_line(fp, (l++) - lr->offset, false, true);
+ }
+ fclose(fp);
+}
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index 7f1d499118c0..711287d4baea 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -5,6 +5,7 @@
#include "probe-finder.h"
#include "strlist.h"
+extern void parse_line_range_desc(const char *arg, struct line_range *lr);
extern void parse_perf_probe_event(const char *str, struct probe_point *pp,
bool *need_dwarf);
extern int synthesize_perf_probe_point(struct probe_point *pp);
@@ -15,6 +16,7 @@ extern void add_trace_kprobe_events(struct probe_point *probes, int nr_probes,
bool force_add);
extern void del_trace_kprobe_events(struct strlist *dellist);
extern void show_perf_probe_events(void);
+extern void show_line_range(struct line_range *lr);
/* Maximum index number of event-name postfix */
#define MAX_EVENT_INDEX 1024
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 4b852c0d16a5..1b2124d12f68 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -140,6 +140,31 @@ static Dwarf_Unsigned cu_find_fileno(Dwarf_Die cu_die, const char *fname)
return found;
}
+static int cu_get_filename(Dwarf_Die cu_die, Dwarf_Unsigned fno, char **buf)
+{
+ Dwarf_Signed cnt, i;
+ char **srcs;
+ int ret = 0;
+
+ if (!buf || !fno)
+ return -EINVAL;
+
+ ret = dwarf_srcfiles(cu_die, &srcs, &cnt, &__dw_error);
+ if (ret == DW_DLV_OK) {
+ if ((Dwarf_Unsigned)cnt > fno - 1) {
+ *buf = strdup(srcs[fno - 1]);
+ ret = 0;
+ pr_debug("found filename: %s\n", *buf);
+ } else
+ ret = -ENOENT;
+ for (i = 0; i < cnt; i++)
+ dwarf_dealloc(__dw_debug, srcs[i], DW_DLA_STRING);
+ dwarf_dealloc(__dw_debug, srcs, DW_DLA_LIST);
+ } else
+ ret = -EINVAL;
+ return ret;
+}
+
/* Compare diename and tname */
static int die_compare_name(Dwarf_Die dw_die, const char *tname)
{
@@ -402,11 +427,11 @@ static void show_location(Dwarf_Loc *loc, struct probe_finder *pf)
} else if (op == DW_OP_regx) {
regn = loc->lr_number;
} else
- die("Dwarf_OP %d is not supported.\n", op);
+ die("Dwarf_OP %d is not supported.", op);
regs = get_arch_regstr(regn);
if (!regs)
- die("%lld exceeds max register number.\n", regn);
+ die("%lld exceeds max register number.", regn);
if (deref)
ret = snprintf(pf->buf, pf->len,
@@ -438,7 +463,7 @@ static void show_variable(Dwarf_Die vr_die, struct probe_finder *pf)
return ;
error:
die("Failed to find the location of %s at this address.\n"
- " Perhaps, it has been optimized out.\n", pf->var);
+ " Perhaps, it has been optimized out.", pf->var);
}
static int variable_callback(struct die_link *dlink, void *data)
@@ -476,7 +501,7 @@ static void find_variable(Dwarf_Die sp_die, struct probe_finder *pf)
/* Search child die for local variables and parameters. */
ret = search_die_from_children(sp_die, variable_callback, pf);
if (!ret)
- die("Failed to find '%s' in this function.\n", pf->var);
+ die("Failed to find '%s' in this function.", pf->var);
}
/* Get a frame base on the address */
@@ -567,7 +592,7 @@ static int probeaddr_callback(struct die_link *dlink, void *data)
}
/* Find probe point from its line number */
-static void find_by_line(struct probe_finder *pf)
+static void find_probe_point_by_line(struct probe_finder *pf)
{
Dwarf_Signed cnt, i, clm;
Dwarf_Line *lines;
@@ -602,7 +627,7 @@ static void find_by_line(struct probe_finder *pf)
ret = search_die_from_children(pf->cu_die,
probeaddr_callback, pf);
if (ret == 0)
- die("Probe point is not found in subprograms.\n");
+ die("Probe point is not found in subprograms.");
/* Continuing, because target line might be inlined. */
}
dwarf_srclines_dealloc(__dw_debug, lines, cnt);
@@ -626,7 +651,7 @@ static int probefunc_callback(struct die_link *dlink, void *data)
pf->fno = die_get_decl_file(dlink->die);
pf->lno = die_get_decl_line(dlink->die)
+ pp->line;
- find_by_line(pf);
+ find_probe_point_by_line(pf);
return 1;
}
if (die_inlined_subprogram(dlink->die)) {
@@ -661,7 +686,7 @@ static int probefunc_callback(struct die_link *dlink, void *data)
!die_inlined_subprogram(lk->die))
goto found;
}
- die("Failed to find real subprogram.\n");
+ die("Failed to find real subprogram.");
found:
/* Get offset from subprogram */
ret = die_within_subprogram(lk->die, pf->addr, &offs);
@@ -673,7 +698,7 @@ found:
return 0;
}
-static void find_by_func(struct probe_finder *pf)
+static void find_probe_point_by_func(struct probe_finder *pf)
{
search_die_from_children(pf->cu_die, probefunc_callback, pf);
}
@@ -714,10 +739,10 @@ int find_probepoint(int fd, struct probe_point *pp)
if (ret == DW_DLV_NO_ENTRY)
pf.cu_base = 0;
if (pp->function)
- find_by_func(&pf);
+ find_probe_point_by_func(&pf);
else {
pf.lno = pp->line;
- find_by_line(&pf);
+ find_probe_point_by_line(&pf);
}
}
dwarf_dealloc(__dw_debug, pf.cu_die, DW_DLA_DIE);
@@ -728,3 +753,159 @@ int find_probepoint(int fd, struct probe_point *pp)
return pp->found;
}
+
+static void line_range_add_line(struct line_range *lr, unsigned int line)
+{
+ struct line_node *ln;
+ struct list_head *p;
+
+ /* Reverse search, because new line will be the last one */
+ list_for_each_entry_reverse(ln, &lr->line_list, list) {
+ if (ln->line < line) {
+ p = &ln->list;
+ goto found;
+ } else if (ln->line == line) /* Already exist */
+ return ;
+ }
+ /* List is empty, or the smallest entry */
+ p = &lr->line_list;
+found:
+ pr_debug("Debug: add a line %u\n", line);
+ ln = zalloc(sizeof(struct line_node));
+ DIE_IF(ln == NULL);
+ ln->line = line;
+ INIT_LIST_HEAD(&ln->list);
+ list_add(&ln->list, p);
+}
+
+/* Find line range from its line number */
+static void find_line_range_by_line(struct line_finder *lf)
+{
+ Dwarf_Signed cnt, i;
+ Dwarf_Line *lines;
+ Dwarf_Unsigned lineno = 0;
+ Dwarf_Unsigned fno;
+ Dwarf_Addr addr;
+ int ret;
+
+ ret = dwarf_srclines(lf->cu_die, &lines, &cnt, &__dw_error);
+ DIE_IF(ret != DW_DLV_OK);
+
+ for (i = 0; i < cnt; i++) {
+ ret = dwarf_line_srcfileno(lines[i], &fno, &__dw_error);
+ DIE_IF(ret != DW_DLV_OK);
+ if (fno != lf->fno)
+ continue;
+
+ ret = dwarf_lineno(lines[i], &lineno, &__dw_error);
+ DIE_IF(ret != DW_DLV_OK);
+ if (lf->lno_s > lineno || lf->lno_e < lineno)
+ continue;
+
+ /* Filter line in the function address range */
+ if (lf->addr_s && lf->addr_e) {
+ ret = dwarf_lineaddr(lines[i], &addr, &__dw_error);
+ DIE_IF(ret != DW_DLV_OK);
+ if (lf->addr_s > addr || lf->addr_e <= addr)
+ continue;
+ }
+ line_range_add_line(lf->lr, (unsigned int)lineno);
+ }
+ dwarf_srclines_dealloc(__dw_debug, lines, cnt);
+ if (!list_empty(&lf->lr->line_list))
+ lf->found = 1;
+}
+
+/* Search function from function name */
+static int linefunc_callback(struct die_link *dlink, void *data)
+{
+ struct line_finder *lf = (struct line_finder *)data;
+ struct line_range *lr = lf->lr;
+ Dwarf_Half tag;
+ int ret;
+
+ ret = dwarf_tag(dlink->die, &tag, &__dw_error);
+ DIE_IF(ret == DW_DLV_ERROR);
+ if (tag == DW_TAG_subprogram &&
+ die_compare_name(dlink->die, lr->function) == 0) {
+ /* Get the address range of this function */
+ ret = dwarf_highpc(dlink->die, &lf->addr_e, &__dw_error);
+ if (ret == DW_DLV_OK)
+ ret = dwarf_lowpc(dlink->die, &lf->addr_s, &__dw_error);
+ DIE_IF(ret == DW_DLV_ERROR);
+ if (ret == DW_DLV_NO_ENTRY) {
+ lf->addr_s = 0;
+ lf->addr_e = 0;
+ }
+
+ lf->fno = die_get_decl_file(dlink->die);
+ lr->offset = die_get_decl_line(dlink->die);;
+ lf->lno_s = lr->offset + lr->start;
+ if (!lr->end)
+ lf->lno_e = (Dwarf_Unsigned)-1;
+ else
+ lf->lno_e = lr->offset + lr->end;
+ lr->start = lf->lno_s;
+ lr->end = lf->lno_e;
+ find_line_range_by_line(lf);
+ /* If we find a target function, this should be end. */
+ lf->found = 1;
+ return 1;
+ }
+ return 0;
+}
+
+static void find_line_range_by_func(struct line_finder *lf)
+{
+ search_die_from_children(lf->cu_die, linefunc_callback, lf);
+}
+
+int find_line_range(int fd, struct line_range *lr)
+{
+ Dwarf_Half addr_size = 0;
+ Dwarf_Unsigned next_cuh = 0;
+ int ret;
+ struct line_finder lf = {.lr = lr};
+
+ ret = dwarf_init(fd, DW_DLC_READ, 0, 0, &__dw_debug, &__dw_error);
+ if (ret != DW_DLV_OK)
+ return -ENOENT;
+
+ while (!lf.found) {
+ /* Search CU (Compilation Unit) */
+ ret = dwarf_next_cu_header(__dw_debug, NULL, NULL, NULL,
+ &addr_size, &next_cuh, &__dw_error);
+ DIE_IF(ret == DW_DLV_ERROR);
+ if (ret == DW_DLV_NO_ENTRY)
+ break;
+
+ /* Get the DIE(Debugging Information Entry) of this CU */
+ ret = dwarf_siblingof(__dw_debug, 0, &lf.cu_die, &__dw_error);
+ DIE_IF(ret != DW_DLV_OK);
+
+ /* Check if target file is included. */
+ if (lr->file)
+ lf.fno = cu_find_fileno(lf.cu_die, lr->file);
+
+ if (!lr->file || lf.fno) {
+ if (lr->function)
+ find_line_range_by_func(&lf);
+ else {
+ lf.lno_s = lr->start;
+ if (!lr->end)
+ lf.lno_e = (Dwarf_Unsigned)-1;
+ else
+ lf.lno_e = lr->end;
+ find_line_range_by_line(&lf);
+ }
+ /* Get the real file path */
+ if (lf.found)
+ cu_get_filename(lf.cu_die, lf.fno, &lr->path);
+ }
+ dwarf_dealloc(__dw_debug, lf.cu_die, DW_DLA_DIE);
+ }
+ ret = dwarf_finish(__dw_debug, &__dw_error);
+ DIE_IF(ret != DW_DLV_OK);
+ return lf.found;
+}
+
diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h
index a4086aaddb73..972b386116f1 100644
--- a/tools/perf/util/probe-finder.h
+++ b/tools/perf/util/probe-finder.h
@@ -1,6 +1,8 @@
#ifndef _PROBE_FINDER_H
#define _PROBE_FINDER_H
+#include "util.h"
+
#define MAX_PATH_LEN 256
#define MAX_PROBE_BUFFER 1024
#define MAX_PROBES 128
@@ -32,8 +34,26 @@ struct probe_point {
char *probes[MAX_PROBES]; /* Output buffers (will be allocated)*/
};
+/* Line number container */
+struct line_node {
+ struct list_head list;
+ unsigned int line;
+};
+
+/* Line range */
+struct line_range {
+ char *file; /* File name */
+ char *function; /* Function name */
+ unsigned int start; /* Start line number */
+ unsigned int end; /* End line number */
+ unsigned int offset; /* Start line offset */
+ char *path; /* Real path name */
+ struct list_head line_list; /* Visible lines */
+};
+
#ifndef NO_LIBDWARF
extern int find_probepoint(int fd, struct probe_point *pp);
+extern int find_line_range(int fd, struct line_range *lr);
/* Workaround for undefined _MIPS_SZLONG bug in libdwarf.h: */
#ifndef _MIPS_SZLONG
@@ -60,6 +80,19 @@ struct probe_finder {
char *buf; /* Current output buffer */
int len; /* Length of output buffer */
};
+
+struct line_finder {
+ struct line_range *lr; /* Target line range */
+
+ Dwarf_Unsigned fno; /* File number */
+ Dwarf_Unsigned lno_s; /* Start line number */
+ Dwarf_Unsigned lno_e; /* End line number */
+ Dwarf_Addr addr_s; /* Start address */
+ Dwarf_Addr addr_e; /* End address */
+ Dwarf_Die cu_die; /* Current CU */
+ int found;
+};
+
#endif /* NO_LIBDWARF */
#endif /*_PROBE_FINDER_H */
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index ce3a6c8abe76..1951e330377c 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1,5 +1,6 @@
#include <linux/kernel.h>
+#include <byteswap.h>
#include <unistd.h>
#include <sys/types.h>
@@ -66,13 +67,13 @@ struct perf_session *perf_session__new(const char *filename, int mode, bool forc
self->mmap_window = 32;
self->cwd = NULL;
self->cwdlen = 0;
+ self->unknown_events = 0;
map_groups__init(&self->kmaps);
- if (perf_session__create_kernel_maps(self) < 0)
- goto out_delete;
-
if (mode == O_RDONLY && perf_session__open(self, force) < 0)
goto out_delete;
+
+ self->sample_type = perf_header__sample_type(&self->header);
out:
return self;
out_free:
@@ -148,3 +149,389 @@ struct symbol **perf_session__resolve_callchain(struct perf_session *self,
return syms;
}
+
+static int process_event_stub(event_t *event __used,
+ struct perf_session *session __used)
+{
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+static void perf_event_ops__fill_defaults(struct perf_event_ops *handler)
+{
+ if (handler->sample == NULL)
+ handler->sample = process_event_stub;
+ if (handler->mmap == NULL)
+ handler->mmap = process_event_stub;
+ if (handler->comm == NULL)
+ handler->comm = process_event_stub;
+ if (handler->fork == NULL)
+ handler->fork = process_event_stub;
+ if (handler->exit == NULL)
+ handler->exit = process_event_stub;
+ if (handler->lost == NULL)
+ handler->lost = process_event_stub;
+ if (handler->read == NULL)
+ handler->read = process_event_stub;
+ if (handler->throttle == NULL)
+ handler->throttle = process_event_stub;
+ if (handler->unthrottle == NULL)
+ handler->unthrottle = process_event_stub;
+}
+
+static const char *event__name[] = {
+ [0] = "TOTAL",
+ [PERF_RECORD_MMAP] = "MMAP",
+ [PERF_RECORD_LOST] = "LOST",
+ [PERF_RECORD_COMM] = "COMM",
+ [PERF_RECORD_EXIT] = "EXIT",
+ [PERF_RECORD_THROTTLE] = "THROTTLE",
+ [PERF_RECORD_UNTHROTTLE] = "UNTHROTTLE",
+ [PERF_RECORD_FORK] = "FORK",
+ [PERF_RECORD_READ] = "READ",
+ [PERF_RECORD_SAMPLE] = "SAMPLE",
+};
+
+unsigned long event__total[PERF_RECORD_MAX];
+
+void event__print_totals(void)
+{
+ int i;
+ for (i = 0; i < PERF_RECORD_MAX; ++i)
+ pr_info("%10s events: %10ld\n",
+ event__name[i], event__total[i]);
+}
+
+void mem_bswap_64(void *src, int byte_size)
+{
+ u64 *m = src;
+
+ while (byte_size > 0) {
+ *m = bswap_64(*m);
+ byte_size -= sizeof(u64);
+ ++m;
+ }
+}
+
+static void event__all64_swap(event_t *self)
+{
+ struct perf_event_header *hdr = &self->header;
+ mem_bswap_64(hdr + 1, self->header.size - sizeof(*hdr));
+}
+
+static void event__comm_swap(event_t *self)
+{
+ self->comm.pid = bswap_32(self->comm.pid);
+ self->comm.tid = bswap_32(self->comm.tid);
+}
+
+static void event__mmap_swap(event_t *self)
+{
+ self->mmap.pid = bswap_32(self->mmap.pid);
+ self->mmap.tid = bswap_32(self->mmap.tid);
+ self->mmap.start = bswap_64(self->mmap.start);
+ self->mmap.len = bswap_64(self->mmap.len);
+ self->mmap.pgoff = bswap_64(self->mmap.pgoff);
+}
+
+static void event__task_swap(event_t *self)
+{
+ self->fork.pid = bswap_32(self->fork.pid);
+ self->fork.tid = bswap_32(self->fork.tid);
+ self->fork.ppid = bswap_32(self->fork.ppid);
+ self->fork.ptid = bswap_32(self->fork.ptid);
+ self->fork.time = bswap_64(self->fork.time);
+}
+
+static void event__read_swap(event_t *self)
+{
+ self->read.pid = bswap_32(self->read.pid);
+ self->read.tid = bswap_32(self->read.tid);
+ self->read.value = bswap_64(self->read.value);
+ self->read.time_enabled = bswap_64(self->read.time_enabled);
+ self->read.time_running = bswap_64(self->read.time_running);
+ self->read.id = bswap_64(self->read.id);
+}
+
+typedef void (*event__swap_op)(event_t *self);
+
+static event__swap_op event__swap_ops[] = {
+ [PERF_RECORD_MMAP] = event__mmap_swap,
+ [PERF_RECORD_COMM] = event__comm_swap,
+ [PERF_RECORD_FORK] = event__task_swap,
+ [PERF_RECORD_EXIT] = event__task_swap,
+ [PERF_RECORD_LOST] = event__all64_swap,
+ [PERF_RECORD_READ] = event__read_swap,
+ [PERF_RECORD_SAMPLE] = event__all64_swap,
+ [PERF_RECORD_MAX] = NULL,
+};
+
+static int perf_session__process_event(struct perf_session *self,
+ event_t *event,
+ struct perf_event_ops *ops,
+ u64 offset, u64 head)
+{
+ trace_event(event);
+
+ if (event->header.type < PERF_RECORD_MAX) {
+ dump_printf("%#Lx [%#x]: PERF_RECORD_%s",
+ offset + head, event->header.size,
+ event__name[event->header.type]);
+ ++event__total[0];
+ ++event__total[event->header.type];
+ }
+
+ if (self->header.needs_swap && event__swap_ops[event->header.type])
+ event__swap_ops[event->header.type](event);
+
+ switch (event->header.type) {
+ case PERF_RECORD_SAMPLE:
+ return ops->sample(event, self);
+ case PERF_RECORD_MMAP:
+ return ops->mmap(event, self);
+ case PERF_RECORD_COMM:
+ return ops->comm(event, self);
+ case PERF_RECORD_FORK:
+ return ops->fork(event, self);
+ case PERF_RECORD_EXIT:
+ return ops->exit(event, self);
+ case PERF_RECORD_LOST:
+ return ops->lost(event, self);
+ case PERF_RECORD_READ:
+ return ops->read(event, self);
+ case PERF_RECORD_THROTTLE:
+ return ops->throttle(event, self);
+ case PERF_RECORD_UNTHROTTLE:
+ return ops->unthrottle(event, self);
+ default:
+ self->unknown_events++;
+ return -1;
+ }
+}
+
+void perf_event_header__bswap(struct perf_event_header *self)
+{
+ self->type = bswap_32(self->type);
+ self->misc = bswap_16(self->misc);
+ self->size = bswap_16(self->size);
+}
+
+int perf_header__read_build_ids(struct perf_header *self,
+ int input, u64 offset, u64 size)
+{
+ struct build_id_event bev;
+ char filename[PATH_MAX];
+ u64 limit = offset + size;
+ int err = -1;
+
+ while (offset < limit) {
+ struct dso *dso;
+ ssize_t len;
+ struct list_head *head = &dsos__user;
+
+ if (read(input, &bev, sizeof(bev)) != sizeof(bev))
+ goto out;
+
+ if (self->needs_swap)
+ perf_event_header__bswap(&bev.header);
+
+ len = bev.header.size - sizeof(bev);
+ if (read(input, filename, len) != len)
+ goto out;
+
+ if (bev.header.misc & PERF_RECORD_MISC_KERNEL)
+ head = &dsos__kernel;
+
+ dso = __dsos__findnew(head, filename);
+ if (dso != NULL) {
+ dso__set_build_id(dso, &bev.build_id);
+ if (head == &dsos__kernel && filename[0] == '[')
+ dso->kernel = 1;
+ }
+
+ offset += bev.header.size;
+ }
+ err = 0;
+out:
+ return err;
+}
+
+static struct thread *perf_session__register_idle_thread(struct perf_session *self)
+{
+ struct thread *thread = perf_session__findnew(self, 0);
+
+ if (thread == NULL || thread__set_comm(thread, "swapper")) {
+ pr_err("problem inserting idle task.\n");
+ thread = NULL;
+ }
+
+ return thread;
+}
+
+int perf_session__process_events(struct perf_session *self,
+ struct perf_event_ops *ops)
+{
+ int err, mmap_prot, mmap_flags;
+ u64 head, shift;
+ u64 offset = 0;
+ size_t page_size;
+ event_t *event;
+ uint32_t size;
+ char *buf;
+
+ if (perf_session__register_idle_thread(self) == NULL)
+ return -ENOMEM;
+
+ perf_event_ops__fill_defaults(ops);
+
+ page_size = sysconf(_SC_PAGESIZE);
+
+ head = self->header.data_offset;
+
+ if (!symbol_conf.full_paths) {
+ char bf[PATH_MAX];
+
+ if (getcwd(bf, sizeof(bf)) == NULL) {
+ err = -errno;
+out_getcwd_err:
+ pr_err("failed to get the current directory\n");
+ goto out_err;
+ }
+ self->cwd = strdup(bf);
+ if (self->cwd == NULL) {
+ err = -ENOMEM;
+ goto out_getcwd_err;
+ }
+ self->cwdlen = strlen(self->cwd);
+ }
+
+ shift = page_size * (head / page_size);
+ offset += shift;
+ head -= shift;
+
+ mmap_prot = PROT_READ;
+ mmap_flags = MAP_SHARED;
+
+ if (self->header.needs_swap) {
+ mmap_prot |= PROT_WRITE;
+ mmap_flags = MAP_PRIVATE;
+ }
+remap:
+ buf = mmap(NULL, page_size * self->mmap_window, mmap_prot,
+ mmap_flags, self->fd, offset);
+ if (buf == MAP_FAILED) {
+ pr_err("failed to mmap file\n");
+ err = -errno;
+ goto out_err;
+ }
+
+more:
+ event = (event_t *)(buf + head);
+
+ if (self->header.needs_swap)
+ perf_event_header__bswap(&event->header);
+ size = event->header.size;
+ if (size == 0)
+ size = 8;
+
+ if (head + event->header.size >= page_size * self->mmap_window) {
+ int munmap_ret;
+
+ shift = page_size * (head / page_size);
+
+ munmap_ret = munmap(buf, page_size * self->mmap_window);
+ assert(munmap_ret == 0);
+
+ offset += shift;
+ head -= shift;
+ goto remap;
+ }
+
+ size = event->header.size;
+
+ dump_printf("\n%#Lx [%#x]: event: %d\n",
+ offset + head, event->header.size, event->header.type);
+
+ if (size == 0 ||
+ perf_session__process_event(self, event, ops, offset, head) < 0) {
+ dump_printf("%#Lx [%#x]: skipping unknown header type: %d\n",
+ offset + head, event->header.size,
+ event->header.type);
+ /*
+ * assume we lost track of the stream, check alignment, and
+ * increment a single u64 in the hope to catch on again 'soon'.
+ */
+ if (unlikely(head & 7))
+ head &= ~7ULL;
+
+ size = 8;
+ }
+
+ head += size;
+
+ if (offset + head >= self->header.data_offset + self->header.data_size)
+ goto done;
+
+ if (offset + head < self->size)
+ goto more;
+done:
+ err = 0;
+out_err:
+ return err;
+}
+
+bool perf_session__has_traces(struct perf_session *self, const char *msg)
+{
+ if (!(self->sample_type & PERF_SAMPLE_RAW)) {
+ pr_err("No trace sample to read. Did you call 'perf %s'?\n", msg);
+ return false;
+ }
+
+ return true;
+}
+
+int perf_session__set_kallsyms_ref_reloc_sym(struct perf_session *self,
+ const char *symbol_name,
+ u64 addr)
+{
+ char *bracket;
+
+ self->ref_reloc_sym.name = strdup(symbol_name);
+ if (self->ref_reloc_sym.name == NULL)
+ return -ENOMEM;
+
+ bracket = strchr(self->ref_reloc_sym.name, ']');
+ if (bracket)
+ *bracket = '\0';
+
+ self->ref_reloc_sym.addr = addr;
+ return 0;
+}
+
+static u64 map__reloc_map_ip(struct map *map, u64 ip)
+{
+ return ip + (s64)map->pgoff;
+}
+
+static u64 map__reloc_unmap_ip(struct map *map, u64 ip)
+{
+ return ip - (s64)map->pgoff;
+}
+
+void perf_session__reloc_vmlinux_maps(struct perf_session *self,
+ u64 unrelocated_addr)
+{
+ enum map_type type;
+ s64 reloc = unrelocated_addr - self->ref_reloc_sym.addr;
+
+ if (!reloc)
+ return;
+
+ for (type = 0; type < MAP__NR_TYPES; ++type) {
+ struct map *map = self->vmlinux_maps[type];
+
+ map->map_ip = map__reloc_map_ip;
+ map->unmap_ip = map__reloc_unmap_ip;
+ map->pgoff = reloc;
+ }
+}
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 32eaa1bada06..36d1a80c0b6c 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -18,10 +18,16 @@ struct perf_session {
struct map_groups kmaps;
struct rb_root threads;
struct thread *last_match;
+ struct map *vmlinux_maps[MAP__NR_TYPES];
struct events_stats events_stats;
unsigned long event_total[PERF_RECORD_MAX];
+ unsigned long unknown_events;
struct rb_root hists;
u64 sample_type;
+ struct {
+ const char *name;
+ u64 addr;
+ } ref_reloc_sym;
int fd;
int cwdlen;
char *cwd;
@@ -31,23 +37,22 @@ struct perf_session {
typedef int (*event_op)(event_t *self, struct perf_session *session);
struct perf_event_ops {
- event_op process_sample_event;
- event_op process_mmap_event;
- event_op process_comm_event;
- event_op process_fork_event;
- event_op process_exit_event;
- event_op process_lost_event;
- event_op process_read_event;
- event_op process_throttle_event;
- event_op process_unthrottle_event;
- int (*sample_type_check)(struct perf_session *session);
- unsigned long total_unknown;
- bool full_paths;
+ event_op sample,
+ mmap,
+ comm,
+ fork,
+ exit,
+ lost,
+ read,
+ throttle,
+ unthrottle;
};
struct perf_session *perf_session__new(const char *filename, int mode, bool force);
void perf_session__delete(struct perf_session *self);
+void perf_event_header__bswap(struct perf_event_header *self);
+
int perf_session__process_events(struct perf_session *self,
struct perf_event_ops *event_ops);
@@ -56,6 +61,17 @@ struct symbol **perf_session__resolve_callchain(struct perf_session *self,
struct ip_callchain *chain,
struct symbol **parent);
-int perf_header__read_build_ids(int input, u64 offset, u64 file_size);
+bool perf_session__has_traces(struct perf_session *self, const char *msg);
+
+int perf_header__read_build_ids(struct perf_header *self, int input,
+ u64 offset, u64 file_size);
+
+int perf_session__set_kallsyms_ref_reloc_sym(struct perf_session *self,
+ const char *symbol_name,
+ u64 addr);
+void perf_session__reloc_vmlinux_maps(struct perf_session *self,
+ u64 unrelocated_addr);
+
+void mem_bswap_64(void *src, int byte_size);
#endif /* __PERF_SESSION_H */
diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c
index 5352d7dccc61..c397d4f6f748 100644
--- a/tools/perf/util/string.c
+++ b/tools/perf/util/string.c
@@ -227,16 +227,73 @@ fail:
return NULL;
}
-/* Glob expression pattern matching */
+/* Character class matching */
+static bool __match_charclass(const char *pat, char c, const char **npat)
+{
+ bool complement = false, ret = true;
+
+ if (*pat == '!') {
+ complement = true;
+ pat++;
+ }
+ if (*pat++ == c) /* First character is special */
+ goto end;
+
+ while (*pat && *pat != ']') { /* Matching */
+ if (*pat == '-' && *(pat + 1) != ']') { /* Range */
+ if (*(pat - 1) <= c && c <= *(pat + 1))
+ goto end;
+ if (*(pat - 1) > *(pat + 1))
+ goto error;
+ pat += 2;
+ } else if (*pat++ == c)
+ goto end;
+ }
+ if (!*pat)
+ goto error;
+ ret = false;
+
+end:
+ while (*pat && *pat != ']') /* Searching closing */
+ pat++;
+ if (!*pat)
+ goto error;
+ *npat = pat + 1;
+ return complement ? !ret : ret;
+
+error:
+ return false;
+}
+
+/**
+ * strglobmatch - glob expression pattern matching
+ * @str: the target string to match
+ * @pat: the pattern string to match
+ *
+ * This returns true if the @str matches @pat. @pat can includes wildcards
+ * ('*','?') and character classes ([CHARS], complementation and ranges are
+ * also supported). Also, this supports escape character ('\') to use special
+ * characters as normal character.
+ *
+ * Note: if @pat syntax is broken, this always returns false.
+ */
bool strglobmatch(const char *str, const char *pat)
{
while (*str && *pat && *pat != '*') {
- if (*pat == '?') {
+ if (*pat == '?') { /* Matches any single character */
str++;
pat++;
- } else
- if (*str++ != *pat++)
+ continue;
+ } else if (*pat == '[') /* Character classes/Ranges */
+ if (__match_charclass(pat + 1, *str, &pat)) {
+ str++;
+ continue;
+ } else
return false;
+ else if (*pat == '\\') /* Escaped char match as normal char */
+ pat++;
+ if (*str++ != *pat++)
+ return false;
}
/* Check wild card */
if (*pat == '*') {
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index ab92763edb03..a4e745934584 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -22,6 +22,7 @@
enum dso_origin {
DSO__ORIG_KERNEL = 0,
DSO__ORIG_JAVA_JIT,
+ DSO__ORIG_BUILD_ID_CACHE,
DSO__ORIG_FEDORA,
DSO__ORIG_UBUNTU,
DSO__ORIG_BUILDID,
@@ -63,7 +64,7 @@ static void dso__set_sorted_by_name(struct dso *self, enum map_type type)
self->sorted_by_name |= (1 << type);
}
-static bool symbol_type__is_a(char symbol_type, enum map_type map_type)
+bool symbol_type__is_a(char symbol_type, enum map_type map_type)
{
switch (map_type) {
case MAP__FUNCTION:
@@ -160,7 +161,7 @@ static size_t symbol__fprintf(struct symbol *self, FILE *fp)
self->start, self->end, self->name);
}
-static void dso__set_long_name(struct dso *self, char *name)
+void dso__set_long_name(struct dso *self, char *name)
{
if (name == NULL)
return;
@@ -175,7 +176,7 @@ static void dso__set_basename(struct dso *self)
struct dso *dso__new(const char *name)
{
- struct dso *self = malloc(sizeof(*self) + strlen(name) + 1);
+ struct dso *self = zalloc(sizeof(*self) + strlen(name) + 1);
if (self != NULL) {
int i;
@@ -382,24 +383,20 @@ size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp)
return ret;
}
-/*
- * Loads the function entries in /proc/kallsyms into kernel_map->dso,
- * so that we can in the next step set the symbol ->end address and then
- * call kernel_maps__split_kallsyms.
- */
-static int dso__load_all_kallsyms(struct dso *self, struct map *map)
+int kallsyms__parse(const char *filename, void *arg,
+ int (*process_symbol)(void *arg, const char *name,
+ char type, u64 start))
{
char *line = NULL;
size_t n;
- struct rb_root *root = &self->symbols[map->type];
- FILE *file = fopen("/proc/kallsyms", "r");
+ int err = 0;
+ FILE *file = fopen(filename, "r");
if (file == NULL)
goto out_failure;
while (!feof(file)) {
u64 start;
- struct symbol *sym;
int line_len, len;
char symbol_type;
char *symbol_name;
@@ -420,35 +417,63 @@ static int dso__load_all_kallsyms(struct dso *self, struct map *map)
continue;
symbol_type = toupper(line[len]);
- if (!symbol_type__is_a(symbol_type, map->type))
- continue;
-
symbol_name = line + len + 2;
- /*
- * Will fix up the end later, when we have all symbols sorted.
- */
- sym = symbol__new(start, 0, symbol_name);
- if (sym == NULL)
- goto out_delete_line;
- /*
- * We will pass the symbols to the filter later, in
- * map__split_kallsyms, when we have split the maps per module
- */
- symbols__insert(root, sym);
+ err = process_symbol(arg, symbol_name, symbol_type, start);
+ if (err)
+ break;
}
free(line);
fclose(file);
+ return err;
- return 0;
-
-out_delete_line:
- free(line);
out_failure:
return -1;
}
+struct process_kallsyms_args {
+ struct map *map;
+ struct dso *dso;
+};
+
+static int map__process_kallsym_symbol(void *arg, const char *name,
+ char type, u64 start)
+{
+ struct symbol *sym;
+ struct process_kallsyms_args *a = arg;
+ struct rb_root *root = &a->dso->symbols[a->map->type];
+
+ if (!symbol_type__is_a(type, a->map->type))
+ return 0;
+
+ /*
+ * Will fix up the end later, when we have all symbols sorted.
+ */
+ sym = symbol__new(start, 0, name);
+
+ if (sym == NULL)
+ return -ENOMEM;
+ /*
+ * We will pass the symbols to the filter later, in
+ * map__split_kallsyms, when we have split the maps per module
+ */
+ symbols__insert(root, sym);
+ return 0;
+}
+
+/*
+ * Loads the function entries in /proc/kallsyms into kernel_map->dso,
+ * so that we can in the next step set the symbol ->end address and then
+ * call kernel_maps__split_kallsyms.
+ */
+static int dso__load_all_kallsyms(struct dso *self, const char *filename,
+ struct map *map)
+{
+ struct process_kallsyms_args args = { .map = map, .dso = self, };
+ return kallsyms__parse(filename, &args, map__process_kallsym_symbol);
+}
+
/*
* Split the symbols into maps, making sure there are no overlaps, i.e. the
* kernel range is broken in several maps, named [kernel].N, as we don't have
@@ -477,13 +502,17 @@ static int dso__split_kallsyms(struct dso *self, struct map *map,
*module++ = '\0';
- if (strcmp(self->name, module)) {
+ if (strcmp(curr_map->dso->short_name, module)) {
curr_map = map_groups__find_by_name(&session->kmaps, map->type, module);
if (curr_map == NULL) {
pr_debug("/proc/{kallsyms,modules} "
- "inconsistency!\n");
+ "inconsistency while looking "
+ "for \"%s\" module!\n", module);
return -1;
}
+
+ if (curr_map->dso->loaded)
+ goto discard_symbol;
}
/*
* So that we look just like we get from .ko files,
@@ -529,10 +558,10 @@ discard_symbol: rb_erase(&pos->rb_node, root);
}
-static int dso__load_kallsyms(struct dso *self, struct map *map,
+static int dso__load_kallsyms(struct dso *self, const char *filename, struct map *map,
struct perf_session *session, symbol_filter_t filter)
{
- if (dso__load_all_kallsyms(self, map) < 0)
+ if (dso__load_all_kallsyms(self, filename, map) < 0)
return -1;
symbols__fixup_end(&self->symbols[map->type]);
@@ -933,11 +962,15 @@ static int dso__load_sym(struct dso *self, struct map *map,
elf_symtab__for_each_symbol(syms, nr_syms, idx, sym) {
struct symbol *f;
- const char *elf_name;
+ const char *elf_name = elf_sym__name(&sym, symstrs);
char *demangled = NULL;
int is_label = elf_sym__is_label(&sym);
const char *section_name;
+ if (kernel && session->ref_reloc_sym.name != NULL &&
+ strcmp(elf_name, session->ref_reloc_sym.name) == 0)
+ perf_session__reloc_vmlinux_maps(session, sym.st_value);
+
if (!is_label && !elf_sym__is_a(&sym, map->type))
continue;
@@ -950,7 +983,6 @@ static int dso__load_sym(struct dso *self, struct map *map,
if (is_label && !elf_sec__is_a(&shdr, secstrs, map->type))
continue;
- elf_name = elf_sym__name(&sym, symstrs);
section_name = elf_sec__name(&shdr, secstrs);
if (kernel || kmodule) {
@@ -1191,6 +1223,7 @@ char dso__symtab_origin(const struct dso *self)
static const char origin[] = {
[DSO__ORIG_KERNEL] = 'k',
[DSO__ORIG_JAVA_JIT] = 'j',
+ [DSO__ORIG_BUILD_ID_CACHE] = 'B',
[DSO__ORIG_FEDORA] = 'f',
[DSO__ORIG_UBUNTU] = 'u',
[DSO__ORIG_BUILDID] = 'b',
@@ -1209,6 +1242,7 @@ int dso__load(struct dso *self, struct map *map, struct perf_session *session,
int size = PATH_MAX;
char *name;
u8 build_id[BUILD_ID_SIZE];
+ char build_id_hex[BUILD_ID_SIZE * 2 + 1];
int ret = -1;
int fd;
@@ -1230,8 +1264,16 @@ int dso__load(struct dso *self, struct map *map, struct perf_session *session,
return ret;
}
- self->origin = DSO__ORIG_FEDORA - 1;
+ self->origin = DSO__ORIG_BUILD_ID_CACHE;
+ if (self->has_build_id) {
+ build_id__sprintf(self->build_id, sizeof(self->build_id),
+ build_id_hex);
+ snprintf(name, size, "%s/%s/.build-id/%.2s/%s",
+ getenv("HOME"), DEBUG_CACHE_DIR,
+ build_id_hex, build_id_hex + 2);
+ goto open_file;
+ }
more:
do {
self->origin++;
@@ -1247,8 +1289,6 @@ more:
case DSO__ORIG_BUILDID:
if (filename__read_build_id(self->long_name, build_id,
sizeof(build_id))) {
- char build_id_hex[BUILD_ID_SIZE * 2 + 1];
-
build_id__sprintf(build_id, sizeof(build_id),
build_id_hex);
snprintf(name, size,
@@ -1276,7 +1316,7 @@ compare_build_id:
if (!dso__build_id_equal(self, build_id))
goto more;
}
-
+open_file:
fd = open(name, O_RDONLY);
} while (fd < 0);
@@ -1309,13 +1349,33 @@ struct map *map_groups__find_by_name(struct map_groups *self,
for (nd = rb_first(&self->maps[type]); nd; nd = rb_next(nd)) {
struct map *map = rb_entry(nd, struct map, rb_node);
- if (map->dso && strcmp(map->dso->name, name) == 0)
+ if (map->dso && strcmp(map->dso->short_name, name) == 0)
return map;
}
return NULL;
}
+static int dso__kernel_module_get_build_id(struct dso *self)
+{
+ char filename[PATH_MAX];
+ /*
+ * kernel module short names are of the form "[module]" and
+ * we need just "module" here.
+ */
+ const char *name = self->short_name + 1;
+
+ snprintf(filename, sizeof(filename),
+ "/sys/module/%.*s/notes/.note.gnu.build-id",
+ (int)strlen(name - 1), name);
+
+ if (sysfs__read_build_id(filename, self->build_id,
+ sizeof(self->build_id)) == 0)
+ self->has_build_id = true;
+
+ return 0;
+}
+
static int perf_session__set_modules_path_dir(struct perf_session *self, char *dirname)
{
struct dirent *dent;
@@ -1361,6 +1421,7 @@ static int perf_session__set_modules_path_dir(struct perf_session *self, char *d
if (long_name == NULL)
goto failure;
dso__set_long_name(map->dso, long_name);
+ dso__kernel_module_get_build_id(map->dso);
}
}
@@ -1403,6 +1464,24 @@ static struct map *map__new2(u64 start, struct dso *dso, enum map_type type)
return self;
}
+struct map *perf_session__new_module_map(struct perf_session *self, u64 start,
+ const char *filename)
+{
+ struct map *map;
+ struct dso *dso = __dsos__findnew(&dsos__kernel, filename);
+
+ if (dso == NULL)
+ return NULL;
+
+ map = map__new2(start, dso, MAP__FUNCTION);
+ if (map == NULL)
+ return NULL;
+
+ dso->origin = DSO__ORIG_KMODULE;
+ map_groups__insert(&self->kmaps, map);
+ return map;
+}
+
static int perf_session__create_module_maps(struct perf_session *self)
{
char *line = NULL;
@@ -1416,7 +1495,6 @@ static int perf_session__create_module_maps(struct perf_session *self)
while (!feof(file)) {
char name[PATH_MAX];
u64 start;
- struct dso *dso;
char *sep;
int line_len;
@@ -1442,26 +1520,10 @@ static int perf_session__create_module_maps(struct perf_session *self)
*sep = '\0';
snprintf(name, sizeof(name), "[%s]", line);
- dso = dso__new(name);
-
- if (dso == NULL)
- goto out_delete_line;
-
- map = map__new2(start, dso, MAP__FUNCTION);
- if (map == NULL) {
- dso__delete(dso);
+ map = perf_session__new_module_map(self, start, name);
+ if (map == NULL)
goto out_delete_line;
- }
-
- snprintf(name, sizeof(name),
- "/sys/module/%s/notes/.note.gnu.build-id", line);
- if (sysfs__read_build_id(name, dso->build_id,
- sizeof(dso->build_id)) == 0)
- dso->has_build_id = true;
-
- dso->origin = DSO__ORIG_KMODULE;
- map_groups__insert(&self->kmaps, map);
- dsos__add(&dsos__kernel, dso);
+ dso__kernel_module_get_build_id(map->dso);
}
free(line);
@@ -1520,7 +1582,8 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map,
struct perf_session *session, symbol_filter_t filter)
{
int err;
- bool is_kallsyms;
+ const char *kallsyms_filename = NULL;
+ char *kallsyms_allocated_filename = NULL;
if (vmlinux_path != NULL) {
int i;
@@ -1539,18 +1602,58 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map,
}
}
- is_kallsyms = self->long_name[0] == '[';
- if (is_kallsyms)
+ /*
+ * Say the kernel DSO was created when processing the build-id header table,
+ * we have a build-id, so check if it is the same as the running kernel,
+ * using it if it is.
+ */
+ if (self->has_build_id) {
+ u8 kallsyms_build_id[BUILD_ID_SIZE];
+ char sbuild_id[BUILD_ID_SIZE * 2 + 1];
+
+ if (sysfs__read_build_id("/sys/kernel/notes", kallsyms_build_id,
+ sizeof(kallsyms_build_id)) == 0) {
+ if (dso__build_id_equal(self, kallsyms_build_id)) {
+ kallsyms_filename = "/proc/kallsyms";
+ goto do_kallsyms;
+ }
+ }
+
+ build_id__sprintf(self->build_id, sizeof(self->build_id),
+ sbuild_id);
+
+ if (asprintf(&kallsyms_allocated_filename,
+ "%s/.debug/[kernel.kallsyms]/%s",
+ getenv("HOME"), sbuild_id) != -1) {
+ if (access(kallsyms_filename, F_OK)) {
+ kallsyms_filename = kallsyms_allocated_filename;
+ goto do_kallsyms;
+ }
+ free(kallsyms_allocated_filename);
+ kallsyms_allocated_filename = NULL;
+ }
+
+ goto do_vmlinux;
+ }
+
+ if (self->long_name[0] == '[') {
+ kallsyms_filename = "/proc/kallsyms";
goto do_kallsyms;
+ }
+do_vmlinux:
err = dso__load_vmlinux(self, map, session, self->long_name, filter);
if (err <= 0) {
+ if (self->has_build_id)
+ return -1;
+
pr_info("The file %s cannot be used, "
"trying to use /proc/kallsyms...", self->long_name);
do_kallsyms:
- err = dso__load_kallsyms(self, map, session, filter);
- if (err > 0 && !is_kallsyms)
+ err = dso__load_kallsyms(self, kallsyms_filename, map, session, filter);
+ if (err > 0 && kallsyms_filename == NULL)
dso__set_long_name(self, strdup("[kernel.kallsyms]"));
+ free(kallsyms_allocated_filename);
}
if (err > 0) {
@@ -1576,19 +1679,19 @@ static struct dso *dsos__find(struct list_head *head, const char *name)
struct dso *pos;
list_for_each_entry(pos, head, node)
- if (strcmp(pos->name, name) == 0)
+ if (strcmp(pos->long_name, name) == 0)
return pos;
return NULL;
}
-struct dso *dsos__findnew(const char *name)
+struct dso *__dsos__findnew(struct list_head *head, const char *name)
{
- struct dso *dso = dsos__find(&dsos__user, name);
+ struct dso *dso = dsos__find(head, name);
if (!dso) {
dso = dso__new(name);
if (dso != NULL) {
- dsos__add(&dsos__user, dso);
+ dsos__add(head, dso);
dso__set_basename(dso);
}
}
@@ -1613,25 +1716,28 @@ void dsos__fprintf(FILE *fp)
__dsos__fprintf(&dsos__user, fp);
}
-static size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp)
+static size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp,
+ bool with_hits)
{
struct dso *pos;
size_t ret = 0;
list_for_each_entry(pos, head, node) {
+ if (with_hits && !pos->hit)
+ continue;
ret += dso__fprintf_buildid(pos, fp);
ret += fprintf(fp, " %s\n", pos->long_name);
}
return ret;
}
-size_t dsos__fprintf_buildid(FILE *fp)
+size_t dsos__fprintf_buildid(FILE *fp, bool with_hits)
{
- return (__dsos__fprintf_buildid(&dsos__kernel, fp) +
- __dsos__fprintf_buildid(&dsos__user, fp));
+ return (__dsos__fprintf_buildid(&dsos__kernel, fp, with_hits) +
+ __dsos__fprintf_buildid(&dsos__user, fp, with_hits));
}
-static struct dso *dsos__create_kernel( const char *vmlinux)
+static struct dso *dsos__create_kernel(const char *vmlinux)
{
struct dso *kernel = dso__new(vmlinux ?: "[kernel.kallsyms]");
@@ -1660,32 +1766,37 @@ out_delete_kernel_dso:
return NULL;
}
-static int map_groups__create_kernel_maps(struct map_groups *self, const char *vmlinux)
+int __map_groups__create_kernel_maps(struct map_groups *self,
+ struct map *vmlinux_maps[MAP__NR_TYPES],
+ struct dso *kernel)
{
- struct map *functions, *variables;
- struct dso *kernel = dsos__create_kernel(vmlinux);
-
- if (kernel == NULL)
- return -1;
+ enum map_type type;
- functions = map__new2(0, kernel, MAP__FUNCTION);
- if (functions == NULL)
- return -1;
+ for (type = 0; type < MAP__NR_TYPES; ++type) {
+ vmlinux_maps[type] = map__new2(0, kernel, type);
+ if (vmlinux_maps[type] == NULL)
+ return -1;
- variables = map__new2(0, kernel, MAP__VARIABLE);
- if (variables == NULL) {
- map__delete(functions);
- return -1;
+ vmlinux_maps[type]->map_ip =
+ vmlinux_maps[type]->unmap_ip = identity__map_ip;
+ map_groups__insert(self, vmlinux_maps[type]);
}
- functions->map_ip = functions->unmap_ip =
- variables->map_ip = variables->unmap_ip = identity__map_ip;
- map_groups__insert(self, functions);
- map_groups__insert(self, variables);
-
return 0;
}
+static int map_groups__create_kernel_maps(struct map_groups *self,
+ struct map *vmlinux_maps[MAP__NR_TYPES],
+ const char *vmlinux)
+{
+ struct dso *kernel = dsos__create_kernel(vmlinux);
+
+ if (kernel == NULL)
+ return -1;
+
+ return __map_groups__create_kernel_maps(self, vmlinux_maps, kernel);
+}
+
static void vmlinux_path__exit(void)
{
while (--vmlinux_path__nr_entries >= 0) {
@@ -1793,7 +1904,7 @@ out_free_comm_list:
int perf_session__create_kernel_maps(struct perf_session *self)
{
- if (map_groups__create_kernel_maps(&self->kmaps,
+ if (map_groups__create_kernel_maps(&self->kmaps, self->vmlinux_maps,
symbol_conf.vmlinux_name) < 0)
return -1;
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 8aded2356f79..525085fd0735 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -8,6 +8,8 @@
#include <linux/rbtree.h>
#include "event.h"
+#define DEBUG_CACHE_DIR ".debug"
+
#ifdef HAVE_CPLUS_DEMANGLE
extern char *cplus_demangle(const char *, int);
@@ -58,7 +60,8 @@ struct symbol_conf {
sort_by_name,
show_nr_samples,
use_callchain,
- exclude_other;
+ exclude_other,
+ full_paths;
const char *vmlinux_name,
*field_sep;
char *dso_list_str,
@@ -94,6 +97,7 @@ struct dso {
u8 slen_calculated:1;
u8 has_build_id:1;
u8 kernel:1;
+ u8 hit:1;
unsigned char origin;
u8 sorted_by_name;
u8 loaded;
@@ -112,17 +116,26 @@ bool dso__sorted_by_name(const struct dso *self, enum map_type type);
void dso__sort_by_name(struct dso *self, enum map_type type);
+extern struct list_head dsos__user, dsos__kernel;
+
+struct dso *__dsos__findnew(struct list_head *head, const char *name);
+
+static inline struct dso *dsos__findnew(const char *name)
+{
+ return __dsos__findnew(&dsos__user, name);
+}
+
struct perf_session;
-struct dso *dsos__findnew(const char *name);
int dso__load(struct dso *self, struct map *map, struct perf_session *session,
symbol_filter_t filter);
void dsos__fprintf(FILE *fp);
-size_t dsos__fprintf_buildid(FILE *fp);
+size_t dsos__fprintf_buildid(FILE *fp, bool with_hits);
size_t dso__fprintf_buildid(struct dso *self, FILE *fp);
size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp);
char dso__symtab_origin(const struct dso *self);
+void dso__set_long_name(struct dso *self, char *name);
void dso__set_build_id(struct dso *self, void *build_id);
struct symbol *dso__find_symbol(struct dso *self, enum map_type type, u64 addr);
struct symbol *dso__find_symbol_by_name(struct dso *self, enum map_type type,
@@ -132,10 +145,16 @@ int filename__read_build_id(const char *filename, void *bf, size_t size);
int sysfs__read_build_id(const char *filename, void *bf, size_t size);
bool dsos__read_build_ids(void);
int build_id__sprintf(u8 *self, int len, char *bf);
+int kallsyms__parse(const char *filename, void *arg,
+ int (*process_symbol)(void *arg, const char *name,
+ char type, u64 start));
int symbol__init(void);
+bool symbol_type__is_a(char symbol_type, enum map_type map_type);
+
int perf_session__create_kernel_maps(struct perf_session *self);
-extern struct list_head dsos__user, dsos__kernel;
+struct map *perf_session__new_module_map(struct perf_session *self, u64 start,
+ const char *filename);
extern struct dso *vdso;
#endif /* __PERF_SYMBOL */
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index c206f72c8881..e35653c1817c 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -48,6 +48,11 @@ static inline struct map *thread__find_map(struct thread *self,
return self ? map_groups__find(&self->mg, type, addr) : NULL;
}
+void thread__find_addr_map(struct thread *self,
+ struct perf_session *session, u8 cpumode,
+ enum map_type type, u64 addr,
+ struct addr_location *al);
+
void thread__find_addr_location(struct thread *self,
struct perf_session *session, u8 cpumode,
enum map_type type, u64 addr,
@@ -67,4 +72,8 @@ map_groups__find_function(struct map_groups *self, struct perf_session *session,
struct map *map_groups__find_by_name(struct map_groups *self,
enum map_type type, const char *name);
+
+int __map_groups__create_kernel_maps(struct map_groups *self,
+ struct map *vmlinux_maps[MAP__NR_TYPES],
+ struct dso *kernel);
#endif /* __PERF_THREAD_H */
diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c
index cace35595530..5ea8973ad331 100644
--- a/tools/perf/util/trace-event-info.c
+++ b/tools/perf/util/trace-event-info.c
@@ -20,6 +20,7 @@
*/
#define _GNU_SOURCE
#include <dirent.h>
+#include <mntent.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -37,6 +38,7 @@
#include "../perf.h"
#include "trace-event.h"
+#include "debugfs.h"
#define VERSION "0.5"
@@ -101,32 +103,12 @@ void *malloc_or_die(unsigned int size)
static const char *find_debugfs(void)
{
- static char debugfs[MAX_PATH+1];
- static int debugfs_found;
- char type[100];
- FILE *fp;
-
- if (debugfs_found)
- return debugfs;
-
- if ((fp = fopen("/proc/mounts","r")) == NULL)
- die("Can't open /proc/mounts for read");
-
- while (fscanf(fp, "%*s %"
- STR(MAX_PATH)
- "s %99s %*s %*d %*d\n",
- debugfs, type) == 2) {
- if (strcmp(type, "debugfs") == 0)
- break;
- }
- fclose(fp);
-
- if (strcmp(type, "debugfs") != 0)
- die("debugfs not mounted, please mount");
+ const char *path = debugfs_mount(NULL);
- debugfs_found = 1;
+ if (!path)
+ die("Your kernel not support debugfs filesystem");
- return debugfs;
+ return path;
}
/*
@@ -271,6 +253,8 @@ static void read_header_files(void)
write_or_die("header_page", 12);
write_or_die(&size, 8);
check_size = copy_file_fd(fd);
+ close(fd);
+
if (size != check_size)
die("wrong size for '%s' size=%lld read=%lld",
path, size, check_size);
@@ -289,6 +273,7 @@ static void read_header_files(void)
if (size != check_size)
die("wrong size for '%s'", path);
put_tracing_file(path);
+ close(fd);
}
static bool name_in_tp_list(char *sys, struct tracepoint_path *tps)
@@ -317,7 +302,8 @@ static void copy_event_system(const char *sys, struct tracepoint_path *tps)
die("can't read directory '%s'", sys);
while ((dent = readdir(dir))) {
- if (strcmp(dent->d_name, ".") == 0 ||
+ if (dent->d_type != DT_DIR ||
+ strcmp(dent->d_name, ".") == 0 ||
strcmp(dent->d_name, "..") == 0 ||
!name_in_tp_list(dent->d_name, tps))
continue;
@@ -334,7 +320,8 @@ static void copy_event_system(const char *sys, struct tracepoint_path *tps)
rewinddir(dir);
while ((dent = readdir(dir))) {
- if (strcmp(dent->d_name, ".") == 0 ||
+ if (dent->d_type != DT_DIR ||
+ strcmp(dent->d_name, ".") == 0 ||
strcmp(dent->d_name, "..") == 0 ||
!name_in_tp_list(dent->d_name, tps))
continue;
@@ -353,6 +340,7 @@ static void copy_event_system(const char *sys, struct tracepoint_path *tps)
free(format);
}
+ closedir(dir);
}
static void read_ftrace_files(struct tracepoint_path *tps)
@@ -394,26 +382,21 @@ static void read_event_files(struct tracepoint_path *tps)
die("can't read directory '%s'", path);
while ((dent = readdir(dir))) {
- if (strcmp(dent->d_name, ".") == 0 ||
+ if (dent->d_type != DT_DIR ||
+ strcmp(dent->d_name, ".") == 0 ||
strcmp(dent->d_name, "..") == 0 ||
strcmp(dent->d_name, "ftrace") == 0 ||
!system_in_tp_list(dent->d_name, tps))
continue;
- sys = malloc_or_die(strlen(path) + strlen(dent->d_name) + 2);
- sprintf(sys, "%s/%s", path, dent->d_name);
- ret = stat(sys, &st);
- free(sys);
- if (ret < 0)
- continue;
- if (S_ISDIR(st.st_mode))
- count++;
+ count++;
}
write_or_die(&count, 4);
rewinddir(dir);
while ((dent = readdir(dir))) {
- if (strcmp(dent->d_name, ".") == 0 ||
+ if (dent->d_type != DT_DIR ||
+ strcmp(dent->d_name, ".") == 0 ||
strcmp(dent->d_name, "..") == 0 ||
strcmp(dent->d_name, "ftrace") == 0 ||
!system_in_tp_list(dent->d_name, tps))
@@ -422,14 +405,13 @@ static void read_event_files(struct tracepoint_path *tps)
sprintf(sys, "%s/%s", path, dent->d_name);
ret = stat(sys, &st);
if (ret >= 0) {
- if (S_ISDIR(st.st_mode)) {
- write_or_die(dent->d_name, strlen(dent->d_name) + 1);
- copy_event_system(sys, tps);
- }
+ write_or_die(dent->d_name, strlen(dent->d_name) + 1);
+ copy_event_system(sys, tps);
}
free(sys);
}
+ closedir(dir);
put_tracing_file(path);
}
@@ -533,7 +515,7 @@ int read_tracing_data(int fd, struct perf_event_attr *pattrs, int nb_events)
write_or_die(buf, 1);
/* save page_size */
- page_size = getpagesize();
+ page_size = sysconf(_SC_PAGESIZE);
write_or_die(&page_size, 4);
read_header_files();
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
new file mode 100644
index 000000000000..f9b890fde681
--- /dev/null
+++ b/tools/perf/util/util.c
@@ -0,0 +1,94 @@
+#include "util.h"
+#include <sys/mman.h>
+
+int mkdir_p(char *path, mode_t mode)
+{
+ struct stat st;
+ int err;
+ char *d = path;
+
+ if (*d != '/')
+ return -1;
+
+ if (stat(path, &st) == 0)
+ return 0;
+
+ while (*++d == '/');
+
+ while ((d = strchr(d, '/'))) {
+ *d = '\0';
+ err = stat(path, &st) && mkdir(path, mode);
+ *d++ = '/';
+ if (err)
+ return -1;
+ while (*d == '/')
+ ++d;
+ }
+ return (stat(path, &st) && mkdir(path, mode)) ? -1 : 0;
+}
+
+static int slow_copyfile(const char *from, const char *to)
+{
+ int err = 0;
+ char *line = NULL;
+ size_t n;
+ FILE *from_fp = fopen(from, "r"), *to_fp;
+
+ if (from_fp == NULL)
+ goto out;
+
+ to_fp = fopen(to, "w");
+ if (to_fp == NULL)
+ goto out_fclose_from;
+
+ while (getline(&line, &n, from_fp) > 0)
+ if (fputs(line, to_fp) == EOF)
+ goto out_fclose_to;
+ err = 0;
+out_fclose_to:
+ fclose(to_fp);
+ free(line);
+out_fclose_from:
+ fclose(from_fp);
+out:
+ return err;
+}
+
+int copyfile(const char *from, const char *to)
+{
+ int fromfd, tofd;
+ struct stat st;
+ void *addr;
+ int err = -1;
+
+ if (stat(from, &st))
+ goto out;
+
+ if (st.st_size == 0) /* /proc? do it slowly... */
+ return slow_copyfile(from, to);
+
+ fromfd = open(from, O_RDONLY);
+ if (fromfd < 0)
+ goto out;
+
+ tofd = creat(to, 0755);
+ if (tofd < 0)
+ goto out_close_from;
+
+ addr = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fromfd, 0);
+ if (addr == MAP_FAILED)
+ goto out_close_to;
+
+ if (write(tofd, addr, st.st_size) == st.st_size)
+ err = 0;
+
+ munmap(addr, st.st_size);
+out_close_to:
+ close(tofd);
+ if (err)
+ unlink(to);
+out_close_from:
+ close(fromfd);
+out:
+ return err;
+}
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index c673d8825883..0f5b2a6f1080 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -403,4 +403,7 @@ void git_qsort(void *base, size_t nmemb, size_t size,
#endif
#endif
+int mkdir_p(char *path, mode_t mode);
+int copyfile(const char *from, const char *to);
+
#endif
diff --git a/tools/perf/util/values.c b/tools/perf/util/values.c
index 1c15e39f99e3..cfa55d686e3b 100644
--- a/tools/perf/util/values.c
+++ b/tools/perf/util/values.c
@@ -169,6 +169,7 @@ static void perf_read_values__display_pretty(FILE *fp,
counterwidth[j], values->value[i][j]);
fprintf(fp, "\n");
}
+ free(counterwidth);
}
static void perf_read_values__display_raw(FILE *fp,