summaryrefslogtreecommitdiff
path: root/kernel/trace/ring_buffer.c
diff options
context:
space:
mode:
authorJames Morris <james.l.morris@oracle.com>2017-11-29 12:47:41 +1100
committerJames Morris <james.l.morris@oracle.com>2017-11-29 12:47:41 +1100
commitcf40a76e7d5874bb25f4404eecc58a2e033af885 (patch)
tree8fd81cbea03c87b3d41d7ae5b1d11eadd35d6ef5 /kernel/trace/ring_buffer.c
parentab5348c9c23cd253f5902980d2d8fe067dc24c82 (diff)
parent4fbd8d194f06c8a3fd2af1ce560ddb31f7ec8323 (diff)
Merge tag 'v4.15-rc1' into next-seccomp
Linux 4.15-rc1
Diffstat (limited to 'kernel/trace/ring_buffer.c')
-rw-r--r--kernel/trace/ring_buffer.c83
1 files changed, 27 insertions, 56 deletions
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 529cc50d7243..91874a95060d 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -13,7 +13,6 @@
#include <linux/uaccess.h>
#include <linux/hardirq.h>
#include <linux/kthread.h> /* for self test */
-#include <linux/kmemcheck.h>
#include <linux/module.h>
#include <linux/percpu.h>
#include <linux/mutex.h>
@@ -2055,7 +2054,6 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
}
event = __rb_page_index(tail_page, tail);
- kmemcheck_annotate_bitfield(event, bitfield);
/* account for padding bytes */
local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes);
@@ -2538,61 +2536,29 @@ rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
* The lock and unlock are done within a preempt disable section.
* The current_context per_cpu variable can only be modified
* by the current task between lock and unlock. But it can
- * be modified more than once via an interrupt. To pass this
- * information from the lock to the unlock without having to
- * access the 'in_interrupt()' functions again (which do show
- * a bit of overhead in something as critical as function tracing,
- * we use a bitmask trick.
+ * be modified more than once via an interrupt. There are four
+ * different contexts that we need to consider.
*
- * bit 0 = NMI context
- * bit 1 = IRQ context
- * bit 2 = SoftIRQ context
- * bit 3 = normal context.
+ * Normal context.
+ * SoftIRQ context
+ * IRQ context
+ * NMI context
*
- * This works because this is the order of contexts that can
- * preempt other contexts. A SoftIRQ never preempts an IRQ
- * context.
- *
- * When the context is determined, the corresponding bit is
- * checked and set (if it was set, then a recursion of that context
- * happened).
- *
- * On unlock, we need to clear this bit. To do so, just subtract
- * 1 from the current_context and AND it to itself.
- *
- * (binary)
- * 101 - 1 = 100
- * 101 & 100 = 100 (clearing bit zero)
- *
- * 1010 - 1 = 1001
- * 1010 & 1001 = 1000 (clearing bit 1)
- *
- * The least significant bit can be cleared this way, and it
- * just so happens that it is the same bit corresponding to
- * the current context.
+ * If for some reason the ring buffer starts to recurse, we
+ * only allow that to happen at most 4 times (one for each
+ * context). If it happens 5 times, then we consider this a
+ * recusive loop and do not let it go further.
*/
static __always_inline int
trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer)
{
- unsigned int val = cpu_buffer->current_context;
- int bit;
-
- if (in_interrupt()) {
- if (in_nmi())
- bit = RB_CTX_NMI;
- else if (in_irq())
- bit = RB_CTX_IRQ;
- else
- bit = RB_CTX_SOFTIRQ;
- } else
- bit = RB_CTX_NORMAL;
-
- if (unlikely(val & (1 << bit)))
+ if (cpu_buffer->current_context >= 4)
return 1;
- val |= (1 << bit);
- cpu_buffer->current_context = val;
+ cpu_buffer->current_context++;
+ /* Interrupts must see this update */
+ barrier();
return 0;
}
@@ -2600,7 +2566,9 @@ trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer)
static __always_inline void
trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer)
{
- cpu_buffer->current_context &= cpu_buffer->current_context - 1;
+ /* Don't let the dec leak out */
+ barrier();
+ cpu_buffer->current_context--;
}
/**
@@ -2686,7 +2654,6 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
/* We reserved something on the buffer */
event = __rb_page_index(tail_page, tail);
- kmemcheck_annotate_bitfield(event, bitfield);
rb_update_event(cpu_buffer, event, info);
local_inc(&tail_page->entries);
@@ -2724,7 +2691,7 @@ rb_reserve_next_event(struct ring_buffer *buffer,
* if it happened, we have to fail the write.
*/
barrier();
- if (unlikely(ACCESS_ONCE(cpu_buffer->buffer) != buffer)) {
+ if (unlikely(READ_ONCE(cpu_buffer->buffer) != buffer)) {
local_dec(&cpu_buffer->committing);
local_dec(&cpu_buffer->commits);
return NULL;
@@ -4386,15 +4353,19 @@ EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu);
* the page that was allocated, with the read page of the buffer.
*
* Returns:
- * The page allocated, or NULL on error.
+ * The page allocated, or ERR_PTR
*/
void *ring_buffer_alloc_read_page(struct ring_buffer *buffer, int cpu)
{
- struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
+ struct ring_buffer_per_cpu *cpu_buffer;
struct buffer_data_page *bpage = NULL;
unsigned long flags;
struct page *page;
+ if (!cpumask_test_cpu(cpu, buffer->cpumask))
+ return ERR_PTR(-ENODEV);
+
+ cpu_buffer = buffer->buffers[cpu];
local_irq_save(flags);
arch_spin_lock(&cpu_buffer->lock);
@@ -4412,7 +4383,7 @@ void *ring_buffer_alloc_read_page(struct ring_buffer *buffer, int cpu)
page = alloc_pages_node(cpu_to_node(cpu),
GFP_KERNEL | __GFP_NORETRY, 0);
if (!page)
- return NULL;
+ return ERR_PTR(-ENOMEM);
bpage = page_address(page);
@@ -4467,8 +4438,8 @@ EXPORT_SYMBOL_GPL(ring_buffer_free_read_page);
*
* for example:
* rpage = ring_buffer_alloc_read_page(buffer, cpu);
- * if (!rpage)
- * return error;
+ * if (IS_ERR(rpage))
+ * return PTR_ERR(rpage);
* ret = ring_buffer_read_page(buffer, &rpage, len, cpu, 0);
* if (ret >= 0)
* process_page(rpage, ret);