summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/trace/kprobes.rst24
-rw-r--r--arch/arc/kernel/kprobes.c16
-rw-r--r--arch/arm/probes/kprobes/core.c23
-rw-r--r--arch/arm64/kernel/probes/kprobes.c17
-rw-r--r--arch/csky/kernel/probes/kprobes.c17
-rw-r--r--arch/ia64/kernel/kprobes.c16
-rw-r--r--arch/mips/kernel/kprobes.c3
-rw-r--r--arch/powerpc/kernel/kprobes.c17
-rw-r--r--arch/riscv/kernel/probes/kprobes.c17
-rw-r--r--arch/s390/kernel/kprobes.c17
-rw-r--r--arch/sh/kernel/kprobes.c17
-rw-r--r--arch/sparc/kernel/kprobes.c17
-rw-r--r--arch/x86/events/core.c28
-rw-r--r--arch/x86/events/intel/core.c15
-rw-r--r--arch/x86/events/intel/ds.c20
-rw-r--r--arch/x86/events/intel/uncore.c4
-rw-r--r--arch/x86/events/intel/uncore.h1
-rw-r--r--arch/x86/events/intel/uncore_snbep.c173
-rw-r--r--arch/x86/events/perf_event.h1
-rw-r--r--arch/x86/include/asm/perf_event.h1
-rw-r--r--arch/x86/kernel/kprobes/core.c18
-rw-r--r--arch/x86/mm/fault.c4
-rw-r--r--arch/x86/mm/tlb.c10
-rw-r--r--include/linux/kprobes.h8
-rw-r--r--kernel/events/core.c21
-rw-r--r--kernel/events/hw_breakpoint.c2
-rw-r--r--kernel/events/uprobes.c1
-rw-r--r--kernel/kprobes.c19
-rw-r--r--samples/kprobes/kprobe_example.c15
29 files changed, 257 insertions, 285 deletions
diff --git a/Documentation/trace/kprobes.rst b/Documentation/trace/kprobes.rst
index b757b6dfd3d4..998149ce2fd9 100644
--- a/Documentation/trace/kprobes.rst
+++ b/Documentation/trace/kprobes.rst
@@ -362,14 +362,11 @@ register_kprobe
#include <linux/kprobes.h>
int register_kprobe(struct kprobe *kp);
-Sets a breakpoint at the address kp->addr. When the breakpoint is
-hit, Kprobes calls kp->pre_handler. After the probed instruction
-is single-stepped, Kprobe calls kp->post_handler. If a fault
-occurs during execution of kp->pre_handler or kp->post_handler,
-or during single-stepping of the probed instruction, Kprobes calls
-kp->fault_handler. Any or all handlers can be NULL. If kp->flags
-is set KPROBE_FLAG_DISABLED, that kp will be registered but disabled,
-so, its handlers aren't hit until calling enable_kprobe(kp).
+Sets a breakpoint at the address kp->addr. When the breakpoint is hit, Kprobes
+calls kp->pre_handler. After the probed instruction is single-stepped, Kprobe
+calls kp->post_handler. Any or all handlers can be NULL. If kp->flags is set
+KPROBE_FLAG_DISABLED, that kp will be registered but disabled, so, its handlers
+aren't hit until calling enable_kprobe(kp).
.. note::
@@ -415,17 +412,6 @@ User's post-handler (kp->post_handler)::
p and regs are as described for the pre_handler. flags always seems
to be zero.
-User's fault-handler (kp->fault_handler)::
-
- #include <linux/kprobes.h>
- #include <linux/ptrace.h>
- int fault_handler(struct kprobe *p, struct pt_regs *regs, int trapnr);
-
-p and regs are as described for the pre_handler. trapnr is the
-architecture-specific trap number associated with the fault (e.g.,
-on i386, 13 for a general protection fault or 14 for a page fault).
-Returns 1 if it successfully handled the exception.
-
register_kretprobe
------------------
diff --git a/arch/arc/kernel/kprobes.c b/arch/arc/kernel/kprobes.c
index cabef45f11df..5f0415fc7328 100644
--- a/arch/arc/kernel/kprobes.c
+++ b/arch/arc/kernel/kprobes.c
@@ -317,22 +317,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned long trapnr)
* caused the fault.
*/
- /* We increment the nmissed count for accounting,
- * we can also use npre/npostfault count for accounting
- * these specific fault cases.
- */
- kprobes_inc_nmissed_count(cur);
-
- /*
- * We come here because instructions in the pre/post
- * handler caused the page_fault, this could happen
- * if handler tries to access user space by
- * copy_from_user(), get_user() etc. Let the
- * user-specified handler try to fix it first.
- */
- if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
- return 1;
-
/*
* In case the user-specified fault handler returned zero,
* try to fix up.
diff --git a/arch/arm/probes/kprobes/core.c b/arch/arm/probes/kprobes/core.c
index a9653117ca0d..27e0af78e88b 100644
--- a/arch/arm/probes/kprobes/core.c
+++ b/arch/arm/probes/kprobes/core.c
@@ -348,29 +348,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr)
reset_current_kprobe();
}
break;
-
- case KPROBE_HIT_ACTIVE:
- case KPROBE_HIT_SSDONE:
- /*
- * We increment the nmissed count for accounting,
- * we can also use npre/npostfault count for accounting
- * these specific fault cases.
- */
- kprobes_inc_nmissed_count(cur);
-
- /*
- * We come here because instructions in the pre/post
- * handler caused the page_fault, this could happen
- * if handler tries to access user space by
- * copy_from_user(), get_user() etc. Let the
- * user-specified handler try to fix it.
- */
- if (cur->fault_handler && cur->fault_handler(cur, regs, fsr))
- return 1;
- break;
-
- default:
- break;
}
return 0;
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index d607c9912025..004b86eff9c2 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -277,23 +277,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr)
case KPROBE_HIT_ACTIVE:
case KPROBE_HIT_SSDONE:
/*
- * We increment the nmissed count for accounting,
- * we can also use npre/npostfault count for accounting
- * these specific fault cases.
- */
- kprobes_inc_nmissed_count(cur);
-
- /*
- * We come here because instructions in the pre/post
- * handler caused the page_fault, this could happen
- * if handler tries to access user space by
- * copy_from_user(), get_user() etc. Let the
- * user-specified handler try to fix it first.
- */
- if (cur->fault_handler && cur->fault_handler(cur, regs, fsr))
- return 1;
-
- /*
* In case the user-specified fault handler returned
* zero, try to fix up.
*/
diff --git a/arch/csky/kernel/probes/kprobes.c b/arch/csky/kernel/probes/kprobes.c
index 589f090f48b9..68b22b499aeb 100644
--- a/arch/csky/kernel/probes/kprobes.c
+++ b/arch/csky/kernel/probes/kprobes.c
@@ -295,23 +295,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int trapnr)
case KPROBE_HIT_ACTIVE:
case KPROBE_HIT_SSDONE:
/*
- * We increment the nmissed count for accounting,
- * we can also use npre/npostfault count for accounting
- * these specific fault cases.
- */
- kprobes_inc_nmissed_count(cur);
-
- /*
- * We come here because instructions in the pre/post
- * handler caused the page_fault, this could happen
- * if handler tries to access user space by
- * copy_from_user(), get_user() etc. Let the
- * user-specified handler try to fix it first.
- */
- if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
- return 1;
-
- /*
* In case the user-specified fault handler returned
* zero, try to fix up.
*/
diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c
index fc1ff8a4d7de..441ed04b1037 100644
--- a/arch/ia64/kernel/kprobes.c
+++ b/arch/ia64/kernel/kprobes.c
@@ -844,22 +844,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
case KPROBE_HIT_ACTIVE:
case KPROBE_HIT_SSDONE:
/*
- * We increment the nmissed count for accounting,
- * we can also use npre/npostfault count for accounting
- * these specific fault cases.
- */
- kprobes_inc_nmissed_count(cur);
-
- /*
- * We come here because instructions in the pre/post
- * handler caused the page_fault, this could happen
- * if handler tries to access user space by
- * copy_from_user(), get_user() etc. Let the
- * user-specified handler try to fix it first.
- */
- if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
- return 1;
- /*
* In case the user-specified fault handler returned
* zero, try to fix up.
*/
diff --git a/arch/mips/kernel/kprobes.c b/arch/mips/kernel/kprobes.c
index 54dfba8fa77c..75bff0f77319 100644
--- a/arch/mips/kernel/kprobes.c
+++ b/arch/mips/kernel/kprobes.c
@@ -403,9 +403,6 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
struct kprobe *cur = kprobe_running();
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
- if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
- return 1;
-
if (kcb->kprobe_status & KPROBE_HIT_SS) {
resume_execution(cur, regs, kcb);
regs->cp0_status |= kcb->kprobe_old_SR;
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index e8c2a6373157..c64a5feaebbe 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -502,23 +502,6 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
case KPROBE_HIT_ACTIVE:
case KPROBE_HIT_SSDONE:
/*
- * We increment the nmissed count for accounting,
- * we can also use npre/npostfault count for accounting
- * these specific fault cases.
- */
- kprobes_inc_nmissed_count(cur);
-
- /*
- * We come here because instructions in the pre/post
- * handler caused the page_fault, this could happen
- * if handler tries to access user space by
- * copy_from_user(), get_user() etc. Let the
- * user-specified handler try to fix it first.
- */
- if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
- return 1;
-
- /*
* In case the user-specified fault handler returned
* zero, try to fix up.
*/
diff --git a/arch/riscv/kernel/probes/kprobes.c b/arch/riscv/kernel/probes/kprobes.c
index 15cc65ac7ca6..247e33fa5bc7 100644
--- a/arch/riscv/kernel/probes/kprobes.c
+++ b/arch/riscv/kernel/probes/kprobes.c
@@ -279,23 +279,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int trapnr)
case KPROBE_HIT_ACTIVE:
case KPROBE_HIT_SSDONE:
/*
- * We increment the nmissed count for accounting,
- * we can also use npre/npostfault count for accounting
- * these specific fault cases.
- */
- kprobes_inc_nmissed_count(cur);
-
- /*
- * We come here because instructions in the pre/post
- * handler caused the page_fault, this could happen
- * if handler tries to access user space by
- * copy_from_user(), get_user() etc. Let the
- * user-specified handler try to fix it first.
- */
- if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
- return 1;
-
- /*
* In case the user-specified fault handler returned
* zero, try to fix up.
*/
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index aae24dc75df6..74b0bd2c24d4 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -446,23 +446,6 @@ static int kprobe_trap_handler(struct pt_regs *regs, int trapnr)
case KPROBE_HIT_ACTIVE:
case KPROBE_HIT_SSDONE:
/*
- * We increment the nmissed count for accounting,
- * we can also use npre/npostfault count for accounting
- * these specific fault cases.
- */
- kprobes_inc_nmissed_count(p);
-
- /*
- * We come here because instructions in the pre/post
- * handler caused the page_fault, this could happen
- * if handler tries to access user space by
- * copy_from_user(), get_user() etc. Let the
- * user-specified handler try to fix it first.
- */
- if (p->fault_handler && p->fault_handler(p, regs, trapnr))
- return 1;
-
- /*
* In case the user-specified fault handler returned
* zero, try to fix up.
*/
diff --git a/arch/sh/kernel/kprobes.c b/arch/sh/kernel/kprobes.c
index 756100b01e84..1c7f358ef0be 100644
--- a/arch/sh/kernel/kprobes.c
+++ b/arch/sh/kernel/kprobes.c
@@ -383,23 +383,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
case KPROBE_HIT_ACTIVE:
case KPROBE_HIT_SSDONE:
/*
- * We increment the nmissed count for accounting,
- * we can also use npre/npostfault count for accounting
- * these specific fault cases.
- */
- kprobes_inc_nmissed_count(cur);
-
- /*
- * We come here because instructions in the pre/post
- * handler caused the page_fault, this could happen
- * if handler tries to access user space by
- * copy_from_user(), get_user() etc. Let the
- * user-specified handler try to fix it first.
- */
- if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
- return 1;
-
- /*
* In case the user-specified fault handler returned
* zero, try to fix up.
*/
diff --git a/arch/sparc/kernel/kprobes.c b/arch/sparc/kernel/kprobes.c
index 217c21a6986a..4c05a4ee6a0e 100644
--- a/arch/sparc/kernel/kprobes.c
+++ b/arch/sparc/kernel/kprobes.c
@@ -346,23 +346,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
case KPROBE_HIT_ACTIVE:
case KPROBE_HIT_SSDONE:
/*
- * We increment the nmissed count for accounting,
- * we can also use npre/npostfault count for accounting
- * these specific fault cases.
- */
- kprobes_inc_nmissed_count(cur);
-
- /*
- * We come here because instructions in the pre/post
- * handler caused the page_fault, this could happen
- * if handler tries to access user space by
- * copy_from_user(), get_user() etc. Let the
- * user-specified handler try to fix it first.
- */
- if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
- return 1;
-
- /*
* In case the user-specified fault handler returned
* zero, try to fix up.
*/
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 8f71dd72ef95..1eb45139fcc6 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1626,6 +1626,8 @@ static void x86_pmu_del(struct perf_event *event, int flags)
if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
goto do_del;
+ __set_bit(event->hw.idx, cpuc->dirty);
+
/*
* Not a TXN, therefore cleanup properly.
*/
@@ -2474,6 +2476,31 @@ static int x86_pmu_event_init(struct perf_event *event)
return err;
}
+void perf_clear_dirty_counters(void)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ int i;
+
+ /* Don't need to clear the assigned counter. */
+ for (i = 0; i < cpuc->n_events; i++)
+ __clear_bit(cpuc->assign[i], cpuc->dirty);
+
+ if (bitmap_empty(cpuc->dirty, X86_PMC_IDX_MAX))
+ return;
+
+ for_each_set_bit(i, cpuc->dirty, X86_PMC_IDX_MAX) {
+ /* Metrics and fake events don't have corresponding HW counters. */
+ if (is_metric_idx(i) || (i == INTEL_PMC_IDX_FIXED_VLBR))
+ continue;
+ else if (i >= INTEL_PMC_IDX_FIXED)
+ wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + (i - INTEL_PMC_IDX_FIXED), 0);
+ else
+ wrmsrl(x86_pmu_event_addr(i), 0);
+ }
+
+ bitmap_zero(cpuc->dirty, X86_PMC_IDX_MAX);
+}
+
static void x86_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm)
{
if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
@@ -2497,7 +2524,6 @@ static void x86_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm)
static void x86_pmu_event_unmapped(struct perf_event *event, struct mm_struct *mm)
{
-
if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
return;
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 3a77f66730d0..fca7a6e2242f 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -280,6 +280,8 @@ static struct extra_reg intel_spr_extra_regs[] __read_mostly = {
INTEL_UEVENT_EXTRA_REG(0x012b, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1),
INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE),
+ INTEL_UEVENT_EXTRA_REG(0x40ad, MSR_PEBS_FRONTEND, 0x7, FE),
+ INTEL_UEVENT_EXTRA_REG(0x04c2, MSR_PEBS_FRONTEND, 0x8, FE),
EVENT_EXTRA_END
};
@@ -4030,8 +4032,10 @@ spr_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
* The :ppp indicates the Precise Distribution (PDist) facility, which
* is only supported on the GP counter 0. If a :ppp event which is not
* available on the GP counter 0, error out.
+ * Exception: Instruction PDIR is only available on the fixed counter 0.
*/
- if (event->attr.precise_ip == 3) {
+ if ((event->attr.precise_ip == 3) &&
+ !constraint_match(&fixed0_constraint, event->hw.config)) {
if (c->idxmsk64 & BIT_ULL(0))
return &counter0_constraint;
@@ -6163,8 +6167,13 @@ __init int intel_pmu_init(void)
pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX];
pmu->name = "cpu_core";
pmu->cpu_type = hybrid_big;
- pmu->num_counters = x86_pmu.num_counters + 2;
- pmu->num_counters_fixed = x86_pmu.num_counters_fixed + 1;
+ if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) {
+ pmu->num_counters = x86_pmu.num_counters + 2;
+ pmu->num_counters_fixed = x86_pmu.num_counters_fixed + 1;
+ } else {
+ pmu->num_counters = x86_pmu.num_counters;
+ pmu->num_counters_fixed = x86_pmu.num_counters_fixed;
+ }
pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters);
pmu->unconstrained = (struct event_constraint)
__EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 1ec8fd311f38..8647713276a7 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1187,6 +1187,9 @@ static void intel_pmu_pebs_via_pt_enable(struct perf_event *event)
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
struct debug_store *ds = cpuc->ds;
+ u64 value = ds->pebs_event_reset[hwc->idx];
+ u32 base = MSR_RELOAD_PMC0;
+ unsigned int idx = hwc->idx;
if (!is_pebs_pt(event))
return;
@@ -1196,7 +1199,12 @@ static void intel_pmu_pebs_via_pt_enable(struct perf_event *event)
cpuc->pebs_enabled |= PEBS_OUTPUT_PT;
- wrmsrl(MSR_RELOAD_PMC0 + hwc->idx, ds->pebs_event_reset[hwc->idx]);
+ if (hwc->idx >= INTEL_PMC_IDX_FIXED) {
+ base = MSR_RELOAD_FIXED_CTR0;
+ idx = hwc->idx - INTEL_PMC_IDX_FIXED;
+ value = ds->pebs_event_reset[MAX_PEBS_EVENTS + idx];
+ }
+ wrmsrl(base + idx, value);
}
void intel_pmu_pebs_enable(struct perf_event *event)
@@ -1204,6 +1212,7 @@ void intel_pmu_pebs_enable(struct perf_event *event)
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
struct debug_store *ds = cpuc->ds;
+ unsigned int idx = hwc->idx;
hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
@@ -1222,19 +1231,18 @@ void intel_pmu_pebs_enable(struct perf_event *event)
}
}
+ if (idx >= INTEL_PMC_IDX_FIXED)
+ idx = MAX_PEBS_EVENTS + (idx - INTEL_PMC_IDX_FIXED);
+
/*
* Use auto-reload if possible to save a MSR write in the PMI.
* This must be done in pmu::start(), because PERF_EVENT_IOC_PERIOD.
*/
if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
- unsigned int idx = hwc->idx;
-
- if (idx >= INTEL_PMC_IDX_FIXED)
- idx = MAX_PEBS_EVENTS + (idx - INTEL_PMC_IDX_FIXED);
ds->pebs_event_reset[idx] =
(u64)(-hwc->sample_period) & x86_pmu.cntval_mask;
} else {
- ds->pebs_event_reset[hwc->idx] = 0;
+ ds->pebs_event_reset[idx] = 0;
}
intel_pmu_pebs_via_pt_enable(event);
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index df7b07d7fdcb..9bf4dbbc26e2 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -801,8 +801,6 @@ static void uncore_pmu_enable(struct pmu *pmu)
struct intel_uncore_box *box;
uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu);
- if (!uncore_pmu)
- return;
box = uncore_pmu_to_box(uncore_pmu, smp_processor_id());
if (!box)
@@ -818,8 +816,6 @@ static void uncore_pmu_disable(struct pmu *pmu)
struct intel_uncore_box *box;
uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu);
- if (!uncore_pmu)
- return;
box = uncore_pmu_to_box(uncore_pmu, smp_processor_id());
if (!box)
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index 291791002997..187d7287039c 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -92,6 +92,7 @@ struct intel_uncore_type {
/*
* Optional callbacks for managing mapping of Uncore units to PMONs
*/
+ int (*get_topology)(struct intel_uncore_type *type);
int (*set_mapping)(struct intel_uncore_type *type);
void (*cleanup_mapping)(struct intel_uncore_type *type);
};
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 3a75a2c601c2..bb6eb1e5569c 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -348,6 +348,13 @@
#define SKX_M2M_PCI_PMON_CTR0 0x200
#define SKX_M2M_PCI_PMON_BOX_CTL 0x258
+/* Memory Map registers device ID */
+#define SNR_ICX_MESH2IIO_MMAP_DID 0x9a2
+#define SNR_ICX_SAD_CONTROL_CFG 0x3f4
+
+/* Getting I/O stack id in SAD_COTROL_CFG notation */
+#define SAD_CONTROL_STACK_ID(data) (((data) >> 4) & 0x7)
+
/* SNR Ubox */
#define SNR_U_MSR_PMON_CTR0 0x1f98
#define SNR_U_MSR_PMON_CTL0 0x1f91
@@ -3682,12 +3689,19 @@ static inline u8 skx_iio_stack(struct intel_uncore_pmu *pmu, int die)
}
static umode_t
-skx_iio_mapping_visible(struct kobject *kobj, struct attribute *attr, int die)
+pmu_iio_mapping_visible(struct kobject *kobj, struct attribute *attr,
+ int die, int zero_bus_pmu)
{
struct intel_uncore_pmu *pmu = dev_to_uncore_pmu(kobj_to_dev(kobj));
- /* Root bus 0x00 is valid only for die 0 AND pmu_idx = 0. */
- return (!skx_iio_stack(pmu, die) && pmu->pmu_idx) ? 0 : attr->mode;
+ return (!skx_iio_stack(pmu, die) && pmu->pmu_idx != zero_bus_pmu) ? 0 : attr->mode;
+}
+
+static umode_t
+skx_iio_mapping_visible(struct kobject *kobj, struct attribute *attr, int die)
+{
+ /* Root bus 0x00 is valid only for pmu_idx = 0. */
+ return pmu_iio_mapping_visible(kobj, attr, die, 0);
}
static ssize_t skx_iio_mapping_show(struct device *dev,
@@ -3772,7 +3786,8 @@ static const struct attribute_group *skx_iio_attr_update[] = {
NULL,
};
-static int skx_iio_set_mapping(struct intel_uncore_type *type)
+static int
+pmu_iio_set_mapping(struct intel_uncore_type *type, struct attribute_group *ag)
{
char buf[64];
int ret;
@@ -3780,7 +3795,7 @@ static int skx_iio_set_mapping(struct intel_uncore_type *type)
struct attribute **attrs = NULL;
struct dev_ext_attribute *eas = NULL;
- ret = skx_iio_get_topology(type);
+ ret = type->get_topology(type);
if (ret < 0)
goto clear_attr_update;
@@ -3807,7 +3822,7 @@ static int skx_iio_set_mapping(struct intel_uncore_type *type)
eas[die].var = (void *)die;
attrs[die] = &eas[die].attr.attr;
}
- skx_iio_mapping_group.attrs = attrs;
+ ag->attrs = attrs;
return 0;
err:
@@ -3821,6 +3836,11 @@ clear_attr_update:
return ret;
}
+static int skx_iio_set_mapping(struct intel_uncore_type *type)
+{
+ return pmu_iio_set_mapping(type, &skx_iio_mapping_group);
+}
+
static void skx_iio_cleanup_mapping(struct intel_uncore_type *type)
{
struct attribute **attr = skx_iio_mapping_group.attrs;
@@ -3851,6 +3871,7 @@ static struct intel_uncore_type skx_uncore_iio = {
.ops = &skx_uncore_iio_ops,
.format_group = &skx_uncore_iio_format_group,
.attr_update = skx_iio_attr_update,
+ .get_topology = skx_iio_get_topology,
.set_mapping = skx_iio_set_mapping,
.cleanup_mapping = skx_iio_cleanup_mapping,
};
@@ -4393,6 +4414,91 @@ static const struct attribute_group snr_uncore_iio_format_group = {
.attrs = snr_uncore_iio_formats_attr,
};
+static umode_t
+snr_iio_mapping_visible(struct kobject *kobj, struct attribute *attr, int die)
+{
+ /* Root bus 0x00 is valid only for pmu_idx = 1. */
+ return pmu_iio_mapping_visible(kobj, attr, die, 1);
+}
+
+static struct attribute_group snr_iio_mapping_group = {
+ .is_visible = snr_iio_mapping_visible,
+};
+
+static const struct attribute_group *snr_iio_attr_update[] = {
+ &snr_iio_mapping_group,
+ NULL,
+};
+
+static int sad_cfg_iio_topology(struct intel_uncore_type *type, u8 *sad_pmon_mapping)
+{
+ u32 sad_cfg;
+ int die, stack_id, ret = -EPERM;
+ struct pci_dev *dev = NULL;
+
+ type->topology = kcalloc(uncore_max_dies(), sizeof(*type->topology),
+ GFP_KERNEL);
+ if (!type->topology)
+ return -ENOMEM;
+
+ while ((dev = pci_get_device(PCI_VENDOR_ID_INTEL, SNR_ICX_MESH2IIO_MMAP_DID, dev))) {
+ ret = pci_read_config_dword(dev, SNR_ICX_SAD_CONTROL_CFG, &sad_cfg);
+ if (ret) {
+ ret = pcibios_err_to_errno(ret);
+ break;
+ }
+
+ die = uncore_pcibus_to_dieid(dev->bus);
+ stack_id = SAD_CONTROL_STACK_ID(sad_cfg);
+ if (die < 0 || stack_id >= type->num_boxes) {
+ ret = -EPERM;
+ break;
+ }
+
+ /* Convert stack id from SAD_CONTROL to PMON notation. */
+ stack_id = sad_pmon_mapping[stack_id];
+
+ ((u8 *)&(type->topology[die].configuration))[stack_id] = dev->bus->number;
+ type->topology[die].segment = pci_domain_nr(dev->bus);
+ }
+
+ if (ret) {
+ kfree(type->topology);
+ type->topology = NULL;
+ }
+
+ return ret;
+}
+
+/*
+ * SNR has a static mapping of stack IDs from SAD_CONTROL_CFG notation to PMON
+ */
+enum {
+ SNR_QAT_PMON_ID,
+ SNR_CBDMA_DMI_PMON_ID,
+ SNR_NIS_PMON_ID,
+ SNR_DLB_PMON_ID,
+ SNR_PCIE_GEN3_PMON_ID
+};
+
+static u8 snr_sad_pmon_mapping[] = {
+ SNR_CBDMA_DMI_PMON_ID,
+ SNR_PCIE_GEN3_PMON_ID,
+ SNR_DLB_PMON_ID,
+ SNR_NIS_PMON_ID,
+ SNR_QAT_PMON_ID
+};
+
+static int snr_iio_get_topology(struct intel_uncore_type *type)
+{
+ return sad_cfg_iio_topology(type, snr_sad_pmon_mapping);
+}
+
+static int snr_iio_set_mapping(struct intel_uncore_type *type)
+{
+ return pmu_iio_set_mapping(type, &snr_iio_mapping_group);
+}
+
static struct intel_uncore_type snr_uncore_iio = {
.name = "iio",
.num_counters = 4,
@@ -4406,6 +4512,10 @@ static struct intel_uncore_type snr_uncore_iio = {
.msr_offset = SNR_IIO_MSR_OFFSET,
.ops = &ivbep_uncore_msr_ops,
.format_group = &snr_uncore_iio_format_group,
+ .attr_update = snr_iio_attr_update,
+ .get_topology = snr_iio_get_topology,
+ .set_mapping = snr_iio_set_mapping,
+ .cleanup_mapping = skx_iio_cleanup_mapping,
};
static struct intel_uncore_type snr_uncore_irp = {
@@ -4933,6 +5043,53 @@ static struct event_constraint icx_uncore_iio_constraints[] = {
EVENT_CONSTRAINT_END
};
+static umode_t
+icx_iio_mapping_visible(struct kobject *kobj, struct attribute *attr, int die)
+{
+ /* Root bus 0x00 is valid only for pmu_idx = 5. */
+ return pmu_iio_mapping_visible(kobj, attr, die, 5);
+}
+
+static struct attribute_group icx_iio_mapping_group = {
+ .is_visible = icx_iio_mapping_visible,
+};
+
+static const struct attribute_group *icx_iio_attr_update[] = {
+ &icx_iio_mapping_group,
+ NULL,
+};
+
+/*
+ * ICX has a static mapping of stack IDs from SAD_CONTROL_CFG notation to PMON
+ */
+enum {
+ ICX_PCIE1_PMON_ID,
+ ICX_PCIE2_PMON_ID,
+ ICX_PCIE3_PMON_ID,
+ ICX_PCIE4_PMON_ID,
+ ICX_PCIE5_PMON_ID,
+ ICX_CBDMA_DMI_PMON_ID
+};
+
+static u8 icx_sad_pmon_mapping[] = {
+ ICX_CBDMA_DMI_PMON_ID,
+ ICX_PCIE1_PMON_ID,
+ ICX_PCIE2_PMON_ID,
+ ICX_PCIE3_PMON_ID,
+ ICX_PCIE4_PMON_ID,
+ ICX_PCIE5_PMON_ID,
+};
+
+static int icx_iio_get_topology(struct intel_uncore_type *type)
+{
+ return sad_cfg_iio_topology(type, icx_sad_pmon_mapping);
+}
+
+static int icx_iio_set_mapping(struct intel_uncore_type *type)
+{
+ return pmu_iio_set_mapping(type, &icx_iio_mapping_group);
+}
+
static struct intel_uncore_type icx_uncore_iio = {
.name = "iio",
.num_counters = 4,
@@ -4947,6 +5104,10 @@ static struct intel_uncore_type icx_uncore_iio = {
.constraints = icx_uncore_iio_constraints,
.ops = &skx_uncore_iio_ops,
.format_group = &snr_uncore_iio_format_group,
+ .attr_update = icx_iio_attr_update,
+ .get_topology = icx_iio_get_topology,
+ .set_mapping = icx_iio_set_mapping,
+ .cleanup_mapping = skx_iio_cleanup_mapping,
};
static struct intel_uncore_type icx_uncore_irp = {
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index ad87cb36f7c8..2bf1c7ea2758 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -229,6 +229,7 @@ struct cpu_hw_events {
*/
struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */
unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+ unsigned long dirty[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
int enabled;
int n_events; /* the # of events in the below arrays */
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 544f41a179fb..8fc1b5003713 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -478,6 +478,7 @@ struct x86_pmu_lbr {
extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap);
extern void perf_check_microcode(void);
+extern void perf_clear_dirty_counters(void);
extern int x86_perf_rdpmc_index(struct perf_event *event);
#else
static inline void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index d3d65545cb8b..1b3fe0edd329 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -1102,24 +1102,6 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
restore_previous_kprobe(kcb);
else
reset_current_kprobe();
- } else if (kcb->kprobe_status == KPROBE_HIT_ACTIVE ||
- kcb->kprobe_status == KPROBE_HIT_SSDONE) {
- /*
- * We increment the nmissed count for accounting,
- * we can also use npre/npostfault count for accounting
- * these specific fault cases.
- */
- kprobes_inc_nmissed_count(cur);
-
- /*
- * We come here because instructions in the pre/post
- * handler caused the page_fault, this could happen
- * if handler tries to access user space by
- * copy_from_user(), get_user() etc. Let the
- * user-specified handler try to fix it first.
- */
- if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
- return 1;
}
return 0;
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 6bda7f67d737..2d27932c9ac7 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1186,7 +1186,7 @@ do_kern_addr_fault(struct pt_regs *regs, unsigned long hw_error_code,
return;
/* kprobes don't want to hook the spurious faults: */
- if (kprobe_page_fault(regs, X86_TRAP_PF))
+ if (WARN_ON_ONCE(kprobe_page_fault(regs, X86_TRAP_PF)))
return;
/*
@@ -1239,7 +1239,7 @@ void do_user_addr_fault(struct pt_regs *regs,
}
/* kprobes don't want to hook the spurious faults: */
- if (unlikely(kprobe_page_fault(regs, X86_TRAP_PF)))
+ if (WARN_ON_ONCE(kprobe_page_fault(regs, X86_TRAP_PF)))
return;
/*
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 78804680e923..cfe6b1e85fa6 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -14,6 +14,7 @@
#include <asm/nospec-branch.h>
#include <asm/cache.h>
#include <asm/apic.h>
+#include <asm/perf_event.h>
#include "mm_internal.h"
@@ -404,9 +405,14 @@ static inline void cr4_update_pce_mm(struct mm_struct *mm)
{
if (static_branch_unlikely(&rdpmc_always_available_key) ||
(!static_branch_unlikely(&rdpmc_never_available_key) &&
- atomic_read(&mm->context.perf_rdpmc_allowed)))
+ atomic_read(&mm->context.perf_rdpmc_allowed))) {
+ /*
+ * Clear the existing dirty counters to
+ * prevent the leak for an RDPMC task.
+ */
+ perf_clear_dirty_counters();
cr4_set_bits_irqsoff(X86_CR4_PCE);
- else
+ } else
cr4_clear_bits_irqsoff(X86_CR4_PCE);
}
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 1883a4a9f16a..523ffc7bc3a8 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -54,8 +54,6 @@ struct kretprobe_instance;
typedef int (*kprobe_pre_handler_t) (struct kprobe *, struct pt_regs *);
typedef void (*kprobe_post_handler_t) (struct kprobe *, struct pt_regs *,
unsigned long flags);
-typedef int (*kprobe_fault_handler_t) (struct kprobe *, struct pt_regs *,
- int trapnr);
typedef int (*kretprobe_handler_t) (struct kretprobe_instance *,
struct pt_regs *);
@@ -83,12 +81,6 @@ struct kprobe {
/* Called after addr is executed, unless... */
kprobe_post_handler_t post_handler;
- /*
- * ... called if executing addr causes a fault (eg. page fault).
- * Return 1 if it handled fault, otherwise kernel will see it.
- */
- kprobe_fault_handler_t fault_handler;
-
/* Saved opcode (which has been replaced with breakpoint) */
kprobe_opcode_t opcode;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index fe88d6eea3c2..ea0e24040691 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -132,6 +132,7 @@ task_function_call(struct task_struct *p, remote_function_f func, void *info)
/**
* cpu_function_call - call a function on the cpu
+ * @cpu: target cpu to queue this function
* @func: the function to be called
* @info: the function call argument
*
@@ -3821,9 +3822,16 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
struct task_struct *task)
{
struct perf_cpu_context *cpuctx;
- struct pmu *pmu = ctx->pmu;
+ struct pmu *pmu;
cpuctx = __get_cpu_context(ctx);
+
+ /*
+ * HACK: for HETEROGENEOUS the task context might have switched to a
+ * different PMU, force (re)set the context,
+ */
+ pmu = ctx->pmu = cpuctx->ctx.pmu;
+
if (cpuctx->task_ctx == ctx) {
if (cpuctx->sched_cb_usage)
__perf_pmu_sched_task(cpuctx, true);
@@ -6669,10 +6677,10 @@ out:
return data->aux_size;
}
-long perf_pmu_snapshot_aux(struct perf_buffer *rb,
- struct perf_event *event,
- struct perf_output_handle *handle,
- unsigned long size)
+static long perf_pmu_snapshot_aux(struct perf_buffer *rb,
+ struct perf_event *event,
+ struct perf_output_handle *handle,
+ unsigned long size)
{
unsigned long flags;
long ret;
@@ -11919,6 +11927,7 @@ again:
* @pid: target pid
* @cpu: target cpu
* @group_fd: group leader event fd
+ * @flags: perf event open flags
*/
SYSCALL_DEFINE5(perf_event_open,
struct perf_event_attr __user *, attr_uptr,
@@ -12375,6 +12384,8 @@ err_fd:
* @attr: attributes of the counter to create
* @cpu: cpu in which the counter is bound
* @task: task to profile (NULL for percpu)
+ * @overflow_handler: callback to trigger when we hit the event
+ * @context: context data could be used in overflow_handler callback
*/
struct perf_event *
perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c
index b48d7039a015..835973444a1e 100644
--- a/kernel/events/hw_breakpoint.c
+++ b/kernel/events/hw_breakpoint.c
@@ -451,6 +451,7 @@ int register_perf_hw_breakpoint(struct perf_event *bp)
* register_user_hw_breakpoint - register a hardware breakpoint for user space
* @attr: breakpoint attributes
* @triggered: callback to trigger when we hit the breakpoint
+ * @context: context data could be used in the triggered callback
* @tsk: pointer to 'task_struct' of the process to which the address belongs
*/
struct perf_event *
@@ -550,6 +551,7 @@ EXPORT_SYMBOL_GPL(unregister_hw_breakpoint);
* register_wide_hw_breakpoint - register a wide breakpoint in the kernel
* @attr: breakpoint attributes
* @triggered: callback to trigger when we hit the breakpoint
+ * @context: context data could be used in the triggered callback
*
* @return a set of per_cpu pointers to perf events
*/
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 6addc9780319..a481ef696143 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -453,6 +453,7 @@ static int update_ref_ctr(struct uprobe *uprobe, struct mm_struct *mm,
* that have fixed length instructions.
*
* uprobe_write_opcode - write the opcode at a given virtual address.
+ * @auprobe: arch specific probepoint information.
* @mm: the probed process address space.
* @vaddr: the virtual address to store the opcode.
* @opcode: opcode to be written at @vaddr.
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 745f08fdd7a6..e41385afe79d 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1183,23 +1183,6 @@ static void aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
}
NOKPROBE_SYMBOL(aggr_post_handler);
-static int aggr_fault_handler(struct kprobe *p, struct pt_regs *regs,
- int trapnr)
-{
- struct kprobe *cur = __this_cpu_read(kprobe_instance);
-
- /*
- * if we faulted "during" the execution of a user specified
- * probe handler, invoke just that probe's fault handler
- */
- if (cur && cur->fault_handler) {
- if (cur->fault_handler(cur, regs, trapnr))
- return 1;
- }
- return 0;
-}
-NOKPROBE_SYMBOL(aggr_fault_handler);
-
/* Walks the list and increments nmissed count for multiprobe case */
void kprobes_inc_nmissed_count(struct kprobe *p)
{
@@ -1330,7 +1313,6 @@ static void init_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
ap->addr = p->addr;
ap->flags = p->flags & ~KPROBE_FLAG_OPTIMIZED;
ap->pre_handler = aggr_pre_handler;
- ap->fault_handler = aggr_fault_handler;
/* We don't care the kprobe which has gone. */
if (p->post_handler && !kprobe_gone(p))
ap->post_handler = aggr_post_handler;
@@ -2014,7 +1996,6 @@ int register_kretprobe(struct kretprobe *rp)
rp->kp.pre_handler = pre_handler_kretprobe;
rp->kp.post_handler = NULL;
- rp->kp.fault_handler = NULL;
/* Pre-allocate memory for max kretprobe instances */
if (rp->maxactive <= 0) {
diff --git a/samples/kprobes/kprobe_example.c b/samples/kprobes/kprobe_example.c
index c495664c0a9b..4b2f31828951 100644
--- a/samples/kprobes/kprobe_example.c
+++ b/samples/kprobes/kprobe_example.c
@@ -94,26 +94,11 @@ static void __kprobes handler_post(struct kprobe *p, struct pt_regs *regs,
#endif
}
-/*
- * fault_handler: this is called if an exception is generated for any
- * instruction within the pre- or post-handler, or when Kprobes
- * single-steps the probed instruction.
- */
-static int handler_fault(struct kprobe *p, struct pt_regs *regs, int trapnr)
-{
- pr_info("fault_handler: p->addr = 0x%p, trap #%dn", p->addr, trapnr);
- /* Return 0 because we don't handle the fault. */
- return 0;
-}
-/* NOKPROBE_SYMBOL() is also available */
-NOKPROBE_SYMBOL(handler_fault);
-
static int __init kprobe_init(void)
{
int ret;
kp.pre_handler = handler_pre;
kp.post_handler = handler_post;
- kp.fault_handler = handler_fault;
ret = register_kprobe(&kp);
if (ret < 0) {