From 375074cc736ab1d89a708c0a8d7baa4a70d5d476 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 24 Oct 2014 15:58:07 -0700 Subject: x86: Clean up cr4 manipulation CR4 manipulation was split, seemingly at random, between direct (write_cr4) and using a helper (set/clear_in_cr4). Unfortunately, the set_in_cr4 and clear_in_cr4 helpers also poke at the boot code, which only a small subset of users actually wanted. This patch replaces all cr4 access in functions that don't leave cr4 exactly the way they found it with new helpers cr4_set_bits, cr4_clear_bits, and cr4_set_bits_and_update_boot. Signed-off-by: Andy Lutomirski Reviewed-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Cc: Andrea Arcangeli Cc: Vince Weaver Cc: "hillf.zj" Cc: Valdis Kletnieks Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Kees Cook Cc: Linus Torvalds Link: http://lkml.kernel.org/r/495a10bdc9e67016b8fd3945700d46cfd5c12c2f.1414190806.git.luto@amacapital.net Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel/cpu/perf_event.c') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 143e5f5dc855..6b5acd5f4a34 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -1328,7 +1329,7 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) case CPU_STARTING: if (x86_pmu.attr_rdpmc) - set_in_cr4(X86_CR4_PCE); + cr4_set_bits(X86_CR4_PCE); if (x86_pmu.cpu_starting) x86_pmu.cpu_starting(cpu); break; @@ -1834,9 +1835,9 @@ static void change_rdpmc(void *info) bool enable = !!(unsigned long)info; if (enable) - set_in_cr4(X86_CR4_PCE); + cr4_set_bits(X86_CR4_PCE); else - clear_in_cr4(X86_CR4_PCE); + cr4_clear_bits(X86_CR4_PCE); } static ssize_t set_attr_rdpmc(struct device *cdev, -- cgit v1.2.3 From c1317ec2b906442930318d9d6e51425c5a69e9cb Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 24 Oct 2014 15:58:11 -0700 Subject: perf: Pass the event to arch_perf_update_userpage() Signed-off-by: Andy Lutomirski Signed-off-by: Peter Zijlstra (Intel) Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Kees Cook Cc: Andrea Arcangeli Cc: Vince Weaver Cc: "hillf.zj" Cc: Valdis Kletnieks Cc: Linus Torvalds Link: http://lkml.kernel.org/r/0fea9a7fac3c1eea86cb0a5954184e74f4213666.1414190806.git.luto@amacapital.net Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 3 ++- kernel/events/core.c | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel/cpu/perf_event.c') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 6b5acd5f4a34..73e84a348de1 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1915,7 +1915,8 @@ static struct pmu pmu = { .flush_branch_stack = x86_pmu_flush_branch_stack, }; -void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) +void arch_perf_update_userpage(struct perf_event *event, + struct perf_event_mmap_page *userpg, u64 now) { struct cyc2ns_data *data; diff --git a/kernel/events/core.c b/kernel/events/core.c index cc1487145d33..13209a90b751 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -4101,7 +4101,8 @@ unlock: rcu_read_unlock(); } -void __weak arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) +void __weak arch_perf_update_userpage( + struct perf_event *event, struct perf_event_mmap_page *userpg, u64 now) { } @@ -4151,7 +4152,7 @@ void perf_event_update_userpage(struct perf_event *event) userpg->time_running = running + atomic64_read(&event->child_total_time_running); - arch_perf_update_userpage(userpg, now); + arch_perf_update_userpage(event, userpg, now); barrier(); ++userpg->lock; -- cgit v1.2.3 From 7911d3f7af14a614617e38245fedf98a724e46a9 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 24 Oct 2014 15:58:12 -0700 Subject: perf/x86: Only allow rdpmc if a perf_event is mapped We currently allow any process to use rdpmc. This significantly weakens the protection offered by PR_TSC_DISABLED, and it could be helpful to users attempting to exploit timing attacks. Since we can't enable access to individual counters, use a very coarse heuristic to limit access to rdpmc: allow access only when a perf_event is mmapped. This protects seccomp sandboxes. There is plenty of room to further tighen these restrictions. For example, this allows rdpmc for any x86_pmu event, but it's only useful for self-monitoring tasks. As a side effect, cap_user_rdpmc will now be false for AMD uncore events. This isn't a real regression, since .event_idx is disabled for these events anyway for the time being. Whenever that gets re-added, the cap_user_rdpmc code can be adjusted or refactored accordingly. Signed-off-by: Andy Lutomirski Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Kees Cook Cc: Andrea Arcangeli Cc: Vince Weaver Cc: "hillf.zj" Cc: Valdis Kletnieks Cc: Linus Torvalds Link: http://lkml.kernel.org/r/a2bdb3cf3a1d70c26980d7c6dddfbaa69f3182bf.1414190806.git.luto@amacapital.net Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mmu.h | 2 ++ arch/x86/include/asm/mmu_context.h | 16 +++++++++++ arch/x86/kernel/cpu/perf_event.c | 57 +++++++++++++++++++++++++------------- arch/x86/kernel/cpu/perf_event.h | 2 ++ 4 files changed, 58 insertions(+), 19 deletions(-) (limited to 'arch/x86/kernel/cpu/perf_event.c') diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h index 876e74e8eec7..09b9620a73b4 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h @@ -19,6 +19,8 @@ typedef struct { struct mutex lock; void __user *vdso; + + atomic_t perf_rdpmc_allowed; /* nonzero if rdpmc is allowed */ } mm_context_t; #ifdef CONFIG_SMP diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 52c18359f1dc..89c1fece224e 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -18,6 +18,18 @@ static inline void paravirt_activate_mm(struct mm_struct *prev, } #endif /* !CONFIG_PARAVIRT */ +#ifdef CONFIG_PERF_EVENTS +static inline void load_mm_cr4(struct mm_struct *mm) +{ + if (atomic_read(&mm->context.perf_rdpmc_allowed)) + cr4_set_bits(X86_CR4_PCE); + else + cr4_clear_bits(X86_CR4_PCE); +} +#else +static inline void load_mm_cr4(struct mm_struct *mm) {} +#endif + /* * Used for LDT copy/destruction. */ @@ -52,6 +64,9 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, /* Stop flush ipis for the previous mm */ cpumask_clear_cpu(cpu, mm_cpumask(prev)); + /* Load per-mm CR4 state */ + load_mm_cr4(next); + /* * Load the LDT, if the LDT is different. * @@ -87,6 +102,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, */ load_cr3(next->pgd); trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); + load_mm_cr4(next); load_LDT_nolock(&next->context); } } diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 73e84a348de1..bec5cff7dc80 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -1328,8 +1329,6 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) break; case CPU_STARTING: - if (x86_pmu.attr_rdpmc) - cr4_set_bits(X86_CR4_PCE); if (x86_pmu.cpu_starting) x86_pmu.cpu_starting(cpu); break; @@ -1805,14 +1804,44 @@ static int x86_pmu_event_init(struct perf_event *event) event->destroy(event); } + if (ACCESS_ONCE(x86_pmu.attr_rdpmc)) + event->hw.flags |= PERF_X86_EVENT_RDPMC_ALLOWED; + return err; } +static void refresh_pce(void *ignored) +{ + if (current->mm) + load_mm_cr4(current->mm); +} + +static void x86_pmu_event_mapped(struct perf_event *event) +{ + if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED)) + return; + + if (atomic_inc_return(¤t->mm->context.perf_rdpmc_allowed) == 1) + on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1); +} + +static void x86_pmu_event_unmapped(struct perf_event *event) +{ + if (!current->mm) + return; + + if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED)) + return; + + if (atomic_dec_and_test(¤t->mm->context.perf_rdpmc_allowed)) + on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1); +} + static int x86_pmu_event_idx(struct perf_event *event) { int idx = event->hw.idx; - if (!x86_pmu.attr_rdpmc) + if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED)) return 0; if (x86_pmu.num_counters_fixed && idx >= INTEL_PMC_IDX_FIXED) { @@ -1830,16 +1859,6 @@ static ssize_t get_attr_rdpmc(struct device *cdev, return snprintf(buf, 40, "%d\n", x86_pmu.attr_rdpmc); } -static void change_rdpmc(void *info) -{ - bool enable = !!(unsigned long)info; - - if (enable) - cr4_set_bits(X86_CR4_PCE); - else - cr4_clear_bits(X86_CR4_PCE); -} - static ssize_t set_attr_rdpmc(struct device *cdev, struct device_attribute *attr, const char *buf, size_t count) @@ -1854,11 +1873,7 @@ static ssize_t set_attr_rdpmc(struct device *cdev, if (x86_pmu.attr_rdpmc_broken) return -ENOTSUPP; - if (!!val != !!x86_pmu.attr_rdpmc) { - x86_pmu.attr_rdpmc = !!val; - on_each_cpu(change_rdpmc, (void *)val, 1); - } - + x86_pmu.attr_rdpmc = !!val; return count; } @@ -1901,6 +1916,9 @@ static struct pmu pmu = { .event_init = x86_pmu_event_init, + .event_mapped = x86_pmu_event_mapped, + .event_unmapped = x86_pmu_event_unmapped, + .add = x86_pmu_add, .del = x86_pmu_del, .start = x86_pmu_start, @@ -1922,7 +1940,8 @@ void arch_perf_update_userpage(struct perf_event *event, userpg->cap_user_time = 0; userpg->cap_user_time_zero = 0; - userpg->cap_user_rdpmc = x86_pmu.attr_rdpmc; + userpg->cap_user_rdpmc = + !!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED); userpg->pmc_width = x86_pmu.cntval_bits; if (!sched_clock_stable()) diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 4e6cdb0ddc70..df525d2be1e8 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -71,6 +71,8 @@ struct event_constraint { #define PERF_X86_EVENT_COMMITTED 0x8 /* event passed commit_txn */ #define PERF_X86_EVENT_PEBS_LD_HSW 0x10 /* haswell style datala, load */ #define PERF_X86_EVENT_PEBS_NA_HSW 0x20 /* haswell style datala, unknown */ +#define PERF_X86_EVENT_RDPMC_ALLOWED 0x40 /* grant rdpmc permission */ + struct amd_nb { int nb_id; /* NorthBridge id */ -- cgit v1.2.3 From a66734297f78707ce39d756b656bfae861d53f62 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 24 Oct 2014 15:58:13 -0700 Subject: perf/x86: Add /sys/devices/cpu/rdpmc=2 to allow rdpmc for all tasks While perfmon2 is a sufficiently evil library (it pokes MSRs directly) that breaking it is fair game, it's still useful, so we might as well try to support it. This allows users to write 2 to /sys/devices/cpu/rdpmc to disable all rdpmc protection so that hack like perfmon2 can continue to work. At some point, if perf_event becomes fast enough to replace perfmon2, then this can go. Signed-off-by: Andy Lutomirski Signed-off-by: Peter Zijlstra (Intel) Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Kees Cook Cc: Andrea Arcangeli Cc: Vince Weaver Cc: "hillf.zj" Cc: Valdis Kletnieks Cc: Linus Torvalds Link: http://lkml.kernel.org/r/caac3c1c707dcca48ecbc35f4def21495856f479.1414190806.git.luto@amacapital.net Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mmu_context.h | 5 ++++- arch/x86/kernel/cpu/perf_event.c | 21 ++++++++++++++++++++- 2 files changed, 24 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel/cpu/perf_event.c') diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 89c1fece224e..883f6b933fa4 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -19,9 +19,12 @@ static inline void paravirt_activate_mm(struct mm_struct *prev, #endif /* !CONFIG_PARAVIRT */ #ifdef CONFIG_PERF_EVENTS +extern struct static_key rdpmc_always_available; + static inline void load_mm_cr4(struct mm_struct *mm) { - if (atomic_read(&mm->context.perf_rdpmc_allowed)) + if (static_key_true(&rdpmc_always_available) || + atomic_read(&mm->context.perf_rdpmc_allowed)) cr4_set_bits(X86_CR4_PCE); else cr4_clear_bits(X86_CR4_PCE); diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index bec5cff7dc80..b71a7f86d68a 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -45,6 +45,8 @@ DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, }; +struct static_key rdpmc_always_available = STATIC_KEY_INIT_FALSE; + u64 __read_mostly hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] @@ -1870,10 +1872,27 @@ static ssize_t set_attr_rdpmc(struct device *cdev, if (ret) return ret; + if (val > 2) + return -EINVAL; + if (x86_pmu.attr_rdpmc_broken) return -ENOTSUPP; - x86_pmu.attr_rdpmc = !!val; + if ((val == 2) != (x86_pmu.attr_rdpmc == 2)) { + /* + * Changing into or out of always available, aka + * perf-event-bypassing mode. This path is extremely slow, + * but only root can trigger it, so it's okay. + */ + if (val == 2) + static_key_slow_inc(&rdpmc_always_available); + else + static_key_slow_dec(&rdpmc_always_available); + on_each_cpu(refresh_pce, NULL, 1); + } + + x86_pmu.attr_rdpmc = val; + return count; } -- cgit v1.2.3