From d1898b733619bd46194bd25aa6452d238ff2dc4e Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Wed, 1 Jun 2016 10:42:20 -0700 Subject: x86/fpu: Add tracepoints to dump FPU state at key points I've been carrying this patch around for a bit and it's helped me solve at least a couple FPU-related bugs. In addition to using it for debugging, I also drug it out because using AVX (and AVX2/AVX-512) can have serious power consequences for a modern core. It's very important to be able to figure out who is using it. It's also insanely useful to go out and see who is using a given feature, like MPX or Memory Protection Keys. If you, for instance, want to find all processes using protection keys, you can do: echo 'xfeatures & 0x200' > filter Since 0x200 is the protection keys feature bit. Note that this touches the KVM code. KVM did a CREATE_TRACE_POINTS and then included a bunch of random headers. If anyone one of those included other tracepoints, it would have defined the *OTHER* tracepoints. That's bogus, so move it to the right place. Signed-off-by: Dave Hansen Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Quentin Casasnovas Cc: Steven Rostedt Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20160601174220.3CDFB90E@viggo.jf.intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/fpu/core.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'arch/x86/kernel/fpu/core.c') diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 97027545a72d..7d564742e499 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -12,6 +12,9 @@ #include +#define CREATE_TRACE_POINTS +#include + /* * Represents the initial FPU state. It's mostly (but not completely) zeroes, * depending on the FPU hardware format: @@ -192,6 +195,7 @@ void fpu__save(struct fpu *fpu) WARN_ON_FPU(fpu != ¤t->thread.fpu); preempt_disable(); + trace_x86_fpu_before_save(fpu); if (fpu->fpregs_active) { if (!copy_fpregs_to_fpstate(fpu)) { if (use_eager_fpu()) @@ -200,6 +204,7 @@ void fpu__save(struct fpu *fpu) fpregs_deactivate(fpu); } } + trace_x86_fpu_after_save(fpu); preempt_enable(); } EXPORT_SYMBOL_GPL(fpu__save); @@ -275,6 +280,9 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) } preempt_enable(); + trace_x86_fpu_copy_src(src_fpu); + trace_x86_fpu_copy_dst(dst_fpu); + return 0; } @@ -288,7 +296,9 @@ void fpu__activate_curr(struct fpu *fpu) if (!fpu->fpstate_active) { fpstate_init(&fpu->state); + trace_x86_fpu_init_state(fpu); + trace_x86_fpu_activate_state(fpu); /* Safe to do for the current task: */ fpu->fpstate_active = 1; } @@ -314,7 +324,9 @@ void fpu__activate_fpstate_read(struct fpu *fpu) } else { if (!fpu->fpstate_active) { fpstate_init(&fpu->state); + trace_x86_fpu_init_state(fpu); + trace_x86_fpu_activate_state(fpu); /* Safe to do for current and for stopped child tasks: */ fpu->fpstate_active = 1; } @@ -347,7 +359,9 @@ void fpu__activate_fpstate_write(struct fpu *fpu) fpu->last_cpu = -1; } else { fpstate_init(&fpu->state); + trace_x86_fpu_init_state(fpu); + trace_x86_fpu_activate_state(fpu); /* Safe to do for stopped child tasks: */ fpu->fpstate_active = 1; } @@ -432,9 +446,11 @@ void fpu__restore(struct fpu *fpu) /* Avoid __kernel_fpu_begin() right after fpregs_activate() */ kernel_fpu_disable(); + trace_x86_fpu_before_restore(fpu); fpregs_activate(fpu); copy_kernel_to_fpregs(&fpu->state); fpu->counter++; + trace_x86_fpu_after_restore(fpu); kernel_fpu_enable(); } EXPORT_SYMBOL_GPL(fpu__restore); @@ -463,6 +479,8 @@ void fpu__drop(struct fpu *fpu) fpu->fpstate_active = 0; + trace_x86_fpu_dropped(fpu); + preempt_enable(); } -- cgit v1.2.3 From bf15a8cf8d14879b785c548728415d36ccb6a33b Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Fri, 20 May 2016 10:47:06 -0700 Subject: x86/fpu/xstate: Rename 'xstate_size' to 'fpu_kernel_xstate_size', to distinguish it from 'fpu_user_xstate_size' User space uses standard format xsave area. fpstate in signal frame should have standard format size. To explicitly distinguish between xstate size in kernel space and the one in user space, we rename 'xstate_size' to 'fpu_kernel_xstate_size'. Cleanup only, no change in functionality. Signed-off-by: Fenghua Yu [ Rebased the patch and cleaned up the naming. ] Signed-off-by: Yu-cheng Yu Reviewed-by: Dave Hansen Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Quentin Casasnovas Cc: Ravi V. Shankar Cc: Sai Praneeth Prakhya Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/2ecbae347a5152d94be52adf7d0f3b7305d90d99.1463760376.git.yu-cheng.yu@intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 2 +- arch/x86/kernel/fpu/core.c | 7 ++++--- arch/x86/kernel/fpu/init.c | 20 +++++++++++--------- arch/x86/kernel/fpu/signal.c | 2 +- arch/x86/kernel/fpu/xstate.c | 8 ++++---- 5 files changed, 21 insertions(+), 18 deletions(-) (limited to 'arch/x86/kernel/fpu/core.c') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 0a16a16284f5..965c5d212c31 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -367,7 +367,7 @@ DECLARE_PER_CPU(struct irq_stack *, hardirq_stack); DECLARE_PER_CPU(struct irq_stack *, softirq_stack); #endif /* X86_64 */ -extern unsigned int xstate_size; +extern unsigned int fpu_kernel_xstate_size; extern unsigned int fpu_user_xstate_size; struct perf_event; diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 7d564742e499..c759bd01ec99 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -227,7 +227,7 @@ void fpstate_init(union fpregs_state *state) return; } - memset(state, 0, xstate_size); + memset(state, 0, fpu_kernel_xstate_size); if (static_cpu_has(X86_FEATURE_FXSR)) fpstate_init_fxstate(&state->fxsave); @@ -252,7 +252,7 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) * leak into the child task: */ if (use_eager_fpu()) - memset(&dst_fpu->state.xsave, 0, xstate_size); + memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size); /* * Save current FPU registers directly into the child @@ -271,7 +271,8 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) */ preempt_disable(); if (!copy_fpregs_to_fpstate(dst_fpu)) { - memcpy(&src_fpu->state, &dst_fpu->state, xstate_size); + memcpy(&src_fpu->state, &dst_fpu->state, + fpu_kernel_xstate_size); if (use_eager_fpu()) copy_kernel_to_fpregs(&src_fpu->state); diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 5b1928c0aad4..60f3839c5bfa 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -145,8 +145,8 @@ static void __init fpu__init_system_generic(void) * This is inherent to the XSAVE architecture which puts all state * components into a single, continuous memory block: */ -unsigned int xstate_size; -EXPORT_SYMBOL_GPL(xstate_size); +unsigned int fpu_kernel_xstate_size; +EXPORT_SYMBOL_GPL(fpu_kernel_xstate_size); /* Get alignment of the TYPE. */ #define TYPE_ALIGN(TYPE) offsetof(struct { char x; TYPE test; }, test) @@ -178,7 +178,7 @@ static void __init fpu__init_task_struct_size(void) * Add back the dynamically-calculated register state * size. */ - task_size += xstate_size; + task_size += fpu_kernel_xstate_size; /* * We dynamically size 'struct fpu', so we require that @@ -195,7 +195,7 @@ static void __init fpu__init_task_struct_size(void) } /* - * Set up the user and kernel xstate_size based on the legacy FPU context size. + * Set up the user and kernel xstate sizes based on the legacy FPU context size. * * We set this up first, and later it will be overwritten by * fpu__init_system_xstate() if the CPU knows about xstates. @@ -208,7 +208,7 @@ static void __init fpu__init_system_xstate_size_legacy(void) on_boot_cpu = 0; /* - * Note that xstate_size might be overwriten later during + * Note that xstate sizes might be overwritten later during * fpu__init_system_xstate(). */ @@ -219,15 +219,17 @@ static void __init fpu__init_system_xstate_size_legacy(void) */ setup_clear_cpu_cap(X86_FEATURE_XSAVE); setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); - xstate_size = sizeof(struct swregs_state); + fpu_kernel_xstate_size = sizeof(struct swregs_state); } else { if (boot_cpu_has(X86_FEATURE_FXSR)) - xstate_size = sizeof(struct fxregs_state); + fpu_kernel_xstate_size = + sizeof(struct fxregs_state); else - xstate_size = sizeof(struct fregs_state); + fpu_kernel_xstate_size = + sizeof(struct fregs_state); } - fpu_user_xstate_size = xstate_size; + fpu_user_xstate_size = fpu_kernel_xstate_size; /* * Quirk: we don't yet handle the XSAVES* instructions diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 0d29d4de4209..06d80f62c03f 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -263,7 +263,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) int ia32_fxstate = (buf != buf_fx); struct task_struct *tsk = current; struct fpu *fpu = &tsk->thread.fpu; - int state_size = xstate_size; + int state_size = fpu_kernel_xstate_size; u64 xfeatures = 0; int fx_only = 0; diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 9c4da358ebb9..46abfafe61c8 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -537,7 +537,7 @@ static void do_extra_xstate_size_checks(void) */ paranoid_xstate_size += xfeature_size(i); } - XSTATE_WARN_ON(paranoid_xstate_size != xstate_size); + XSTATE_WARN_ON(paranoid_xstate_size != fpu_kernel_xstate_size); } @@ -616,7 +616,7 @@ static int init_xstate_size(void) * The size is OK, we are definitely going to use xsave, * make it known to the world that we need more space. */ - xstate_size = possible_xstate_size; + fpu_kernel_xstate_size = possible_xstate_size; do_extra_xstate_size_checks(); /* @@ -679,14 +679,14 @@ void __init fpu__init_system_xstate(void) return; } - update_regset_xstate_info(xstate_size, xfeatures_mask); + update_regset_xstate_info(fpu_kernel_xstate_size, xfeatures_mask); fpu__init_prepare_fx_sw_frame(); setup_init_fpu_buf(); setup_xstate_comp(); pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n", xfeatures_mask, - xstate_size, + fpu_kernel_xstate_size, boot_cpu_has(X86_FEATURE_XSAVES) ? "compacted" : "standard"); } -- cgit v1.2.3 From 35ac2d7ba787eb4b7418a5a6f5919c25e10a780a Mon Sep 17 00:00:00 2001 From: Yu-cheng Yu Date: Mon, 11 Jul 2016 09:18:56 -0700 Subject: x86/fpu/xstate: Fix fpstate_init() for XRSTORS In XSAVES mode if fpstate_init() is used to initialize a task's extended state area, xsave.header.xcomp_bv[63] must be set. Otherwise, when the task is scheduled, a warning is triggered from copy_kernel_to_xregs(). One such test case is: setting an invalid extended state through PTRACE. When xstateregs_set() rejects the syscall and re-initializes the task's extended state area. This triggers the warning mentioned above. Signed-off-by: Yu-cheng Yu Signed-off-by: Fenghua Yu Reviewed-by: Dave Hansen Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Ravi V Shankar Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1468253937-40008-4-git-send-email-fenghua.yu@intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/fpu/types.h | 6 ++++++ arch/x86/kernel/fpu/core.c | 8 ++++++++ 2 files changed, 14 insertions(+) (limited to 'arch/x86/kernel/fpu/core.c') diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index 12dd648735b6..48df486b02f9 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -231,6 +231,12 @@ struct xstate_header { u64 reserved[6]; } __attribute__((packed)); +/* + * xstate_header.xcomp_bv[63] indicates that the extended_state_area + * is in compacted format. + */ +#define XCOMP_BV_COMPACTED_FORMAT ((u64)1 << 63) + /* * This is our most modern FPU state format, as saved by the XSAVE * and restored by the XRSTOR instructions. diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index c759bd01ec99..3fc03a09a93b 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -229,6 +230,13 @@ void fpstate_init(union fpregs_state *state) memset(state, 0, fpu_kernel_xstate_size); + /* + * XRSTORS requires that this bit is set in xcomp_bv, or + * it will #GP. Make sure it is replaced after the memset(). + */ + if (static_cpu_has(X86_FEATURE_XSAVES)) + state->xsave.header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT; + if (static_cpu_has(X86_FEATURE_FXSR)) fpstate_init_fxstate(&state->fxsave); else -- cgit v1.2.3