From d272f6670a0bcc91fa50c234088d21cd6ac30af4 Mon Sep 17 00:00:00 2001 From: Cyril Bur Date: Mon, 29 Feb 2016 17:53:46 +1100 Subject: powerpc: Explicitly disable math features when copying thread Currently when threads get scheduled off they always giveup the FPU, Altivec (VMX) and Vector (VSX) units if they were using them. When they are scheduled back on a fault is then taken to enable each facility and load registers. As a result explicitly disabling FPU/VMX/VSX has not been necessary. Future changes and optimisations remove this mandatory giveup and fault which could cause calls such as clone() and fork() to copy threads and run them later with FPU/VMX/VSX enabled but no registers loaded. This patch starts the process of having MSR_{FP,VEC,VSX} mean that a threads registers are hot while not having MSR_{FP,VEC,VSX} means that the registers must be loaded. This allows for a smarter return to userspace. Signed-off-by: Cyril Bur Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc/kernel/process.c') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index dccc87e8fee5..e0c3d2dc7ca3 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1307,6 +1307,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, f = ret_from_fork; } + childregs->msr &= ~(MSR_FP|MSR_VEC|MSR_VSX); sp -= STACK_FRAME_OVERHEAD; /* -- cgit v1.2.3 From 70fe3d980f5f14d8125869125ba9a0ea95e09c6b Mon Sep 17 00:00:00 2001 From: Cyril Bur Date: Mon, 29 Feb 2016 17:53:47 +1100 Subject: powerpc: Restore FPU/VEC/VSX if previously used Currently the FPU, VEC and VSX facilities are lazily loaded. This is not a problem unless a process is using these facilities. Modern versions of GCC are very good at automatically vectorising code, new and modernised workloads make use of floating point and vector facilities, even the kernel makes use of vectorised memcpy. All this combined greatly increases the cost of a syscall since the kernel uses the facilities sometimes even in syscall fast-path making it increasingly common for a thread to take an *_unavailable exception soon after a syscall, not to mention potentially taking all three. The obvious overcompensation to this problem is to simply always load all the facilities on every exit to userspace. Loading up all FPU, VEC and VSX registers every time can be expensive and if a workload does avoid using them, it should not be forced to incur this penalty. An 8bit counter is used to detect if the registers have been used in the past and the registers are always loaded until the value wraps to back to zero. Several versions of the assembly in entry_64.S were tested: 1. Always calling C. 2. Performing a common case check and then calling C. 3. A complex check in asm. After some benchmarking it was determined that avoiding C in the common case is a performance benefit (option 2). The full check in asm (option 3) greatly complicated that codepath for a negligible performance gain and the trade-off was deemed not worth it. Signed-off-by: Cyril Bur [mpe: Move load_vec in the struct to fill an existing hole, reword change log] Signed-off-by: Michael Ellerman fixup --- arch/powerpc/include/asm/processor.h | 2 + arch/powerpc/kernel/asm-offsets.c | 2 + arch/powerpc/kernel/entry_64.S | 21 +++++++-- arch/powerpc/kernel/fpu.S | 4 ++ arch/powerpc/kernel/process.c | 88 +++++++++++++++++++++++++++++++----- arch/powerpc/kernel/vector.S | 4 ++ 6 files changed, 107 insertions(+), 14 deletions(-) (limited to 'arch/powerpc/kernel/process.c') diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index ac2330820b9a..8ab8a1a9610a 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -236,7 +236,9 @@ struct thread_struct { #endif struct arch_hw_breakpoint hw_brk; /* info on the hardware breakpoint */ unsigned long trap_nr; /* last trap # on this thread */ + u8 load_fp; #ifdef CONFIG_ALTIVEC + u8 load_vec; struct thread_vr_state vr_state; struct thread_vr_state *vr_save_area; unsigned long vrsave; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 07cebc3514f3..10d5eab19458 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -95,12 +95,14 @@ int main(void) DEFINE(THREAD_FPSTATE, offsetof(struct thread_struct, fp_state)); DEFINE(THREAD_FPSAVEAREA, offsetof(struct thread_struct, fp_save_area)); DEFINE(FPSTATE_FPSCR, offsetof(struct thread_fp_state, fpscr)); + DEFINE(THREAD_LOAD_FP, offsetof(struct thread_struct, load_fp)); #ifdef CONFIG_ALTIVEC DEFINE(THREAD_VRSTATE, offsetof(struct thread_struct, vr_state)); DEFINE(THREAD_VRSAVEAREA, offsetof(struct thread_struct, vr_save_area)); DEFINE(THREAD_VRSAVE, offsetof(struct thread_struct, vrsave)); DEFINE(THREAD_USED_VR, offsetof(struct thread_struct, used_vr)); DEFINE(VRSTATE_VSCR, offsetof(struct thread_vr_state, vscr)); + DEFINE(THREAD_LOAD_VEC, offsetof(struct thread_struct, load_vec)); #endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_VSX DEFINE(THREAD_USED_VSR, offsetof(struct thread_struct, used_vsr)); diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 0d525ce3717f..038e0a1425e7 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -210,7 +210,20 @@ system_call: /* label this so stack traces look sane */ li r11,-MAX_ERRNO andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK) bne- syscall_exit_work - cmpld r3,r11 + + andi. r0,r8,MSR_FP + beq 2f +#ifdef CONFIG_ALTIVEC + andis. r0,r8,MSR_VEC@h + bne 3f +#endif +2: addi r3,r1,STACK_FRAME_OVERHEAD + bl restore_math + ld r8,_MSR(r1) + ld r3,RESULT(r1) + li r11,-MAX_ERRNO + +3: cmpld r3,r11 ld r5,_CCR(r1) bge- syscall_error .Lsyscall_error_cont: @@ -602,8 +615,8 @@ _GLOBAL(ret_from_except_lite) /* Check current_thread_info()->flags */ andi. r0,r4,_TIF_USER_WORK_MASK -#ifdef CONFIG_PPC_BOOK3E bne 1f +#ifdef CONFIG_PPC_BOOK3E /* * Check to see if the dbcr0 register is set up to debug. * Use the internal debug mode bit to do this. @@ -618,7 +631,9 @@ _GLOBAL(ret_from_except_lite) mtspr SPRN_DBSR,r10 b restore #else - beq restore + addi r3,r1,STACK_FRAME_OVERHEAD + bl restore_math + b restore #endif 1: andi. r0,r4,_TIF_NEED_RESCHED beq 2f diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index 2117eaca3d28..b06352474ad0 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -130,6 +130,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) or r12,r12,r4 std r12,_MSR(r1) #endif + /* Don't care if r4 overflows, this is desired behaviour */ + lbz r4,THREAD_LOAD_FP(r5) + addi r4,r4,1 + stb r4,THREAD_LOAD_FP(r5) addi r10,r5,THREAD_FPSTATE lfd fr0,FPSTATE_FPSCR(r10) MTFSF_L(fr0) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index e0c3d2dc7ca3..55c1eb0465af 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -187,9 +187,22 @@ void enable_kernel_fp(void) } } EXPORT_SYMBOL(enable_kernel_fp); + +static int restore_fp(struct task_struct *tsk) { + if (tsk->thread.load_fp) { + load_fp_state(¤t->thread.fp_state); + current->thread.load_fp++; + return 1; + } + return 0; +} +#else +static int restore_fp(struct task_struct *tsk) { return 0; } #endif /* CONFIG_PPC_FPU */ #ifdef CONFIG_ALTIVEC +#define loadvec(thr) ((thr).load_vec) + void giveup_altivec(struct task_struct *tsk) { check_if_tm_restore_required(tsk); @@ -229,6 +242,21 @@ void flush_altivec_to_thread(struct task_struct *tsk) } } EXPORT_SYMBOL_GPL(flush_altivec_to_thread); + +static int restore_altivec(struct task_struct *tsk) +{ + if (cpu_has_feature(CPU_FTR_ALTIVEC) && tsk->thread.load_vec) { + load_vr_state(&tsk->thread.vr_state); + tsk->thread.used_vr = 1; + tsk->thread.load_vec++; + + return 1; + } + return 0; +} +#else +#define loadvec(thr) 0 +static inline int restore_altivec(struct task_struct *tsk) { return 0; } #endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_VSX @@ -275,6 +303,18 @@ void flush_vsx_to_thread(struct task_struct *tsk) } } EXPORT_SYMBOL_GPL(flush_vsx_to_thread); + +static int restore_vsx(struct task_struct *tsk) +{ + if (cpu_has_feature(CPU_FTR_VSX)) { + tsk->thread.used_vsr = 1; + return 1; + } + + return 0; +} +#else +static inline int restore_vsx(struct task_struct *tsk) { return 0; } #endif /* CONFIG_VSX */ #ifdef CONFIG_SPE @@ -374,6 +414,36 @@ void giveup_all(struct task_struct *tsk) } EXPORT_SYMBOL(giveup_all); +void restore_math(struct pt_regs *regs) +{ + unsigned long msr; + + if (!current->thread.load_fp && !loadvec(current->thread)) + return; + + msr = regs->msr; + msr_check_and_set(msr_all_available); + + /* + * Only reload if the bit is not set in the user MSR, the bit BEING set + * indicates that the registers are hot + */ + if ((!(msr & MSR_FP)) && restore_fp(current)) + msr |= MSR_FP | current->thread.fpexc_mode; + + if ((!(msr & MSR_VEC)) && restore_altivec(current)) + msr |= MSR_VEC; + + if ((msr & (MSR_FP | MSR_VEC)) == (MSR_FP | MSR_VEC) && + restore_vsx(current)) { + msr |= MSR_VSX; + } + + msr_check_and_clear(msr_all_available); + + regs->msr = msr; +} + void flush_all_to_thread(struct task_struct *tsk) { if (tsk->thread.regs) { @@ -832,17 +902,9 @@ void restore_tm_state(struct pt_regs *regs) msr_diff = current->thread.ckpt_regs.msr & ~regs->msr; msr_diff &= MSR_FP | MSR_VEC | MSR_VSX; - if (msr_diff & MSR_FP) { - msr_check_and_set(MSR_FP); - load_fp_state(¤t->thread.fp_state); - msr_check_and_clear(MSR_FP); - regs->msr |= current->thread.fpexc_mode; - } - if (msr_diff & MSR_VEC) { - msr_check_and_set(MSR_VEC); - load_vr_state(¤t->thread.vr_state); - msr_check_and_clear(MSR_VEC); - } + + restore_math(regs); + regs->msr |= msr_diff; } @@ -1006,6 +1068,10 @@ struct task_struct *__switch_to(struct task_struct *prev, batch = this_cpu_ptr(&ppc64_tlb_batch); batch->active = 1; } + + if (current_thread_info()->task->thread.regs) + restore_math(current_thread_info()->task->thread.regs); + #endif /* CONFIG_PPC_BOOK3S_64 */ return last; diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index 162d0f714941..038cff8cf5f2 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -91,6 +91,10 @@ _GLOBAL(load_up_altivec) oris r12,r12,MSR_VEC@h std r12,_MSR(r1) #endif + /* Don't care if r4 overflows, this is desired behaviour */ + lbz r4,THREAD_LOAD_VEC(r5) + addi r4,r4,1 + stb r4,THREAD_LOAD_VEC(r5) addi r6,r5,THREAD_VRSTATE li r4,1 li r10,VRSTATE_VSCR -- cgit v1.2.3 From de2a20aa7237b45d3c14a2505804a8daa95a8f53 Mon Sep 17 00:00:00 2001 From: Cyril Bur Date: Mon, 29 Feb 2016 17:53:48 +1100 Subject: powerpc: Prepare for splitting giveup_{fpu, altivec, vsx} in two This prepares for the decoupling of saving {fpu,altivec,vsx} registers and marking {fpu,altivec,vsx} as being unused by a thread. Currently giveup_{fpu,altivec,vsx}() does both however optimisations to task switching can be made if these two operations are decoupled. save_all() will permit the saving of registers to thread structs and leave threads MSR with bits enabled. This patch introduces no functional change. Signed-off-by: Cyril Bur Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/reg.h | 8 ++++++++ arch/powerpc/include/asm/switch_to.h | 7 +++++++ arch/powerpc/kernel/process.c | 31 ++++++++++++++++++++++++++++++- 3 files changed, 45 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel/process.c') diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 11a81bd5dabd..52ed654d01ba 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -75,6 +75,14 @@ #define MSR_HV 0 #endif +/* + * To be used in shared book E/book S, this avoids needing to worry about + * book S/book E in shared code + */ +#ifndef MSR_SPE +#define MSR_SPE 0 +#endif + #define MSR_VEC __MASK(MSR_VEC_LG) /* Enable AltiVec */ #define MSR_VSX __MASK(MSR_VSX_LG) /* Enable VSX */ #define MSR_POW __MASK(MSR_POW_LG) /* Enable Power Management */ diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h index 5b268b6be74c..3690041c126a 100644 --- a/arch/powerpc/include/asm/switch_to.h +++ b/arch/powerpc/include/asm/switch_to.h @@ -34,6 +34,7 @@ static inline void disable_kernel_fp(void) msr_check_and_clear(MSR_FP); } #else +static inline void __giveup_fpu(struct task_struct *t) { } static inline void flush_fp_to_thread(struct task_struct *t) { } #endif @@ -46,6 +47,8 @@ static inline void disable_kernel_altivec(void) { msr_check_and_clear(MSR_VEC); } +#else +static inline void __giveup_altivec(struct task_struct *t) { } #endif #ifdef CONFIG_VSX @@ -57,6 +60,8 @@ static inline void disable_kernel_vsx(void) { msr_check_and_clear(MSR_FP|MSR_VEC|MSR_VSX); } +#else +static inline void __giveup_vsx(struct task_struct *t) { } #endif #ifdef CONFIG_SPE @@ -68,6 +73,8 @@ static inline void disable_kernel_spe(void) { msr_check_and_clear(MSR_SPE); } +#else +static inline void __giveup_spe(struct task_struct *t) { } #endif static inline void clear_task_ebb(struct task_struct *t) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 55c1eb0465af..29da07fb3b4a 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -444,12 +444,41 @@ void restore_math(struct pt_regs *regs) regs->msr = msr; } +void save_all(struct task_struct *tsk) +{ + unsigned long usermsr; + + if (!tsk->thread.regs) + return; + + usermsr = tsk->thread.regs->msr; + + if ((usermsr & msr_all_available) == 0) + return; + + msr_check_and_set(msr_all_available); + + if (usermsr & MSR_FP) + __giveup_fpu(tsk); + + if (usermsr & MSR_VEC) + __giveup_altivec(tsk); + + if (usermsr & MSR_VSX) + __giveup_vsx(tsk); + + if (usermsr & MSR_SPE) + __giveup_spe(tsk); + + msr_check_and_clear(msr_all_available); +} + void flush_all_to_thread(struct task_struct *tsk) { if (tsk->thread.regs) { preempt_disable(); BUG_ON(tsk != current); - giveup_all(tsk); + save_all(tsk); #ifdef CONFIG_SPE if (tsk->thread.regs->msr & MSR_SPE) -- cgit v1.2.3 From 8792468da5e12e77e76e1edf081acf0392abb331 Mon Sep 17 00:00:00 2001 From: Cyril Bur Date: Mon, 29 Feb 2016 17:53:49 +1100 Subject: powerpc: Add the ability to save FPU without giving it up This patch adds the ability to be able to save the FPU registers to the thread struct without giving up (disabling the facility) next time the process returns to userspace. This patch optimises the thread copy path (as a result of a fork() or clone()) so that the parent thread can return to userspace with hot registers avoiding a possibly pointless reload of FPU register state. Signed-off-by: Cyril Bur Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/switch_to.h | 3 ++- arch/powerpc/kernel/fpu.S | 21 ++++----------------- arch/powerpc/kernel/process.c | 12 +++++++++++- 3 files changed, 17 insertions(+), 19 deletions(-) (limited to 'arch/powerpc/kernel/process.c') diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h index 3690041c126a..6a201e8dc947 100644 --- a/arch/powerpc/include/asm/switch_to.h +++ b/arch/powerpc/include/asm/switch_to.h @@ -28,13 +28,14 @@ extern void giveup_all(struct task_struct *); extern void enable_kernel_fp(void); extern void flush_fp_to_thread(struct task_struct *); extern void giveup_fpu(struct task_struct *); -extern void __giveup_fpu(struct task_struct *); +extern void save_fpu(struct task_struct *); static inline void disable_kernel_fp(void) { msr_check_and_clear(MSR_FP); } #else static inline void __giveup_fpu(struct task_struct *t) { } +static inline void save_fpu(struct task_struct *t) { } static inline void flush_fp_to_thread(struct task_struct *t) { } #endif diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index b06352474ad0..15da2b5df85e 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -143,33 +143,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) blr /* - * __giveup_fpu(tsk) - * Disable FP for the task given as the argument, - * and save the floating-point registers in its thread_struct. + * save_fpu(tsk) + * Save the floating-point registers in its thread_struct. * Enables the FPU for use in the kernel on return. */ -_GLOBAL(__giveup_fpu) +_GLOBAL(save_fpu) addi r3,r3,THREAD /* want THREAD of task */ PPC_LL r6,THREAD_FPSAVEAREA(r3) PPC_LL r5,PT_REGS(r3) PPC_LCMPI 0,r6,0 bne 2f addi r6,r3,THREAD_FPSTATE -2: PPC_LCMPI 0,r5,0 - SAVE_32FPVSRS(0, R4, R6) +2: SAVE_32FPVSRS(0, R4, R6) mffs fr0 stfd fr0,FPSTATE_FPSCR(r6) - beq 1f - PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5) - li r3,MSR_FP|MSR_FE0|MSR_FE1 -#ifdef CONFIG_VSX -BEGIN_FTR_SECTION - oris r3,r3,MSR_VSX@h -END_FTR_SECTION_IFSET(CPU_FTR_VSX) -#endif - andc r4,r4,r3 /* disable FP for previous task */ - PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5) -1: blr /* diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 29da07fb3b4a..a7e5061187e8 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -133,6 +133,16 @@ void __msr_check_and_clear(unsigned long bits) EXPORT_SYMBOL(__msr_check_and_clear); #ifdef CONFIG_PPC_FPU +void __giveup_fpu(struct task_struct *tsk) +{ + save_fpu(tsk); + tsk->thread.regs->msr &= ~MSR_FP; +#ifdef CONFIG_VSX + if (cpu_has_feature(CPU_FTR_VSX)) + tsk->thread.regs->msr &= ~MSR_VSX; +#endif +} + void giveup_fpu(struct task_struct *tsk) { check_if_tm_restore_required(tsk); @@ -459,7 +469,7 @@ void save_all(struct task_struct *tsk) msr_check_and_set(msr_all_available); if (usermsr & MSR_FP) - __giveup_fpu(tsk); + save_fpu(tsk); if (usermsr & MSR_VEC) __giveup_altivec(tsk); -- cgit v1.2.3 From 6f515d842e8e1b205e54f44b9013bf14870b97a7 Mon Sep 17 00:00:00 2001 From: Cyril Bur Date: Mon, 29 Feb 2016 17:53:50 +1100 Subject: powerpc: Add the ability to save Altivec without giving it up This patch adds the ability to be able to save the VEC registers to the thread struct without giving up (disabling the facility) next time the process returns to userspace. This patch builds on a previous optimisation for the FPU registers in the thread copy path to avoid a possibly pointless reload of VEC state. Signed-off-by: Cyril Bur Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/switch_to.h | 3 ++- arch/powerpc/kernel/process.c | 12 +++++++++++- arch/powerpc/kernel/vector.S | 24 ++++-------------------- 3 files changed, 17 insertions(+), 22 deletions(-) (limited to 'arch/powerpc/kernel/process.c') diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h index 6a201e8dc947..9028822bb73f 100644 --- a/arch/powerpc/include/asm/switch_to.h +++ b/arch/powerpc/include/asm/switch_to.h @@ -43,12 +43,13 @@ static inline void flush_fp_to_thread(struct task_struct *t) { } extern void enable_kernel_altivec(void); extern void flush_altivec_to_thread(struct task_struct *); extern void giveup_altivec(struct task_struct *); -extern void __giveup_altivec(struct task_struct *); +extern void save_altivec(struct task_struct *); static inline void disable_kernel_altivec(void) { msr_check_and_clear(MSR_VEC); } #else +static inline void save_altivec(struct task_struct *t) { } static inline void __giveup_altivec(struct task_struct *t) { } #endif diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index a7e5061187e8..14c09d25de98 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -213,6 +213,16 @@ static int restore_fp(struct task_struct *tsk) { return 0; } #ifdef CONFIG_ALTIVEC #define loadvec(thr) ((thr).load_vec) +static void __giveup_altivec(struct task_struct *tsk) +{ + save_altivec(tsk); + tsk->thread.regs->msr &= ~MSR_VEC; +#ifdef CONFIG_VSX + if (cpu_has_feature(CPU_FTR_VSX)) + tsk->thread.regs->msr &= ~MSR_VSX; +#endif +} + void giveup_altivec(struct task_struct *tsk) { check_if_tm_restore_required(tsk); @@ -472,7 +482,7 @@ void save_all(struct task_struct *tsk) save_fpu(tsk); if (usermsr & MSR_VEC) - __giveup_altivec(tsk); + save_altivec(tsk); if (usermsr & MSR_VSX) __giveup_vsx(tsk); diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index 038cff8cf5f2..51b0c175ea8c 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -106,36 +106,20 @@ _GLOBAL(load_up_altivec) blr /* - * __giveup_altivec(tsk) - * Disable VMX for the task given as the argument, - * and save the vector registers in its thread_struct. + * save_altivec(tsk) + * Save the vector registers to its thread_struct */ -_GLOBAL(__giveup_altivec) +_GLOBAL(save_altivec) addi r3,r3,THREAD /* want THREAD of task */ PPC_LL r7,THREAD_VRSAVEAREA(r3) PPC_LL r5,PT_REGS(r3) PPC_LCMPI 0,r7,0 bne 2f addi r7,r3,THREAD_VRSTATE -2: PPC_LCMPI 0,r5,0 - SAVE_32VRS(0,r4,r7) +2: SAVE_32VRS(0,r4,r7) mfvscr v0 li r4,VRSTATE_VSCR stvx v0,r4,r7 - beq 1f - PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5) -#ifdef CONFIG_VSX -BEGIN_FTR_SECTION - lis r3,(MSR_VEC|MSR_VSX)@h -FTR_SECTION_ELSE - lis r3,MSR_VEC@h -ALT_FTR_SECTION_END_IFSET(CPU_FTR_VSX) -#else - lis r3,MSR_VEC@h -#endif - andc r4,r4,r3 /* disable FP for previous task */ - PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5) -1: blr #ifdef CONFIG_VSX -- cgit v1.2.3 From bf6a4d5b75d1ea87897fe68d0e45d35a2996c678 Mon Sep 17 00:00:00 2001 From: Cyril Bur Date: Mon, 29 Feb 2016 17:53:51 +1100 Subject: powerpc: Add the ability to save VSX without giving it up This patch adds the ability to be able to save the VSX registers to the thread struct without giving up (disabling the facility) next time the process returns to userspace. This patch builds on a previous optimisation for the FPU and VEC registers in the thread copy path to avoid a possibly pointless reload of VSX state. Signed-off-by: Cyril Bur Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/switch_to.h | 4 ---- arch/powerpc/kernel/ppc_ksyms.c | 4 ---- arch/powerpc/kernel/process.c | 42 +++++++++++++++++++++++++----------- arch/powerpc/kernel/vector.S | 17 --------------- 4 files changed, 30 insertions(+), 37 deletions(-) (limited to 'arch/powerpc/kernel/process.c') diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h index 9028822bb73f..17c8380673a6 100644 --- a/arch/powerpc/include/asm/switch_to.h +++ b/arch/powerpc/include/asm/switch_to.h @@ -56,14 +56,10 @@ static inline void __giveup_altivec(struct task_struct *t) { } #ifdef CONFIG_VSX extern void enable_kernel_vsx(void); extern void flush_vsx_to_thread(struct task_struct *); -extern void giveup_vsx(struct task_struct *); -extern void __giveup_vsx(struct task_struct *); static inline void disable_kernel_vsx(void) { msr_check_and_clear(MSR_FP|MSR_VEC|MSR_VSX); } -#else -static inline void __giveup_vsx(struct task_struct *t) { } #endif #ifdef CONFIG_SPE diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index 41e1607e800c..ef7024dacff7 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -28,10 +28,6 @@ EXPORT_SYMBOL(load_vr_state); EXPORT_SYMBOL(store_vr_state); #endif -#ifdef CONFIG_VSX -EXPORT_SYMBOL_GPL(__giveup_vsx); -#endif - #ifdef CONFIG_EPAPR_PARAVIRT EXPORT_SYMBOL(epapr_hypercall_start); #endif diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 14c09d25de98..d7a9df51b974 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -280,19 +280,31 @@ static inline int restore_altivec(struct task_struct *tsk) { return 0; } #endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_VSX -void giveup_vsx(struct task_struct *tsk) +static void __giveup_vsx(struct task_struct *tsk) { - check_if_tm_restore_required(tsk); - - msr_check_and_set(MSR_FP|MSR_VEC|MSR_VSX); if (tsk->thread.regs->msr & MSR_FP) __giveup_fpu(tsk); if (tsk->thread.regs->msr & MSR_VEC) __giveup_altivec(tsk); + tsk->thread.regs->msr &= ~MSR_VSX; +} + +static void giveup_vsx(struct task_struct *tsk) +{ + check_if_tm_restore_required(tsk); + + msr_check_and_set(MSR_FP|MSR_VEC|MSR_VSX); __giveup_vsx(tsk); msr_check_and_clear(MSR_FP|MSR_VEC|MSR_VSX); } -EXPORT_SYMBOL(giveup_vsx); + +static void save_vsx(struct task_struct *tsk) +{ + if (tsk->thread.regs->msr & MSR_FP) + save_fpu(tsk); + if (tsk->thread.regs->msr & MSR_VEC) + save_altivec(tsk); +} void enable_kernel_vsx(void) { @@ -335,6 +347,7 @@ static int restore_vsx(struct task_struct *tsk) } #else static inline int restore_vsx(struct task_struct *tsk) { return 0; } +static inline void save_vsx(struct task_struct *tsk) { } #endif /* CONFIG_VSX */ #ifdef CONFIG_SPE @@ -478,14 +491,19 @@ void save_all(struct task_struct *tsk) msr_check_and_set(msr_all_available); - if (usermsr & MSR_FP) - save_fpu(tsk); - - if (usermsr & MSR_VEC) - save_altivec(tsk); + /* + * Saving the way the register space is in hardware, save_vsx boils + * down to a save_fpu() and save_altivec() + */ + if (usermsr & MSR_VSX) { + save_vsx(tsk); + } else { + if (usermsr & MSR_FP) + save_fpu(tsk); - if (usermsr & MSR_VSX) - __giveup_vsx(tsk); + if (usermsr & MSR_VEC) + save_altivec(tsk); + } if (usermsr & MSR_SPE) __giveup_spe(tsk); diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index 51b0c175ea8c..1c2e7a343bf5 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -151,23 +151,6 @@ _GLOBAL(load_up_vsx) std r12,_MSR(r1) b fast_exception_return -/* - * __giveup_vsx(tsk) - * Disable VSX for the task given as the argument. - * Does NOT save vsx registers. - */ -_GLOBAL(__giveup_vsx) - addi r3,r3,THREAD /* want THREAD of task */ - ld r5,PT_REGS(r3) - cmpdi 0,r5,0 - beq 1f - ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) - lis r3,MSR_VSX@h - andc r4,r4,r3 /* disable VSX for previous task */ - std r4,_MSR-STACK_FRAME_OVERHEAD(r5) -1: - blr - #endif /* CONFIG_VSX */ -- cgit v1.2.3 From 01d7c2a2de47890934faba91a71d183795e4348d Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Tue, 8 Mar 2016 09:08:47 +1100 Subject: powerpc/process: Fix altivec SPR not being saved In save_sprs() in process.c contains the following test: if (cpu_has_feature(cpu_has_feature(CPU_FTR_ALTIVEC))) t->vrsave = mfspr(SPRN_VRSAVE); CPU feature with the mask 0x1 is CPU_FTR_COHERENT_ICACHE so the test is equivilent to: if (cpu_has_feature(CPU_FTR_ALTIVEC) && cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) On CPUs without support for both (i.e G5) this results in vrsave not being saved between context switches. The vector register save/restore code doesn't use VRSAVE to determine which registers to save/restore, but the value of VRSAVE is used to determine if altivec is being used in several code paths. Fixes: 152d523e6307 ("powerpc: Create context switch helpers save_sprs() and restore_sprs()") Cc: stable@vger.kernel.org Signed-off-by: Oliver O'Halloran Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel/process.c') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 612df305886b..b8500b4ac7fe 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -983,7 +983,7 @@ void restore_tm_state(struct pt_regs *regs) static inline void save_sprs(struct thread_struct *t) { #ifdef CONFIG_ALTIVEC - if (cpu_has_feature(cpu_has_feature(CPU_FTR_ALTIVEC))) + if (cpu_has_feature(CPU_FTR_ALTIVEC)) t->vrsave = mfspr(SPRN_VRSAVE); #endif #ifdef CONFIG_PPC_BOOK3S_64 -- cgit v1.2.3