From 2e54dc3c7dadd4d012f132b1a2b3ab89d9a48cc2 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Fri, 12 Jun 2015 10:57:40 +0200 Subject: s390/kernel: move EX_TABLE macros to linkage.h header file Move the EX_TABLE macro definitions from the processor.h to the linkage.h header file. It helps to reduce circular header file dependencies. Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/processor.h | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'arch/s390/include/asm/processor.h') diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index dedb6218544b..f4d9f741421d 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -336,25 +336,6 @@ extern void memcpy_absolute(void *, void *, size_t); memcpy_absolute(&(dest), &__tmp, sizeof(__tmp)); \ } -/* - * Helper macro for exception table entries - */ -#define EX_TABLE(_fault, _target) \ - ".section __ex_table,\"a\"\n" \ - ".align 4\n" \ - ".long (" #_fault ") - .\n" \ - ".long (" #_target ") - .\n" \ - ".previous\n" - -#else /* __ASSEMBLY__ */ - -#define EX_TABLE(_fault, _target) \ - .section __ex_table,"a" ; \ - .align 4 ; \ - .long (_fault) - . ; \ - .long (_target) - . ; \ - .previous - #endif /* __ASSEMBLY__ */ #endif /* __ASM_S390_PROCESSOR_H */ -- cgit v1.2.3 From 904818e2f229f3d94ec95f6932a6358c81e73d78 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Thu, 11 Jun 2015 15:33:54 +0200 Subject: s390/kernel: introduce fpu-internal.h with fpu helper functions Introduce a new structure to manage FP and VX registers. Refactor the save and restore of floating point and vector registers with a set of helper functions in fpu-internal.h. Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/fpu-internal.h | 191 +++++++++++++++++++++++++++++++++++ arch/s390/include/asm/processor.h | 4 +- arch/s390/include/asm/switch_to.h | 134 +----------------------- arch/s390/kernel/compat_signal.c | 43 ++------ arch/s390/kernel/nmi.c | 1 + arch/s390/kernel/process.c | 14 +-- arch/s390/kernel/ptrace.c | 131 ++++++++++++------------ arch/s390/kernel/signal.c | 42 ++------ arch/s390/kernel/traps.c | 24 ++--- 9 files changed, 299 insertions(+), 285 deletions(-) create mode 100644 arch/s390/include/asm/fpu-internal.h (limited to 'arch/s390/include/asm/processor.h') diff --git a/arch/s390/include/asm/fpu-internal.h b/arch/s390/include/asm/fpu-internal.h new file mode 100644 index 000000000000..04b4cfc08fb5 --- /dev/null +++ b/arch/s390/include/asm/fpu-internal.h @@ -0,0 +1,191 @@ +/* + * General floating pointer and vector register helpers + * + * Copyright IBM Corp. 2015 + * Author(s): Hendrik Brueckner + */ + +#ifndef _ASM_S390_FPU_INTERNAL_H +#define _ASM_S390_FPU_INTERNAL_H + +#include +#include +#include +#include +#include + +struct fpu { + __u32 fpc; /* Floating-point control */ + __u32 pad; + freg_t fprs[__NUM_FPRS]; /* Floating-point register save area */ + __vector128 *vxrs; /* Vector register save area */ +}; + +#define is_vx_fpu(fpu) (!!(fpu)->vxrs) +#define is_vx_task(tsk) (!!(tsk)->thread.fpu.vxrs) + +static inline int test_fp_ctl(u32 fpc) +{ + u32 orig_fpc; + int rc; + + asm volatile( + " efpc %1\n" + " sfpc %2\n" + "0: sfpc %1\n" + " la %0,0\n" + "1:\n" + EX_TABLE(0b,1b) + : "=d" (rc), "=d" (orig_fpc) + : "d" (fpc), "0" (-EINVAL)); + return rc; +} + +static inline void save_fp_ctl(u32 *fpc) +{ + asm volatile( + " stfpc %0\n" + : "+Q" (*fpc)); +} + +static inline int restore_fp_ctl(u32 *fpc) +{ + int rc; + + asm volatile( + " lfpc %1\n" + "0: la %0,0\n" + "1:\n" + : "=d" (rc) : "Q" (*fpc), "0" (-EINVAL)); + return rc; +} + +static inline void save_fp_regs(freg_t *fprs) +{ + asm volatile("std 0,%0" : "=Q" (fprs[0])); + asm volatile("std 2,%0" : "=Q" (fprs[2])); + asm volatile("std 4,%0" : "=Q" (fprs[4])); + asm volatile("std 6,%0" : "=Q" (fprs[6])); + asm volatile("std 1,%0" : "=Q" (fprs[1])); + asm volatile("std 3,%0" : "=Q" (fprs[3])); + asm volatile("std 5,%0" : "=Q" (fprs[5])); + asm volatile("std 7,%0" : "=Q" (fprs[7])); + asm volatile("std 8,%0" : "=Q" (fprs[8])); + asm volatile("std 9,%0" : "=Q" (fprs[9])); + asm volatile("std 10,%0" : "=Q" (fprs[10])); + asm volatile("std 11,%0" : "=Q" (fprs[11])); + asm volatile("std 12,%0" : "=Q" (fprs[12])); + asm volatile("std 13,%0" : "=Q" (fprs[13])); + asm volatile("std 14,%0" : "=Q" (fprs[14])); + asm volatile("std 15,%0" : "=Q" (fprs[15])); +} + +static inline void restore_fp_regs(freg_t *fprs) +{ + asm volatile("ld 0,%0" : : "Q" (fprs[0])); + asm volatile("ld 2,%0" : : "Q" (fprs[2])); + asm volatile("ld 4,%0" : : "Q" (fprs[4])); + asm volatile("ld 6,%0" : : "Q" (fprs[6])); + asm volatile("ld 1,%0" : : "Q" (fprs[1])); + asm volatile("ld 3,%0" : : "Q" (fprs[3])); + asm volatile("ld 5,%0" : : "Q" (fprs[5])); + asm volatile("ld 7,%0" : : "Q" (fprs[7])); + asm volatile("ld 8,%0" : : "Q" (fprs[8])); + asm volatile("ld 9,%0" : : "Q" (fprs[9])); + asm volatile("ld 10,%0" : : "Q" (fprs[10])); + asm volatile("ld 11,%0" : : "Q" (fprs[11])); + asm volatile("ld 12,%0" : : "Q" (fprs[12])); + asm volatile("ld 13,%0" : : "Q" (fprs[13])); + asm volatile("ld 14,%0" : : "Q" (fprs[14])); + asm volatile("ld 15,%0" : : "Q" (fprs[15])); +} + +static inline void save_vx_regs(__vector128 *vxrs) +{ + typedef struct { __vector128 _[__NUM_VXRS]; } addrtype; + + asm volatile( + " la 1,%0\n" + " .word 0xe70f,0x1000,0x003e\n" /* vstm 0,15,0(1) */ + " .word 0xe70f,0x1100,0x0c3e\n" /* vstm 16,31,256(1) */ + : "=Q" (*(addrtype *) vxrs) : : "1"); +} + +static inline void save_vx_regs_safe(__vector128 *vxrs) +{ + unsigned long cr0, flags; + + flags = arch_local_irq_save(); + __ctl_store(cr0, 0, 0); + __ctl_set_bit(0, 17); + __ctl_set_bit(0, 18); + save_vx_regs(vxrs); + __ctl_load(cr0, 0, 0); + arch_local_irq_restore(flags); +} + +static inline void restore_vx_regs(__vector128 *vxrs) +{ + typedef struct { __vector128 _[__NUM_VXRS]; } addrtype; + + asm volatile( + " la 1,%0\n" + " .word 0xe70f,0x1000,0x0036\n" /* vlm 0,15,0(1) */ + " .word 0xe70f,0x1100,0x0c36\n" /* vlm 16,31,256(1) */ + : : "Q" (*(addrtype *) vxrs) : "1"); +} + +static inline void convert_vx_to_fp(freg_t *fprs, __vector128 *vxrs) +{ + int i; + + for (i = 0; i < __NUM_FPRS; i++) + fprs[i] = *(freg_t *)(vxrs + i); +} + +static inline void convert_fp_to_vx(__vector128 *vxrs, freg_t *fprs) +{ + int i; + + for (i = 0; i < __NUM_FPRS; i++) + *(freg_t *)(vxrs + i) = fprs[i]; +} + +static inline void fpregs_store(_s390_fp_regs *fpregs, struct fpu *fpu) +{ + fpregs->pad = 0; + if (is_vx_fpu(fpu)) + convert_vx_to_fp((freg_t *)&fpregs->fprs, fpu->vxrs); + else + memcpy((freg_t *)&fpregs->fprs, fpu->fprs, + sizeof(fpregs->fprs)); +} + +static inline void fpregs_load(_s390_fp_regs *fpregs, struct fpu *fpu) +{ + if (is_vx_fpu(fpu)) + convert_fp_to_vx(fpu->vxrs, (freg_t *)&fpregs->fprs); + else + memcpy(fpu->fprs, (freg_t *)&fpregs->fprs, + sizeof(fpregs->fprs)); +} + +static inline void save_fpu_regs(struct fpu *fpu) +{ + save_fp_ctl(&fpu->fpc); + if (is_vx_fpu(fpu)) + save_vx_regs(fpu->vxrs); + else + save_fp_regs(fpu->fprs); +} + +static inline void restore_fpu_regs(struct fpu *fpu) +{ + restore_fp_ctl(&fpu->fpc); + if (is_vx_fpu(fpu)) + restore_vx_regs(fpu->vxrs); + else + restore_fp_regs(fpu->fprs); +} + +#endif /* _ASM_S390_FPU_INTERNAL_H */ diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index f4d9f741421d..19f51db7c5e6 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -28,6 +28,7 @@ #include #include #include +#include static inline void set_cpu_flag(int flag) { @@ -85,7 +86,7 @@ typedef struct { * Thread structure */ struct thread_struct { - s390_fp_regs fp_regs; + struct fpu fpu; /* FP and VX register save area */ unsigned int acrs[NUM_ACRS]; unsigned long ksp; /* kernel stack pointer */ mm_segment_t mm_segment; @@ -101,7 +102,6 @@ struct thread_struct { struct runtime_instr_cb *ri_cb; int ri_signum; unsigned char trap_tdb[256]; /* Transaction abort diagnose block */ - __vector128 *vxrs; /* Vector register save area */ }; /* Flag to disable transactions. */ diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h index b304031d9d11..caf4f23462b0 100644 --- a/arch/s390/include/asm/switch_to.h +++ b/arch/s390/include/asm/switch_to.h @@ -8,138 +8,12 @@ #define __ASM_SWITCH_TO_H #include +#include #include extern struct task_struct *__switch_to(void *, void *); extern void update_cr_regs(struct task_struct *task); -static inline int test_fp_ctl(u32 fpc) -{ - u32 orig_fpc; - int rc; - - asm volatile( - " efpc %1\n" - " sfpc %2\n" - "0: sfpc %1\n" - " la %0,0\n" - "1:\n" - EX_TABLE(0b,1b) - : "=d" (rc), "=d" (orig_fpc) - : "d" (fpc), "0" (-EINVAL)); - return rc; -} - -static inline void save_fp_ctl(u32 *fpc) -{ - asm volatile( - " stfpc %0\n" - : "+Q" (*fpc)); -} - -static inline int restore_fp_ctl(u32 *fpc) -{ - int rc; - - asm volatile( - " lfpc %1\n" - "0: la %0,0\n" - "1:\n" - : "=d" (rc) : "Q" (*fpc), "0" (-EINVAL)); - return rc; -} - -static inline void save_fp_regs(freg_t *fprs) -{ - asm volatile("std 0,%0" : "=Q" (fprs[0])); - asm volatile("std 2,%0" : "=Q" (fprs[2])); - asm volatile("std 4,%0" : "=Q" (fprs[4])); - asm volatile("std 6,%0" : "=Q" (fprs[6])); - asm volatile("std 1,%0" : "=Q" (fprs[1])); - asm volatile("std 3,%0" : "=Q" (fprs[3])); - asm volatile("std 5,%0" : "=Q" (fprs[5])); - asm volatile("std 7,%0" : "=Q" (fprs[7])); - asm volatile("std 8,%0" : "=Q" (fprs[8])); - asm volatile("std 9,%0" : "=Q" (fprs[9])); - asm volatile("std 10,%0" : "=Q" (fprs[10])); - asm volatile("std 11,%0" : "=Q" (fprs[11])); - asm volatile("std 12,%0" : "=Q" (fprs[12])); - asm volatile("std 13,%0" : "=Q" (fprs[13])); - asm volatile("std 14,%0" : "=Q" (fprs[14])); - asm volatile("std 15,%0" : "=Q" (fprs[15])); -} - -static inline void restore_fp_regs(freg_t *fprs) -{ - asm volatile("ld 0,%0" : : "Q" (fprs[0])); - asm volatile("ld 2,%0" : : "Q" (fprs[2])); - asm volatile("ld 4,%0" : : "Q" (fprs[4])); - asm volatile("ld 6,%0" : : "Q" (fprs[6])); - asm volatile("ld 1,%0" : : "Q" (fprs[1])); - asm volatile("ld 3,%0" : : "Q" (fprs[3])); - asm volatile("ld 5,%0" : : "Q" (fprs[5])); - asm volatile("ld 7,%0" : : "Q" (fprs[7])); - asm volatile("ld 8,%0" : : "Q" (fprs[8])); - asm volatile("ld 9,%0" : : "Q" (fprs[9])); - asm volatile("ld 10,%0" : : "Q" (fprs[10])); - asm volatile("ld 11,%0" : : "Q" (fprs[11])); - asm volatile("ld 12,%0" : : "Q" (fprs[12])); - asm volatile("ld 13,%0" : : "Q" (fprs[13])); - asm volatile("ld 14,%0" : : "Q" (fprs[14])); - asm volatile("ld 15,%0" : : "Q" (fprs[15])); -} - -static inline void save_vx_regs(__vector128 *vxrs) -{ - typedef struct { __vector128 _[__NUM_VXRS]; } addrtype; - - asm volatile( - " la 1,%0\n" - " .word 0xe70f,0x1000,0x003e\n" /* vstm 0,15,0(1) */ - " .word 0xe70f,0x1100,0x0c3e\n" /* vstm 16,31,256(1) */ - : "=Q" (*(addrtype *) vxrs) : : "1"); -} - -static inline void save_vx_regs_safe(__vector128 *vxrs) -{ - unsigned long cr0, flags; - - flags = arch_local_irq_save(); - __ctl_store(cr0, 0, 0); - __ctl_set_bit(0, 17); - __ctl_set_bit(0, 18); - save_vx_regs(vxrs); - __ctl_load(cr0, 0, 0); - arch_local_irq_restore(flags); -} - -static inline void restore_vx_regs(__vector128 *vxrs) -{ - typedef struct { __vector128 _[__NUM_VXRS]; } addrtype; - - asm volatile( - " la 1,%0\n" - " .word 0xe70f,0x1000,0x0036\n" /* vlm 0,15,0(1) */ - " .word 0xe70f,0x1100,0x0c36\n" /* vlm 16,31,256(1) */ - : : "Q" (*(addrtype *) vxrs) : "1"); -} - -static inline void save_fp_vx_regs(struct task_struct *task) -{ - if (task->thread.vxrs) - save_vx_regs(task->thread.vxrs); - else - save_fp_regs(task->thread.fp_regs.fprs); -} - -static inline void restore_fp_vx_regs(struct task_struct *task) -{ - if (task->thread.vxrs) - restore_vx_regs(task->thread.vxrs); - else - restore_fp_regs(task->thread.fp_regs.fprs); -} - static inline void save_access_regs(unsigned int *acrs) { typedef struct { int _[NUM_ACRS]; } acrstype; @@ -156,15 +30,13 @@ static inline void restore_access_regs(unsigned int *acrs) #define switch_to(prev,next,last) do { \ if (prev->mm) { \ - save_fp_ctl(&prev->thread.fp_regs.fpc); \ - save_fp_vx_regs(prev); \ + save_fpu_regs(&prev->thread.fpu); \ save_access_regs(&prev->thread.acrs[0]); \ save_ri_cb(prev->thread.ri_cb); \ } \ if (next->mm) { \ update_cr_regs(next); \ - restore_fp_ctl(&next->thread.fp_regs.fpc); \ - restore_fp_vx_regs(next); \ + restore_fpu_regs(&next->thread.fpu); \ restore_access_regs(&next->thread.acrs[0]); \ restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \ } \ diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index b55d2063a23d..452995137a69 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -153,33 +153,15 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) /* Store registers needed to create the signal frame */ static void store_sigregs(void) { - int i; - save_access_regs(current->thread.acrs); - save_fp_ctl(¤t->thread.fp_regs.fpc); - if (current->thread.vxrs) { - save_vx_regs(current->thread.vxrs); - for (i = 0; i < __NUM_FPRS; i++) - current->thread.fp_regs.fprs[i] = - *(freg_t *)(current->thread.vxrs + i); - } else - save_fp_regs(current->thread.fp_regs.fprs); + save_fpu_regs(¤t->thread.fpu); } /* Load registers after signal return */ static void load_sigregs(void) { - int i; - restore_access_regs(current->thread.acrs); - restore_fp_ctl(¤t->thread.fp_regs.fpc); - if (current->thread.vxrs) { - for (i = 0; i < __NUM_FPRS; i++) - *(freg_t *)(current->thread.vxrs + i) = - current->thread.fp_regs.fprs[i]; - restore_vx_regs(current->thread.vxrs); - } else - restore_fp_regs(current->thread.fp_regs.fprs); + restore_fpu_regs(¤t->thread.fpu); } static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs) @@ -196,8 +178,7 @@ static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs) user_sregs.regs.gprs[i] = (__u32) regs->gprs[i]; memcpy(&user_sregs.regs.acrs, current->thread.acrs, sizeof(user_sregs.regs.acrs)); - memcpy(&user_sregs.fpregs, ¤t->thread.fp_regs, - sizeof(user_sregs.fpregs)); + fpregs_store((_s390_fp_regs *) &user_sregs.fpregs, ¤t->thread.fpu); if (__copy_to_user(sregs, &user_sregs, sizeof(_sigregs32))) return -EFAULT; return 0; @@ -235,9 +216,7 @@ static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs) regs->gprs[i] = (__u64) user_sregs.regs.gprs[i]; memcpy(¤t->thread.acrs, &user_sregs.regs.acrs, sizeof(current->thread.acrs)); - - memcpy(¤t->thread.fp_regs, &user_sregs.fpregs, - sizeof(current->thread.fp_regs)); + fpregs_load((_s390_fp_regs *) &user_sregs.fpregs, ¤t->thread.fpu); clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */ return 0; @@ -258,13 +237,13 @@ static int save_sigregs_ext32(struct pt_regs *regs, return -EFAULT; /* Save vector registers to signal stack */ - if (current->thread.vxrs) { + if (is_vx_task(current)) { for (i = 0; i < __NUM_VXRS_LOW; i++) - vxrs[i] = *((__u64 *)(current->thread.vxrs + i) + 1); + vxrs[i] = *((__u64 *)(current->thread.fpu.vxrs + i) + 1); if (__copy_to_user(&sregs_ext->vxrs_low, vxrs, sizeof(sregs_ext->vxrs_low)) || __copy_to_user(&sregs_ext->vxrs_high, - current->thread.vxrs + __NUM_VXRS_LOW, + current->thread.fpu.vxrs + __NUM_VXRS_LOW, sizeof(sregs_ext->vxrs_high))) return -EFAULT; } @@ -286,15 +265,15 @@ static int restore_sigregs_ext32(struct pt_regs *regs, *(__u32 *)®s->gprs[i] = gprs_high[i]; /* Restore vector registers from signal stack */ - if (current->thread.vxrs) { + if (is_vx_task(current)) { if (__copy_from_user(vxrs, &sregs_ext->vxrs_low, sizeof(sregs_ext->vxrs_low)) || - __copy_from_user(current->thread.vxrs + __NUM_VXRS_LOW, + __copy_from_user(current->thread.fpu.vxrs + __NUM_VXRS_LOW, &sregs_ext->vxrs_high, sizeof(sregs_ext->vxrs_high))) return -EFAULT; for (i = 0; i < __NUM_VXRS_LOW; i++) - *((__u64 *)(current->thread.vxrs + i) + 1) = vxrs[i]; + *((__u64 *)(current->thread.fpu.vxrs + i) + 1) = vxrs[i]; } return 0; } @@ -472,7 +451,7 @@ static int setup_rt_frame32(struct ksignal *ksig, sigset_t *set, */ uc_flags = UC_GPRS_HIGH; if (MACHINE_HAS_VX) { - if (current->thread.vxrs) + if (is_vx_task(current)) uc_flags |= UC_VXRS; } else frame_size -= sizeof(frame->uc.uc_mcontext_ext.vxrs_low) + diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 56b550893593..e66141c6696a 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -21,6 +21,7 @@ #include #include #include +#include #include struct mcck_struct { diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 8f587d871b9f..61795bc2fff4 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -81,8 +81,8 @@ void release_thread(struct task_struct *dead_task) void arch_release_task_struct(struct task_struct *tsk) { - if (tsk->thread.vxrs) - kfree(tsk->thread.vxrs); + if (is_vx_task(tsk)) + kfree(tsk->thread.fpu.vxrs); } int copy_thread(unsigned long clone_flags, unsigned long new_stackp, @@ -143,10 +143,10 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp, frame->childregs.psw.mask &= ~PSW_MASK_RI; /* Save the fpu registers to new thread structure. */ - save_fp_ctl(&p->thread.fp_regs.fpc); - save_fp_regs(p->thread.fp_regs.fprs); - p->thread.fp_regs.pad = 0; - p->thread.vxrs = NULL; + save_fp_ctl(&p->thread.fpu.fpc); + save_fp_regs(p->thread.fpu.fprs); + p->thread.fpu.pad = 0; + p->thread.fpu.vxrs = NULL; /* Set a new TLS ? */ if (clone_flags & CLONE_SETTLS) { unsigned long tls = frame->childregs.gprs[6]; @@ -162,7 +162,7 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp, asmlinkage void execve_tail(void) { - current->thread.fp_regs.fpc = 0; + current->thread.fpu.fpc = 0; asm volatile("sfpc %0" : : "d" (0)); } diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index d363c9c322a1..52e2e1dd919d 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -59,7 +59,7 @@ void update_cr_regs(struct task_struct *task) if (MACHINE_HAS_VX) { /* Enable/disable of vector extension */ cr_new &= ~(1UL << 17); - if (task->thread.vxrs) + if (task->thread.fpu.vxrs) cr_new |= (1UL << 17); } if (cr_new != cr) @@ -242,21 +242,21 @@ static unsigned long __peek_user(struct task_struct *child, addr_t addr) /* * floating point control reg. is in the thread structure */ - tmp = child->thread.fp_regs.fpc; + tmp = child->thread.fpu.fpc; tmp <<= BITS_PER_LONG - 32; } else if (addr < (addr_t) (&dummy->regs.fp_regs + 1)) { /* - * floating point regs. are either in child->thread.fp_regs - * or the child->thread.vxrs array + * floating point regs. are either in child->thread.fpu + * or the child->thread.fpu.vxrs array */ offset = addr - (addr_t) &dummy->regs.fp_regs.fprs; - if (child->thread.vxrs) + if (is_vx_task(child)) tmp = *(addr_t *) - ((addr_t) child->thread.vxrs + 2*offset); + ((addr_t) child->thread.fpu.vxrs + 2*offset); else tmp = *(addr_t *) - ((addr_t) &child->thread.fp_regs.fprs + offset); + ((addr_t) &child->thread.fpu.fprs + offset); } else if (addr < (addr_t) (&dummy->regs.per_info + 1)) { /* @@ -387,20 +387,20 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) if ((unsigned int) data != 0 || test_fp_ctl(data >> (BITS_PER_LONG - 32))) return -EINVAL; - child->thread.fp_regs.fpc = data >> (BITS_PER_LONG - 32); + child->thread.fpu.fpc = data >> (BITS_PER_LONG - 32); } else if (addr < (addr_t) (&dummy->regs.fp_regs + 1)) { /* - * floating point regs. are either in child->thread.fp_regs - * or the child->thread.vxrs array + * floating point regs. are either in child->thread.fpu + * or the child->thread.fpu.vxrs array */ offset = addr - (addr_t) &dummy->regs.fp_regs.fprs; - if (child->thread.vxrs) + if (is_vx_task(child)) *(addr_t *)((addr_t) - child->thread.vxrs + 2*offset) = data; + child->thread.fpu.vxrs + 2*offset) = data; else *(addr_t *)((addr_t) - &child->thread.fp_regs.fprs + offset) = data; + &child->thread.fpu.fprs + offset) = data; } else if (addr < (addr_t) (&dummy->regs.per_info + 1)) { /* @@ -621,20 +621,20 @@ static u32 __peek_user_compat(struct task_struct *child, addr_t addr) /* * floating point control reg. is in the thread structure */ - tmp = child->thread.fp_regs.fpc; + tmp = child->thread.fpu.fpc; } else if (addr < (addr_t) (&dummy32->regs.fp_regs + 1)) { /* - * floating point regs. are either in child->thread.fp_regs - * or the child->thread.vxrs array + * floating point regs. are either in child->thread.fpu + * or the child->thread.fpu.vxrs array */ offset = addr - (addr_t) &dummy32->regs.fp_regs.fprs; - if (child->thread.vxrs) + if (is_vx_task(child)) tmp = *(__u32 *) - ((addr_t) child->thread.vxrs + 2*offset); + ((addr_t) child->thread.fpu.vxrs + 2*offset); else tmp = *(__u32 *) - ((addr_t) &child->thread.fp_regs.fprs + offset); + ((addr_t) &child->thread.fpu.fprs + offset); } else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) { /* @@ -746,20 +746,20 @@ static int __poke_user_compat(struct task_struct *child, */ if (test_fp_ctl(tmp)) return -EINVAL; - child->thread.fp_regs.fpc = data; + child->thread.fpu.fpc = data; } else if (addr < (addr_t) (&dummy32->regs.fp_regs + 1)) { /* - * floating point regs. are either in child->thread.fp_regs - * or the child->thread.vxrs array + * floating point regs. are either in child->thread.fpu + * or the child->thread.fpu.vxrs array */ offset = addr - (addr_t) &dummy32->regs.fp_regs.fprs; - if (child->thread.vxrs) + if (is_vx_task(child)) *(__u32 *)((addr_t) - child->thread.vxrs + 2*offset) = tmp; + child->thread.fpu.vxrs + 2*offset) = tmp; else *(__u32 *)((addr_t) - &child->thread.fp_regs.fprs + offset) = tmp; + &child->thread.fpu.fprs + offset) = tmp; } else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) { /* @@ -952,18 +952,16 @@ static int s390_fpregs_get(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) { - if (target == current) { - save_fp_ctl(&target->thread.fp_regs.fpc); - save_fp_regs(target->thread.fp_regs.fprs); - } else if (target->thread.vxrs) { - int i; + _s390_fp_regs fp_regs; + + if (target == current) + save_fpu_regs(&target->thread.fpu); + + fp_regs.fpc = target->thread.fpu.fpc; + fpregs_store(&fp_regs, &target->thread.fpu); - for (i = 0; i < __NUM_VXRS_LOW; i++) - target->thread.fp_regs.fprs[i] = - *(freg_t *)(target->thread.vxrs + i); - } return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.fp_regs, 0, -1); + &fp_regs, 0, -1); } static int s390_fpregs_set(struct task_struct *target, @@ -972,41 +970,36 @@ static int s390_fpregs_set(struct task_struct *target, const void __user *ubuf) { int rc = 0; + freg_t fprs[__NUM_FPRS]; - if (target == current) { - save_fp_ctl(&target->thread.fp_regs.fpc); - save_fp_regs(target->thread.fp_regs.fprs); - } + if (target == current) + save_fpu_regs(&target->thread.fpu); /* If setting FPC, must validate it first. */ if (count > 0 && pos < offsetof(s390_fp_regs, fprs)) { - u32 ufpc[2] = { target->thread.fp_regs.fpc, 0 }; + u32 ufpc[2] = { target->thread.fpu.fpc, 0 }; rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ufpc, 0, offsetof(s390_fp_regs, fprs)); if (rc) return rc; if (ufpc[1] != 0 || test_fp_ctl(ufpc[0])) return -EINVAL; - target->thread.fp_regs.fpc = ufpc[0]; + target->thread.fpu.fpc = ufpc[0]; } if (rc == 0 && count > 0) rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - target->thread.fp_regs.fprs, - offsetof(s390_fp_regs, fprs), -1); + fprs, offsetof(s390_fp_regs, fprs), -1); + if (rc) + return rc; - if (rc == 0) { - if (target == current) { - restore_fp_ctl(&target->thread.fp_regs.fpc); - restore_fp_regs(target->thread.fp_regs.fprs); - } else if (target->thread.vxrs) { - int i; - - for (i = 0; i < __NUM_VXRS_LOW; i++) - *(freg_t *)(target->thread.vxrs + i) = - target->thread.fp_regs.fprs[i]; - } - } + if (is_vx_task(target)) + convert_fp_to_vx(target->thread.fpu.vxrs, fprs); + else + memcpy(target->thread.fpu.fprs, &fprs, sizeof(fprs)); + + if (target == current) + restore_fpu_regs(&target->thread.fpu); return rc; } @@ -1069,11 +1062,11 @@ static int s390_vxrs_low_get(struct task_struct *target, if (!MACHINE_HAS_VX) return -ENODEV; - if (target->thread.vxrs) { + if (is_vx_task(target)) { if (target == current) - save_vx_regs(target->thread.vxrs); + save_fpu_regs(&target->thread.fpu); for (i = 0; i < __NUM_VXRS_LOW; i++) - vxrs[i] = *((__u64 *)(target->thread.vxrs + i) + 1); + vxrs[i] = *((__u64 *)(target->thread.fpu.vxrs + i) + 1); } else memset(vxrs, 0, sizeof(vxrs)); return user_regset_copyout(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1); @@ -1089,19 +1082,19 @@ static int s390_vxrs_low_set(struct task_struct *target, if (!MACHINE_HAS_VX) return -ENODEV; - if (!target->thread.vxrs) { + if (!is_vx_task(target)) { rc = alloc_vector_registers(target); if (rc) return rc; } else if (target == current) - save_vx_regs(target->thread.vxrs); + save_fpu_regs(&target->thread.fpu); rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1); if (rc == 0) { for (i = 0; i < __NUM_VXRS_LOW; i++) - *((__u64 *)(target->thread.vxrs + i) + 1) = vxrs[i]; + *((__u64 *)(target->thread.fpu.vxrs + i) + 1) = vxrs[i]; if (target == current) - restore_vx_regs(target->thread.vxrs); + restore_fpu_regs(&target->thread.fpu); } return rc; @@ -1116,10 +1109,10 @@ static int s390_vxrs_high_get(struct task_struct *target, if (!MACHINE_HAS_VX) return -ENODEV; - if (target->thread.vxrs) { + if (is_vx_task(target)) { if (target == current) - save_vx_regs(target->thread.vxrs); - memcpy(vxrs, target->thread.vxrs + __NUM_VXRS_LOW, + save_fpu_regs(&target->thread.fpu); + memcpy(vxrs, target->thread.fpu.vxrs + __NUM_VXRS_LOW, sizeof(vxrs)); } else memset(vxrs, 0, sizeof(vxrs)); @@ -1135,17 +1128,17 @@ static int s390_vxrs_high_set(struct task_struct *target, if (!MACHINE_HAS_VX) return -ENODEV; - if (!target->thread.vxrs) { + if (!is_vx_task(target)) { rc = alloc_vector_registers(target); if (rc) return rc; } else if (target == current) - save_vx_regs(target->thread.vxrs); + save_fpu_regs(&target->thread.fpu); rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - target->thread.vxrs + __NUM_VXRS_LOW, 0, -1); + target->thread.fpu.vxrs + __NUM_VXRS_LOW, 0, -1); if (rc == 0 && target == current) - restore_vx_regs(target->thread.vxrs); + restore_vx_regs(target->thread.fpu.vxrs); return rc; } diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index 98520c8ae0ee..49c259cd5a33 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -105,32 +105,14 @@ struct rt_sigframe static void store_sigregs(void) { save_access_regs(current->thread.acrs); - save_fp_ctl(¤t->thread.fp_regs.fpc); - if (current->thread.vxrs) { - int i; - - save_vx_regs(current->thread.vxrs); - for (i = 0; i < __NUM_FPRS; i++) - current->thread.fp_regs.fprs[i] = - *(freg_t *)(current->thread.vxrs + i); - } else - save_fp_regs(current->thread.fp_regs.fprs); + save_fpu_regs(¤t->thread.fpu); } /* Load registers after signal return */ static void load_sigregs(void) { restore_access_regs(current->thread.acrs); - restore_fp_ctl(¤t->thread.fp_regs.fpc); - if (current->thread.vxrs) { - int i; - - for (i = 0; i < __NUM_FPRS; i++) - *(freg_t *)(current->thread.vxrs + i) = - current->thread.fp_regs.fprs[i]; - restore_vx_regs(current->thread.vxrs); - } else - restore_fp_regs(current->thread.fp_regs.fprs); + restore_fpu_regs(¤t->thread.fpu); } /* Returns non-zero on fault. */ @@ -146,8 +128,7 @@ static int save_sigregs(struct pt_regs *regs, _sigregs __user *sregs) memcpy(&user_sregs.regs.gprs, ®s->gprs, sizeof(sregs->regs.gprs)); memcpy(&user_sregs.regs.acrs, current->thread.acrs, sizeof(user_sregs.regs.acrs)); - memcpy(&user_sregs.fpregs, ¤t->thread.fp_regs, - sizeof(user_sregs.fpregs)); + fpregs_store(&user_sregs.fpregs, ¤t->thread.fpu); if (__copy_to_user(sregs, &user_sregs, sizeof(_sigregs))) return -EFAULT; return 0; @@ -185,8 +166,7 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs) memcpy(¤t->thread.acrs, &user_sregs.regs.acrs, sizeof(current->thread.acrs)); - memcpy(¤t->thread.fp_regs, &user_sregs.fpregs, - sizeof(current->thread.fp_regs)); + fpregs_load(&user_sregs.fpregs, ¤t->thread.fpu); clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */ return 0; @@ -200,13 +180,13 @@ static int save_sigregs_ext(struct pt_regs *regs, int i; /* Save vector registers to signal stack */ - if (current->thread.vxrs) { + if (is_vx_task(current)) { for (i = 0; i < __NUM_VXRS_LOW; i++) - vxrs[i] = *((__u64 *)(current->thread.vxrs + i) + 1); + vxrs[i] = *((__u64 *)(current->thread.fpu.vxrs + i) + 1); if (__copy_to_user(&sregs_ext->vxrs_low, vxrs, sizeof(sregs_ext->vxrs_low)) || __copy_to_user(&sregs_ext->vxrs_high, - current->thread.vxrs + __NUM_VXRS_LOW, + current->thread.fpu.vxrs + __NUM_VXRS_LOW, sizeof(sregs_ext->vxrs_high))) return -EFAULT; } @@ -220,15 +200,15 @@ static int restore_sigregs_ext(struct pt_regs *regs, int i; /* Restore vector registers from signal stack */ - if (current->thread.vxrs) { + if (is_vx_task(current)) { if (__copy_from_user(vxrs, &sregs_ext->vxrs_low, sizeof(sregs_ext->vxrs_low)) || - __copy_from_user(current->thread.vxrs + __NUM_VXRS_LOW, + __copy_from_user(current->thread.fpu.vxrs + __NUM_VXRS_LOW, &sregs_ext->vxrs_high, sizeof(sregs_ext->vxrs_high))) return -EFAULT; for (i = 0; i < __NUM_VXRS_LOW; i++) - *((__u64 *)(current->thread.vxrs + i) + 1) = vxrs[i]; + *((__u64 *)(current->thread.fpu.vxrs + i) + 1) = vxrs[i]; } return 0; } @@ -400,7 +380,7 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set, uc_flags = 0; if (MACHINE_HAS_VX) { frame_size += sizeof(_sigregs_ext); - if (current->thread.vxrs) + if (is_vx_task(current)) uc_flags |= UC_VXRS; } frame = get_sigframe(&ksig->ka, regs, frame_size); diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 7bea81d8a363..97598d1876c7 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include "entry.h" int show_unhandled_signals = 1; @@ -227,7 +227,6 @@ DO_ERROR_INFO(specification_exception, SIGILL, ILL_ILLOPN, int alloc_vector_registers(struct task_struct *tsk) { __vector128 *vxrs; - int i; /* Allocate vector register save area. */ vxrs = kzalloc(sizeof(__vector128) * __NUM_VXRS, @@ -236,11 +235,10 @@ int alloc_vector_registers(struct task_struct *tsk) return -ENOMEM; preempt_disable(); if (tsk == current) - save_fp_regs(tsk->thread.fp_regs.fprs); + save_fp_regs(tsk->thread.fpu.fprs); /* Copy the 16 floating point registers */ - for (i = 0; i < 16; i++) - *(freg_t *) &vxrs[i] = tsk->thread.fp_regs.fprs[i]; - tsk->thread.vxrs = vxrs; + convert_fp_to_vx(vxrs, tsk->thread.fpu.fprs); + tsk->thread.fpu.vxrs = vxrs; if (tsk == current) { __ctl_set_bit(0, 17); restore_vx_regs(vxrs); @@ -259,8 +257,8 @@ void vector_exception(struct pt_regs *regs) } /* get vector interrupt code from fpc */ - asm volatile("stfpc %0" : "=Q" (current->thread.fp_regs.fpc)); - vic = (current->thread.fp_regs.fpc & 0xf00) >> 8; + asm volatile("stfpc %0" : "=Q" (current->thread.fpu.fpc)); + vic = (current->thread.fpu.fpc & 0xf00) >> 8; switch (vic) { case 1: /* invalid vector operation */ si_code = FPE_FLTINV; @@ -297,22 +295,22 @@ void data_exception(struct pt_regs *regs) location = get_trap_ip(regs); - asm volatile("stfpc %0" : "=Q" (current->thread.fp_regs.fpc)); + asm volatile("stfpc %0" : "=Q" (current->thread.fpu.fpc)); /* Check for vector register enablement */ - if (MACHINE_HAS_VX && !current->thread.vxrs && - (current->thread.fp_regs.fpc & FPC_DXC_MASK) == 0xfe00) { + if (MACHINE_HAS_VX && !is_vx_task(current) && + (current->thread.fpu.fpc & FPC_DXC_MASK) == 0xfe00) { alloc_vector_registers(current); /* Vector data exception is suppressing, rewind psw. */ regs->psw.addr = __rewind_psw(regs->psw, regs->int_code >> 16); clear_pt_regs_flag(regs, PIF_PER_TRAP); return; } - if (current->thread.fp_regs.fpc & FPC_DXC_MASK) + if (current->thread.fpu.fpc & FPC_DXC_MASK) signal = SIGFPE; else signal = SIGILL; if (signal == SIGFPE) - do_fp_trap(regs, current->thread.fp_regs.fpc); + do_fp_trap(regs, current->thread.fpu.fpc); else if (signal) do_trap(regs, signal, ILL_ILLOPN, "data exception"); } -- cgit v1.2.3 From 9977e886cbbc758b4b601a160b5825ba573b5ca8 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Wed, 10 Jun 2015 12:53:42 +0200 Subject: s390/kernel: lazy restore fpu registers Improve the save and restore behavior of FPU register contents to use the vector extension within the kernel. The kernel does not use floating-point or vector registers and, therefore, saving and restoring the FPU register contents are performed for handling signals or switching processes only. To prepare for using vector instructions and vector registers within the kernel, enhance the save behavior and implement a lazy restore at return to user space from a system call or interrupt. To implement the lazy restore, the save_fpu_regs() sets a CPU information flag, CIF_FPU, to indicate that the FPU registers must be restored. Saving and setting CIF_FPU is performed in an atomic fashion to be interrupt-safe. When the kernel wants to use the vector extension or wants to change the FPU register state for a task during signal handling, the save_fpu_regs() must be called first. The CIF_FPU flag is also set at process switch. At return to user space, the FPU state is restored. In particular, the FPU state includes the floating-point or vector register contents, as well as, vector-enablement and floating-point control. The FPU state restore and clearing CIF_FPU is also performed in an atomic fashion. For KVM, the restore of the FPU register state is performed when restoring the general-purpose guest registers before the SIE instructions is started. Because the path towards the SIE instruction is interruptible, the CIF_FPU flag must be checked again right before going into SIE. If set, the guest registers must be reloaded again by re-entering the outer SIE loop. This is the same behavior as if the SIE critical section is interrupted. Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/ctl_reg.h | 2 + arch/s390/include/asm/fpu-internal.h | 110 ++----------- arch/s390/include/asm/kvm_host.h | 6 +- arch/s390/include/asm/processor.h | 2 + arch/s390/include/asm/switch_to.h | 2 +- arch/s390/kernel/asm-offsets.c | 5 + arch/s390/kernel/compat_signal.c | 3 +- arch/s390/kernel/entry.S | 311 ++++++++++++++++++++++++++++++++++- arch/s390/kernel/nmi.c | 8 +- arch/s390/kernel/process.c | 33 +++- arch/s390/kernel/ptrace.c | 53 ++---- arch/s390/kernel/s390_ksyms.c | 3 + arch/s390/kernel/signal.c | 3 +- arch/s390/kernel/traps.c | 12 +- arch/s390/kvm/kvm-s390.c | 132 ++++++++++----- 15 files changed, 482 insertions(+), 203 deletions(-) (limited to 'arch/s390/include/asm/processor.h') diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h index d7697ab802f6..17a373576868 100644 --- a/arch/s390/include/asm/ctl_reg.h +++ b/arch/s390/include/asm/ctl_reg.h @@ -46,6 +46,8 @@ static inline void __ctl_clear_bit(unsigned int cr, unsigned int bit) __ctl_load(reg, cr, cr); } +void __ctl_set_vx(void); + void smp_ctl_set_bit(int cr, int bit); void smp_ctl_clear_bit(int cr, int bit); diff --git a/arch/s390/include/asm/fpu-internal.h b/arch/s390/include/asm/fpu-internal.h index cc44c75fc4f7..237f8fcbe46b 100644 --- a/arch/s390/include/asm/fpu-internal.h +++ b/arch/s390/include/asm/fpu-internal.h @@ -28,9 +28,14 @@ struct fpu { }; }; +void save_fpu_regs(struct fpu *fpu); + #define is_vx_fpu(fpu) (!!((fpu)->flags & FPU_USE_VX)) #define is_vx_task(tsk) (!!((tsk)->thread.fpu.flags & FPU_USE_VX)) +/* VX array structure for address operand constraints in inline assemblies */ +struct vx_array { __vector128 _[__NUM_VXRS]; }; + static inline int test_fp_ctl(u32 fpc) { u32 orig_fpc; @@ -48,76 +53,6 @@ static inline int test_fp_ctl(u32 fpc) return rc; } -static inline void save_fp_ctl(u32 *fpc) -{ - asm volatile( - " stfpc %0\n" - : "+Q" (*fpc)); -} - -static inline int restore_fp_ctl(u32 *fpc) -{ - int rc; - - asm volatile( - " lfpc %1\n" - "0: la %0,0\n" - "1:\n" - : "=d" (rc) : "Q" (*fpc), "0" (-EINVAL)); - return rc; -} - -static inline void save_fp_regs(freg_t *fprs) -{ - asm volatile("std 0,%0" : "=Q" (fprs[0])); - asm volatile("std 2,%0" : "=Q" (fprs[2])); - asm volatile("std 4,%0" : "=Q" (fprs[4])); - asm volatile("std 6,%0" : "=Q" (fprs[6])); - asm volatile("std 1,%0" : "=Q" (fprs[1])); - asm volatile("std 3,%0" : "=Q" (fprs[3])); - asm volatile("std 5,%0" : "=Q" (fprs[5])); - asm volatile("std 7,%0" : "=Q" (fprs[7])); - asm volatile("std 8,%0" : "=Q" (fprs[8])); - asm volatile("std 9,%0" : "=Q" (fprs[9])); - asm volatile("std 10,%0" : "=Q" (fprs[10])); - asm volatile("std 11,%0" : "=Q" (fprs[11])); - asm volatile("std 12,%0" : "=Q" (fprs[12])); - asm volatile("std 13,%0" : "=Q" (fprs[13])); - asm volatile("std 14,%0" : "=Q" (fprs[14])); - asm volatile("std 15,%0" : "=Q" (fprs[15])); -} - -static inline void restore_fp_regs(freg_t *fprs) -{ - asm volatile("ld 0,%0" : : "Q" (fprs[0])); - asm volatile("ld 2,%0" : : "Q" (fprs[2])); - asm volatile("ld 4,%0" : : "Q" (fprs[4])); - asm volatile("ld 6,%0" : : "Q" (fprs[6])); - asm volatile("ld 1,%0" : : "Q" (fprs[1])); - asm volatile("ld 3,%0" : : "Q" (fprs[3])); - asm volatile("ld 5,%0" : : "Q" (fprs[5])); - asm volatile("ld 7,%0" : : "Q" (fprs[7])); - asm volatile("ld 8,%0" : : "Q" (fprs[8])); - asm volatile("ld 9,%0" : : "Q" (fprs[9])); - asm volatile("ld 10,%0" : : "Q" (fprs[10])); - asm volatile("ld 11,%0" : : "Q" (fprs[11])); - asm volatile("ld 12,%0" : : "Q" (fprs[12])); - asm volatile("ld 13,%0" : : "Q" (fprs[13])); - asm volatile("ld 14,%0" : : "Q" (fprs[14])); - asm volatile("ld 15,%0" : : "Q" (fprs[15])); -} - -static inline void save_vx_regs(__vector128 *vxrs) -{ - typedef struct { __vector128 _[__NUM_VXRS]; } addrtype; - - asm volatile( - " la 1,%0\n" - " .word 0xe70f,0x1000,0x003e\n" /* vstm 0,15,0(1) */ - " .word 0xe70f,0x1100,0x0c3e\n" /* vstm 16,31,256(1) */ - : "=Q" (*(addrtype *) vxrs) : : "1"); -} - static inline void save_vx_regs_safe(__vector128 *vxrs) { unsigned long cr0, flags; @@ -126,20 +61,13 @@ static inline void save_vx_regs_safe(__vector128 *vxrs) __ctl_store(cr0, 0, 0); __ctl_set_bit(0, 17); __ctl_set_bit(0, 18); - save_vx_regs(vxrs); - __ctl_load(cr0, 0, 0); - arch_local_irq_restore(flags); -} - -static inline void restore_vx_regs(__vector128 *vxrs) -{ - typedef struct { __vector128 _[__NUM_VXRS]; } addrtype; - asm volatile( " la 1,%0\n" - " .word 0xe70f,0x1000,0x0036\n" /* vlm 0,15,0(1) */ - " .word 0xe70f,0x1100,0x0c36\n" /* vlm 16,31,256(1) */ - : : "Q" (*(addrtype *) vxrs) : "1"); + " .word 0xe70f,0x1000,0x003e\n" /* vstm 0,15,0(1) */ + " .word 0xe70f,0x1100,0x0c3e\n" /* vstm 16,31,256(1) */ + : "=Q" (*(struct vx_array *) vxrs) : : "1"); + __ctl_load(cr0, 0, 0); + arch_local_irq_restore(flags); } static inline void convert_vx_to_fp(freg_t *fprs, __vector128 *vxrs) @@ -177,24 +105,6 @@ static inline void fpregs_load(_s390_fp_regs *fpregs, struct fpu *fpu) sizeof(fpregs->fprs)); } -static inline void save_fpu_regs(struct fpu *fpu) -{ - save_fp_ctl(&fpu->fpc); - if (is_vx_fpu(fpu)) - save_vx_regs(fpu->vxrs); - else - save_fp_regs(fpu->fprs); -} - -static inline void restore_fpu_regs(struct fpu *fpu) -{ - restore_fp_ctl(&fpu->fpc); - if (is_vx_fpu(fpu)) - restore_vx_regs(fpu->vxrs); - else - restore_fp_regs(fpu->fprs); -} - #endif #endif /* _ASM_S390_FPU_INTERNAL_H */ diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 3024acbe1f9d..c4f4c52aaa23 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -22,6 +22,7 @@ #include #include #include +#include #include #define KVM_MAX_VCPUS 64 @@ -498,10 +499,9 @@ struct kvm_guestdbg_info_arch { struct kvm_vcpu_arch { struct kvm_s390_sie_block *sie_block; - s390_fp_regs host_fpregs; unsigned int host_acrs[NUM_ACRS]; - s390_fp_regs guest_fpregs; - struct kvm_s390_vregs *host_vregs; + struct fpu host_fpregs; + struct fpu guest_fpregs; struct kvm_s390_local_interrupt local_int; struct hrtimer ckc_timer; struct kvm_s390_pgm_info pgm; diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 19f51db7c5e6..c417015c5304 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -14,10 +14,12 @@ #define CIF_MCCK_PENDING 0 /* machine check handling is pending */ #define CIF_ASCE 1 /* user asce needs fixup / uaccess */ #define CIF_NOHZ_DELAY 2 /* delay HZ disable for a tick */ +#define CIF_FPU 3 /* restore vector registers */ #define _CIF_MCCK_PENDING (1<mm) { \ update_cr_regs(next); \ - restore_fpu_regs(&next->thread.fpu); \ + set_cpu_flag(CIF_FPU); \ restore_access_regs(&next->thread.acrs[0]); \ restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \ } \ diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index a2da259d9327..6bc42c08be09 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -28,11 +28,16 @@ int main(void) DEFINE(__TASK_pid, offsetof(struct task_struct, pid)); BLANK(); DEFINE(__THREAD_ksp, offsetof(struct thread_struct, ksp)); + DEFINE(__THREAD_fpu, offsetof(struct task_struct, thread.fpu)); DEFINE(__THREAD_per_cause, offsetof(struct thread_struct, per_event.cause)); DEFINE(__THREAD_per_address, offsetof(struct thread_struct, per_event.address)); DEFINE(__THREAD_per_paid, offsetof(struct thread_struct, per_event.paid)); DEFINE(__THREAD_trap_tdb, offsetof(struct thread_struct, trap_tdb)); BLANK(); + DEFINE(__FPU_fpc, offsetof(struct fpu, fpc)); + DEFINE(__FPU_flags, offsetof(struct fpu, flags)); + DEFINE(__FPU_regs, offsetof(struct fpu, regs)); + BLANK(); DEFINE(__TI_task, offsetof(struct thread_info, task)); DEFINE(__TI_flags, offsetof(struct thread_info, flags)); DEFINE(__TI_sysc_table, offsetof(struct thread_info, sys_call_table)); diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index 452995137a69..0b46fd4aa31e 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -161,7 +161,6 @@ static void store_sigregs(void) static void load_sigregs(void) { restore_access_regs(current->thread.acrs); - restore_fpu_regs(¤t->thread.fpu); } static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs) @@ -287,6 +286,7 @@ COMPAT_SYSCALL_DEFINE0(sigreturn) if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE32)) goto badframe; set_current_blocked(&set); + save_fpu_regs(¤t->thread.fpu); if (restore_sigregs32(regs, &frame->sregs)) goto badframe; if (restore_sigregs_ext32(regs, &frame->sregs_ext)) @@ -309,6 +309,7 @@ COMPAT_SYSCALL_DEFINE0(rt_sigreturn) set_current_blocked(&set); if (compat_restore_altstack(&frame->uc.uc_stack)) goto badframe; + save_fpu_regs(¤t->thread.fpu); if (restore_sigregs32(regs, &frame->uc.uc_mcontext)) goto badframe; if (restore_sigregs_ext32(regs, &frame->uc.uc_mcontext_ext)) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 84062e7a77da..05ea485156ee 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -20,6 +20,8 @@ #include #include #include +#include +#include __PT_R0 = __PT_GPRS __PT_R1 = __PT_GPRS + 8 @@ -46,10 +48,10 @@ _TIF_WORK = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ _TIF_UPROBE) _TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \ _TIF_SYSCALL_TRACEPOINT) -_CIF_WORK = (_CIF_MCCK_PENDING | _CIF_ASCE) +_CIF_WORK = (_CIF_MCCK_PENDING | _CIF_ASCE | _CIF_FPU) _PIF_WORK = (_PIF_PER_TRAP) -#define BASED(name) name-system_call(%r13) +#define BASED(name) name-cleanup_critical(%r13) .macro TRACE_IRQS_ON #ifdef CONFIG_TRACE_IRQFLAGS @@ -280,6 +282,8 @@ ENTRY(system_call) jo .Lsysc_sigpending tm __TI_flags+7(%r12),_TIF_NOTIFY_RESUME jo .Lsysc_notify_resume + tm __LC_CPU_FLAGS+7,_CIF_FPU + jo .Lsysc_vxrs tm __LC_CPU_FLAGS+7,_CIF_ASCE jo .Lsysc_uaccess j .Lsysc_return # beware of critical section cleanup @@ -306,6 +310,13 @@ ENTRY(system_call) lctlg %c1,%c1,__LC_USER_ASCE # load primary asce j .Lsysc_return +# +# CIF_FPU is set, restore floating-point controls and floating-point registers. +# +.Lsysc_vxrs: + larl %r14,.Lsysc_return + jg load_fpu_regs + # # _TIF_SIGPENDING is set, call do_signal # @@ -405,7 +416,7 @@ ENTRY(pgm_check_handler) stmg %r8,%r15,__LC_SAVE_AREA_SYNC lg %r10,__LC_LAST_BREAK lg %r12,__LC_THREAD_INFO - larl %r13,system_call + larl %r13,cleanup_critical lmg %r8,%r9,__LC_PGM_OLD_PSW HANDLE_SIE_INTERCEPT %r14,1 tmhh %r8,0x0001 # test problem state bit @@ -483,7 +494,7 @@ ENTRY(io_int_handler) stmg %r8,%r15,__LC_SAVE_AREA_ASYNC lg %r10,__LC_LAST_BREAK lg %r12,__LC_THREAD_INFO - larl %r13,system_call + larl %r13,cleanup_critical lmg %r8,%r9,__LC_IO_OLD_PSW HANDLE_SIE_INTERCEPT %r14,2 SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT @@ -587,6 +598,8 @@ ENTRY(io_int_handler) jo .Lio_sigpending tm __TI_flags+7(%r12),_TIF_NOTIFY_RESUME jo .Lio_notify_resume + tm __LC_CPU_FLAGS+7,_CIF_FPU + jo .Lio_vxrs tm __LC_CPU_FLAGS+7,_CIF_ASCE jo .Lio_uaccess j .Lio_return # beware of critical section cleanup @@ -608,6 +621,13 @@ ENTRY(io_int_handler) lctlg %c1,%c1,__LC_USER_ASCE # load primary asce j .Lio_return +# +# CIF_FPU is set, restore floating-point controls and floating-point registers. +# +.Lio_vxrs: + larl %r14,.Lio_return + jg load_fpu_regs + # # _TIF_NEED_RESCHED is set, call schedule # @@ -652,7 +672,7 @@ ENTRY(ext_int_handler) stmg %r8,%r15,__LC_SAVE_AREA_ASYNC lg %r10,__LC_LAST_BREAK lg %r12,__LC_THREAD_INFO - larl %r13,system_call + larl %r13,cleanup_critical lmg %r8,%r9,__LC_EXT_OLD_PSW HANDLE_SIE_INTERCEPT %r14,3 SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_STACK,STACK_SHIFT @@ -690,6 +710,121 @@ ENTRY(psw_idle) br %r14 .Lpsw_idle_end: +/* Store floating-point controls and floating-point or vector extension + * registers instead. A critical section cleanup assures that the registers + * are stored even if interrupted for some other work. The register %r2 + * designates a struct fpu to store register contents. If the specified + * structure does not contain a register save area, the register store is + * omitted (see also comments in arch_dup_task_struct()). + * + * The CIF_FPU flag is set in any case. The CIF_FPU triggers a lazy restore + * of the register contents at system call or io return. + */ +ENTRY(save_fpu_regs) + tm __LC_CPU_FLAGS+7,_CIF_FPU + bor %r14 + stfpc __FPU_fpc(%r2) +.Lsave_fpu_regs_fpc_end: + lg %r3,__FPU_regs(%r2) + ltgr %r3,%r3 + jz .Lsave_fpu_regs_done # no save area -> set CIF_FPU + tm __FPU_flags+3(%r2),FPU_USE_VX + jz .Lsave_fpu_regs_fp # no -> store FP regs +.Lsave_fpu_regs_vx_low: + VSTM %v0,%v15,0,%r3 # vstm 0,15,0(3) +.Lsave_fpu_regs_vx_high: + VSTM %v16,%v31,256,%r3 # vstm 16,31,256(3) + j .Lsave_fpu_regs_done # -> set CIF_FPU flag +.Lsave_fpu_regs_fp: + std 0,0(%r3) + std 1,8(%r3) + std 2,16(%r3) + std 3,24(%r3) + std 4,32(%r3) + std 5,40(%r3) + std 6,48(%r3) + std 7,56(%r3) + std 8,64(%r3) + std 9,72(%r3) + std 10,80(%r3) + std 11,88(%r3) + std 12,96(%r3) + std 13,104(%r3) + std 14,112(%r3) + std 15,120(%r3) +.Lsave_fpu_regs_done: + oi __LC_CPU_FLAGS+7,_CIF_FPU + br %r14 +.Lsave_fpu_regs_end: + +/* Load floating-point controls and floating-point or vector extension + * registers. A critical section cleanup assures that the register contents + * are loaded even if interrupted for some other work. Depending on the saved + * FP/VX state, the vector-enablement control, CR0.46, is either set or cleared. + * + * There are special calling conventions to fit into sysc and io return work: + * %r12: __LC_THREAD_INFO + * %r15: + * The function requires: + * %r4 and __SF_EMPTY+32(%r15) + */ +load_fpu_regs: + tm __LC_CPU_FLAGS+7,_CIF_FPU + bnor %r14 + lg %r4,__TI_task(%r12) + la %r4,__THREAD_fpu(%r4) + lfpc __FPU_fpc(%r4) + stctg %c0,%c0,__SF_EMPTY+32(%r15) # store CR0 + tm __FPU_flags+3(%r4),FPU_USE_VX # VX-enabled task ? + lg %r4,__FPU_regs(%r4) # %r4 <- reg save area + jz .Lload_fpu_regs_fp_ctl # -> no VX, load FP regs +.Lload_fpu_regs_vx_ctl: + tm __SF_EMPTY+32+5(%r15),2 # test VX control + jo .Lload_fpu_regs_vx + oi __SF_EMPTY+32+5(%r15),2 # set VX control + lctlg %c0,%c0,__SF_EMPTY+32(%r15) +.Lload_fpu_regs_vx: + VLM %v0,%v15,0,%r4 +.Lload_fpu_regs_vx_high: + VLM %v16,%v31,256,%r4 + j .Lload_fpu_regs_done +.Lload_fpu_regs_fp_ctl: + tm __SF_EMPTY+32+5(%r15),2 # test VX control + jz .Lload_fpu_regs_fp + ni __SF_EMPTY+32+5(%r15),253 # clear VX control + lctlg %c0,%c0,__SF_EMPTY+32(%r15) +.Lload_fpu_regs_fp: + ld 0,0(%r4) + ld 1,8(%r4) + ld 2,16(%r4) + ld 3,24(%r4) + ld 4,32(%r4) + ld 5,40(%r4) + ld 6,48(%r4) + ld 7,56(%r4) + ld 8,64(%r4) + ld 9,72(%r4) + ld 10,80(%r4) + ld 11,88(%r4) + ld 12,96(%r4) + ld 13,104(%r4) + ld 14,112(%r4) + ld 15,120(%r4) +.Lload_fpu_regs_done: + ni __LC_CPU_FLAGS+7,255-_CIF_FPU + br %r14 +.Lload_fpu_regs_end: + +/* Test and set the vector enablement control in CR0.46 */ +ENTRY(__ctl_set_vx) + stctg %c0,%c0,__SF_EMPTY(%r15) + tm __SF_EMPTY+5(%r15),2 + bor %r14 + oi __SF_EMPTY+5(%r15),2 + lctlg %c0,%c0,__SF_EMPTY(%r15) + br %r14 +.L__ctl_set_vx_end: + .L__critical_end: /* @@ -702,7 +837,7 @@ ENTRY(mcck_int_handler) lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# revalidate gprs lg %r10,__LC_LAST_BREAK lg %r12,__LC_THREAD_INFO - larl %r13,system_call + larl %r13,cleanup_critical lmg %r8,%r9,__LC_MCK_OLD_PSW HANDLE_SIE_INTERCEPT %r14,4 tm __LC_MCCK_CODE,0x80 # system damage? @@ -831,6 +966,12 @@ stack_overflow: .quad .Lio_done .quad psw_idle .quad .Lpsw_idle_end + .quad save_fpu_regs + .quad .Lsave_fpu_regs_end + .quad load_fpu_regs + .quad .Lload_fpu_regs_end + .quad __ctl_set_vx + .quad .L__ctl_set_vx_end cleanup_critical: clg %r9,BASED(.Lcleanup_table) # system_call @@ -853,6 +994,18 @@ cleanup_critical: jl 0f clg %r9,BASED(.Lcleanup_table+72) # .Lpsw_idle_end jl .Lcleanup_idle + clg %r9,BASED(.Lcleanup_table+80) # save_fpu_regs + jl 0f + clg %r9,BASED(.Lcleanup_table+88) # .Lsave_fpu_regs_end + jl .Lcleanup_save_fpu_regs + clg %r9,BASED(.Lcleanup_table+96) # load_fpu_regs + jl 0f + clg %r9,BASED(.Lcleanup_table+104) # .Lload_fpu_regs_end + jl .Lcleanup_load_fpu_regs + clg %r9,BASED(.Lcleanup_table+112) # __ctl_set_vx + jl 0f + clg %r9,BASED(.Lcleanup_table+120) # .L__ctl_set_vx_end + jl .Lcleanup___ctl_set_vx 0: br %r14 @@ -981,6 +1134,145 @@ cleanup_critical: .Lcleanup_idle_insn: .quad .Lpsw_idle_lpsw +.Lcleanup_save_fpu_regs: + tm __LC_CPU_FLAGS+7,_CIF_FPU + bor %r14 + clg %r9,BASED(.Lcleanup_save_fpu_regs_done) + jhe 5f + clg %r9,BASED(.Lcleanup_save_fpu_regs_fp) + jhe 4f + clg %r9,BASED(.Lcleanup_save_fpu_regs_vx_high) + jhe 3f + clg %r9,BASED(.Lcleanup_save_fpu_regs_vx_low) + jhe 2f + clg %r9,BASED(.Lcleanup_save_fpu_fpc_end) + jhe 1f +0: # Store floating-point controls + stfpc __FPU_fpc(%r2) +1: # Load register save area and check if VX is active + lg %r3,__FPU_regs(%r2) + ltgr %r3,%r3 + jz 5f # no save area -> set CIF_FPU + tm __FPU_flags+3(%r2),FPU_USE_VX + jz 4f # no VX -> store FP regs +2: # Store vector registers (V0-V15) + VSTM %v0,%v15,0,%r3 # vstm 0,15,0(3) +3: # Store vector registers (V16-V31) + VSTM %v16,%v31,256,%r3 # vstm 16,31,256(3) + j 5f # -> done, set CIF_FPU flag +4: # Store floating-point registers + std 0,0(%r3) + std 1,8(%r3) + std 2,16(%r3) + std 3,24(%r3) + std 4,32(%r3) + std 5,40(%r3) + std 6,48(%r3) + std 7,56(%r3) + std 8,64(%r3) + std 9,72(%r3) + std 10,80(%r3) + std 11,88(%r3) + std 12,96(%r3) + std 13,104(%r3) + std 14,112(%r3) + std 15,120(%r3) +5: # Set CIF_FPU flag + oi __LC_CPU_FLAGS+7,_CIF_FPU + lg %r9,48(%r11) # return from save_fpu_regs + br %r14 +.Lcleanup_save_fpu_fpc_end: + .quad .Lsave_fpu_regs_fpc_end +.Lcleanup_save_fpu_regs_vx_low: + .quad .Lsave_fpu_regs_vx_low +.Lcleanup_save_fpu_regs_vx_high: + .quad .Lsave_fpu_regs_vx_high +.Lcleanup_save_fpu_regs_fp: + .quad .Lsave_fpu_regs_fp +.Lcleanup_save_fpu_regs_done: + .quad .Lsave_fpu_regs_done + +.Lcleanup_load_fpu_regs: + tm __LC_CPU_FLAGS+7,_CIF_FPU + bnor %r14 + clg %r9,BASED(.Lcleanup_load_fpu_regs_done) + jhe 1f + clg %r9,BASED(.Lcleanup_load_fpu_regs_fp) + jhe 2f + clg %r9,BASED(.Lcleanup_load_fpu_regs_fp_ctl) + jhe 3f + clg %r9,BASED(.Lcleanup_load_fpu_regs_vx_high) + jhe 4f + clg %r9,BASED(.Lcleanup_load_fpu_regs_vx) + jhe 5f + clg %r9,BASED(.Lcleanup_load_fpu_regs_vx_ctl) + jhe 6f + lg %r4,__TI_task(%r12) + la %r4,__THREAD_fpu(%r4) + lfpc __FPU_fpc(%r4) + tm __FPU_flags+3(%r4),FPU_USE_VX # VX-enabled task ? + lg %r4,__FPU_regs(%r4) # %r4 <- reg save area + jz 3f # -> no VX, load FP regs +6: # Set VX-enablement control + stctg %c0,%c0,__SF_EMPTY+32(%r15) # store CR0 + tm __SF_EMPTY+32+5(%r15),2 # test VX control + jo 5f + oi __SF_EMPTY+32+5(%r15),2 # set VX control + lctlg %c0,%c0,__SF_EMPTY+32(%r15) +5: # Load V0 ..V15 registers + VLM %v0,%v15,0,%r4 +4: # Load V16..V31 registers + VLM %v16,%v31,256,%r4 + j 1f +3: # Clear VX-enablement control for FP + stctg %c0,%c0,__SF_EMPTY+32(%r15) # store CR0 + tm __SF_EMPTY+32+5(%r15),2 # test VX control + jz 2f + ni __SF_EMPTY+32+5(%r15),253 # clear VX control + lctlg %c0,%c0,__SF_EMPTY+32(%r15) +2: # Load floating-point registers + ld 0,0(%r4) + ld 1,8(%r4) + ld 2,16(%r4) + ld 3,24(%r4) + ld 4,32(%r4) + ld 5,40(%r4) + ld 6,48(%r4) + ld 7,56(%r4) + ld 8,64(%r4) + ld 9,72(%r4) + ld 10,80(%r4) + ld 11,88(%r4) + ld 12,96(%r4) + ld 13,104(%r4) + ld 14,112(%r4) + ld 15,120(%r4) +1: # Clear CIF_FPU bit + ni __LC_CPU_FLAGS+7,255-_CIF_FPU + lg %r9,48(%r11) # return from load_fpu_regs + br %r14 +.Lcleanup_load_fpu_regs_vx_ctl: + .quad .Lload_fpu_regs_vx_ctl +.Lcleanup_load_fpu_regs_vx: + .quad .Lload_fpu_regs_vx +.Lcleanup_load_fpu_regs_vx_high: + .quad .Lload_fpu_regs_vx_high +.Lcleanup_load_fpu_regs_fp_ctl: + .quad .Lload_fpu_regs_fp_ctl +.Lcleanup_load_fpu_regs_fp: + .quad .Lload_fpu_regs_fp +.Lcleanup_load_fpu_regs_done: + .quad .Lload_fpu_regs_done + +.Lcleanup___ctl_set_vx: + stctg %c0,%c0,__SF_EMPTY(%r15) + tm __SF_EMPTY+5(%r15),2 + bor %r14 + oi __SF_EMPTY+5(%r15),2 + lctlg %c0,%c0,__SF_EMPTY(%r15) + lg %r9,48(%r11) # return from __ctl_set_vx + br %r14 + /* * Integer constants */ @@ -1002,6 +1294,11 @@ ENTRY(sie64a) stg %r2,__SF_EMPTY(%r15) # save control block pointer stg %r3,__SF_EMPTY+8(%r15) # save guest register save area xc __SF_EMPTY+16(16,%r15),__SF_EMPTY+16(%r15) # host id & reason + tm __LC_CPU_FLAGS+7,_CIF_FPU # load guest fp/vx registers ? + jno .Lsie_load_guest_gprs + lg %r12,__LC_THREAD_INFO # load fp/vx regs save area + brasl %r14,load_fpu_regs # load guest fp/vx regs +.Lsie_load_guest_gprs: lmg %r0,%r13,0(%r3) # load guest gprs 0-13 lg %r14,__LC_GMAP # get gmap pointer ltgr %r14,%r14 @@ -1012,6 +1309,8 @@ ENTRY(sie64a) oi __SIE_PROG0C+3(%r14),1 # we are going into SIE now tm __SIE_PROG20+3(%r14),3 # last exit... jnz .Lsie_done + tm __LC_CPU_FLAGS+7,_CIF_FPU + jo .Lsie_done # exit if fp/vx regs changed LPP __SF_EMPTY(%r15) # set guest id sie 0(%r14) .Lsie_done: diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index e66141c6696a..cbdd94c8ba18 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -165,8 +165,12 @@ static int notrace s390_revalidate_registers(struct mci *mci) cr0.val = S390_lowcore.cregs_save_area[0]; cr0.afp = cr0.vx = 1; __ctl_load(cr0.val, 0, 0); - restore_vx_regs((__vector128 *) - &S390_lowcore.vector_save_area); + asm volatile( + " la 1,%0\n" + " .word 0xe70f,0x1000,0x0036\n" /* vlm 0,15,0(1) */ + " .word 0xe70f,0x1100,0x0c36\n" /* vlm 16,31,256(1) */ + : : "Q" (*(struct vx_array *) + &S390_lowcore.vector_save_area) : "1"); __ctl_load(S390_lowcore.cregs_save_area[0], 0, 0); } /* Revalidate access registers */ diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 56949c9cda97..9cf0063f920e 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -90,16 +90,28 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) *dst = *src; /* Set up a new floating-point register save area */ + dst->thread.fpu.fpc = 0; + dst->thread.fpu.flags = 0; /* Always start with VX disabled */ dst->thread.fpu.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS, GFP_KERNEL|__GFP_REPEAT); if (!dst->thread.fpu.fprs) return -ENOMEM; - /* Save the fpu registers to new thread structure. */ - save_fp_ctl(&dst->thread.fpu.fpc); - save_fp_regs(dst->thread.fpu.fprs); - dst->thread.fpu.flags = 0; /* Always start with VX disabled */ - + /* + * Save the floating-point or vector register state of the current + * task. The state is not saved for early kernel threads, for example, + * the init_task, which do not have an allocated save area. + * The CIF_FPU flag is set in any case to lazy clear or restore a saved + * state when switching to a different task or returning to user space. + */ + save_fpu_regs(¤t->thread.fpu); + dst->thread.fpu.fpc = current->thread.fpu.fpc; + if (is_vx_task(current)) + convert_vx_to_fp(dst->thread.fpu.fprs, + current->thread.fpu.vxrs); + else + memcpy(dst->thread.fpu.fprs, current->thread.fpu.fprs, + sizeof(freg_t) * __NUM_FPRS); return 0; } @@ -184,8 +196,15 @@ asmlinkage void execve_tail(void) */ int dump_fpu (struct pt_regs * regs, s390_fp_regs *fpregs) { - save_fp_ctl(&fpregs->fpc); - save_fp_regs(fpregs->fprs); + save_fpu_regs(¤t->thread.fpu); + fpregs->fpc = current->thread.fpu.fpc; + fpregs->pad = 0; + if (is_vx_task(current)) + convert_vx_to_fp((freg_t *)&fpregs->fprs, + current->thread.fpu.vxrs); + else + memcpy(&fpregs->fprs, current->thread.fpu.fprs, + sizeof(fpregs->fprs)); return 1; } EXPORT_SYMBOL(dump_fpu); diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 52e2e1dd919d..8c525880a3ff 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -45,39 +45,27 @@ void update_cr_regs(struct task_struct *task) struct per_regs old, new; /* Take care of the enable/disable of transactional execution. */ - if (MACHINE_HAS_TE || MACHINE_HAS_VX) { + if (MACHINE_HAS_TE) { unsigned long cr, cr_new; __ctl_store(cr, 0, 0); - cr_new = cr; - if (MACHINE_HAS_TE) { - /* Set or clear transaction execution TXC bit 8. */ - cr_new |= (1UL << 55); - if (task->thread.per_flags & PER_FLAG_NO_TE) - cr_new &= ~(1UL << 55); - } - if (MACHINE_HAS_VX) { - /* Enable/disable of vector extension */ - cr_new &= ~(1UL << 17); - if (task->thread.fpu.vxrs) - cr_new |= (1UL << 17); - } + /* Set or clear transaction execution TXC bit 8. */ + cr_new = cr | (1UL << 55); + if (task->thread.per_flags & PER_FLAG_NO_TE) + cr_new &= ~(1UL << 55); if (cr_new != cr) __ctl_load(cr_new, 0, 0); - if (MACHINE_HAS_TE) { - /* Set/clear transaction execution TDC bits 62/63. */ - __ctl_store(cr, 2, 2); - cr_new = cr & ~3UL; - if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) { - if (task->thread.per_flags & - PER_FLAG_TE_ABORT_RAND_TEND) - cr_new |= 1UL; - else - cr_new |= 2UL; - } - if (cr_new != cr) - __ctl_load(cr_new, 2, 2); + /* Set or clear transaction execution TDC bits 62 and 63. */ + __ctl_store(cr, 2, 2); + cr_new = cr & ~3UL; + if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) { + if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND_TEND) + cr_new |= 1UL; + else + cr_new |= 2UL; } + if (cr_new != cr) + __ctl_load(cr_new, 2, 2); } /* Copy user specified PER registers */ new.control = thread->per_user.control; @@ -998,9 +986,6 @@ static int s390_fpregs_set(struct task_struct *target, else memcpy(target->thread.fpu.fprs, &fprs, sizeof(fprs)); - if (target == current) - restore_fpu_regs(&target->thread.fpu); - return rc; } @@ -1090,12 +1075,9 @@ static int s390_vxrs_low_set(struct task_struct *target, save_fpu_regs(&target->thread.fpu); rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1); - if (rc == 0) { + if (rc == 0) for (i = 0; i < __NUM_VXRS_LOW; i++) *((__u64 *)(target->thread.fpu.vxrs + i) + 1) = vxrs[i]; - if (target == current) - restore_fpu_regs(&target->thread.fpu); - } return rc; } @@ -1137,9 +1119,6 @@ static int s390_vxrs_high_set(struct task_struct *target, rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, target->thread.fpu.vxrs + __NUM_VXRS_LOW, 0, -1); - if (rc == 0 && target == current) - restore_vx_regs(target->thread.fpu.vxrs); - return rc; } diff --git a/arch/s390/kernel/s390_ksyms.c b/arch/s390/kernel/s390_ksyms.c index 9f60467938d1..5090d3dad10b 100644 --- a/arch/s390/kernel/s390_ksyms.c +++ b/arch/s390/kernel/s390_ksyms.c @@ -1,5 +1,6 @@ #include #include +#include #include #ifdef CONFIG_FUNCTION_TRACER @@ -8,6 +9,8 @@ EXPORT_SYMBOL(_mcount); #if IS_ENABLED(CONFIG_KVM) EXPORT_SYMBOL(sie64a); EXPORT_SYMBOL(sie_exit); +EXPORT_SYMBOL(save_fpu_regs); +EXPORT_SYMBOL(__ctl_set_vx); #endif EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(memset); diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index 49c259cd5a33..2f4c7e2638c9 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -112,7 +112,6 @@ static void store_sigregs(void) static void load_sigregs(void) { restore_access_regs(current->thread.acrs); - restore_fpu_regs(¤t->thread.fpu); } /* Returns non-zero on fault. */ @@ -223,6 +222,7 @@ SYSCALL_DEFINE0(sigreturn) if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE)) goto badframe; set_current_blocked(&set); + save_fpu_regs(¤t->thread.fpu); if (restore_sigregs(regs, &frame->sregs)) goto badframe; if (restore_sigregs_ext(regs, &frame->sregs_ext)) @@ -246,6 +246,7 @@ SYSCALL_DEFINE0(rt_sigreturn) set_current_blocked(&set); if (restore_altstack(&frame->uc.uc_stack)) goto badframe; + save_fpu_regs(¤t->thread.fpu); if (restore_sigregs(regs, &frame->uc.uc_mcontext)) goto badframe; if (restore_sigregs_ext(regs, &frame->uc.uc_mcontext_ext)) diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 7b09224c05a3..76f76932ccb9 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -151,7 +151,7 @@ DO_ERROR_INFO(special_op_exception, SIGILL, ILL_ILLOPN, DO_ERROR_INFO(transaction_exception, SIGILL, ILL_ILLOPN, "transaction constraint exception") -static inline void do_fp_trap(struct pt_regs *regs, int fpc) +static inline void do_fp_trap(struct pt_regs *regs, __u32 fpc) { int si_code = 0; /* FPC[2] is Data Exception Code */ @@ -236,17 +236,13 @@ int alloc_vector_registers(struct task_struct *tsk) return -ENOMEM; preempt_disable(); if (tsk == current) - save_fp_regs(tsk->thread.fpu.fprs); + save_fpu_regs(&tsk->thread.fpu); /* Copy the 16 floating point registers */ convert_fp_to_vx(vxrs, tsk->thread.fpu.fprs); fprs = tsk->thread.fpu.fprs; tsk->thread.fpu.vxrs = vxrs; tsk->thread.fpu.flags |= FPU_USE_VX; kfree(fprs); - if (tsk == current) { - __ctl_set_bit(0, 17); - restore_vx_regs(vxrs); - } preempt_enable(); return 0; } @@ -261,7 +257,7 @@ void vector_exception(struct pt_regs *regs) } /* get vector interrupt code from fpc */ - asm volatile("stfpc %0" : "=Q" (current->thread.fpu.fpc)); + save_fpu_regs(¤t->thread.fpu); vic = (current->thread.fpu.fpc & 0xf00) >> 8; switch (vic) { case 1: /* invalid vector operation */ @@ -299,7 +295,7 @@ void data_exception(struct pt_regs *regs) location = get_trap_ip(regs); - asm volatile("stfpc %0" : "=Q" (current->thread.fpu.fpc)); + save_fpu_regs(¤t->thread.fpu); /* Check for vector register enablement */ if (MACHINE_HAS_VX && !is_vx_task(current) && (current->thread.fpu.fpc & FPC_DXC_MASK) == 0xfe00) { diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index fc7bc7118b23..c0cceaf4a92e 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1198,27 +1198,54 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) return 0; } +/* + * Backs up the current FP/VX register save area on a particular + * destination. Used to switch between different register save + * areas. + */ +static inline void save_fpu_to(struct fpu *dst) +{ + dst->fpc = current->thread.fpu.fpc; + dst->flags = current->thread.fpu.flags; + dst->regs = current->thread.fpu.regs; +} + +/* + * Switches the FP/VX register save area from which to lazy + * restore register contents. + */ +static inline void load_fpu_from(struct fpu *from) +{ + current->thread.fpu.fpc = from->fpc; + current->thread.fpu.flags = from->flags; + current->thread.fpu.regs = from->regs; +} + void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { - __u32 fpc; + /* Save host register state */ + save_fpu_regs(¤t->thread.fpu); + save_fpu_to(&vcpu->arch.host_fpregs); - save_fp_ctl(&vcpu->arch.host_fpregs.fpc); - if (test_kvm_facility(vcpu->kvm, 129)) - save_vx_regs((__vector128 *)&vcpu->arch.host_vregs->vrs); - else - save_fp_regs(vcpu->arch.host_fpregs.fprs); - save_access_regs(vcpu->arch.host_acrs); if (test_kvm_facility(vcpu->kvm, 129)) { - fpc = vcpu->run->s.regs.fpc; - restore_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs); - } else { - fpc = vcpu->arch.guest_fpregs.fpc; - restore_fp_regs(vcpu->arch.guest_fpregs.fprs); - } - if (test_fp_ctl(fpc)) + current->thread.fpu.fpc = vcpu->run->s.regs.fpc; + current->thread.fpu.flags = FPU_USE_VX; + /* + * Use the register save area in the SIE-control block + * for register restore and save in kvm_arch_vcpu_put() + */ + current->thread.fpu.vxrs = + (__vector128 *)&vcpu->run->s.regs.vrs; + /* Always enable the vector extension for KVM */ + __ctl_set_vx(); + } else + load_fpu_from(&vcpu->arch.guest_fpregs); + + if (test_fp_ctl(current->thread.fpu.fpc)) /* User space provided an invalid FPC, let's clear it */ - fpc = 0; - restore_fp_ctl(&fpc); + current->thread.fpu.fpc = 0; + + save_access_regs(vcpu->arch.host_acrs); restore_access_regs(vcpu->run->s.regs.acrs); gmap_enable(vcpu->arch.gmap); atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); @@ -1228,19 +1255,22 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); gmap_disable(vcpu->arch.gmap); - if (test_kvm_facility(vcpu->kvm, 129)) { - save_fp_ctl(&vcpu->run->s.regs.fpc); - save_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs); - } else { - save_fp_ctl(&vcpu->arch.guest_fpregs.fpc); - save_fp_regs(vcpu->arch.guest_fpregs.fprs); - } - save_access_regs(vcpu->run->s.regs.acrs); - restore_fp_ctl(&vcpu->arch.host_fpregs.fpc); + + save_fpu_regs(¤t->thread.fpu); + if (test_kvm_facility(vcpu->kvm, 129)) - restore_vx_regs((__vector128 *)&vcpu->arch.host_vregs->vrs); + /* + * kvm_arch_vcpu_load() set up the register save area to + * the &vcpu->run->s.regs.vrs and, thus, the vector registers + * are already saved. Only the floating-point control must be + * copied. + */ + vcpu->run->s.regs.fpc = current->thread.fpu.fpc; else - restore_fp_regs(vcpu->arch.host_fpregs.fprs); + save_fpu_to(&vcpu->arch.guest_fpregs); + load_fpu_from(&vcpu->arch.host_fpregs); + + save_access_regs(vcpu->run->s.regs.acrs); restore_access_regs(vcpu->arch.host_acrs); } @@ -1383,7 +1413,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, vcpu->arch.sie_block = &sie_page->sie_block; vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb; - vcpu->arch.host_vregs = &sie_page->vregs; vcpu->arch.sie_block->icpua = id; if (!kvm_is_ucontrol(kvm)) { @@ -1405,6 +1434,19 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, vcpu->arch.local_int.wq = &vcpu->wq; vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags; + /* + * Allocate a save area for floating-point registers. If the vector + * extension is available, register contents are saved in the SIE + * control block. The allocated save area is still required in + * particular places, for example, in kvm_s390_vcpu_store_status(). + */ + vcpu->arch.guest_fpregs.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS, + GFP_KERNEL); + if (!vcpu->arch.guest_fpregs.fprs) { + rc = -ENOMEM; + goto out_free_sie_block; + } + rc = kvm_vcpu_init(vcpu, kvm, id); if (rc) goto out_free_sie_block; @@ -1627,16 +1669,16 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { if (test_fp_ctl(fpu->fpc)) return -EINVAL; - memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs)); + memcpy(vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs)); vcpu->arch.guest_fpregs.fpc = fpu->fpc; - restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc); - restore_fp_regs(vcpu->arch.guest_fpregs.fprs); + save_fpu_regs(¤t->thread.fpu); + load_fpu_from(&vcpu->arch.guest_fpregs); return 0; } int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { - memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs)); + memcpy(&fpu->fprs, vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs)); fpu->fpc = vcpu->arch.guest_fpregs.fpc; return 0; } @@ -2199,8 +2241,21 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) * copying in vcpu load/put. Lets update our copies before we save * it into the save area */ - save_fp_ctl(&vcpu->arch.guest_fpregs.fpc); - save_fp_regs(vcpu->arch.guest_fpregs.fprs); + save_fpu_regs(¤t->thread.fpu); + if (test_kvm_facility(vcpu->kvm, 129)) { + /* + * If the vector extension is available, the vector registers + * which overlaps with floating-point registers are saved in + * the SIE-control block. Hence, extract the floating-point + * registers and the FPC value and store them in the + * guest_fpregs structure. + */ + WARN_ON(!is_vx_task(current)); /* XXX remove later */ + vcpu->arch.guest_fpregs.fpc = current->thread.fpu.fpc; + convert_vx_to_fp(vcpu->arch.guest_fpregs.fprs, + current->thread.fpu.vxrs); + } else + save_fpu_to(&vcpu->arch.guest_fpregs); save_access_regs(vcpu->run->s.regs.acrs); return kvm_s390_store_status_unloaded(vcpu, addr); @@ -2227,10 +2282,13 @@ int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr) /* * The guest VXRS are in the host VXRs due to the lazy - * copying in vcpu load/put. Let's update our copies before we save - * it into the save area. + * copying in vcpu load/put. We can simply call save_fpu_regs() + * to save the current register state because we are in the + * middle of a load/put cycle. + * + * Let's update our copies before we save it into the save area. */ - save_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs); + save_fpu_regs(¤t->thread.fpu); return kvm_s390_store_adtl_status_unloaded(vcpu, addr); } -- cgit v1.2.3 From 22362a0e23182d230527a5add690b4027860d7d3 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 8 Jul 2015 10:20:04 +0200 Subject: s390/sclp: convert early sclp console code to C The 31-bit assembler code for the early sclp console is error prone as git commit fde24b54d976cc123506695c17db01438a11b673 "s390/sclp: clear upper register halves in _sclp_print_early" has shown. Convert the assembler code to C. Signed-off-by: Martin Schwidefsky --- arch/s390/Makefile | 2 + arch/s390/include/asm/processor.h | 11 ++ arch/s390/include/asm/sclp.h | 2 +- arch/s390/kernel/Makefile | 11 ++ arch/s390/kernel/head.S | 1 + arch/s390/kernel/sclp.S | 355 -------------------------------------- arch/s390/kernel/sclp.c | 160 +++++++++++++++++ 7 files changed, 186 insertions(+), 356 deletions(-) delete mode 100644 arch/s390/kernel/sclp.S create mode 100644 arch/s390/kernel/sclp.c (limited to 'arch/s390/include/asm/processor.h') diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 667b1bca5681..e8d4423e4f85 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -33,6 +33,8 @@ mflags-$(CONFIG_MARCH_Z196) := -march=z196 mflags-$(CONFIG_MARCH_ZEC12) := -march=zEC12 mflags-$(CONFIG_MARCH_Z13) := -march=z13 +export CC_FLAGS_MARCH := $(mflags-y) + aflags-y += $(mflags-y) cflags-y += $(mflags-y) diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index c417015c5304..085fb0d3c54e 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -232,6 +232,17 @@ static inline void __load_psw_mask (unsigned long mask) : "=&d" (addr), "=Q" (psw) : "Q" (psw) : "memory", "cc"); } +/* + * Extract current PSW mask + */ +static inline unsigned long __extract_psw(void) +{ + unsigned int reg1, reg2; + + asm volatile("epsw %0,%1" : "=d" (reg1), "=a" (reg2)); + return (((unsigned long) reg1) << 32) | ((unsigned long) reg2); +} + /* * Rewind PSW instruction address by specified number of bytes. */ diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index f6ff06077631..821dde5f425d 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -79,6 +79,6 @@ int sclp_pci_configure(u32 fid); int sclp_pci_deconfigure(u32 fid); int memcpy_hsa(void *dest, unsigned long src, size_t count, int mode); void sclp_early_detect(void); -long _sclp_print_early(const char *); +int _sclp_print_early(const char *); #endif /* _ASM_S390_SCLP_H */ diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index ffb87617a36c..b756c6348ac6 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -28,6 +28,17 @@ CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' CFLAGS_sysinfo.o += -w +# +# Use -march=z900 for sclp.c to be able to print an error message if +# the kernel is started on a machine which is too old +# +CFLAGS_REMOVE_sclp.o = $(CC_FLAGS_FTRACE) +ifneq ($(CC_FLAGS_MARCH),-march=z900) +CFLAGS_REMOVE_sclp.o += $(CC_FLAGS_MARCH) +CFLAGS_sclp.o += -march=z900 +endif +GCOV_PROFILE_sclp.o := n + obj-y := traps.o time.o process.o base.o early.o setup.o idle.o vtime.o obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o obj-y += debug.o irq.o ipl.o dis.o diag.o sclp.o vdso.o diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S index 59b7c6470567..63c77fdb619e 100644 --- a/arch/s390/kernel/head.S +++ b/arch/s390/kernel/head.S @@ -370,6 +370,7 @@ ENTRY(startup_kdump) xc 0x200(256),0x200 # partially clear lowcore xc 0x300(256),0x300 xc 0xe00(256),0xe00 + lctlg %c0,%c15,0x200(%r0) # initialize control registers stck __LC_LAST_UPDATE_CLOCK spt 6f-.LPG0(%r13) mvc __LC_LAST_UPDATE_TIMER(8),6f-.LPG0(%r13) diff --git a/arch/s390/kernel/sclp.S b/arch/s390/kernel/sclp.S deleted file mode 100644 index ada0c07fe1a8..000000000000 --- a/arch/s390/kernel/sclp.S +++ /dev/null @@ -1,355 +0,0 @@ -/* - * Mini SCLP driver. - * - * Copyright IBM Corp. 2004, 2009 - * - * Author(s): Peter Oberparleiter , - * Heiko Carstens , - * - */ - -#include -#include - -LC_EXT_NEW_PSW = 0x58 # addr of ext int handler -LC_EXT_NEW_PSW_64 = 0x1b0 # addr of ext int handler 64 bit -LC_EXT_INT_PARAM = 0x80 # addr of ext int parameter -LC_EXT_INT_CODE = 0x86 # addr of ext int code -LC_AR_MODE_ID = 0xa3 - -# -# Subroutine which waits synchronously until either an external interruption -# or a timeout occurs. -# -# Parameters: -# R2 = 0 for no timeout, non-zero for timeout in (approximated) seconds -# -# Returns: -# R2 = 0 on interrupt, 2 on timeout -# R3 = external interruption parameter if R2=0 -# - -_sclp_wait_int: - stm %r6,%r15,24(%r15) # save registers - basr %r13,0 # get base register -.LbaseS1: - ahi %r15,-96 # create stack frame - la %r8,LC_EXT_NEW_PSW # register int handler - la %r9,.LextpswS1-.LbaseS1(%r13) - tm LC_AR_MODE_ID,1 - jno .Lesa1 - la %r8,LC_EXT_NEW_PSW_64 # register int handler 64 bit - la %r9,.LextpswS1_64-.LbaseS1(%r13) -.Lesa1: - mvc .LoldpswS1-.LbaseS1(16,%r13),0(%r8) - mvc 0(16,%r8),0(%r9) - epsw %r6,%r7 # set current addressing mode - nill %r6,0x1 # in new psw (31 or 64 bit mode) - nilh %r7,0x8000 - stm %r6,%r7,0(%r8) - lhi %r6,0x0200 # cr mask for ext int (cr0.54) - ltr %r2,%r2 - jz .LsetctS1 - ahi %r6,0x0800 # cr mask for clock int (cr0.52) - stck .LtimeS1-.LbaseS1(%r13) # initiate timeout - al %r2,.LtimeS1-.LbaseS1(%r13) - st %r2,.LtimeS1-.LbaseS1(%r13) - sckc .LtimeS1-.LbaseS1(%r13) - -.LsetctS1: - stctl %c0,%c0,.LctlS1-.LbaseS1(%r13) # enable required interrupts - l %r0,.LctlS1-.LbaseS1(%r13) - lhi %r1,~(0x200 | 0x800) # clear old values - nr %r1,%r0 - or %r1,%r6 # set new value - st %r1,.LctlS1-.LbaseS1(%r13) - lctl %c0,%c0,.LctlS1-.LbaseS1(%r13) - st %r0,.LctlS1-.LbaseS1(%r13) - lhi %r2,2 # return code for timeout -.LloopS1: - lpsw .LwaitpswS1-.LbaseS1(%r13) # wait until interrupt -.LwaitS1: - lh %r7,LC_EXT_INT_CODE - chi %r7,EXT_IRQ_CLK_COMP # timeout? - je .LtimeoutS1 - chi %r7,EXT_IRQ_SERVICE_SIG # service int? - jne .LloopS1 - sr %r2,%r2 - l %r3,LC_EXT_INT_PARAM -.LtimeoutS1: - lctl %c0,%c0,.LctlS1-.LbaseS1(%r13) # restore interrupt setting - # restore old handler - mvc 0(16,%r8),.LoldpswS1-.LbaseS1(%r13) - lm %r6,%r15,120(%r15) # restore registers - br %r14 # return to caller - - .align 8 -.LoldpswS1: - .long 0, 0, 0, 0 # old ext int PSW -.LextpswS1: - .long 0x00080000, 0x80000000+.LwaitS1 # PSW to handle ext int -.LextpswS1_64: - .quad 0, .LwaitS1 # PSW to handle ext int, 64 bit -.LwaitpswS1: - .long 0x010a0000, 0x00000000+.LloopS1 # PSW to wait for ext int -.LtimeS1: - .quad 0 # current time -.LctlS1: - .long 0 # CT0 contents - -# -# Subroutine to synchronously issue a service call. -# -# Parameters: -# R2 = command word -# R3 = sccb address -# -# Returns: -# R2 = 0 on success, 1 on failure -# R3 = sccb response code if R2 = 0 -# - -_sclp_servc: - stm %r6,%r15,24(%r15) # save registers - ahi %r15,-96 # create stack frame - lr %r6,%r2 # save command word - lr %r7,%r3 # save sccb address -.LretryS2: - lhi %r2,1 # error return code - .insn rre,0xb2200000,%r6,%r7 # servc - brc 1,.LendS2 # exit if not operational - brc 8,.LnotbusyS2 # go on if not busy - sr %r2,%r2 # wait until no longer busy - bras %r14,_sclp_wait_int - j .LretryS2 # retry -.LnotbusyS2: - sr %r2,%r2 # wait until result - bras %r14,_sclp_wait_int - sr %r2,%r2 - lh %r3,6(%r7) -.LendS2: - lm %r6,%r15,120(%r15) # restore registers - br %r14 - -# -# Subroutine to set up the SCLP interface. -# -# Parameters: -# R2 = 0 to activate, non-zero to deactivate -# -# Returns: -# R2 = 0 on success, non-zero on failure -# - -_sclp_setup: - stm %r6,%r15,24(%r15) # save registers - ahi %r15,-96 # create stack frame - basr %r13,0 # get base register -.LbaseS3: - l %r6,.LsccbS0-.LbaseS3(%r13) # prepare init mask sccb - mvc 0(.LinitendS3-.LinitsccbS3,%r6),.LinitsccbS3-.LbaseS3(%r13) - ltr %r2,%r2 # initialization? - jz .LdoinitS3 # go ahead - # clear masks - xc .LinitmaskS3-.LinitsccbS3(8,%r6),.LinitmaskS3-.LinitsccbS3(%r6) -.LdoinitS3: - l %r2,.LwritemaskS3-.LbaseS3(%r13)# get command word - lr %r3,%r6 # get sccb address - bras %r14,_sclp_servc # issue service call - ltr %r2,%r2 # servc successful? - jnz .LerrorS3 - chi %r3,0x20 # write mask successful? - jne .LerrorS3 - # check masks - la %r2,.LinitmaskS3-.LinitsccbS3(%r6) - l %r1,0(%r2) # receive mask ok? - n %r1,12(%r2) - cl %r1,0(%r2) - jne .LerrorS3 - l %r1,4(%r2) # send mask ok? - n %r1,8(%r2) - cl %r1,4(%r2) - sr %r2,%r2 - je .LendS3 -.LerrorS3: - lhi %r2,1 # error return code -.LendS3: - lm %r6,%r15,120(%r15) # restore registers - br %r14 -.LwritemaskS3: - .long 0x00780005 # SCLP command for write mask -.LinitsccbS3: - .word .LinitendS3-.LinitsccbS3 - .byte 0,0,0,0 - .word 0 - .word 0 - .word 4 -.LinitmaskS3: - .long 0x80000000 - .long 0x40000000 - .long 0 - .long 0 -.LinitendS3: - -# -# Subroutine which prints a given text to the SCLP console. -# -# Parameters: -# R2 = address of nil-terminated ASCII text -# -# Returns: -# R2 = 0 on success, 1 on failure -# - -_sclp_print: - stm %r6,%r15,24(%r15) # save registers - ahi %r15,-96 # create stack frame - basr %r13,0 # get base register -.LbaseS4: - l %r8,.LsccbS0-.LbaseS4(%r13) # prepare write data sccb - mvc 0(.LmtoS4-.LwritesccbS4,%r8),.LwritesccbS4-.LbaseS4(%r13) - la %r7,.LmtoS4-.LwritesccbS4(%r8) # current mto addr - sr %r0,%r0 - l %r10,.Lascebc-.LbaseS4(%r13) # address of translation table -.LinitmtoS4: - # initialize mto - mvc 0(.LmtoendS4-.LmtoS4,%r7),.LmtoS4-.LbaseS4(%r13) - lhi %r6,.LmtoendS4-.LmtoS4 # current mto length -.LloopS4: - ic %r0,0(%r2) # get character - ahi %r2,1 - ltr %r0,%r0 # end of string? - jz .LfinalizemtoS4 - chi %r0,0x0a # end of line (NL)? - jz .LfinalizemtoS4 - stc %r0,0(%r6,%r7) # copy to mto - la %r11,0(%r6,%r7) - tr 0(1,%r11),0(%r10) # translate to EBCDIC - ahi %r6,1 - j .LloopS4 -.LfinalizemtoS4: - sth %r6,0(%r7) # update mto length - lh %r9,.LmdbS4-.LwritesccbS4(%r8) # update mdb length - ar %r9,%r6 - sth %r9,.LmdbS4-.LwritesccbS4(%r8) - lh %r9,.LevbufS4-.LwritesccbS4(%r8)# update evbuf length - ar %r9,%r6 - sth %r9,.LevbufS4-.LwritesccbS4(%r8) - lh %r9,0(%r8) # update sccb length - ar %r9,%r6 - sth %r9,0(%r8) - ar %r7,%r6 # update current mto address - ltr %r0,%r0 # more characters? - jnz .LinitmtoS4 - l %r2,.LwritedataS4-.LbaseS4(%r13)# write data - lr %r3,%r8 - bras %r14,_sclp_servc - ltr %r2,%r2 # servc successful? - jnz .LendS4 - chi %r3,0x20 # write data successful? - je .LendS4 - lhi %r2,1 # error return code -.LendS4: - lm %r6,%r15,120(%r15) # restore registers - br %r14 - -# -# Function which prints a given text to the SCLP console. -# -# Parameters: -# R2 = address of nil-terminated ASCII text -# -# Returns: -# R2 = 0 on success, 1 on failure -# - -ENTRY(_sclp_print_early) - stm %r6,%r15,24(%r15) # save registers - ahi %r15,-96 # create stack frame - tm LC_AR_MODE_ID,1 - jno .Lesa2 - ahi %r15,-80 - stmh %r6,%r15,96(%r15) # store upper register halves - basr %r13,0 - lmh %r0,%r15,.Lzeroes-.(%r13) # clear upper register halves -.Lesa2: - lr %r10,%r2 # save string pointer - lhi %r2,0 - bras %r14,_sclp_setup # enable console - ltr %r2,%r2 - jnz .LendS5 - lr %r2,%r10 - bras %r14,_sclp_print # print string - ltr %r2,%r2 - jnz .LendS5 - lhi %r2,1 - bras %r14,_sclp_setup # disable console -.LendS5: - tm LC_AR_MODE_ID,1 - jno .Lesa3 - lgfr %r2,%r2 # sign extend return value - lmh %r6,%r15,96(%r15) # restore upper register halves - ahi %r15,80 -.Lesa3: - lm %r6,%r15,120(%r15) # restore registers - br %r14 -.Lzeroes: - .fill 64,4,0 - -.LwritedataS4: - .long 0x00760005 # SCLP command for write data -.LwritesccbS4: - # sccb - .word .LmtoS4-.LwritesccbS4 - .byte 0 - .byte 0,0,0 - .word 0 - - # evbuf -.LevbufS4: - .word .LmtoS4-.LevbufS4 - .byte 0x02 - .byte 0 - .word 0 - -.LmdbS4: - # mdb - .word .LmtoS4-.LmdbS4 - .word 1 - .long 0xd4c4c240 - .long 1 - - # go -.LgoS4: - .word .LmtoS4-.LgoS4 - .word 1 - .long 0 - .byte 0,0,0,0,0,0,0,0 - .byte 0,0,0 - .byte 0 - .byte 0,0,0,0,0,0,0 - .byte 0 - .word 0 - .byte 0,0,0,0,0,0,0,0,0,0 - .byte 0,0,0,0,0,0,0,0 - .byte 0,0,0,0,0,0,0,0 - -.LmtoS4: - .word .LmtoendS4-.LmtoS4 - .word 4 - .word 0x1000 - .byte 0 - .byte 0,0,0 -.LmtoendS4: - - # Global constants -.LsccbS0: - .long _sclp_work_area -.Lascebc: - .long _ascebc - -.section .data,"aw",@progbits - .balign 4096 -_sclp_work_area: - .fill 4096 -.previous diff --git a/arch/s390/kernel/sclp.c b/arch/s390/kernel/sclp.c new file mode 100644 index 000000000000..fa0bdff1d413 --- /dev/null +++ b/arch/s390/kernel/sclp.c @@ -0,0 +1,160 @@ +/* + * Copyright IBM Corp. 2015 + * Author(s): Martin Schwidefsky + */ +#include +#include +#include +#include +#include +#include + +static char _sclp_work_area[4096] __aligned(PAGE_SIZE); + +static void _sclp_wait_int(void) +{ + unsigned long cr0, cr0_new, psw_mask, addr; + psw_t psw_ext_save, psw_wait; + + __ctl_store(cr0, 0, 0); + cr0_new = cr0 | 0x200; + __ctl_load(cr0_new, 0, 0); + + psw_ext_save = S390_lowcore.external_new_psw; + psw_mask = __extract_psw() & (PSW_MASK_EA | PSW_MASK_BA); + S390_lowcore.external_new_psw.mask = psw_mask; + psw_wait.mask = psw_mask | PSW_MASK_EXT | PSW_MASK_WAIT; + S390_lowcore.ext_int_code = 0; + + do { + asm volatile( + " larl %[addr],0f\n" + " stg %[addr],%[psw_wait_addr]\n" + " stg %[addr],%[psw_ext_addr]\n" + " lpswe %[psw_wait]\n" + "0:\n" + : [addr] "=&d" (addr), + [psw_wait_addr] "=Q" (psw_wait.addr), + [psw_ext_addr] "=Q" (S390_lowcore.external_new_psw.addr) + : [psw_wait] "Q" (psw_wait) + : "cc", "memory"); + } while (S390_lowcore.ext_int_code != EXT_IRQ_SERVICE_SIG); + + __ctl_load(cr0, 0, 0); + S390_lowcore.external_new_psw = psw_ext_save; +} + +static int _sclp_servc(unsigned int cmd, char *sccb) +{ + unsigned int cc; + + do { + asm volatile( + " .insn rre,0xb2200000,%1,%2\n" + " ipm %0\n" + : "=d" (cc) : "d" (cmd), "a" (sccb) + : "cc", "memory"); + cc >>= 28; + if (cc == 3) + return -EINVAL; + _sclp_wait_int(); + } while (cc != 0); + return (*(unsigned short *)(sccb + 6) == 0x20) ? 0 : -EIO; +} + +static int _sclp_setup(int disable) +{ + static unsigned char init_sccb[] = { + 0x00, 0x1c, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x04, + 0x80, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 + }; + unsigned int *masks; + int rc; + + memcpy(_sclp_work_area, init_sccb, 28); + masks = (unsigned int *)(_sclp_work_area + 12); + if (disable) + memset(masks, 0, 16); + /* SCLP write mask */ + rc = _sclp_servc(0x00780005, _sclp_work_area); + if (rc) + return rc; + if ((masks[0] & masks[3]) != masks[0] || + (masks[1] & masks[2]) != masks[1]) + return -EIO; + return 0; +} + +static int _sclp_print(const char *str) +{ + static unsigned char write_head[] = { + /* sccb header */ + 0x00, 0x52, /* 0 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 2 */ + /* evbuf */ + 0x00, 0x4a, /* 8 */ + 0x02, 0x00, 0x00, 0x00, /* 10 */ + /* mdb */ + 0x00, 0x44, /* 14 */ + 0x00, 0x01, /* 16 */ + 0xd4, 0xc4, 0xc2, 0x40, /* 18 */ + 0x00, 0x00, 0x00, 0x01, /* 22 */ + /* go */ + 0x00, 0x38, /* 26 */ + 0x00, 0x01, /* 28 */ + 0x00, 0x00, 0x00, 0x00, /* 30 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 34 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 42 */ + 0x00, 0x00, 0x00, 0x00, /* 50 */ + 0x00, 0x00, /* 54 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 56 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 64 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 72 */ + 0x00, 0x00, /* 80 */ + }; + static unsigned char write_mto[] = { + /* mto */ + 0x00, 0x0a, /* 0 */ + 0x00, 0x04, /* 2 */ + 0x10, 0x00, /* 4 */ + 0x00, 0x00, 0x00, 0x00 /* 6 */ + }; + unsigned char *ptr, ch; + unsigned int count; + + memcpy(_sclp_work_area, write_head, sizeof(write_head)); + ptr = _sclp_work_area + sizeof(write_head); + do { + memcpy(ptr, write_mto, sizeof(write_mto)); + for (count = sizeof(write_mto); (ch = *str++) != 0; count++) { + if (ch == 0x0a) + break; + ptr[count] = _ascebc[ch]; + } + /* Update length fields in mto, mdb, evbuf and sccb */ + *(unsigned short *) ptr = count; + *(unsigned short *)(_sclp_work_area + 14) += count; + *(unsigned short *)(_sclp_work_area + 8) += count; + *(unsigned short *)(_sclp_work_area + 0) += count; + ptr += count; + } while (ch != 0); + + /* SCLP write data */ + return _sclp_servc(0x00760005, _sclp_work_area); +} + +int _sclp_print_early(const char *str) +{ + int rc; + + rc = _sclp_setup(0); + if (rc) + return rc; + rc = _sclp_print(str); + if (rc) + return rc; + return _sclp_setup(1); +} -- cgit v1.2.3