From fd2527f20915d041e838b6e4a08122dbc73c7abc Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 3 Feb 2024 11:45:02 +0100 Subject: s390/fpu: move, rename, and merge header files Move, rename, and merge the fpu and vx header files. This way fpu header files have a consistent naming scheme (fpu*.h). Also get rid of the fpu subdirectory and move header files to asm directory, so that all fpu and vx header files can be found at the same location. Merge internal.h header file into other header files, since the internal helpers are used at many locations. so those helper functions are really not internal. Signed-off-by: Heiko Carstens --- arch/s390/include/asm/processor.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/s390/include/asm/processor.h') diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index c0b6e74d899a..a422a2cf9d05 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -33,13 +33,12 @@ #include #include #include +#include #include #include #include #include #include -#include -#include #include typedef long (*sys_call_ptr_t)(struct pt_regs *regs); -- cgit v1.2.3 From 419abc4d3828813b58d047da146f519eedaa395b Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 3 Feb 2024 11:45:09 +0100 Subject: s390/fpu: convert FPU CIF flag to regular TIF flag The FPU state, as represented by the CIF_FPU flag reflects the FPU state of a task, not the CPU it is running on. Therefore convert the flag to a regular TIF flag. This removes the magic in switch_to() where a save_fpu_regs() call for the currently (previous) running task sets the per-cpu CIF_FPU flag, which is required to restore FPU register contents of the next task, when it returns to user space. Signed-off-by: Heiko Carstens --- arch/s390/include/asm/entry-common.h | 2 +- arch/s390/include/asm/fpu.h | 2 +- arch/s390/include/asm/processor.h | 2 -- arch/s390/include/asm/thread_info.h | 2 ++ arch/s390/kernel/entry.S | 2 +- arch/s390/kernel/fpu.c | 6 +++--- arch/s390/kernel/process.c | 7 +------ arch/s390/kvm/kvm-s390.c | 2 +- arch/s390/kvm/vsie.c | 2 +- 9 files changed, 11 insertions(+), 16 deletions(-) (limited to 'arch/s390/include/asm/processor.h') diff --git a/arch/s390/include/asm/entry-common.h b/arch/s390/include/asm/entry-common.h index a1dbab19c0bd..e479d39e1445 100644 --- a/arch/s390/include/asm/entry-common.h +++ b/arch/s390/include/asm/entry-common.h @@ -41,7 +41,7 @@ static __always_inline void arch_exit_to_user_mode_work(struct pt_regs *regs, static __always_inline void arch_exit_to_user_mode(void) { - if (test_cpu_flag(CIF_FPU)) + if (test_thread_flag(TIF_FPU)) __load_fpu_regs(); if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) diff --git a/arch/s390/include/asm/fpu.h b/arch/s390/include/asm/fpu.h index be85c28cdcde..3f076172a283 100644 --- a/arch/s390/include/asm/fpu.h +++ b/arch/s390/include/asm/fpu.h @@ -148,7 +148,7 @@ static inline void kernel_fpu_begin(struct kernel_fpu *state, u32 flags) { preempt_disable(); state->mask = S390_lowcore.fpu_flags; - if (!test_cpu_flag(CIF_FPU)) { + if (!test_thread_flag(TIF_FPU)) { /* Save user space FPU state and register contents */ save_fpu_regs(); } else if (state->mask & flags) { diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index a422a2cf9d05..f25617cbc49e 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -15,13 +15,11 @@ #include #define CIF_NOHZ_DELAY 2 /* delay HZ disable for a tick */ -#define CIF_FPU 3 /* restore FPU registers */ #define CIF_ENABLED_WAIT 5 /* in enabled wait state */ #define CIF_MCCK_GUEST 6 /* machine check happening in guest */ #define CIF_DEDICATED_CPU 7 /* this CPU is dedicated */ #define _CIF_NOHZ_DELAY BIT(CIF_NOHZ_DELAY) -#define _CIF_FPU BIT(CIF_FPU) #define _CIF_ENABLED_WAIT BIT(CIF_ENABLED_WAIT) #define _CIF_MCCK_GUEST BIT(CIF_MCCK_GUEST) #define _CIF_DEDICATED_CPU BIT(CIF_DEDICATED_CPU) diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index a674c7d25da5..e3f70b94a79b 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -69,6 +69,7 @@ void arch_setup_new_exec(void); #define TIF_PATCH_PENDING 5 /* pending live patching update */ #define TIF_PGSTE 6 /* New mm's will use 4K page tables */ #define TIF_NOTIFY_SIGNAL 7 /* signal notifications exist */ +#define TIF_FPU 8 /* restore FPU registers on exit to usermode */ #define TIF_ISOLATE_BP_GUEST 9 /* Run KVM guests with isolated BP */ #define TIF_PER_TRAP 10 /* Need to handle PER trap on exit to usermode */ @@ -92,6 +93,7 @@ void arch_setup_new_exec(void); #define _TIF_UPROBE BIT(TIF_UPROBE) #define _TIF_GUARDED_STORAGE BIT(TIF_GUARDED_STORAGE) #define _TIF_PATCH_PENDING BIT(TIF_PATCH_PENDING) +#define _TIF_FPU BIT(TIF_FPU) #define _TIF_ISOLATE_BP_GUEST BIT(TIF_ISOLATE_BP_GUEST) #define _TIF_PER_TRAP BIT(TIF_PER_TRAP) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 01c3b2d2821d..00f2e1741501 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -220,7 +220,7 @@ SYM_FUNC_START(__sie64a) oi __SIE_PROG0C+3(%r14),1 # we are going into SIE now tm __SIE_PROG20+3(%r14),3 # last exit... jnz .Lsie_skip - TSTMSK __LC_CPU_FLAGS,_CIF_FPU + TSTMSK __SF_SIE_FLAGS(%r15),_TIF_FPU jo .Lsie_skip # exit if fp/vx regs changed lg %r14,__SF_SIE_CONTROL_PHYS(%r15) # get sie block phys addr BPEXIT __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST diff --git a/arch/s390/kernel/fpu.c b/arch/s390/kernel/fpu.c index 0a31408a46f3..12d6e9d97104 100644 --- a/arch/s390/kernel/fpu.c +++ b/arch/s390/kernel/fpu.c @@ -117,7 +117,7 @@ void __load_fpu_regs(void) load_vx_regs(regs); else load_fp_regs(regs); - clear_cpu_flag(CIF_FPU); + clear_thread_flag(TIF_FPU); } void load_fpu_regs(void) @@ -136,7 +136,7 @@ void save_fpu_regs(void) local_irq_save(flags); - if (test_cpu_flag(CIF_FPU)) + if (test_thread_flag(TIF_FPU)) goto out; state = ¤t->thread.fpu; @@ -147,7 +147,7 @@ void save_fpu_regs(void) save_vx_regs(regs); else save_fp_regs(regs); - set_cpu_flag(CIF_FPU); + set_thread_flag(TIF_FPU); out: local_irq_restore(flags); } diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index b0578ea230e7..f4c355f080f2 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -88,7 +88,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { /* * Save the floating-point or vector register state of the current - * task and set the CIF_FPU flag to lazy restore the FPU register + * task and set the TIF_FPU flag to lazy restore the FPU register * state when returning to user space. */ save_fpu_regs(); @@ -196,11 +196,6 @@ void execve_tail(void) struct task_struct *__switch_to(struct task_struct *prev, struct task_struct *next) { - /* - * save_fpu_regs() sets the CIF_FPU flag, which enforces - * a restore of the floating point / vector registers as - * soon as the next task returns to user space. - */ save_fpu_regs(); save_access_regs(&prev->thread.acrs[0]); save_ri_cb(prev->thread.ri_cb); diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 8f4414539756..0cee7192a1c2 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -4829,7 +4829,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) vcpu->run->s.regs.gprs, sizeof(sie_page->pv_grregs)); } - if (test_cpu_flag(CIF_FPU)) + if (test_thread_flag(TIF_FPU)) load_fpu_regs(); exit_reason = sie64a(vcpu->arch.sie_block, vcpu->run->s.regs.gprs); diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 457d92c2949a..b8a242360ed0 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -1149,7 +1149,7 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) */ vcpu->arch.sie_block->prog0c |= PROG_IN_SIE; barrier(); - if (test_cpu_flag(CIF_FPU)) + if (test_thread_flag(TIF_FPU)) load_fpu_regs(); if (!kvm_s390_vcpu_sie_inhibited(vcpu)) rc = sie64a(scb_s, vcpu->run->s.regs.gprs); -- cgit v1.2.3 From 87c5c70036813d26e6e7e4393747a4fdc63cf193 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 3 Feb 2024 11:45:10 +0100 Subject: s390/fpu: rename save_fpu_regs() to save_user_fpu_regs(), etc Rename save_fpu_regs(), load_fpu_regs(), and struct thread_struct's fpu member to save_user_fpu_regs(), load_user_fpu_regs(), and ufpu. This way the function and variable names reflect for which context they are supposed to be used. This large and trivial conversion is a prerequisite for making the kernel fpu usage preemptible. Reviewed-by: Claudio Imbrenda Signed-off-by: Heiko Carstens --- arch/s390/include/asm/entry-common.h | 2 +- arch/s390/include/asm/fpu.h | 8 ++-- arch/s390/include/asm/processor.h | 4 +- arch/s390/kernel/compat_signal.c | 18 ++++----- arch/s390/kernel/fpu.c | 20 ++++----- arch/s390/kernel/perf_regs.c | 6 +-- arch/s390/kernel/process.c | 8 ++-- arch/s390/kernel/ptrace.c | 78 ++++++++++++++++++------------------ arch/s390/kernel/signal.c | 18 ++++----- arch/s390/kernel/traps.c | 10 ++--- arch/s390/kvm/interrupt.c | 4 +- arch/s390/kvm/kvm-s390.c | 26 ++++++------ arch/s390/kvm/vsie.c | 2 +- 13 files changed, 102 insertions(+), 102 deletions(-) (limited to 'arch/s390/include/asm/processor.h') diff --git a/arch/s390/include/asm/entry-common.h b/arch/s390/include/asm/entry-common.h index e479d39e1445..659e07d7f31a 100644 --- a/arch/s390/include/asm/entry-common.h +++ b/arch/s390/include/asm/entry-common.h @@ -42,7 +42,7 @@ static __always_inline void arch_exit_to_user_mode_work(struct pt_regs *regs, static __always_inline void arch_exit_to_user_mode(void) { if (test_thread_flag(TIF_FPU)) - __load_fpu_regs(); + __load_user_fpu_regs(); if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) debug_user_asce(1); diff --git a/arch/s390/include/asm/fpu.h b/arch/s390/include/asm/fpu.h index 3f076172a283..5d3533569925 100644 --- a/arch/s390/include/asm/fpu.h +++ b/arch/s390/include/asm/fpu.h @@ -57,9 +57,9 @@ static inline bool cpu_has_vx(void) return likely(test_facility(129)); } -void save_fpu_regs(void); -void load_fpu_regs(void); -void __load_fpu_regs(void); +void save_user_fpu_regs(void); +void load_user_fpu_regs(void); +void __load_user_fpu_regs(void); enum { KERNEL_FPC_BIT = 0, @@ -150,7 +150,7 @@ static inline void kernel_fpu_begin(struct kernel_fpu *state, u32 flags) state->mask = S390_lowcore.fpu_flags; if (!test_thread_flag(TIF_FPU)) { /* Save user space FPU state and register contents */ - save_fpu_regs(); + save_user_fpu_regs(); } else if (state->mask & flags) { /* Save FPU/vector register in-use by the kernel */ __kernel_fpu_begin(state, flags); diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index f25617cbc49e..2e716bf34bf8 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -181,7 +181,7 @@ struct thread_struct { struct gs_cb *gs_cb; /* Current guarded storage cb */ struct gs_cb *gs_bc_cb; /* Broadcast guarded storage cb */ struct pgm_tdb trap_tdb; /* Transaction abort diagnose block */ - struct fpu fpu; /* FP and VX register save area */ + struct fpu ufpu; /* User FP and VX register save area */ }; /* Flag to disable transactions. */ @@ -200,7 +200,7 @@ typedef struct thread_struct thread_struct; #define INIT_THREAD { \ .ksp = sizeof(init_stack) + (unsigned long) &init_stack, \ - .fpu.regs = (void *) init_task.thread.fpu.fprs, \ + .ufpu.regs = (void *)init_task.thread.ufpu.fprs, \ .last_break = 1, \ } diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index 6cd9bf925c82..1942e2a9f8db 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -56,7 +56,7 @@ typedef struct static void store_sigregs(void) { save_access_regs(current->thread.acrs); - save_fpu_regs(); + save_user_fpu_regs(); } /* Load registers after signal return */ @@ -79,7 +79,7 @@ static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs) user_sregs.regs.gprs[i] = (__u32) regs->gprs[i]; memcpy(&user_sregs.regs.acrs, current->thread.acrs, sizeof(user_sregs.regs.acrs)); - fpregs_store((_s390_fp_regs *) &user_sregs.fpregs, ¤t->thread.fpu); + fpregs_store((_s390_fp_regs *) &user_sregs.fpregs, ¤t->thread.ufpu); if (__copy_to_user(sregs, &user_sregs, sizeof(_sigregs32))) return -EFAULT; return 0; @@ -113,7 +113,7 @@ static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs) regs->gprs[i] = (__u64) user_sregs.regs.gprs[i]; memcpy(¤t->thread.acrs, &user_sregs.regs.acrs, sizeof(current->thread.acrs)); - fpregs_load((_s390_fp_regs *) &user_sregs.fpregs, ¤t->thread.fpu); + fpregs_load((_s390_fp_regs *)&user_sregs.fpregs, ¤t->thread.ufpu); clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */ return 0; @@ -136,11 +136,11 @@ static int save_sigregs_ext32(struct pt_regs *regs, /* Save vector registers to signal stack */ if (cpu_has_vx()) { for (i = 0; i < __NUM_VXRS_LOW; i++) - vxrs[i] = current->thread.fpu.vxrs[i].low; + vxrs[i] = current->thread.ufpu.vxrs[i].low; if (__copy_to_user(&sregs_ext->vxrs_low, vxrs, sizeof(sregs_ext->vxrs_low)) || __copy_to_user(&sregs_ext->vxrs_high, - current->thread.fpu.vxrs + __NUM_VXRS_LOW, + current->thread.ufpu.vxrs + __NUM_VXRS_LOW, sizeof(sregs_ext->vxrs_high))) return -EFAULT; } @@ -165,12 +165,12 @@ static int restore_sigregs_ext32(struct pt_regs *regs, if (cpu_has_vx()) { if (__copy_from_user(vxrs, &sregs_ext->vxrs_low, sizeof(sregs_ext->vxrs_low)) || - __copy_from_user(current->thread.fpu.vxrs + __NUM_VXRS_LOW, + __copy_from_user(current->thread.ufpu.vxrs + __NUM_VXRS_LOW, &sregs_ext->vxrs_high, sizeof(sregs_ext->vxrs_high))) return -EFAULT; for (i = 0; i < __NUM_VXRS_LOW; i++) - current->thread.fpu.vxrs[i].low = vxrs[i]; + current->thread.ufpu.vxrs[i].low = vxrs[i]; } return 0; } @@ -184,7 +184,7 @@ COMPAT_SYSCALL_DEFINE0(sigreturn) if (get_compat_sigset(&set, (compat_sigset_t __user *)frame->sc.oldmask)) goto badframe; set_current_blocked(&set); - save_fpu_regs(); + save_user_fpu_regs(); if (restore_sigregs32(regs, &frame->sregs)) goto badframe; if (restore_sigregs_ext32(regs, &frame->sregs_ext)) @@ -207,7 +207,7 @@ COMPAT_SYSCALL_DEFINE0(rt_sigreturn) set_current_blocked(&set); if (compat_restore_altstack(&frame->uc.uc_stack)) goto badframe; - save_fpu_regs(); + save_user_fpu_regs(); if (restore_sigregs32(regs, &frame->uc.uc_mcontext)) goto badframe; if (restore_sigregs_ext32(regs, &frame->uc.uc_mcontext_ext)) diff --git a/arch/s390/kernel/fpu.c b/arch/s390/kernel/fpu.c index 12d6e9d97104..62c9b2809057 100644 --- a/arch/s390/kernel/fpu.c +++ b/arch/s390/kernel/fpu.c @@ -107,10 +107,10 @@ void __kernel_fpu_end(struct kernel_fpu *state, u32 flags) } EXPORT_SYMBOL(__kernel_fpu_end); -void __load_fpu_regs(void) +void __load_user_fpu_regs(void) { - struct fpu *state = ¤t->thread.fpu; - void *regs = current->thread.fpu.regs; + struct fpu *state = ¤t->thread.ufpu; + void *regs = current->thread.ufpu.regs; fpu_lfpc_safe(&state->fpc); if (likely(cpu_has_vx())) @@ -120,15 +120,15 @@ void __load_fpu_regs(void) clear_thread_flag(TIF_FPU); } -void load_fpu_regs(void) +void load_user_fpu_regs(void) { raw_local_irq_disable(); - __load_fpu_regs(); + __load_user_fpu_regs(); raw_local_irq_enable(); } -EXPORT_SYMBOL(load_fpu_regs); +EXPORT_SYMBOL(load_user_fpu_regs); -void save_fpu_regs(void) +void save_user_fpu_regs(void) { unsigned long flags; struct fpu *state; @@ -139,8 +139,8 @@ void save_fpu_regs(void) if (test_thread_flag(TIF_FPU)) goto out; - state = ¤t->thread.fpu; - regs = current->thread.fpu.regs; + state = ¤t->thread.ufpu; + regs = current->thread.ufpu.regs; fpu_stfpc(&state->fpc); if (likely(cpu_has_vx())) @@ -151,4 +151,4 @@ void save_fpu_regs(void) out: local_irq_restore(flags); } -EXPORT_SYMBOL(save_fpu_regs); +EXPORT_SYMBOL(save_user_fpu_regs); diff --git a/arch/s390/kernel/perf_regs.c b/arch/s390/kernel/perf_regs.c index c8e8fb728ddb..511349b8bc5c 100644 --- a/arch/s390/kernel/perf_regs.c +++ b/arch/s390/kernel/perf_regs.c @@ -20,9 +20,9 @@ u64 perf_reg_value(struct pt_regs *regs, int idx) idx -= PERF_REG_S390_FP0; if (cpu_has_vx()) - fp = *(freg_t *)(current->thread.fpu.vxrs + idx); + fp = *(freg_t *)(current->thread.ufpu.vxrs + idx); else - fp = current->thread.fpu.fprs[idx]; + fp = current->thread.ufpu.fprs[idx]; return fp.ui; } @@ -64,6 +64,6 @@ void perf_get_regs_user(struct perf_regs *regs_user, */ regs_user->regs = task_pt_regs(current); if (user_mode(regs_user->regs)) - save_fpu_regs(); + save_user_fpu_regs(); regs_user->abi = perf_reg_abi(current); } diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index f4c355f080f2..62146daf9051 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -91,10 +91,10 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) * task and set the TIF_FPU flag to lazy restore the FPU register * state when returning to user space. */ - save_fpu_regs(); + save_user_fpu_regs(); *dst = *src; - dst->thread.fpu.regs = dst->thread.fpu.fprs; + dst->thread.ufpu.regs = dst->thread.ufpu.fprs; /* * Don't transfer over the runtime instrumentation or the guarded @@ -190,13 +190,13 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) void execve_tail(void) { - current->thread.fpu.fpc = 0; + current->thread.ufpu.fpc = 0; fpu_sfpc(0); } struct task_struct *__switch_to(struct task_struct *prev, struct task_struct *next) { - save_fpu_regs(); + save_user_fpu_regs(); save_access_regs(&prev->thread.acrs[0]); save_ri_cb(prev->thread.ri_cb); save_gs_cb(prev->thread.gs_cb); diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 2eafd6dcd592..f1ca79073173 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -247,21 +247,21 @@ static unsigned long __peek_user(struct task_struct *child, addr_t addr) /* * floating point control reg. is in the thread structure */ - tmp = child->thread.fpu.fpc; + tmp = child->thread.ufpu.fpc; tmp <<= BITS_PER_LONG - 32; } else if (addr < offsetof(struct user, regs.fp_regs) + sizeof(s390_fp_regs)) { /* - * floating point regs. are either in child->thread.fpu - * or the child->thread.fpu.vxrs array + * floating point regs. are either in child->thread.ufpu + * or the child->thread.ufpu.vxrs array */ offset = addr - offsetof(struct user, regs.fp_regs.fprs); if (cpu_has_vx()) tmp = *(addr_t *) - ((addr_t) child->thread.fpu.vxrs + 2*offset); + ((addr_t)child->thread.ufpu.vxrs + 2 * offset); else tmp = *(addr_t *) - ((addr_t) child->thread.fpu.fprs + offset); + ((addr_t)child->thread.ufpu.fprs + offset); } else if (addr < offsetof(struct user, regs.per_info) + sizeof(per_struct)) { /* @@ -396,20 +396,20 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) */ if ((unsigned int)data != 0) return -EINVAL; - child->thread.fpu.fpc = data >> (BITS_PER_LONG - 32); + child->thread.ufpu.fpc = data >> (BITS_PER_LONG - 32); } else if (addr < offsetof(struct user, regs.fp_regs) + sizeof(s390_fp_regs)) { /* - * floating point regs. are either in child->thread.fpu - * or the child->thread.fpu.vxrs array + * floating point regs. are either in child->thread.ufpu + * or the child->thread.ufpu.vxrs array */ offset = addr - offsetof(struct user, regs.fp_regs.fprs); if (cpu_has_vx()) *(addr_t *)((addr_t) - child->thread.fpu.vxrs + 2*offset) = data; + child->thread.ufpu.vxrs + 2 * offset) = data; else *(addr_t *)((addr_t) - child->thread.fpu.fprs + offset) = data; + child->thread.ufpu.fprs + offset) = data; } else if (addr < offsetof(struct user, regs.per_info) + sizeof(per_struct)) { /* @@ -623,20 +623,20 @@ static u32 __peek_user_compat(struct task_struct *child, addr_t addr) /* * floating point control reg. is in the thread structure */ - tmp = child->thread.fpu.fpc; + tmp = child->thread.ufpu.fpc; } else if (addr < offsetof(struct compat_user, regs.fp_regs) + sizeof(s390_fp_regs)) { /* - * floating point regs. are either in child->thread.fpu - * or the child->thread.fpu.vxrs array + * floating point regs. are either in child->thread.ufpu + * or the child->thread.ufpu.vxrs array */ offset = addr - offsetof(struct compat_user, regs.fp_regs.fprs); if (cpu_has_vx()) tmp = *(__u32 *) - ((addr_t) child->thread.fpu.vxrs + 2*offset); + ((addr_t)child->thread.ufpu.vxrs + 2 * offset); else tmp = *(__u32 *) - ((addr_t) child->thread.fpu.fprs + offset); + ((addr_t)child->thread.ufpu.fprs + offset); } else if (addr < offsetof(struct compat_user, regs.per_info) + sizeof(struct compat_per_struct_kernel)) { /* @@ -749,20 +749,20 @@ static int __poke_user_compat(struct task_struct *child, /* * floating point control reg. is in the thread structure */ - child->thread.fpu.fpc = data; + child->thread.ufpu.fpc = data; } else if (addr < offsetof(struct compat_user, regs.fp_regs) + sizeof(s390_fp_regs)) { /* - * floating point regs. are either in child->thread.fpu - * or the child->thread.fpu.vxrs array + * floating point regs. are either in child->thread.ufpu + * or the child->thread.ufpu.vxrs array */ offset = addr - offsetof(struct compat_user, regs.fp_regs.fprs); if (cpu_has_vx()) *(__u32 *)((addr_t) - child->thread.fpu.vxrs + 2*offset) = tmp; + child->thread.ufpu.vxrs + 2 * offset) = tmp; else *(__u32 *)((addr_t) - child->thread.fpu.fprs + offset) = tmp; + child->thread.ufpu.fprs + offset) = tmp; } else if (addr < offsetof(struct compat_user, regs.per_info) + sizeof(struct compat_per_struct_kernel)) { /* @@ -894,10 +894,10 @@ static int s390_fpregs_get(struct task_struct *target, _s390_fp_regs fp_regs; if (target == current) - save_fpu_regs(); + save_user_fpu_regs(); - fp_regs.fpc = target->thread.fpu.fpc; - fpregs_store(&fp_regs, &target->thread.fpu); + fp_regs.fpc = target->thread.ufpu.fpc; + fpregs_store(&fp_regs, &target->thread.ufpu); return membuf_write(&to, &fp_regs, sizeof(fp_regs)); } @@ -911,22 +911,22 @@ static int s390_fpregs_set(struct task_struct *target, freg_t fprs[__NUM_FPRS]; if (target == current) - save_fpu_regs(); + save_user_fpu_regs(); if (cpu_has_vx()) - convert_vx_to_fp(fprs, target->thread.fpu.vxrs); + convert_vx_to_fp(fprs, target->thread.ufpu.vxrs); else - memcpy(&fprs, target->thread.fpu.fprs, sizeof(fprs)); + memcpy(&fprs, target->thread.ufpu.fprs, sizeof(fprs)); if (count > 0 && pos < offsetof(s390_fp_regs, fprs)) { - u32 ufpc[2] = { target->thread.fpu.fpc, 0 }; + u32 ufpc[2] = { target->thread.ufpu.fpc, 0 }; rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ufpc, 0, offsetof(s390_fp_regs, fprs)); if (rc) return rc; if (ufpc[1] != 0) return -EINVAL; - target->thread.fpu.fpc = ufpc[0]; + target->thread.ufpu.fpc = ufpc[0]; } if (rc == 0 && count > 0) @@ -936,9 +936,9 @@ static int s390_fpregs_set(struct task_struct *target, return rc; if (cpu_has_vx()) - convert_fp_to_vx(target->thread.fpu.vxrs, fprs); + convert_fp_to_vx(target->thread.ufpu.vxrs, fprs); else - memcpy(target->thread.fpu.fprs, &fprs, sizeof(fprs)); + memcpy(target->thread.ufpu.fprs, &fprs, sizeof(fprs)); return rc; } @@ -989,9 +989,9 @@ static int s390_vxrs_low_get(struct task_struct *target, if (!cpu_has_vx()) return -ENODEV; if (target == current) - save_fpu_regs(); + save_user_fpu_regs(); for (i = 0; i < __NUM_VXRS_LOW; i++) - vxrs[i] = target->thread.fpu.vxrs[i].low; + vxrs[i] = target->thread.ufpu.vxrs[i].low; return membuf_write(&to, vxrs, sizeof(vxrs)); } @@ -1006,15 +1006,15 @@ static int s390_vxrs_low_set(struct task_struct *target, if (!cpu_has_vx()) return -ENODEV; if (target == current) - save_fpu_regs(); + save_user_fpu_regs(); for (i = 0; i < __NUM_VXRS_LOW; i++) - vxrs[i] = target->thread.fpu.vxrs[i].low; + vxrs[i] = target->thread.ufpu.vxrs[i].low; rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1); if (rc == 0) for (i = 0; i < __NUM_VXRS_LOW; i++) - target->thread.fpu.vxrs[i].low = vxrs[i]; + target->thread.ufpu.vxrs[i].low = vxrs[i]; return rc; } @@ -1026,8 +1026,8 @@ static int s390_vxrs_high_get(struct task_struct *target, if (!cpu_has_vx()) return -ENODEV; if (target == current) - save_fpu_regs(); - return membuf_write(&to, target->thread.fpu.vxrs + __NUM_VXRS_LOW, + save_user_fpu_regs(); + return membuf_write(&to, target->thread.ufpu.vxrs + __NUM_VXRS_LOW, __NUM_VXRS_HIGH * sizeof(__vector128)); } @@ -1041,10 +1041,10 @@ static int s390_vxrs_high_set(struct task_struct *target, if (!cpu_has_vx()) return -ENODEV; if (target == current) - save_fpu_regs(); + save_user_fpu_regs(); rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - target->thread.fpu.vxrs + __NUM_VXRS_LOW, 0, -1); + target->thread.ufpu.vxrs + __NUM_VXRS_LOW, 0, -1); return rc; } diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index 1517aa70678b..6c2cb345402f 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -109,7 +109,7 @@ struct rt_sigframe static void store_sigregs(void) { save_access_regs(current->thread.acrs); - save_fpu_regs(); + save_user_fpu_regs(); } /* Load registers after signal return */ @@ -131,7 +131,7 @@ static int save_sigregs(struct pt_regs *regs, _sigregs __user *sregs) memcpy(&user_sregs.regs.gprs, ®s->gprs, sizeof(sregs->regs.gprs)); memcpy(&user_sregs.regs.acrs, current->thread.acrs, sizeof(user_sregs.regs.acrs)); - fpregs_store(&user_sregs.fpregs, ¤t->thread.fpu); + fpregs_store(&user_sregs.fpregs, ¤t->thread.ufpu); if (__copy_to_user(sregs, &user_sregs, sizeof(_sigregs))) return -EFAULT; return 0; @@ -165,7 +165,7 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs) memcpy(¤t->thread.acrs, &user_sregs.regs.acrs, sizeof(current->thread.acrs)); - fpregs_load(&user_sregs.fpregs, ¤t->thread.fpu); + fpregs_load(&user_sregs.fpregs, ¤t->thread.ufpu); clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */ return 0; @@ -181,11 +181,11 @@ static int save_sigregs_ext(struct pt_regs *regs, /* Save vector registers to signal stack */ if (cpu_has_vx()) { for (i = 0; i < __NUM_VXRS_LOW; i++) - vxrs[i] = current->thread.fpu.vxrs[i].low; + vxrs[i] = current->thread.ufpu.vxrs[i].low; if (__copy_to_user(&sregs_ext->vxrs_low, vxrs, sizeof(sregs_ext->vxrs_low)) || __copy_to_user(&sregs_ext->vxrs_high, - current->thread.fpu.vxrs + __NUM_VXRS_LOW, + current->thread.ufpu.vxrs + __NUM_VXRS_LOW, sizeof(sregs_ext->vxrs_high))) return -EFAULT; } @@ -202,12 +202,12 @@ static int restore_sigregs_ext(struct pt_regs *regs, if (cpu_has_vx()) { if (__copy_from_user(vxrs, &sregs_ext->vxrs_low, sizeof(sregs_ext->vxrs_low)) || - __copy_from_user(current->thread.fpu.vxrs + __NUM_VXRS_LOW, + __copy_from_user(current->thread.ufpu.vxrs + __NUM_VXRS_LOW, &sregs_ext->vxrs_high, sizeof(sregs_ext->vxrs_high))) return -EFAULT; for (i = 0; i < __NUM_VXRS_LOW; i++) - current->thread.fpu.vxrs[i].low = vxrs[i]; + current->thread.ufpu.vxrs[i].low = vxrs[i]; } return 0; } @@ -222,7 +222,7 @@ SYSCALL_DEFINE0(sigreturn) if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE)) goto badframe; set_current_blocked(&set); - save_fpu_regs(); + save_user_fpu_regs(); if (restore_sigregs(regs, &frame->sregs)) goto badframe; if (restore_sigregs_ext(regs, &frame->sregs_ext)) @@ -246,7 +246,7 @@ SYSCALL_DEFINE0(rt_sigreturn) set_current_blocked(&set); if (restore_altstack(&frame->uc.uc_stack)) goto badframe; - save_fpu_regs(); + save_user_fpu_regs(); if (restore_sigregs(regs, &frame->uc.uc_mcontext)) goto badframe; if (restore_sigregs_ext(regs, &frame->uc.uc_mcontext_ext)) diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 08f8aee96d8f..52578b5cecbd 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -201,8 +201,8 @@ static void vector_exception(struct pt_regs *regs) } /* get vector interrupt code from fpc */ - save_fpu_regs(); - vic = (current->thread.fpu.fpc & 0xf00) >> 8; + save_user_fpu_regs(); + vic = (current->thread.ufpu.fpc & 0xf00) >> 8; switch (vic) { case 1: /* invalid vector operation */ si_code = FPE_FLTINV; @@ -227,9 +227,9 @@ static void vector_exception(struct pt_regs *regs) static void data_exception(struct pt_regs *regs) { - save_fpu_regs(); - if (current->thread.fpu.fpc & FPC_DXC_MASK) - do_fp_trap(regs, current->thread.fpu.fpc); + save_user_fpu_regs(); + if (current->thread.ufpu.fpc & FPC_DXC_MASK) + do_fp_trap(regs, current->thread.ufpu.fpc); else do_trap(regs, SIGILL, ILL_ILLOPN, "data exception"); } diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 6e435bf0c27e..9315203c2786 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -584,7 +584,7 @@ static int __write_machine_check(struct kvm_vcpu *vcpu, mci.val = mchk->mcic; /* take care of lazy register loading */ - save_fpu_regs(); + save_user_fpu_regs(); save_access_regs(vcpu->run->s.regs.acrs); if (MACHINE_HAS_GS && vcpu->arch.gs_enabled) save_gs_cb(current->thread.gs_cb); @@ -648,7 +648,7 @@ static int __write_machine_check(struct kvm_vcpu *vcpu, } rc |= write_guest_lc(vcpu, __LC_GPREGS_SAVE_AREA, vcpu->run->s.regs.gprs, 128); - rc |= put_guest_lc(vcpu, current->thread.fpu.fpc, + rc |= put_guest_lc(vcpu, current->thread.ufpu.fpc, (u32 __user *) __LC_FP_CREG_SAVE_AREA); rc |= put_guest_lc(vcpu, vcpu->arch.sie_block->todpr, (u32 __user *) __LC_TOD_PROGREG_SAVE_AREA); diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 0cee7192a1c2..3ce4029cabc2 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -4830,7 +4830,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) sizeof(sie_page->pv_grregs)); } if (test_thread_flag(TIF_FPU)) - load_fpu_regs(); + load_user_fpu_regs(); exit_reason = sie64a(vcpu->arch.sie_block, vcpu->run->s.regs.gprs); if (kvm_s390_pv_cpu_is_protected(vcpu)) { @@ -4952,14 +4952,14 @@ static void sync_regs(struct kvm_vcpu *vcpu) save_access_regs(vcpu->arch.host_acrs); restore_access_regs(vcpu->run->s.regs.acrs); /* save host (userspace) fprs/vrs */ - save_fpu_regs(); - vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc; - vcpu->arch.host_fpregs.regs = current->thread.fpu.regs; + save_user_fpu_regs(); + vcpu->arch.host_fpregs.fpc = current->thread.ufpu.fpc; + vcpu->arch.host_fpregs.regs = current->thread.ufpu.regs; if (cpu_has_vx()) - current->thread.fpu.regs = vcpu->run->s.regs.vrs; + current->thread.ufpu.regs = vcpu->run->s.regs.vrs; else - current->thread.fpu.regs = vcpu->run->s.regs.fprs; - current->thread.fpu.fpc = vcpu->run->s.regs.fpc; + current->thread.ufpu.regs = vcpu->run->s.regs.fprs; + current->thread.ufpu.fpc = vcpu->run->s.regs.fpc; /* Sync fmt2 only data */ if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) { @@ -5022,11 +5022,11 @@ static void store_regs(struct kvm_vcpu *vcpu) save_access_regs(vcpu->run->s.regs.acrs); restore_access_regs(vcpu->arch.host_acrs); /* Save guest register state */ - save_fpu_regs(); - vcpu->run->s.regs.fpc = current->thread.fpu.fpc; + save_user_fpu_regs(); + vcpu->run->s.regs.fpc = current->thread.ufpu.fpc; /* Restore will be done lazily at return */ - current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc; - current->thread.fpu.regs = vcpu->arch.host_fpregs.regs; + current->thread.ufpu.fpc = vcpu->arch.host_fpregs.fpc; + current->thread.ufpu.regs = vcpu->arch.host_fpregs.regs; if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) store_regs_fmt2(vcpu); } @@ -5172,8 +5172,8 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) * switch in the run ioctl. Let's update our copies before we save * it into the save area */ - save_fpu_regs(); - vcpu->run->s.regs.fpc = current->thread.fpu.fpc; + save_user_fpu_regs(); + vcpu->run->s.regs.fpc = current->thread.ufpu.fpc; save_access_regs(vcpu->run->s.regs.acrs); return kvm_s390_store_status_unloaded(vcpu, addr); diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index b8a242360ed0..e0f79c9a4852 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -1150,7 +1150,7 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) vcpu->arch.sie_block->prog0c |= PROG_IN_SIE; barrier(); if (test_thread_flag(TIF_FPU)) - load_fpu_regs(); + load_user_fpu_regs(); if (!kvm_s390_vcpu_sie_inhibited(vcpu)) rc = sie64a(scb_s, vcpu->run->s.regs.gprs); barrier(); -- cgit v1.2.3 From 4eed43de9ba0ae3af6716544408d185a152424cd Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 3 Feb 2024 11:45:12 +0100 Subject: s390/fpu: make kernel fpu context preemptible Make the kernel fpu context preemptible. Add another fpu structure to the thread_struct, and use it to save and restore the kernel fpu context if its task uses fpu registers when it is preempted. Reviewed-by: Claudio Imbrenda Signed-off-by: Heiko Carstens --- arch/s390/include/asm/fpu.h | 42 ++++++++++++++++++++++++++++----------- arch/s390/include/asm/lowcore.h | 2 +- arch/s390/include/asm/processor.h | 2 ++ arch/s390/kernel/process.c | 3 +++ 4 files changed, 36 insertions(+), 13 deletions(-) (limited to 'arch/s390/include/asm/processor.h') diff --git a/arch/s390/include/asm/fpu.h b/arch/s390/include/asm/fpu.h index 447d68fb41b2..4300eef243f9 100644 --- a/arch/s390/include/asm/fpu.h +++ b/arch/s390/include/asm/fpu.h @@ -47,6 +47,7 @@ #include #include #include +#include #include #include #include @@ -82,13 +83,6 @@ enum { #define KERNEL_VXR (KERNEL_VXR_LOW | KERNEL_VXR_HIGH) #define KERNEL_FPR (KERNEL_FPC | KERNEL_VXR_LOW) -/* - * Note the functions below must be called with preemption disabled. - * Do not enable preemption before calling __kernel_fpu_end() to prevent - * an corruption of an existing kernel FPU state. - * - * Prefer using the kernel_fpu_begin()/kernel_fpu_end() pair of functions. - */ void __kernel_fpu_begin(struct kernel_fpu *state, int flags); void __kernel_fpu_end(struct kernel_fpu *state, int flags); @@ -146,8 +140,7 @@ static __always_inline void load_fp_regs(freg_t *fprs) static inline void kernel_fpu_begin(struct kernel_fpu *state, int flags) { - preempt_disable(); - state->mask = S390_lowcore.fpu_flags; + state->mask = READ_ONCE(current->thread.kfpu_flags); if (!test_thread_flag(TIF_FPU)) { /* Save user space FPU state and register contents */ save_user_fpu_regs(); @@ -155,17 +148,42 @@ static inline void kernel_fpu_begin(struct kernel_fpu *state, int flags) /* Save FPU/vector register in-use by the kernel */ __kernel_fpu_begin(state, flags); } - S390_lowcore.fpu_flags |= flags; + __atomic_or(flags, ¤t->thread.kfpu_flags); } static inline void kernel_fpu_end(struct kernel_fpu *state, int flags) { - S390_lowcore.fpu_flags = state->mask; + WRITE_ONCE(current->thread.kfpu_flags, state->mask); if (state->mask & flags) { /* Restore FPU/vector register in-use by the kernel */ __kernel_fpu_end(state, flags); } - preempt_enable(); +} + +static inline void save_kernel_fpu_regs(struct thread_struct *thread) +{ + struct fpu *state = &thread->kfpu; + + if (!thread->kfpu_flags) + return; + fpu_stfpc(&state->fpc); + if (likely(cpu_has_vx())) + save_vx_regs(state->vxrs); + else + save_fp_regs(state->fprs); +} + +static inline void restore_kernel_fpu_regs(struct thread_struct *thread) +{ + struct fpu *state = &thread->kfpu; + + if (!thread->kfpu_flags) + return; + fpu_lfpc(&state->fpc); + if (likely(cpu_has_vx())) + load_vx_regs(state->vxrs); + else + load_fp_regs(state->fprs); } static inline void convert_vx_to_fp(freg_t *fprs, __vector128 *vxrs) diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 5dc1b6345006..8c5f16857539 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -157,7 +157,7 @@ struct lowcore { __s32 preempt_count; /* 0x03a8 */ __u32 spinlock_lockval; /* 0x03ac */ __u32 spinlock_index; /* 0x03b0 */ - __u32 fpu_flags; /* 0x03b4 */ + __u8 pad_0x03b4[0x03b8-0x03b4]; /* 0x03b4 */ __u64 percpu_offset; /* 0x03b8 */ __u8 pad_0x03c0[0x03c8-0x03c0]; /* 0x03c0 */ __u64 machine_flags; /* 0x03c8 */ diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 2e716bf34bf8..eee0a1eec620 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -166,6 +166,7 @@ struct thread_struct { unsigned int gmap_write_flag; /* gmap fault write indication */ unsigned int gmap_int_code; /* int code of last gmap fault */ unsigned int gmap_pfault; /* signal of a pending guest pfault */ + int kfpu_flags; /* kernel fpu flags */ /* Per-thread information related to debugging */ struct per_regs per_user; /* User specified PER registers */ @@ -182,6 +183,7 @@ struct thread_struct { struct gs_cb *gs_bc_cb; /* Broadcast guarded storage cb */ struct pgm_tdb trap_tdb; /* Transaction abort diagnose block */ struct fpu ufpu; /* User FP and VX register save area */ + struct fpu kfpu; /* Kernel FP and VX register save area */ }; /* Flag to disable transactions. */ diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 62146daf9051..b7b623818753 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -95,6 +95,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) *dst = *src; dst->thread.ufpu.regs = dst->thread.ufpu.fprs; + dst->thread.kfpu_flags = 0; /* * Don't transfer over the runtime instrumentation or the guarded @@ -197,10 +198,12 @@ void execve_tail(void) struct task_struct *__switch_to(struct task_struct *prev, struct task_struct *next) { save_user_fpu_regs(); + save_kernel_fpu_regs(&prev->thread); save_access_regs(&prev->thread.acrs[0]); save_ri_cb(prev->thread.ri_cb); save_gs_cb(prev->thread.gs_cb); update_cr_regs(next); + restore_kernel_fpu_regs(&next->thread); restore_access_regs(&next->thread.acrs[0]); restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); restore_gs_cb(next->thread.gs_cb); -- cgit v1.2.3 From 9cbff7f2214d16af5c10f1f55ac72d4c1a8bd787 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 3 Feb 2024 11:45:14 +0100 Subject: s390/fpu: remove regs member from struct fpu KVM was the only user which modified the regs pointer in struct fpu. Remove the pointer and convert the rest of the core fpu code to directly access the save area embedded within struct fpu. Reviewed-by: Claudio Imbrenda Signed-off-by: Heiko Carstens --- arch/s390/include/asm/fpu-types.h | 1 - arch/s390/include/asm/processor.h | 1 - arch/s390/kernel/fpu.c | 11 ++++------- arch/s390/kernel/process.c | 1 - 4 files changed, 4 insertions(+), 10 deletions(-) (limited to 'arch/s390/include/asm/processor.h') diff --git a/arch/s390/include/asm/fpu-types.h b/arch/s390/include/asm/fpu-types.h index fee4468a85d3..f5b6fab30401 100644 --- a/arch/s390/include/asm/fpu-types.h +++ b/arch/s390/include/asm/fpu-types.h @@ -13,7 +13,6 @@ struct fpu { __u32 fpc; /* Floating-point control */ - void *regs; /* Pointer to the current save area */ union { /* Floating-point register save area */ freg_t fprs[__NUM_FPRS]; diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index eee0a1eec620..ecce58abf3db 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -202,7 +202,6 @@ typedef struct thread_struct thread_struct; #define INIT_THREAD { \ .ksp = sizeof(init_stack) + (unsigned long) &init_stack, \ - .ufpu.regs = (void *)init_task.thread.ufpu.fprs, \ .last_break = 1, \ } diff --git a/arch/s390/kernel/fpu.c b/arch/s390/kernel/fpu.c index b976da5bf71b..a0ef3fc5d90f 100644 --- a/arch/s390/kernel/fpu.c +++ b/arch/s390/kernel/fpu.c @@ -110,13 +110,12 @@ EXPORT_SYMBOL(__kernel_fpu_end); void __load_user_fpu_regs(void) { struct fpu *state = ¤t->thread.ufpu; - void *regs = current->thread.ufpu.regs; fpu_lfpc_safe(&state->fpc); if (likely(cpu_has_vx())) - load_vx_regs(regs); + load_vx_regs(state->vxrs); else - load_fp_regs(regs); + load_fp_regs(state->fprs); clear_thread_flag(TIF_FPU); } @@ -132,7 +131,6 @@ void save_user_fpu_regs(void) { unsigned long flags; struct fpu *state; - void *regs; local_irq_save(flags); @@ -140,13 +138,12 @@ void save_user_fpu_regs(void) goto out; state = ¤t->thread.ufpu; - regs = current->thread.ufpu.regs; fpu_stfpc(&state->fpc); if (likely(cpu_has_vx())) - save_vx_regs(regs); + save_vx_regs(state->vxrs); else - save_fp_regs(regs); + save_fp_regs(state->fprs); set_thread_flag(TIF_FPU); out: local_irq_restore(flags); diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index b7b623818753..dd456b475861 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -94,7 +94,6 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) save_user_fpu_regs(); *dst = *src; - dst->thread.ufpu.regs = dst->thread.ufpu.fprs; dst->thread.kfpu_flags = 0; /* -- cgit v1.2.3 From 8c09871a950a3fe686e0e27fd4193179c5f74f37 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 3 Feb 2024 11:45:18 +0100 Subject: s390/fpu: limit save and restore to used registers The first invocation of kernel_fpu_begin() after switching from user to kernel context will save all vector registers, even if only parts of the vector registers are used within the kernel fpu context. Given that save and restore of all vector registers is quite expensive change the current approach in several ways: - Instead of saving and restoring all user registers limit this to those registers which are actually used within an kernel fpu context. - On context switch save all remaining user fpu registers, so they can be restored when the task is rescheduled. - Saving user registers within kernel_fpu_begin() is done without disabling and enabling interrupts - which also slightly reduces runtime. In worst case (e.g. interrupt context uses the same registers) this may lead to the situation that registers are saved several times, however the assumption is that this will not happen frequently, so that the new method is faster in nearly all cases. - save_user_fpu_regs() can still be called from all contexts and saves all (or all remaining) user registers to a tasks ufpu user fpu save area. Overall this reduces the time required to save and restore the user fpu context for nearly all cases. Signed-off-by: Heiko Carstens --- arch/s390/include/asm/entry-common.h | 3 +- arch/s390/include/asm/fpu.h | 81 +++++++++++++++---------- arch/s390/include/asm/processor.h | 1 + arch/s390/kernel/fpu.c | 112 ++++++++++++++++++++++++----------- 4 files changed, 128 insertions(+), 69 deletions(-) (limited to 'arch/s390/include/asm/processor.h') diff --git a/arch/s390/include/asm/entry-common.h b/arch/s390/include/asm/entry-common.h index 659e07d7f31a..7f5004065e8a 100644 --- a/arch/s390/include/asm/entry-common.h +++ b/arch/s390/include/asm/entry-common.h @@ -41,8 +41,7 @@ static __always_inline void arch_exit_to_user_mode_work(struct pt_regs *regs, static __always_inline void arch_exit_to_user_mode(void) { - if (test_thread_flag(TIF_FPU)) - __load_user_fpu_regs(); + load_user_fpu_regs(); if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) debug_user_asce(1); diff --git a/arch/s390/include/asm/fpu.h b/arch/s390/include/asm/fpu.h index c1b3920092a1..c84cb33913e2 100644 --- a/arch/s390/include/asm/fpu.h +++ b/arch/s390/include/asm/fpu.h @@ -58,10 +58,6 @@ static inline bool cpu_has_vx(void) return likely(test_facility(129)); } -void save_user_fpu_regs(void); -void load_user_fpu_regs(void); -void __load_user_fpu_regs(void); - enum { KERNEL_FPC_BIT = 0, KERNEL_VXR_V0V7_BIT, @@ -83,6 +79,8 @@ enum { #define KERNEL_VXR (KERNEL_VXR_LOW | KERNEL_VXR_HIGH) #define KERNEL_FPR (KERNEL_FPC | KERNEL_VXR_LOW) +void load_fpu_state(struct fpu *state, int flags); +void save_fpu_state(struct fpu *state, int flags); void __kernel_fpu_begin(struct kernel_fpu *state, int flags); void __kernel_fpu_end(struct kernel_fpu *state, int flags); @@ -162,26 +160,57 @@ static __always_inline void load_fp_regs_vx(__vector128 *vxrs) __load_fp_regs(fprs, sizeof(__vector128) / sizeof(freg_t)); } -static inline void _kernel_fpu_begin(struct kernel_fpu *state, int flags) +static inline void load_user_fpu_regs(void) +{ + struct thread_struct *thread = ¤t->thread; + + if (!thread->ufpu_flags) + return; + load_fpu_state(&thread->ufpu, thread->ufpu_flags); + thread->ufpu_flags = 0; +} + +static __always_inline void __save_user_fpu_regs(struct thread_struct *thread, int flags) { - state->hdr.mask = READ_ONCE(current->thread.kfpu_flags); - if (!test_thread_flag(TIF_FPU)) { - /* Save user space FPU state and register contents */ - save_user_fpu_regs(); - } else if (state->hdr.mask & flags) { - /* Save FPU/vector register in-use by the kernel */ + save_fpu_state(&thread->ufpu, flags); + __atomic_or(flags, &thread->ufpu_flags); +} + +static inline void save_user_fpu_regs(void) +{ + struct thread_struct *thread = ¤t->thread; + int mask, flags; + + mask = __atomic_or(KERNEL_FPC | KERNEL_VXR, &thread->kfpu_flags); + flags = ~READ_ONCE(thread->ufpu_flags) & (KERNEL_FPC | KERNEL_VXR); + if (flags) + __save_user_fpu_regs(thread, flags); + barrier(); + WRITE_ONCE(thread->kfpu_flags, mask); +} + +static __always_inline void _kernel_fpu_begin(struct kernel_fpu *state, int flags) +{ + struct thread_struct *thread = ¤t->thread; + int mask, uflags; + + mask = __atomic_or(flags, &thread->kfpu_flags); + state->hdr.mask = mask; + uflags = READ_ONCE(thread->ufpu_flags); + if ((uflags & flags) != flags) + __save_user_fpu_regs(thread, ~uflags & flags); + if (mask & flags) __kernel_fpu_begin(state, flags); - } - __atomic_or(flags, ¤t->thread.kfpu_flags); } -static inline void _kernel_fpu_end(struct kernel_fpu *state, int flags) +static __always_inline void _kernel_fpu_end(struct kernel_fpu *state, int flags) { - WRITE_ONCE(current->thread.kfpu_flags, state->hdr.mask); - if (state->hdr.mask & flags) { - /* Restore FPU/vector register in-use by the kernel */ + int mask = state->hdr.mask; + + if (mask & flags) __kernel_fpu_end(state, flags); - } + barrier(); + WRITE_ONCE(current->thread.kfpu_flags, mask); } void __kernel_fpu_invalid_size(void); @@ -222,28 +251,16 @@ static __always_inline void kernel_fpu_check_size(int flags, unsigned int size) static inline void save_kernel_fpu_regs(struct thread_struct *thread) { - struct fpu *state = &thread->kfpu; - if (!thread->kfpu_flags) return; - fpu_stfpc(&state->fpc); - if (likely(cpu_has_vx())) - save_vx_regs(state->vxrs); - else - save_fp_regs_vx(state->vxrs); + save_fpu_state(&thread->kfpu, thread->kfpu_flags); } static inline void restore_kernel_fpu_regs(struct thread_struct *thread) { - struct fpu *state = &thread->kfpu; - if (!thread->kfpu_flags) return; - fpu_lfpc(&state->fpc); - if (likely(cpu_has_vx())) - load_vx_regs(state->vxrs); - else - load_fp_regs_vx(state->vxrs); + load_fpu_state(&thread->kfpu, thread->kfpu_flags); } static inline void convert_vx_to_fp(freg_t *fprs, __vector128 *vxrs) diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index ecce58abf3db..7cf00cf8fb0b 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -166,6 +166,7 @@ struct thread_struct { unsigned int gmap_write_flag; /* gmap fault write indication */ unsigned int gmap_int_code; /* int code of last gmap fault */ unsigned int gmap_pfault; /* signal of a pending guest pfault */ + int ufpu_flags; /* user fpu flags */ int kfpu_flags; /* kernel fpu flags */ /* Per-thread information related to debugging */ diff --git a/arch/s390/kernel/fpu.c b/arch/s390/kernel/fpu.c index 62e9befe7890..fa90bbdc5ef9 100644 --- a/arch/s390/kernel/fpu.c +++ b/arch/s390/kernel/fpu.c @@ -107,45 +107,87 @@ void __kernel_fpu_end(struct kernel_fpu *state, int flags) } EXPORT_SYMBOL(__kernel_fpu_end); -void __load_user_fpu_regs(void) +void load_fpu_state(struct fpu *state, int flags) { - struct fpu *state = ¤t->thread.ufpu; - - fpu_lfpc_safe(&state->fpc); - if (likely(cpu_has_vx())) - load_vx_regs(state->vxrs); - else - load_fp_regs_vx(state->vxrs); - clear_thread_flag(TIF_FPU); -} + __vector128 *vxrs = &state->vxrs[0]; + int mask; -void load_user_fpu_regs(void) -{ - raw_local_irq_disable(); - __load_user_fpu_regs(); - raw_local_irq_enable(); + if (flags & KERNEL_FPC) + fpu_lfpc(&state->fpc); + if (!cpu_has_vx()) { + if (flags & KERNEL_VXR_V0V7) + load_fp_regs_vx(state->vxrs); + return; + } + mask = flags & KERNEL_VXR; + if (mask == KERNEL_VXR) { + fpu_vlm(0, 15, &vxrs[0]); + fpu_vlm(16, 31, &vxrs[16]); + return; + } + if (mask == KERNEL_VXR_MID) { + fpu_vlm(8, 23, &vxrs[8]); + return; + } + mask = flags & KERNEL_VXR_LOW; + if (mask) { + if (mask == KERNEL_VXR_LOW) + fpu_vlm(0, 15, &vxrs[0]); + else if (mask == KERNEL_VXR_V0V7) + fpu_vlm(0, 7, &vxrs[0]); + else + fpu_vlm(8, 15, &vxrs[8]); + } + mask = flags & KERNEL_VXR_HIGH; + if (mask) { + if (mask == KERNEL_VXR_HIGH) + fpu_vlm(16, 31, &vxrs[16]); + else if (mask == KERNEL_VXR_V16V23) + fpu_vlm(16, 23, &vxrs[16]); + else + fpu_vlm(24, 31, &vxrs[24]); + } } -EXPORT_SYMBOL(load_user_fpu_regs); -void save_user_fpu_regs(void) +void save_fpu_state(struct fpu *state, int flags) { - unsigned long flags; - struct fpu *state; - - local_irq_save(flags); - - if (test_thread_flag(TIF_FPU)) - goto out; - - state = ¤t->thread.ufpu; + __vector128 *vxrs = &state->vxrs[0]; + int mask; - fpu_stfpc(&state->fpc); - if (likely(cpu_has_vx())) - save_vx_regs(state->vxrs); - else - save_fp_regs_vx(state->vxrs); - set_thread_flag(TIF_FPU); -out: - local_irq_restore(flags); + if (flags & KERNEL_FPC) + fpu_stfpc(&state->fpc); + if (!cpu_has_vx()) { + if (flags & KERNEL_VXR_LOW) + save_fp_regs_vx(state->vxrs); + return; + } + mask = flags & KERNEL_VXR; + if (mask == KERNEL_VXR) { + fpu_vstm(0, 15, &vxrs[0]); + fpu_vstm(16, 31, &vxrs[16]); + return; + } + if (mask == KERNEL_VXR_MID) { + fpu_vstm(8, 23, &vxrs[8]); + return; + } + mask = flags & KERNEL_VXR_LOW; + if (mask) { + if (mask == KERNEL_VXR_LOW) + fpu_vstm(0, 15, &vxrs[0]); + else if (mask == KERNEL_VXR_V0V7) + fpu_vstm(0, 7, &vxrs[0]); + else + fpu_vstm(8, 15, &vxrs[8]); + } + mask = flags & KERNEL_VXR_HIGH; + if (mask) { + if (mask == KERNEL_VXR_HIGH) + fpu_vstm(16, 31, &vxrs[16]); + else if (mask == KERNEL_VXR_V16V23) + fpu_vstm(16, 23, &vxrs[16]); + else + fpu_vstm(24, 31, &vxrs[24]); + } } -EXPORT_SYMBOL(save_user_fpu_regs); +EXPORT_SYMBOL(save_fpu_state); -- cgit v1.2.3 From c239c83ed5c558be3b5926c7f11639f02c8acd00 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Tue, 20 Feb 2024 14:21:14 +0100 Subject: s390/entry: add CIF_SIE flag and remove sie64a() address check When a program check, interrupt or machine check is triggered, the PSW address is compared to a certain range of the sie64a() function to figure out whether SIE was interrupted and a cleanup of SIE is needed. This doesn't work with kprobes: If kprobes probes an instruction, it copies the instruction to the kprobes instruction page and overwrites the original instruction with an undefind instruction (Opcode 00). When this instruction is hit later, kprobes single-steps the instruction on the kprobes_instruction page. However, if this instruction is a relative branch instruction it will now point to a different location in memory due to being moved to the kprobes instruction page. If the new branch target points into sie64a() the kernel assumes it interrupted SIE when processing the breakpoint and will crash trying to access the SIE control block. Instead of comparing the address, introduce a new CIF_SIE flag which indicates whether SIE was interrupted. Signed-off-by: Sven Schnelle Suggested-by: Heiko Carstens Reviewed-by: Heiko Carstens Signed-off-by: Heiko Carstens --- arch/s390/include/asm/processor.h | 2 ++ arch/s390/kernel/entry.S | 17 ++++++++++++++--- 2 files changed, 16 insertions(+), 3 deletions(-) (limited to 'arch/s390/include/asm/processor.h') diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 7cf00cf8fb0b..db9982f0e8cd 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -14,11 +14,13 @@ #include +#define CIF_SIE 0 /* CPU needs SIE exit cleanup */ #define CIF_NOHZ_DELAY 2 /* delay HZ disable for a tick */ #define CIF_ENABLED_WAIT 5 /* in enabled wait state */ #define CIF_MCCK_GUEST 6 /* machine check happening in guest */ #define CIF_DEDICATED_CPU 7 /* this CPU is dedicated */ +#define _CIF_SIE BIT(CIF_SIE) #define _CIF_NOHZ_DELAY BIT(CIF_NOHZ_DELAY) #define _CIF_ENABLED_WAIT BIT(CIF_ENABLED_WAIT) #define _CIF_MCCK_GUEST BIT(CIF_MCCK_GUEST) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index fc5277eab554..4e0ff79ffee3 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -146,6 +146,7 @@ _LPP_OFFSET = __LC_LPP lg %r9,__SF_SIE_CONTROL(%r15) # get control block pointer ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE lctlg %c1,%c1,__LC_KERNEL_ASCE # load primary asce + ni __LC_CPU_FLAGS+7,255-_CIF_SIE larl %r9,sie_exit # skip forward to sie_exit .endm #endif @@ -214,6 +215,7 @@ SYM_FUNC_START(__sie64a) lg %r14,__LC_GMAP # get gmap pointer ltgr %r14,%r14 jz .Lsie_gmap + oi __LC_CPU_FLAGS+7,_CIF_SIE lctlg %c1,%c1,__GMAP_ASCE(%r14) # load primary asce .Lsie_gmap: lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer @@ -235,6 +237,7 @@ SYM_FUNC_START(__sie64a) ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE lctlg %c1,%c1,__LC_KERNEL_ASCE # load primary asce .Lsie_done: + ni __LC_CPU_FLAGS+7,255-_CIF_SIE # some program checks are suppressing. C code (e.g. do_protection_exception) # will rewind the PSW by the ILC, which is often 4 bytes in case of SIE. There # are some corner cases (e.g. runtime instrumentation) where ILC is unpredictable. @@ -346,7 +349,8 @@ SYM_CODE_START(pgm_check_handler) .Lpgm_skip_asce: #if IS_ENABLED(CONFIG_KVM) # cleanup critical section for program checks in __sie64a - OUTSIDE %r9,.Lsie_gmap,.Lsie_done,1f + TSTMSK __LC_CPU_FLAGS,_CIF_SIE + jz 1f BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST SIEEXIT lghi %r10,_PIF_GUEST_FAULT @@ -416,7 +420,8 @@ SYM_CODE_START(\name) tmhh %r8,0x0001 # interrupting from user ? jnz 1f #if IS_ENABLED(CONFIG_KVM) - OUTSIDE %r9,.Lsie_gmap,.Lsie_done,0f + TSTMSK __LC_CPU_FLAGS,_CIF_SIE + jz 0f BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST SIEEXIT #endif @@ -513,7 +518,13 @@ SYM_CODE_START(mcck_int_handler) TSTMSK __LC_MCCK_CODE,MCCK_CODE_PSW_IA_VALID jno .Lmcck_panic #if IS_ENABLED(CONFIG_KVM) - OUTSIDE %r9,.Lsie_gmap,.Lsie_done,.Lmcck_user + TSTMSK __LC_CPU_FLAGS,_CIF_SIE + jz .Lmcck_user + # Need to compare the address instead of a CIF_SIE* flag. + # Otherwise there would be a race between setting the flag + # and entering SIE (or leaving and clearing the flag). This + # would cause machine checks targeted at the guest to be + # handled by the host. OUTSIDE %r9,.Lsie_entry,.Lsie_leave,4f oi __LC_CPU_FLAGS+7, _CIF_MCCK_GUEST 4: BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST -- cgit v1.2.3