From 44a12806d010944a5727f1dc99123121e3e2c8c6 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 7 Aug 2017 21:25:01 +1000 Subject: Revert "powerpc/64: Avoid restore_math call if possible in syscall exit" This reverts commit bc4f65e4cf9d6cc43e0e9ba0b8648cf9201cd55f. As reported by Andreas, this commit is causing unrecoverable SLB misses in the system call exit path: Unrecoverable exception 4100 at c00000000000a1ec Oops: Unrecoverable exception, sig: 6 [#1] SMP NR_CPUS=2 PowerMac ... CPU: 0 PID: 18626 Comm: rm Not tainted 4.13.0-rc3 #1 task: c00000018335e080 task.stack: c000000139e50000 NIP: c00000000000a1ec LR: c00000000000a118 CTR: 0000000000000000 REGS: c000000139e53bb0 TRAP: 4100 Not tainted (4.13.0-rc3) MSR: 9000000000001030 CR: 24000044 XER: 20000000 SOFTE: 1 GPR00: 0000000000000000 c000000139e53e30 c000000000abb500 fffffffffffffffe GPR04: c0000001eb866298 0000000000000000 0000000000000000 c00000018335e080 GPR08: 900000000000d032 0000000000000000 0000000000000002 fffffffffffff001 GPR12: c000000139e50000 c00000000ffff000 00003fffa8c0dca0 00003fffa8c0dc88 GPR16: 0000000010000000 0000000000000001 00003fffa8c0eaa0 0000000000000000 GPR20: 00003fffa8c27528 00003fffa8c27b00 0000000000000000 0000000000000000 GPR24: 00003fffa8c0d918 00003ffff1b3efa0 00003fffa8c26d68 0000000000000000 GPR28: 00003fffa8c249e8 00003fffa8c263d0 00003fffa8c27550 00003ffff1b3ef10 NIP [c00000000000a1ec] system_call_exit+0xc0/0x21c LR [c00000000000a118] system_call+0x58/0x6c Call Trace: [c000000139e53e30] [c00000000000a118] system_call+0x58/0x6c (unreliable) Instruction dump: 64a51000 7c6300d0 f8a101a0 4bffff9c 3c000000 60000006 780007c6 64000000 60000000 7c004039 4082001c e8ed0170 <88070b78> 88c70b79 7c003214 2c200000 This is caused by us trying to load THREAD_LOAD_FP with MSR_RI=0, and taking an SLB miss on the thread struct. Reported-by: Andreas Schwab Diagnosed-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch/powerpc/kernel/process.c') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 9f3e2c932dcc..ec480966f9bf 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -511,10 +511,6 @@ void restore_math(struct pt_regs *regs) { unsigned long msr; - /* - * Syscall exit makes a similar initial check before branching - * to restore_math. Keep them in synch. - */ if (!msr_tm_active(regs->msr) && !current->thread.load_fp && !loadvec(current->thread)) return; -- cgit v1.2.3 From 5a69aec945d27e78abac9fd032533d3aaebf7c1e Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 16 Aug 2017 16:01:14 +1000 Subject: powerpc: Fix VSX enabling/flushing to also test MSR_FP and MSR_VEC VSX uses a combination of the old vector registers, the old FP registers and new "second halves" of the FP registers. Thus when we need to see the VSX state in the thread struct (flush_vsx_to_thread()) or when we'll use the VSX in the kernel (enable_kernel_vsx()) we need to ensure they are all flushed into the thread struct if either of them is individually enabled. Unfortunately we only tested if the whole VSX was enabled, not if they were individually enabled. Fixes: 72cd7b44bc99 ("powerpc: Uncomment and make enable_kernel_vsx() routine available") Cc: stable@vger.kernel.org # v4.3+ Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel/process.c') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index ec480966f9bf..1f0fd361e09b 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -362,7 +362,8 @@ void enable_kernel_vsx(void) cpumsr = msr_check_and_set(MSR_FP|MSR_VEC|MSR_VSX); - if (current->thread.regs && (current->thread.regs->msr & MSR_VSX)) { + if (current->thread.regs && + (current->thread.regs->msr & (MSR_VSX|MSR_VEC|MSR_FP))) { check_if_tm_restore_required(current); /* * If a thread has already been reclaimed then the @@ -386,7 +387,7 @@ void flush_vsx_to_thread(struct task_struct *tsk) { if (tsk->thread.regs) { preempt_disable(); - if (tsk->thread.regs->msr & MSR_VSX) { + if (tsk->thread.regs->msr & (MSR_VSX|MSR_VEC|MSR_FP)) { BUG_ON(tsk != current); giveup_vsx(tsk); } -- cgit v1.2.3 From 6a303833b5e3acbb4c97cc11cc688650d070da19 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 16 Aug 2017 16:01:15 +1000 Subject: powerpc: Fix missing newline before { Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel/process.c') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 9f3e2c932dcc..cd476e338768 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -230,7 +230,8 @@ void enable_kernel_fp(void) } EXPORT_SYMBOL(enable_kernel_fp); -static int restore_fp(struct task_struct *tsk) { +static int restore_fp(struct task_struct *tsk) +{ if (tsk->thread.load_fp || msr_tm_active(tsk->thread.regs->msr)) { load_fp_state(¤t->thread.fp_state); current->thread.load_fp++; -- cgit v1.2.3 From 746874d31cd1173f2dd234c1a5f692939afe0b71 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 16 Aug 2017 16:01:16 +1000 Subject: powerpc: Remove redundant FP/Altivec giveup code __giveup_vsx() already calls those two functions. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch/powerpc/kernel/process.c') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index cd476e338768..32b58648052f 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -374,10 +374,6 @@ void enable_kernel_vsx(void) */ if(!msr_tm_active(cpumsr) && msr_tm_active(current->thread.regs->msr)) return; - if (current->thread.regs->msr & MSR_FP) - __giveup_fpu(current); - if (current->thread.regs->msr & MSR_VEC) - __giveup_altivec(current); __giveup_vsx(current); } } -- cgit v1.2.3 From dc801081f2eae57a389bc9230ff4fb0d91487990 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 16 Aug 2017 16:01:17 +1000 Subject: powerpc: Remove redundant clear of MSR_VSX in __giveup_vsx() __giveup_fpu() already does it and we cannot have MSR_VSX set without having MSR_FP also set. This also adds a warning to check we indeed do Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kernel/process.c') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 32b58648052f..ff522bf75d53 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -331,11 +331,19 @@ static inline int restore_altivec(struct task_struct *tsk) { return 0; } #ifdef CONFIG_VSX static void __giveup_vsx(struct task_struct *tsk) { - if (tsk->thread.regs->msr & MSR_FP) + unsigned long msr = tsk->thread.regs->msr; + + /* + * We should never be ssetting MSR_VSX without also setting + * MSR_FP and MSR_VEC + */ + WARN_ON((msr & MSR_VSX) && !((msr & MSR_FP) && (msr & MSR_VEC))); + + /* __giveup_fpu will clear MSR_VSX */ + if (msr & MSR_FP) __giveup_fpu(tsk); - if (tsk->thread.regs->msr & MSR_VEC) + if (msr & MSR_VEC) __giveup_altivec(tsk); - tsk->thread.regs->msr &= ~MSR_VSX; } static void giveup_vsx(struct task_struct *tsk) -- cgit v1.2.3 From 96c79b6bd74039e8a799e4aa2d331cbd478ab5a1 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 16 Aug 2017 16:01:18 +1000 Subject: powerpc: Remove more redundant VSX save/tests __giveup_vsx/save_vsx are completely equivalent to testing MSR_FP and MSR_VEC and calling the corresponding giveup/save function so just remove the spurious VSX cases. Also add WARN_ONs checking that we never have VSX enabled without the two other. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 33 ++++++++------------------------- 1 file changed, 8 insertions(+), 25 deletions(-) (limited to 'arch/powerpc/kernel/process.c') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index ff522bf75d53..cc5bae4bba7b 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -355,14 +355,6 @@ static void giveup_vsx(struct task_struct *tsk) msr_check_and_clear(MSR_FP|MSR_VEC|MSR_VSX); } -static void save_vsx(struct task_struct *tsk) -{ - if (tsk->thread.regs->msr & MSR_FP) - save_fpu(tsk); - if (tsk->thread.regs->msr & MSR_VEC) - save_altivec(tsk); -} - void enable_kernel_vsx(void) { unsigned long cpumsr; @@ -411,7 +403,6 @@ static int restore_vsx(struct task_struct *tsk) } #else static inline int restore_vsx(struct task_struct *tsk) { return 0; } -static inline void save_vsx(struct task_struct *tsk) { } #endif /* CONFIG_VSX */ #ifdef CONFIG_SPE @@ -491,6 +482,8 @@ void giveup_all(struct task_struct *tsk) msr_check_and_set(msr_all_available); check_if_tm_restore_required(tsk); + WARN_ON((usermsr & MSR_VSX) && !((usermsr & MSR_FP) && (usermsr & MSR_VEC))); + #ifdef CONFIG_PPC_FPU if (usermsr & MSR_FP) __giveup_fpu(tsk); @@ -499,10 +492,6 @@ void giveup_all(struct task_struct *tsk) if (usermsr & MSR_VEC) __giveup_altivec(tsk); #endif -#ifdef CONFIG_VSX - if (usermsr & MSR_VSX) - __giveup_vsx(tsk); -#endif #ifdef CONFIG_SPE if (usermsr & MSR_SPE) __giveup_spe(tsk); @@ -561,19 +550,13 @@ void save_all(struct task_struct *tsk) msr_check_and_set(msr_all_available); - /* - * Saving the way the register space is in hardware, save_vsx boils - * down to a save_fpu() and save_altivec() - */ - if (usermsr & MSR_VSX) { - save_vsx(tsk); - } else { - if (usermsr & MSR_FP) - save_fpu(tsk); + WARN_ON((usermsr & MSR_VSX) && !((usermsr & MSR_FP) && (usermsr & MSR_VEC))); - if (usermsr & MSR_VEC) - save_altivec(tsk); - } + if (usermsr & MSR_FP) + save_fpu(tsk); + + if (usermsr & MSR_VEC) + save_altivec(tsk); if (usermsr & MSR_SPE) __giveup_spe(tsk); -- cgit v1.2.3 From d1d0d5ffb3006eaf9b5f41c89fe801e032cbbfe4 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 12 Aug 2017 02:39:07 +1000 Subject: powerpc/64: Optimise set/clear of CTRL[RUN] (runlatch) On modern CPUs the CTRL register is read-only except bit 63 which is the run latch control. This means it can be updated with a mtspr rather than mfspr/mtspr. To accomodate older CPUs (Cell at least), where there are other bits in the register, we still do a read/modify/write on pre 2.06 CPUs. Signed-off-by: Nicholas Piggin [mpe: Update change log to mention 2.06 workaround] Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) (limited to 'arch/powerpc/kernel/process.c') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 34bd94b090e2..0a00d59df537 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1979,11 +1979,25 @@ void show_stack(struct task_struct *tsk, unsigned long *stack) void notrace __ppc64_runlatch_on(void) { struct thread_info *ti = current_thread_info(); - unsigned long ctrl; - ctrl = mfspr(SPRN_CTRLF); - ctrl |= CTRL_RUNLATCH; - mtspr(SPRN_CTRLT, ctrl); + if (cpu_has_feature(CPU_FTR_ARCH_206)) { + /* + * Least significant bit (RUN) is the only writable bit of + * the CTRL register, so we can avoid mfspr. 2.06 is not the + * earliest ISA where this is the case, but it's convenient. + */ + mtspr(SPRN_CTRLT, CTRL_RUNLATCH); + } else { + unsigned long ctrl; + + /* + * Some architectures (e.g., Cell) have writable fields other + * than RUN, so do the read-modify-write. + */ + ctrl = mfspr(SPRN_CTRLF); + ctrl |= CTRL_RUNLATCH; + mtspr(SPRN_CTRLT, ctrl); + } ti->local_flags |= _TLF_RUNLATCH; } @@ -1992,13 +2006,18 @@ void notrace __ppc64_runlatch_on(void) void notrace __ppc64_runlatch_off(void) { struct thread_info *ti = current_thread_info(); - unsigned long ctrl; ti->local_flags &= ~_TLF_RUNLATCH; - ctrl = mfspr(SPRN_CTRLF); - ctrl &= ~CTRL_RUNLATCH; - mtspr(SPRN_CTRLT, ctrl); + if (cpu_has_feature(CPU_FTR_ARCH_206)) { + mtspr(SPRN_CTRLT, 0); + } else { + unsigned long ctrl; + + ctrl = mfspr(SPRN_CTRLF); + ctrl &= ~CTRL_RUNLATCH; + mtspr(SPRN_CTRLT, ctrl); + } } #endif /* CONFIG_PPC64 */ -- cgit v1.2.3 From f6fc73fb965f6c1fd7ad75aabfdee6b1af0f7093 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 23 Aug 2017 23:56:23 +1000 Subject: powerpc/oops: Print CR/XER on same line as MSR Somehow we missed this when the pr_cont() changes went in. Fix CR/XER to go on the same line as MSR, as they have historically, eg: MSR: 8000000000009032 CR: 4804408a XER: 20000000 Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel/process.c') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 0a00d59df537..1e24d6f1be90 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1386,7 +1386,7 @@ void show_regs(struct pt_regs * regs) regs, regs->trap, print_tainted(), init_utsname()->release); printk("MSR: "REG" ", regs->msr); print_msr_bits(regs->msr); - printk(" CR: %08lx XER: %08lx\n", regs->ccr, regs->xer); + pr_cont(" CR: %08lx XER: %08lx\n", regs->ccr, regs->xer); trap = TRAP(regs); if ((regs->trap != 0xc00) && cpu_has_feature(CPU_FTR_CFAR)) pr_cont("CFAR: "REG" ", regs->orig_gpr3); -- cgit v1.2.3 From a6036100edd1d8e024beb4b97c1f15c114660c6c Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 23 Aug 2017 23:56:24 +1000 Subject: powerpc/oops: Line up NIP & MSR with other rows This is purely cosmetic, but does look nicer IMHO: Before: task: c000000001453400 task.stack: c000000001c6c000 NIP: c000000000a0fbfc LR: c000000000a0fbf4 CTR: c000000000ba6220 REGS: c0000001fffef820 TRAP: 0300 Not tainted (4.13.0-rc6-gcc-6.3.1-00234-g423af27f7d81) MSR: 8000000000009033 CR: 88088242 XER: 00000000 CFAR: c0000000000b3488 DAR: 0000000000000000 DSISR: 42000000 SOFTE: 0 After: task: c000000001453400 task.stack: c000000001c6c000 NIP: c000000000a0fbfc LR: c000000000a0fbf4 CTR: c000000000ba6220 REGS: c0000001fffef820 TRAP: 0300 Not tainted (4.13.0-rc6-gcc-6.3.1-00234-g423af27f7d81-dirty) MSR: 8000000000009033 CR: 88088242 XER: 00000000 CFAR: c0000000000b34a4 DAR: 0000000000000000 DSISR: 42000000 SOFTE: 0 Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel/process.c') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 1e24d6f1be90..a0c74bbf3454 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1380,11 +1380,11 @@ void show_regs(struct pt_regs * regs) show_regs_print_info(KERN_DEFAULT); - printk("NIP: "REG" LR: "REG" CTR: "REG"\n", + printk("NIP: "REG" LR: "REG" CTR: "REG"\n", regs->nip, regs->link, regs->ctr); printk("REGS: %p TRAP: %04lx %s (%s)\n", regs, regs->trap, print_tainted(), init_utsname()->release); - printk("MSR: "REG" ", regs->msr); + printk("MSR: "REG" ", regs->msr); print_msr_bits(regs->msr); pr_cont(" CR: %08lx XER: %08lx\n", regs->ccr, regs->xer); trap = TRAP(regs); -- cgit v1.2.3 From 4ca360f3dbf2036d964cdf3a6c4a45a81fdf8e18 Mon Sep 17 00:00:00 2001 From: Kautuk Consul Date: Tue, 19 Apr 2016 15:48:21 +0530 Subject: powerpc: get_wchan(): solve possible race scenario due to parallel wakeup Add a check for p->state == TASK_RUNNING so that any wake-ups on task_struct p in the interim lead to 0 being returned by get_wchan(). Signed-off-by: Kautuk Consul [mpe: Confirmed other architectures do similar] Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel/process.c') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index a0c74bbf3454..166145b18728 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1898,7 +1898,8 @@ unsigned long get_wchan(struct task_struct *p) do { sp = *(unsigned long *)sp; - if (!validate_sp(sp, p, STACK_FRAME_OVERHEAD)) + if (!validate_sp(sp, p, STACK_FRAME_OVERHEAD) || + p->state == TASK_RUNNING) return 0; if (count > 0) { ip = ((unsigned long *)sp)[STACK_FRAME_LR_SAVE]; -- cgit v1.2.3 From 54820530c5faa9fd78e1c08cb6449100b1a19157 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 12 Oct 2017 21:17:18 +1100 Subject: powerpc/powernv: Enable TM without suspend if possible Some Power9 revisions can run in a mode where TM operates without suspended state. If we find ourself on a CPU that might be in this mode, we query OPAL to check, and if so we reenable TM in CPU features, and enable a new user feature to signal to userspace that we are in this mode. We do not enable the "normal" user feature, PPC_FEATURE2_HTM, but we do enable PPC_FEATURE2_HTM_NOSC because that indicates to userspace that the kernel will abort transactions on syscall entry, which is true regardless of the suspend mode. Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/opal-api.h | 2 ++ arch/powerpc/include/asm/powernv.h | 4 ++++ arch/powerpc/include/asm/tm.h | 2 ++ arch/powerpc/kernel/process.c | 7 +++++++ arch/powerpc/kernel/prom.c | 4 ++++ arch/powerpc/platforms/powernv/setup.c | 23 +++++++++++++++++++++++ 6 files changed, 42 insertions(+) (limited to 'arch/powerpc/kernel/process.c') diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index 9d191ebea706..233c7504b1f2 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -896,6 +896,8 @@ enum { */ OPAL_REINIT_CPUS_MMU_HASH = (1 << 2), OPAL_REINIT_CPUS_MMU_RADIX = (1 << 3), + + OPAL_REINIT_CPUS_TM_SUSPEND_DISABLED = (1 << 4), }; typedef struct oppanel_line { diff --git a/arch/powerpc/include/asm/powernv.h b/arch/powerpc/include/asm/powernv.h index f62797702300..dc5f6a5d4575 100644 --- a/arch/powerpc/include/asm/powernv.h +++ b/arch/powerpc/include/asm/powernv.h @@ -22,6 +22,8 @@ extern void pnv_npu2_destroy_context(struct npu_context *context, extern int pnv_npu2_handle_fault(struct npu_context *context, uintptr_t *ea, unsigned long *flags, unsigned long *status, int count); + +void pnv_tm_init(void); #else static inline void powernv_set_nmmu_ptcr(unsigned long ptcr) { } static inline struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev, @@ -36,6 +38,8 @@ static inline int pnv_npu2_handle_fault(struct npu_context *context, unsigned long *status, int count) { return -ENODEV; } + +static inline void pnv_tm_init(void) { } #endif #endif /* _ASM_POWERNV_H */ diff --git a/arch/powerpc/include/asm/tm.h b/arch/powerpc/include/asm/tm.h index 82e06ca3a49b..ad19fe41931b 100644 --- a/arch/powerpc/include/asm/tm.h +++ b/arch/powerpc/include/asm/tm.h @@ -19,4 +19,6 @@ extern void tm_abort(uint8_t cause); extern void tm_save_sprs(struct thread_struct *thread); extern void tm_restore_sprs(struct thread_struct *thread); +extern bool tm_suspend_disabled; + #endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 166145b18728..b02807ea54dc 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -77,6 +77,13 @@ extern unsigned long _get_SP(void); #ifdef CONFIG_PPC_TRANSACTIONAL_MEM +/* + * Are we running in "Suspend disabled" mode? If so we have to block any + * sigreturn that would get us into suspended state, and we also warn in some + * other paths that we should never reach with suspend disabled. + */ +bool tm_suspend_disabled __ro_after_init = false; + static void check_if_tm_restore_required(struct task_struct *tsk) { /* diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index d9bd6555f980..101822be525a 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include #include @@ -681,7 +682,10 @@ static void __init tm_init(void) cur_cpu_spec->cpu_user_features2 &= ~(PPC_FEATURE2_HTM_NOSC | PPC_FEATURE2_HTM); cur_cpu_spec->cpu_features &= ~CPU_FTR_TM; + return; } + + pnv_tm_init(); } #else static void tm_init(void) { } diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index cf52d53da460..d23f148a11f0 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "powernv.h" @@ -304,6 +305,28 @@ static int __init pnv_probe(void) return 1; } +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +void __init pnv_tm_init(void) +{ + if (!firmware_has_feature(FW_FEATURE_OPAL) || + !pvr_version_is(PVR_POWER9) || + early_cpu_has_feature(CPU_FTR_TM)) + return; + + if (opal_reinit_cpus(OPAL_REINIT_CPUS_TM_SUSPEND_DISABLED) != OPAL_SUCCESS) + return; + + pr_info("Enabling TM (Transactional Memory) with Suspend Disabled\n"); + cur_cpu_spec->cpu_features |= CPU_FTR_TM; + /* Make sure "normal" HTM is off (it should be) */ + cur_cpu_spec->cpu_user_features2 &= ~PPC_FEATURE2_HTM; + /* Turn on no suspend mode, and HTM no SC */ + cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_HTM_NO_SUSPEND | \ + PPC_FEATURE2_HTM_NOSC; + tm_suspend_disabled = true; +} +#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ + /* * Returns the cpu frequency for 'cpu' in Hz. This is used by * /proc/cpuinfo -- cgit v1.2.3 From 92fb8690bd04cb421d987d246deac60eef85d272 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Thu, 12 Oct 2017 21:17:19 +1100 Subject: powerpc/tm: P9 disable transactionally suspended sigcontexts Unfortunately userspace can construct a sigcontext which enables suspend. Thus userspace can force Linux into a path where trechkpt is executed. This patch blocks this from happening on POWER9 by sanity checking sigcontexts passed in. ptrace doesn't have this problem as only MSR SE and BE can be changed via ptrace. This patch also adds a number of WARN_ON()s in case we ever enter suspend when we shouldn't. This should not happen, but if it does the symptoms are soft lockup warnings which are not obviously TM related, so the WARN_ON()s should make it obvious what's happening. Signed-off-by: Michael Neuling Signed-off-by: Cyril Bur Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 2 ++ arch/powerpc/kernel/signal_32.c | 4 ++++ arch/powerpc/kernel/signal_64.c | 5 +++++ 3 files changed, 11 insertions(+) (limited to 'arch/powerpc/kernel/process.c') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index b02807ea54dc..c051dc2b42ad 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -910,6 +910,8 @@ static inline void tm_reclaim_task(struct task_struct *tsk) if (!MSR_TM_ACTIVE(thr->regs->msr)) goto out_and_saveregs; + WARN_ON(tm_suspend_disabled); + TM_DEBUG("--- tm_reclaim on pid %d (NIP=%lx, " "ccr=%lx, msr=%lx, trap=%lx)\n", tsk->pid, thr->regs->nip, diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 92fb1c8dbbd8..1dd5fa0f65fd 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -519,6 +519,8 @@ static int save_tm_user_regs(struct pt_regs *regs, { unsigned long msr = regs->msr; + WARN_ON(tm_suspend_disabled); + /* Remove TM bits from thread's MSR. The MSR in the sigcontext * just indicates to userland that we were doing a transaction, but we * don't want to return in transactional state. This also ensures @@ -769,6 +771,8 @@ static long restore_tm_user_regs(struct pt_regs *regs, int i; #endif + if (tm_suspend_disabled) + return 1; /* * restore general registers but not including MSR or SOFTE. Also * take care of keeping r2 (TLS) intact if not a signal. diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index c83c115858c1..4dfdd8e56836 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -214,6 +214,8 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc, BUG_ON(!MSR_TM_ACTIVE(regs->msr)); + WARN_ON(tm_suspend_disabled); + /* Remove TM bits from thread's MSR. The MSR in the sigcontext * just indicates to userland that we were doing a transaction, but we * don't want to return in transactional state. This also ensures @@ -430,6 +432,9 @@ static long restore_tm_sigcontexts(struct task_struct *tsk, BUG_ON(tsk != current); + if (tm_suspend_disabled) + return -EINVAL; + /* copy the GPRs */ err |= __copy_from_user(regs->gpr, tm_sc->gp_regs, sizeof(regs->gpr)); err |= __copy_from_user(&tsk->thread.ckpt_regs, sc->gp_regs, -- cgit v1.2.3 From 4e003747043d57aa75c9762fa148ef38afe68dd8 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 19 Oct 2017 15:08:43 +1100 Subject: powerpc/64s: Replace CONFIG_PPC_STD_MMU_64 with CONFIG_PPC_BOOK3S_64 CONFIG_PPC_STD_MMU_64 indicates support for the "standard" powerpc MMU on 64-bit CPUs. The "standard" MMU refers to the hash page table MMU found in "server" processors, from IBM mainly. Currently CONFIG_PPC_STD_MMU_64 is == CONFIG_PPC_BOOK3S_64. While it's annoying to have two symbols that always have the same value, it's not quite annoying enough to bother removing one. However with the arrival of Power9, we now have the situation where CONFIG_PPC_STD_MMU_64 is enabled, but the kernel is running using the Radix MMU - *not* the "standard" MMU. So it is now actively confusing to use it, because it implies that code is disabled or inactive when the Radix MMU is in use, however that is not necessarily true. So s/CONFIG_PPC_STD_MMU_64/CONFIG_PPC_BOOK3S_64/, and do some minor formatting updates of some of the affected lines. This will be a pain for backports, but c'est la vie. Signed-off-by: Michael Ellerman --- arch/powerpc/Kconfig | 6 +++--- arch/powerpc/include/asm/nohash/64/pgtable.h | 2 +- arch/powerpc/include/asm/paca.h | 10 +++++----- arch/powerpc/include/asm/page_64.h | 6 +++--- arch/powerpc/include/asm/pgtable-be-types.h | 2 +- arch/powerpc/include/asm/pgtable-types.h | 4 ++-- arch/powerpc/include/asm/tlbflush.h | 2 +- arch/powerpc/kernel/asm-offsets.c | 4 ++-- arch/powerpc/kernel/entry_64.S | 4 ++-- arch/powerpc/kernel/exceptions-64s.S | 8 ++++---- arch/powerpc/kernel/machine_kexec_64.c | 4 ++-- arch/powerpc/kernel/mce_power.c | 4 ++-- arch/powerpc/kernel/paca.c | 12 ++++++------ arch/powerpc/kernel/pci_64.c | 4 ++-- arch/powerpc/kernel/process.c | 12 ++++++------ arch/powerpc/kernel/prom.c | 2 +- arch/powerpc/kernel/setup-common.c | 4 ++-- arch/powerpc/mm/Makefile | 6 +++--- arch/powerpc/mm/dump_hashpagetable.c | 2 +- arch/powerpc/mm/dump_linuxpagetables.c | 10 +++++----- arch/powerpc/mm/init_64.c | 8 ++++---- arch/powerpc/mm/pgtable_64.c | 2 +- arch/powerpc/platforms/Kconfig.cputype | 6 +----- arch/powerpc/platforms/pseries/lpar.c | 8 ++++---- arch/powerpc/platforms/pseries/lparcfg.c | 2 +- arch/powerpc/xmon/xmon.c | 6 +++--- 26 files changed, 68 insertions(+), 72 deletions(-) (limited to 'arch/powerpc/kernel/process.c') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 809c468edab1..b8b75b423316 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -334,7 +334,7 @@ config PPC_OF_PLATFORM_PCI default n config ARCH_SUPPORTS_DEBUG_PAGEALLOC - depends on PPC32 || PPC_STD_MMU_64 + depends on PPC32 || PPC_BOOK3S_64 def_bool y config ARCH_SUPPORTS_UPROBES @@ -721,7 +721,7 @@ config PPC_16K_PAGES config PPC_64K_PAGES bool "64k page size" - depends on !PPC_FSL_BOOK3E && (44x || PPC_STD_MMU_64 || PPC_BOOK3E_64) + depends on !PPC_FSL_BOOK3E && (44x || PPC_BOOK3S_64 || PPC_BOOK3E_64) select HAVE_ARCH_SOFT_DIRTY if PPC_BOOK3S_64 config PPC_256K_PAGES @@ -780,7 +780,7 @@ config FORCE_MAX_ZONEORDER config PPC_SUBPAGE_PROT bool "Support setting protections for 4k subpages" - depends on PPC_STD_MMU_64 && PPC_64K_PAGES + depends on PPC_BOOK3S_64 && PPC_64K_PAGES help This option adds support for a system call to allow user programs to set access permissions (read/write, readonly, or no access) diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index f0ff384d4ca5..b1c42ac54d96 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -203,7 +203,7 @@ static inline unsigned long pte_update(struct mm_struct *mm, if (!huge) assert_pte_locked(mm, addr); -#ifdef CONFIG_PPC_STD_MMU_64 +#ifdef CONFIG_PPC_BOOK3S_64 if (old & _PAGE_HASHPTE) hpte_need_flush(mm, addr, ptep, old, huge); #endif diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 1e06310ccc09..c907ae23c956 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -91,14 +91,14 @@ struct paca_struct { u8 cpu_start; /* At startup, processor spins until */ /* this becomes non-zero. */ u8 kexec_state; /* set when kexec down has irqs off */ -#ifdef CONFIG_PPC_STD_MMU_64 +#ifdef CONFIG_PPC_BOOK3S_64 struct slb_shadow *slb_shadow_ptr; struct dtl_entry *dispatch_log; struct dtl_entry *dispatch_log_end; -#endif /* CONFIG_PPC_STD_MMU_64 */ +#endif u64 dscr_default; /* per-CPU default DSCR */ -#ifdef CONFIG_PPC_STD_MMU_64 +#ifdef CONFIG_PPC_BOOK3S_64 /* * Now, starting in cacheline 2, the exception save areas */ @@ -110,7 +110,7 @@ struct paca_struct { u16 vmalloc_sllp; u16 slb_cache_ptr; u32 slb_cache[SLB_CACHE_ENTRIES]; -#endif /* CONFIG_PPC_STD_MMU_64 */ +#endif /* CONFIG_PPC_BOOK3S_64 */ #ifdef CONFIG_PPC_BOOK3E u64 exgen[8] __aligned(0x40); @@ -192,7 +192,7 @@ struct paca_struct { struct stop_sprs stop_sprs; #endif -#ifdef CONFIG_PPC_STD_MMU_64 +#ifdef CONFIG_PPC_BOOK3S_64 /* Non-maskable exceptions that are not performance critical */ u64 exnmi[EX_SIZE]; /* used for system reset (nmi) */ u64 exmc[EX_SIZE]; /* used for machine checks */ diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h index c4d9654bd637..56234c6fcd61 100644 --- a/arch/powerpc/include/asm/page_64.h +++ b/arch/powerpc/include/asm/page_64.h @@ -117,21 +117,21 @@ extern void slice_set_range_psize(struct mm_struct *mm, unsigned long start, #endif /* __ASSEMBLY__ */ #else #define slice_init() -#ifdef CONFIG_PPC_STD_MMU_64 +#ifdef CONFIG_PPC_BOOK3S_64 #define get_slice_psize(mm, addr) ((mm)->context.user_psize) #define slice_set_user_psize(mm, psize) \ do { \ (mm)->context.user_psize = (psize); \ (mm)->context.sllp = SLB_VSID_USER | mmu_psize_defs[(psize)].sllp; \ } while (0) -#else /* CONFIG_PPC_STD_MMU_64 */ +#else /* !CONFIG_PPC_BOOK3S_64 */ #ifdef CONFIG_PPC_64K_PAGES #define get_slice_psize(mm, addr) MMU_PAGE_64K #else /* CONFIG_PPC_64K_PAGES */ #define get_slice_psize(mm, addr) MMU_PAGE_4K #endif /* !CONFIG_PPC_64K_PAGES */ #define slice_set_user_psize(mm, psize) do { BUG(); } while(0) -#endif /* !CONFIG_PPC_STD_MMU_64 */ +#endif /* CONFIG_PPC_BOOK3S_64 */ #define slice_set_range_psize(mm, start, len, psize) \ slice_set_user_psize((mm), (psize)) diff --git a/arch/powerpc/include/asm/pgtable-be-types.h b/arch/powerpc/include/asm/pgtable-be-types.h index 67e7e3d990f4..03ab638593e1 100644 --- a/arch/powerpc/include/asm/pgtable-be-types.h +++ b/arch/powerpc/include/asm/pgtable-be-types.h @@ -76,7 +76,7 @@ typedef struct { unsigned long pgprot; } pgprot_t; * With hash config 64k pages additionally define a bigger "real PTE" type that * gathers the "second half" part of the PTE for pseudo 64k pages */ -#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_STD_MMU_64) +#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_BOOK3S_64) typedef struct { pte_t pte; unsigned long hidx; } real_pte_t; #else typedef struct { pte_t pte; } real_pte_t; diff --git a/arch/powerpc/include/asm/pgtable-types.h b/arch/powerpc/include/asm/pgtable-types.h index 369a164b545c..5d2c38d26396 100644 --- a/arch/powerpc/include/asm/pgtable-types.h +++ b/arch/powerpc/include/asm/pgtable-types.h @@ -49,13 +49,13 @@ typedef struct { unsigned long pgprot; } pgprot_t; * With hash config 64k pages additionally define a bigger "real PTE" type that * gathers the "second half" part of the PTE for pseudo 64k pages */ -#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_STD_MMU_64) +#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_BOOK3S_64) typedef struct { pte_t pte; unsigned long hidx; } real_pte_t; #else typedef struct { pte_t pte; } real_pte_t; #endif -#ifdef CONFIG_PPC_STD_MMU_64 +#ifdef CONFIG_PPC_BOOK3S_64 #include static inline bool pte_xchg(pte_t *ptep, pte_t old, pte_t new) diff --git a/arch/powerpc/include/asm/tlbflush.h b/arch/powerpc/include/asm/tlbflush.h index 13dbcd41885e..7d5a157c7832 100644 --- a/arch/powerpc/include/asm/tlbflush.h +++ b/arch/powerpc/include/asm/tlbflush.h @@ -77,7 +77,7 @@ static inline void local_flush_tlb_mm(struct mm_struct *mm) flush_tlb_mm(mm); } -#elif defined(CONFIG_PPC_STD_MMU_64) +#elif defined(CONFIG_PPC_BOOK3S_64) #include #else #error Unsupported MMU type diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 8cfb20e38cfe..200623e71474 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -208,7 +208,7 @@ int main(void) OFFSET(TCD_ESEL_FIRST, tlb_core_data, esel_first); #endif /* CONFIG_PPC_BOOK3E */ -#ifdef CONFIG_PPC_STD_MMU_64 +#ifdef CONFIG_PPC_BOOK3S_64 OFFSET(PACASLBCACHE, paca_struct, slb_cache); OFFSET(PACASLBCACHEPTR, paca_struct, slb_cache_ptr); OFFSET(PACAVMALLOCSLLP, paca_struct, vmalloc_sllp); @@ -230,7 +230,7 @@ int main(void) OFFSET(LPPACA_DTLIDX, lppaca, dtl_idx); OFFSET(LPPACA_YIELDCOUNT, lppaca, yield_count); OFFSET(PACA_DTL_RIDX, paca_struct, dtl_ridx); -#endif /* CONFIG_PPC_STD_MMU_64 */ +#endif /* CONFIG_PPC_BOOK3S_64 */ OFFSET(PACAEMERGSP, paca_struct, emergency_sp); #ifdef CONFIG_PPC_BOOK3S_64 OFFSET(PACAMCEMERGSP, paca_struct, mc_emergency_sp); diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 4a0fd4f40245..3320bcac7192 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -539,7 +539,7 @@ _GLOBAL(_switch) std r6,PACACURRENT(r13) /* Set new 'current' */ ld r8,KSP(r4) /* new stack pointer */ -#ifdef CONFIG_PPC_STD_MMU_64 +#ifdef CONFIG_PPC_BOOK3S_64 BEGIN_MMU_FTR_SECTION b 2f END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX) @@ -588,7 +588,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT) slbmte r7,r0 isync 2: -#endif /* CONFIG_PPC_STD_MMU_64 */ +#endif /* CONFIG_PPC_BOOK3S_64 */ CURRENT_THREAD_INFO(r7, r8) /* base of new stack */ /* Note: this uses SWITCH_FRAME_SIZE rather than INT_FRAME_SIZE diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 64c2bc35e4ff..6879aed47377 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -607,7 +607,7 @@ EXC_COMMON_BEGIN(slb_miss_common) cmpdi cr5,r11,MSR_RI crset 4*cr0+eq -#ifdef CONFIG_PPC_STD_MMU_64 +#ifdef CONFIG_PPC_BOOK3S_64 BEGIN_MMU_FTR_SECTION bl slb_allocate END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX) @@ -1497,7 +1497,7 @@ USE_TEXT_SECTION() */ .balign IFETCH_ALIGN_BYTES do_hash_page: - #ifdef CONFIG_PPC_STD_MMU_64 +#ifdef CONFIG_PPC_BOOK3S_64 lis r0,DSISR_BAD_FAULT_64S@h ori r0,r0,DSISR_BAD_FAULT_64S@l and. r0,r4,r0 /* weird error? */ @@ -1528,7 +1528,7 @@ do_hash_page: /* Reload DSISR into r4 for the DABR check below */ ld r4,_DSISR(r1) -#endif /* CONFIG_PPC_STD_MMU_64 */ +#endif /* CONFIG_PPC_BOOK3S_64 */ /* Here we have a page fault that hash_page can't handle. */ handle_page_fault: @@ -1557,7 +1557,7 @@ handle_dabr_fault: 12: b ret_from_except_lite -#ifdef CONFIG_PPC_STD_MMU_64 +#ifdef CONFIG_PPC_BOOK3S_64 /* We have a page fault that hash_page could handle but HV refused * the PTE insertion */ diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c index 5c12e21d0d1a..49d34d7271e7 100644 --- a/arch/powerpc/kernel/machine_kexec_64.c +++ b/arch/powerpc/kernel/machine_kexec_64.c @@ -360,7 +360,7 @@ void default_machine_kexec(struct kimage *image) /* NOTREACHED */ } -#ifdef CONFIG_PPC_STD_MMU_64 +#ifdef CONFIG_PPC_BOOK3S_64 /* Values we need to export to the second kernel via the device tree. */ static unsigned long htab_base; static unsigned long htab_size; @@ -402,4 +402,4 @@ static int __init export_htab_values(void) return 0; } late_initcall(export_htab_values); -#endif /* CONFIG_PPC_STD_MMU_64 */ +#endif /* CONFIG_PPC_BOOK3S_64 */ diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index 82cabfc7c2f6..1600ab194ff9 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -168,7 +168,7 @@ void __flush_tlb_power9(unsigned int action) /* flush SLBs and reload */ -#ifdef CONFIG_PPC_STD_MMU_64 +#ifdef CONFIG_PPC_BOOK3S_64 static void flush_and_reload_slb(void) { struct slb_shadow *slb; @@ -215,7 +215,7 @@ static void flush_erat(void) static int mce_flush(int what) { -#ifdef CONFIG_PPC_STD_MMU_64 +#ifdef CONFIG_PPC_BOOK3S_64 if (what == MCE_FLUSH_SLB) { flush_and_reload_slb(); return 1; diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 2ff2b8a19f71..5d38d5ea9a24 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -90,7 +90,7 @@ static inline void free_lppacas(void) { } #endif /* CONFIG_PPC_BOOK3S */ -#ifdef CONFIG_PPC_STD_MMU_64 +#ifdef CONFIG_PPC_BOOK3S_64 /* * 3 persistent SLBs are registered here. The buffer will be zero @@ -135,11 +135,11 @@ static struct slb_shadow * __init init_slb_shadow(int cpu) return s; } -#else /* CONFIG_PPC_STD_MMU_64 */ +#else /* !CONFIG_PPC_BOOK3S_64 */ static void __init allocate_slb_shadows(int nr_cpus, int limit) { } -#endif /* CONFIG_PPC_STD_MMU_64 */ +#endif /* CONFIG_PPC_BOOK3S_64 */ /* The Paca is an array with one entry per processor. Each contains an * lppaca, which contains the information shared between the @@ -170,9 +170,9 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu) new_paca->kexec_state = KEXEC_STATE_NONE; new_paca->__current = &init_task; new_paca->data_offset = 0xfeeeeeeeeeeeeeeeULL; -#ifdef CONFIG_PPC_STD_MMU_64 +#ifdef CONFIG_PPC_BOOK3S_64 new_paca->slb_shadow_ptr = init_slb_shadow(cpu); -#endif /* CONFIG_PPC_STD_MMU_64 */ +#endif #ifdef CONFIG_PPC_BOOK3E /* For now -- if we have threads this will be adjusted later */ @@ -271,7 +271,7 @@ void copy_mm_to_paca(struct mm_struct *mm) get_paca()->mm_ctx_user_psize = context->user_psize; get_paca()->mm_ctx_sllp = context->sllp; #endif -#else /* CONFIG_PPC_BOOK3S */ +#else /* !CONFIG_PPC_BOOK3S */ return; #endif } diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index 932b9741aa8f..15ce0306b092 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -90,14 +90,14 @@ int pcibios_unmap_io_space(struct pci_bus *bus) * to do an appropriate TLB flush here too */ if (bus->self) { -#ifdef CONFIG_PPC_STD_MMU_64 +#ifdef CONFIG_PPC_BOOK3S_64 struct resource *res = bus->resource[0]; #endif pr_debug("IO unmapping for PCI-PCI bridge %s\n", pci_name(bus->self)); -#ifdef CONFIG_PPC_STD_MMU_64 +#ifdef CONFIG_PPC_BOOK3S_64 __flush_hash_table_range(&init_mm, res->start + _IO_BASE, res->end + _IO_BASE + 1); #endif diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index c051dc2b42ad..2b602d60a9c7 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1164,7 +1164,7 @@ struct task_struct *__switch_to(struct task_struct *prev, } #endif /* CONFIG_PPC64 */ -#ifdef CONFIG_PPC_STD_MMU_64 +#ifdef CONFIG_PPC_BOOK3S_64 batch = this_cpu_ptr(&ppc64_tlb_batch); if (batch->active) { current_thread_info()->local_flags |= _TLF_LAZY_MMU; @@ -1172,7 +1172,7 @@ struct task_struct *__switch_to(struct task_struct *prev, __flush_tlb_pending(batch); batch->active = 0; } -#endif /* CONFIG_PPC_STD_MMU_64 */ +#endif /* CONFIG_PPC_BOOK3S_64 */ #ifdef CONFIG_PPC_ADV_DEBUG_REGS switch_booke_debug_regs(&new->thread.debug); @@ -1218,7 +1218,7 @@ struct task_struct *__switch_to(struct task_struct *prev, last = _switch(old_thread, new_thread); -#ifdef CONFIG_PPC_STD_MMU_64 +#ifdef CONFIG_PPC_BOOK3S_64 if (current_thread_info()->local_flags & _TLF_LAZY_MMU) { current_thread_info()->local_flags &= ~_TLF_LAZY_MMU; batch = this_cpu_ptr(&ppc64_tlb_batch); @@ -1247,7 +1247,7 @@ struct task_struct *__switch_to(struct task_struct *prev, : : "r"(dummy_copy_buffer), "r"(0)); } } -#endif /* CONFIG_PPC_STD_MMU_64 */ +#endif /* CONFIG_PPC_BOOK3S_64 */ return last; } @@ -1476,7 +1476,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) static void setup_ksp_vsid(struct task_struct *p, unsigned long sp) { -#ifdef CONFIG_PPC_STD_MMU_64 +#ifdef CONFIG_PPC_BOOK3S_64 unsigned long sp_vsid; unsigned long llp = mmu_psize_defs[mmu_linear_psize].sllp; @@ -2056,7 +2056,7 @@ unsigned long arch_randomize_brk(struct mm_struct *mm) unsigned long base = mm->brk; unsigned long ret; -#ifdef CONFIG_PPC_STD_MMU_64 +#ifdef CONFIG_PPC_BOOK3S_64 /* * If we are using 1TB segments and we are allowed to randomise * the heap, we can put it above 1TB so it is backed by a 1TB diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 101822be525a..b15bae265c90 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -229,7 +229,7 @@ static void __init check_cpu_pa_features(unsigned long node) ibm_pa_features, ARRAY_SIZE(ibm_pa_features)); } -#ifdef CONFIG_PPC_STD_MMU_64 +#ifdef CONFIG_PPC_BOOK3S_64 static void __init init_mmu_slb_size(unsigned long node) { const __be32 *slb_size_ptr; diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 0ac741fae90e..da4fd54e6914 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -773,7 +773,7 @@ void arch_setup_pdev_archdata(struct platform_device *pdev) static __init void print_system_info(void) { pr_info("-----------------------------------------------------\n"); -#ifdef CONFIG_PPC_STD_MMU_64 +#ifdef CONFIG_PPC_BOOK3S_64 pr_info("ppc64_pft_size = 0x%llx\n", ppc64_pft_size); #endif #ifdef CONFIG_PPC_STD_MMU_32 @@ -800,7 +800,7 @@ static __init void print_system_info(void) pr_info("firmware_features = 0x%016lx\n", powerpc_firmware_features); #endif -#ifdef CONFIG_PPC_STD_MMU_64 +#ifdef CONFIG_PPC_BOOK3S_64 if (htab_address) pr_info("htab_address = 0x%p\n", htab_address); if (htab_hash_mask) diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index fb844d2f266e..915178423c3a 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -14,11 +14,11 @@ obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \ obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(BITS)e.o hash64-$(CONFIG_PPC_NATIVE) := hash_native_64.o obj-$(CONFIG_PPC_BOOK3E_64) += pgtable-book3e.o -obj-$(CONFIG_PPC_STD_MMU_64) += pgtable-hash64.o hash_utils_64.o slb_low.o slb.o $(hash64-y) mmu_context_book3s64.o pgtable-book3s64.o +obj-$(CONFIG_PPC_BOOK3S_64) += pgtable-hash64.o hash_utils_64.o slb_low.o slb.o $(hash64-y) mmu_context_book3s64.o pgtable-book3s64.o obj-$(CONFIG_PPC_RADIX_MMU) += pgtable-radix.o tlb-radix.o obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o hash_low_32.o mmu_context_hash32.o obj-$(CONFIG_PPC_STD_MMU) += tlb_hash$(BITS).o -ifeq ($(CONFIG_PPC_STD_MMU_64),y) +ifeq ($(CONFIG_PPC_BOOK3S_64),y) obj-$(CONFIG_PPC_4K_PAGES) += hash64_4k.o obj-$(CONFIG_PPC_64K_PAGES) += hash64_64k.o endif @@ -31,7 +31,7 @@ obj-$(CONFIG_PPC_SPLPAR) += vphn.o obj-$(CONFIG_PPC_MM_SLICES) += slice.o obj-y += hugetlbpage.o ifeq ($(CONFIG_HUGETLB_PAGE),y) -obj-$(CONFIG_PPC_STD_MMU_64) += hugetlbpage-hash64.o +obj-$(CONFIG_PPC_BOOK3S_64) += hugetlbpage-hash64.o obj-$(CONFIG_PPC_RADIX_MMU) += hugetlbpage-radix.o obj-$(CONFIG_PPC_BOOK3E_MMU) += hugetlbpage-book3e.o endif diff --git a/arch/powerpc/mm/dump_hashpagetable.c b/arch/powerpc/mm/dump_hashpagetable.c index 5c4c93dcff19..14cfb11b09d0 100644 --- a/arch/powerpc/mm/dump_hashpagetable.c +++ b/arch/powerpc/mm/dump_hashpagetable.c @@ -500,7 +500,7 @@ static void populate_markers(void) address_markers[6].start_address = PHB_IO_END; address_markers[7].start_address = IOREMAP_BASE; address_markers[8].start_address = IOREMAP_END; -#ifdef CONFIG_PPC_STD_MMU_64 +#ifdef CONFIG_PPC_BOOK3S_64 address_markers[9].start_address = H_VMEMMAP_BASE; #else address_markers[9].start_address = VMEMMAP_BASE; diff --git a/arch/powerpc/mm/dump_linuxpagetables.c b/arch/powerpc/mm/dump_linuxpagetables.c index c9282d27b203..c2e7dea59490 100644 --- a/arch/powerpc/mm/dump_linuxpagetables.c +++ b/arch/powerpc/mm/dump_linuxpagetables.c @@ -112,7 +112,7 @@ struct flag_info { static const struct flag_info flag_array[] = { { -#ifdef CONFIG_PPC_STD_MMU_64 +#ifdef CONFIG_PPC_BOOK3S_64 .mask = _PAGE_PRIVILEGED, .val = 0, #else @@ -147,7 +147,7 @@ static const struct flag_info flag_array[] = { .set = "present", .clear = " ", }, { -#ifdef CONFIG_PPC_STD_MMU_64 +#ifdef CONFIG_PPC_BOOK3S_64 .mask = H_PAGE_HASHPTE, .val = H_PAGE_HASHPTE, #else @@ -157,7 +157,7 @@ static const struct flag_info flag_array[] = { .set = "hpte", .clear = " ", }, { -#ifndef CONFIG_PPC_STD_MMU_64 +#ifndef CONFIG_PPC_BOOK3S_64 .mask = _PAGE_GUARDED, .val = _PAGE_GUARDED, .set = "guarded", @@ -174,7 +174,7 @@ static const struct flag_info flag_array[] = { .set = "accessed", .clear = " ", }, { -#ifndef CONFIG_PPC_STD_MMU_64 +#ifndef CONFIG_PPC_BOOK3S_64 .mask = _PAGE_WRITETHRU, .val = _PAGE_WRITETHRU, .set = "write through", @@ -450,7 +450,7 @@ static void populate_markers(void) address_markers[i++].start_address = PHB_IO_END; address_markers[i++].start_address = IOREMAP_BASE; address_markers[i++].start_address = IOREMAP_END; -#ifdef CONFIG_PPC_STD_MMU_64 +#ifdef CONFIG_PPC_BOOK3S_64 address_markers[i++].start_address = H_VMEMMAP_BASE; #else address_markers[i++].start_address = VMEMMAP_BASE; diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 588a521966ec..12b3025fadac 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -68,11 +68,11 @@ #include "mmu_decl.h" -#ifdef CONFIG_PPC_STD_MMU_64 +#ifdef CONFIG_PPC_BOOK3S_64 #if H_PGTABLE_RANGE > USER_VSID_RANGE #warning Limited user VSID range means pagetable space is wasted #endif -#endif /* CONFIG_PPC_STD_MMU_64 */ +#endif /* CONFIG_PPC_BOOK3S_64 */ phys_addr_t memstart_addr = ~0; EXPORT_SYMBOL_GPL(memstart_addr); @@ -367,7 +367,7 @@ EXPORT_SYMBOL_GPL(realmode_pfn_to_page); #endif /* CONFIG_SPARSEMEM_VMEMMAP */ -#ifdef CONFIG_PPC_STD_MMU_64 +#ifdef CONFIG_PPC_BOOK3S_64 static bool disable_radix; static int __init parse_disable_radix(char *p) { @@ -444,4 +444,4 @@ void __init mmu_early_init_devtree(void) else hash__early_init_devtree(); } -#endif /* CONFIG_PPC_STD_MMU_64 */ +#endif /* CONFIG_PPC_BOOK3S_64 */ diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index ac0717a90ca6..2851282edcd4 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -57,7 +57,7 @@ #include "mmu_decl.h" -#ifdef CONFIG_PPC_STD_MMU_64 +#ifdef CONFIG_PPC_BOOK3S_64 #if TASK_SIZE_USER64 > (1UL << (ESID_BITS + SID_SHIFT)) #error TASK_SIZE_USER64 exceeds user VSID range #endif diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 13663efc1d31..f8928ee85f6b 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -294,10 +294,6 @@ config PPC_STD_MMU_32 def_bool y depends on PPC_STD_MMU && PPC32 -config PPC_STD_MMU_64 - def_bool y - depends on PPC_STD_MMU && PPC64 - config PPC_RADIX_MMU bool "Radix MMU Support" depends on PPC_BOOK3S_64 @@ -323,7 +319,7 @@ config PPC_BOOK3E_MMU config PPC_MM_SLICES bool - default y if PPC_STD_MMU_64 + default y if PPC_BOOK3S_64 default n config PPC_HAVE_PMU_SUPPORT diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 495ba4e7336d..0ee4a469a4ae 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -93,7 +93,7 @@ void vpa_init(int cpu) return; } -#ifdef CONFIG_PPC_STD_MMU_64 +#ifdef CONFIG_PPC_BOOK3S_64 /* * PAPR says this feature is SLB-Buffer but firmware never * reports that. All SPLPAR support SLB shadow buffer. @@ -106,7 +106,7 @@ void vpa_init(int cpu) "cpu %d (hw %d) of area %lx failed with %ld\n", cpu, hwcpu, addr, ret); } -#endif /* CONFIG_PPC_STD_MMU_64 */ +#endif /* CONFIG_PPC_BOOK3S_64 */ /* * Register dispatch trace log, if one has been allocated. @@ -129,7 +129,7 @@ void vpa_init(int cpu) } } -#ifdef CONFIG_PPC_STD_MMU_64 +#ifdef CONFIG_PPC_BOOK3S_64 static long pSeries_lpar_hpte_insert(unsigned long hpte_group, unsigned long vpn, unsigned long pa, @@ -824,7 +824,7 @@ void arch_free_page(struct page *page, int order) EXPORT_SYMBOL(arch_free_page); #endif /* CONFIG_PPC_SMLPAR */ -#endif /* CONFIG_PPC_STD_MMU_64 */ +#endif /* CONFIG_PPC_BOOK3S_64 */ #ifdef CONFIG_TRACEPOINTS #ifdef HAVE_JUMP_LABEL diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c index 779fc2a1c8f7..b2706c483067 100644 --- a/arch/powerpc/platforms/pseries/lparcfg.c +++ b/arch/powerpc/platforms/pseries/lparcfg.c @@ -485,7 +485,7 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v) seq_printf(m, "shared_processor_mode=%d\n", lppaca_shared_proc(get_lppaca())); -#ifdef CONFIG_PPC_STD_MMU_64 +#ifdef CONFIG_PPC_BOOK3S_64 seq_printf(m, "slb_size=%d\n", mmu_slb_size); #endif parse_em_data(m); diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 83e7faf5b3d3..6b1236b85622 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -2312,7 +2312,7 @@ static void dump_tracing(void) static void dump_one_paca(int cpu) { struct paca_struct *p; -#ifdef CONFIG_PPC_STD_MMU_64 +#ifdef CONFIG_PPC_BOOK3S_64 int i = 0; #endif @@ -2353,7 +2353,7 @@ static void dump_one_paca(int cpu) DUMP(p, hw_cpu_id, "x"); DUMP(p, cpu_start, "x"); DUMP(p, kexec_state, "x"); -#ifdef CONFIG_PPC_STD_MMU_64 +#ifdef CONFIG_PPC_BOOK3S_64 for (i = 0; i < SLB_NUM_BOLTED; i++) { u64 esid, vsid; @@ -3263,7 +3263,7 @@ static void xmon_print_symbol(unsigned long address, const char *mid, printf("%s", after); } -#ifdef CONFIG_PPC_STD_MMU_64 +#ifdef CONFIG_PPC_BOOK3S_64 void dump_segments(void) { int i; -- cgit v1.2.3 From a7771176b4392fbc3a17399c51a8c11f2f681afe Mon Sep 17 00:00:00 2001 From: Cyril Bur Date: Thu, 2 Nov 2017 14:09:03 +1100 Subject: powerpc: Don't enable FP/Altivec if not checkpointed Lazy save and restore of FP/Altivec means that a userspace process can be sent to userspace with FP or Altivec disabled and loaded only as required (by way of an FP/Altivec unavailable exception). Transactional Memory complicates this situation as a transaction could be started without FP/Altivec being loaded up. This causes the hardware to checkpoint incorrect registers. Handling FP/Altivec unavailable exceptions while a thread is transactional requires a reclaim and recheckpoint to ensure the CPU has correct state for both sets of registers. Lazy save and restore of FP/Altivec cannot be done if a process is transactional. If a facility was enabled it must remain enabled whenever a thread is transactional. Commit dc16b553c949 ("powerpc: Always restore FPU/VEC/VSX if hardware transactional memory in use") ensures that the facilities are always enabled if a thread is transactional. A bug in the introduced code may cause it to inadvertently enable a facility that was (and should remain) disabled. The problem with this extraneous enablement is that the registers for the erroneously enabled facility have not been correctly recheckpointed - the recheckpointing code assumed the facility would remain disabled. Further compounding the issue, the transactional {fp,altivec,vsx} unavailable code has been incorrectly using the MSR to enable facilities. The presence of the {FP,VEC,VSX} bit in the regs->msr simply means if the registers are live on the CPU, not if the kernel should load them before returning to userspace. This has worked due to the bug mentioned above. This causes transactional threads which return to their failure handler to observe incorrect checkpointed registers. Perhaps an example will help illustrate the problem: A userspace process is running and uses both FP and Altivec registers. This process then continues to run for some time without touching either sets of registers. The kernel subsequently disables the facilities as part of lazy save and restore. The userspace process then performs a tbegin and the CPU checkpoints 'junk' FP and Altivec registers. The process then performs a floating point instruction triggering a fp unavailable exception in the kernel. The kernel then loads the FP registers - and only the FP registers. Since the thread is transactional it must perform a reclaim and recheckpoint to ensure both the checkpointed registers and the transactional registers are correct. It then (correctly) enables MSR[FP] for the process. Later (on exception exist) the kernel also (inadvertently) enables MSR[VEC]. The process is then returned to userspace. Since the act of loading the FP registers doomed the transaction we know CPU will fail the transaction, restore its checkpointed registers, and return the process to its failure handler. The problem is that we're now running with Altivec enabled and the 'junk' checkpointed registers are restored. The kernel had only recheckpointed FP. This patch solves this by only activating FP/Altivec if userspace was using them when it entered the kernel and not simply if the process is transactional. Fixes: dc16b553c949 ("powerpc: Always restore FPU/VEC/VSX if hardware transactional memory in use") Signed-off-by: Cyril Bur Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 18 ++++++++++++++++-- arch/powerpc/kernel/traps.c | 8 ++++---- 2 files changed, 20 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/kernel/process.c') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 2b602d60a9c7..e2980a22c487 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -104,9 +104,23 @@ static inline bool msr_tm_active(unsigned long msr) { return MSR_TM_ACTIVE(msr); } + +static bool tm_active_with_fp(struct task_struct *tsk) +{ + return msr_tm_active(tsk->thread.regs->msr) && + (tsk->thread.ckpt_regs.msr & MSR_FP); +} + +static bool tm_active_with_altivec(struct task_struct *tsk) +{ + return msr_tm_active(tsk->thread.regs->msr) && + (tsk->thread.ckpt_regs.msr & MSR_VEC); +} #else static inline bool msr_tm_active(unsigned long msr) { return false; } static inline void check_if_tm_restore_required(struct task_struct *tsk) { } +static inline bool tm_active_with_fp(struct task_struct *tsk) { return false; } +static inline bool tm_active_with_altivec(struct task_struct *tsk) { return false; } #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ bool strict_msr_control; @@ -239,7 +253,7 @@ EXPORT_SYMBOL(enable_kernel_fp); static int restore_fp(struct task_struct *tsk) { - if (tsk->thread.load_fp || msr_tm_active(tsk->thread.regs->msr)) { + if (tsk->thread.load_fp || tm_active_with_fp(tsk)) { load_fp_state(¤t->thread.fp_state); current->thread.load_fp++; return 1; @@ -321,7 +335,7 @@ EXPORT_SYMBOL_GPL(flush_altivec_to_thread); static int restore_altivec(struct task_struct *tsk) { if (cpu_has_feature(CPU_FTR_ALTIVEC) && - (tsk->thread.load_vec || msr_tm_active(tsk->thread.regs->msr))) { + (tsk->thread.load_vec || tm_active_with_altivec(tsk))) { load_vr_state(&tsk->thread.vr_state); tsk->thread.used_vr = 1; tsk->thread.load_vec++; diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 0e4099fef198..863c6858ed0f 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1679,7 +1679,7 @@ void fp_unavailable_tm(struct pt_regs *regs) /* Reclaim didn't save out any FPRs to transact_fprs. */ /* Enable FP for the task: */ - regs->msr |= (MSR_FP | current->thread.fpexc_mode); + current->thread.load_fp = 1; /* This loads and recheckpoints the FP registers from * thread.fpr[]. They will remain in registers after the @@ -1708,7 +1708,7 @@ void altivec_unavailable_tm(struct pt_regs *regs) "MSR=%lx\n", regs->nip, regs->msr); tm_reclaim_current(TM_CAUSE_FAC_UNAV); - regs->msr |= MSR_VEC; + current->thread.load_vec = 1; tm_recheckpoint(¤t->thread, MSR_VEC); current->thread.used_vr = 1; @@ -1745,8 +1745,8 @@ void vsx_unavailable_tm(struct pt_regs *regs) /* This reclaims FP and/or VR regs if they're already enabled */ tm_reclaim_current(TM_CAUSE_FAC_UNAV); - regs->msr |= MSR_VEC | MSR_FP | current->thread.fpexc_mode | - MSR_VSX; + current->thread.load_vec = 1; + current->thread.load_fp = 1; /* This loads & recheckpoints FP and VRs; but we have * to be sure not to overwrite previously-valid state. -- cgit v1.2.3 From 91381b9cb1c3afc162f830ebf698721402c7d577 Mon Sep 17 00:00:00 2001 From: Cyril Bur Date: Thu, 2 Nov 2017 14:09:04 +1100 Subject: powerpc: Force reload for recheckpoint during tm {fp, vec, vsx} unavailable exception Lazy save and restore of FP/Altivec means that a userspace process can be sent to userspace with FP or Altivec disabled and loaded only as required (by way of an FP/Altivec unavailable exception). Transactional Memory complicates this situation as a transaction could be started without FP/Altivec being loaded up. This causes the hardware to checkpoint incorrect registers. Handling FP/Altivec unavailable exceptions while a thread is transactional requires a reclaim and recheckpoint to ensure the CPU has correct state for both sets of registers. tm_reclaim() has optimisations to not always save the FP/Altivec registers to the checkpointed save area. This was originally done because the caller might have information that the checkpointed registers aren't valid due to lazy save and restore. We've also been a little vague as to how tm_reclaim() leaves the FP/Altivec state since it doesn't necessarily always save it to the thread struct. This has lead to an (incorrect) assumption that it leaves the checkpointed state on the CPU. tm_recheckpoint() has similar optimisations in reverse. It may not always reload the checkpointed FP/Altivec registers from the thread struct before the trecheckpoint. It is therefore quite unclear where it expects to get the state from. This didn't help with the assumption made about tm_reclaim(). This patch is a minimal fix for ease of backporting. A more correct fix which removes the msr parameter to tm_reclaim() and tm_recheckpoint() altogether has been upstreamed to apply on top of this patch. Fixes: dc3106690b20 ("powerpc: tm: Always use fp_state and vr_state to store live registers") Signed-off-by: Cyril Bur Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 4 ++-- arch/powerpc/kernel/traps.c | 22 +++++++++++++++++----- 2 files changed, 19 insertions(+), 7 deletions(-) (limited to 'arch/powerpc/kernel/process.c') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index e2980a22c487..ca8c33b41989 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -874,6 +874,8 @@ static void tm_reclaim_thread(struct thread_struct *thr, if (!MSR_TM_SUSPENDED(mfmsr())) return; + giveup_all(container_of(thr, struct task_struct, thread)); + /* * If we are in a transaction and FP is off then we can't have * used FP inside that transaction. Hence the checkpointed @@ -893,8 +895,6 @@ static void tm_reclaim_thread(struct thread_struct *thr, memcpy(&thr->ckvr_state, &thr->vr_state, sizeof(struct thread_vr_state)); - giveup_all(container_of(thr, struct task_struct, thread)); - tm_reclaim(thr, thr->ckpt_regs.msr, cause); } diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 863c6858ed0f..e099e61c702b 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1663,6 +1663,12 @@ out: void fp_unavailable_tm(struct pt_regs *regs) { + /* + * Save the MSR now because tm_reclaim_current() is likely to + * change it + */ + unsigned long orig_msr = regs->msr; + /* Note: This does not handle any kind of FP laziness. */ TM_DEBUG("FP Unavailable trap whilst transactional at 0x%lx, MSR=%lx\n", @@ -1687,10 +1693,10 @@ void fp_unavailable_tm(struct pt_regs *regs) * If VMX is in use, the VRs now hold checkpointed values, * so we don't want to load the VRs from the thread_struct. */ - tm_recheckpoint(¤t->thread, MSR_FP); + tm_recheckpoint(¤t->thread, orig_msr | MSR_FP); /* If VMX is in use, get the transactional values back */ - if (regs->msr & MSR_VEC) { + if (orig_msr & MSR_VEC) { msr_check_and_set(MSR_VEC); load_vr_state(¤t->thread.vr_state); /* At this point all the VSX state is loaded, so enable it */ @@ -1700,6 +1706,12 @@ void fp_unavailable_tm(struct pt_regs *regs) void altivec_unavailable_tm(struct pt_regs *regs) { + /* + * Save the MSR now because tm_reclaim_current() is likely to + * change it + */ + unsigned long orig_msr = regs->msr; + /* See the comments in fp_unavailable_tm(). This function operates * the same way. */ @@ -1709,10 +1721,10 @@ void altivec_unavailable_tm(struct pt_regs *regs) regs->nip, regs->msr); tm_reclaim_current(TM_CAUSE_FAC_UNAV); current->thread.load_vec = 1; - tm_recheckpoint(¤t->thread, MSR_VEC); + tm_recheckpoint(¤t->thread, orig_msr | MSR_VEC); current->thread.used_vr = 1; - if (regs->msr & MSR_FP) { + if (orig_msr & MSR_FP) { msr_check_and_set(MSR_FP); load_fp_state(¤t->thread.fp_state); regs->msr |= MSR_VSX; @@ -1751,7 +1763,7 @@ void vsx_unavailable_tm(struct pt_regs *regs) /* This loads & recheckpoints FP and VRs; but we have * to be sure not to overwrite previously-valid state. */ - tm_recheckpoint(¤t->thread, regs->msr & ~orig_msr); + tm_recheckpoint(¤t->thread, orig_msr | MSR_FP | MSR_VEC); msr_check_and_set(orig_msr & (MSR_FP | MSR_VEC)); -- cgit v1.2.3 From eb5c3f1c86470fc1a57ab28cce15c12e4d6cdf8b Mon Sep 17 00:00:00 2001 From: Cyril Bur Date: Thu, 2 Nov 2017 14:09:05 +1100 Subject: powerpc: Always save/restore checkpointed regs during treclaim/trecheckpoint Lazy save and restore of FP/Altivec means that a userspace process can be sent to userspace with FP or Altivec disabled and loaded only as required (by way of an FP/Altivec unavailable exception). Transactional Memory complicates this situation as a transaction could be started without FP/Altivec being loaded up. This causes the hardware to checkpoint incorrect registers. Handling FP/Altivec unavailable exceptions while a thread is transactional requires a reclaim and recheckpoint to ensure the CPU has correct state for both sets of registers. tm_reclaim() has optimisations to not always save the FP/Altivec registers to the checkpointed save area. This was originally done because the caller might have information that the checkpointed registers aren't valid due to lazy save and restore. We've also been a little vague as to how tm_reclaim() leaves the FP/Altivec state since it doesn't necessarily always save it to the thread struct. This has lead to an (incorrect) assumption that it leaves the checkpointed state on the CPU. tm_recheckpoint() has similar optimisations in reverse. It may not always reload the checkpointed FP/Altivec registers from the thread struct before the trecheckpoint. It is therefore quite unclear where it expects to get the state from. This didn't help with the assumption made about tm_reclaim(). These optimisations sit in what is by definition a slow path. If a process has to go through a reclaim/recheckpoint then its transaction will be doomed on returning to userspace. This mean that the process will be unable to complete its transaction and be forced to its failure handler. This is already an out if line case for userspace. Furthermore, the cost of copying 64 times 128 bits from registers isn't very long[0] (at all) on modern processors. As such it appears these optimisations have only served to increase code complexity and are unlikely to have had a measurable performance impact. Our transactional memory handling has been riddled with bugs. A cause of this has been difficulty in following the code flow, code complexity has not been our friend here. It makes sense to remove these optimisations in favour of a (hopefully) more stable implementation. This patch does mean that some times the assembly will needlessly save 'junk' registers which will subsequently get overwritten with the correct value by the C code which calls the assembly function. This small inefficiency is far outweighed by the reduction in complexity for general TM code, context switching paths, and transactional facility unavailable exception handler. 0: I tried to measure it once for other work and found that it was hiding in the noise of everything else I was working with. I find it exceedingly likely this will be the case here. Signed-off-by: Cyril Bur Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/tm.h | 5 ++-- arch/powerpc/kernel/process.c | 22 ++++++--------- arch/powerpc/kernel/signal_32.c | 2 +- arch/powerpc/kernel/signal_64.c | 2 +- arch/powerpc/kernel/tm.S | 59 ++++++++++++----------------------------- arch/powerpc/kernel/traps.c | 26 +++++------------- 6 files changed, 35 insertions(+), 81 deletions(-) (limited to 'arch/powerpc/kernel/process.c') diff --git a/arch/powerpc/include/asm/tm.h b/arch/powerpc/include/asm/tm.h index ad19fe41931b..79d4156554b4 100644 --- a/arch/powerpc/include/asm/tm.h +++ b/arch/powerpc/include/asm/tm.h @@ -11,10 +11,9 @@ extern void tm_enable(void); extern void tm_reclaim(struct thread_struct *thread, - unsigned long orig_msr, uint8_t cause); + uint8_t cause); extern void tm_reclaim_current(uint8_t cause); -extern void tm_recheckpoint(struct thread_struct *thread, - unsigned long orig_msr); +extern void tm_recheckpoint(struct thread_struct *thread); extern void tm_abort(uint8_t cause); extern void tm_save_sprs(struct thread_struct *thread); extern void tm_restore_sprs(struct thread_struct *thread); diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index ca8c33b41989..4083b737decb 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -876,6 +876,8 @@ static void tm_reclaim_thread(struct thread_struct *thr, giveup_all(container_of(thr, struct task_struct, thread)); + tm_reclaim(thr, cause); + /* * If we are in a transaction and FP is off then we can't have * used FP inside that transaction. Hence the checkpointed @@ -894,8 +896,6 @@ static void tm_reclaim_thread(struct thread_struct *thr, if ((thr->ckpt_regs.msr & MSR_VEC) == 0) memcpy(&thr->ckvr_state, &thr->vr_state, sizeof(struct thread_vr_state)); - - tm_reclaim(thr, thr->ckpt_regs.msr, cause); } void tm_reclaim_current(uint8_t cause) @@ -946,11 +946,9 @@ out_and_saveregs: tm_save_sprs(thr); } -extern void __tm_recheckpoint(struct thread_struct *thread, - unsigned long orig_msr); +extern void __tm_recheckpoint(struct thread_struct *thread); -void tm_recheckpoint(struct thread_struct *thread, - unsigned long orig_msr) +void tm_recheckpoint(struct thread_struct *thread) { unsigned long flags; @@ -969,15 +967,13 @@ void tm_recheckpoint(struct thread_struct *thread, */ tm_restore_sprs(thread); - __tm_recheckpoint(thread, orig_msr); + __tm_recheckpoint(thread); local_irq_restore(flags); } static inline void tm_recheckpoint_new_task(struct task_struct *new) { - unsigned long msr; - if (!cpu_has_feature(CPU_FTR_TM)) return; @@ -996,13 +992,11 @@ static inline void tm_recheckpoint_new_task(struct task_struct *new) tm_restore_sprs(&new->thread); return; } - msr = new->thread.ckpt_regs.msr; /* Recheckpoint to restore original checkpointed register state. */ - TM_DEBUG("*** tm_recheckpoint of pid %d " - "(new->msr 0x%lx, new->origmsr 0x%lx)\n", - new->pid, new->thread.regs->msr, msr); + TM_DEBUG("*** tm_recheckpoint of pid %d (new->msr 0x%lx)\n", + new->pid, new->thread.regs->msr); - tm_recheckpoint(&new->thread, msr); + tm_recheckpoint(&new->thread); /* * The checkpointed state has been restored but the live state has diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 1dd5fa0f65fd..16d16583cf11 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -880,7 +880,7 @@ static long restore_tm_user_regs(struct pt_regs *regs, /* Make sure the transaction is marked as failed */ current->thread.tm_texasr |= TEXASR_FS; /* This loads the checkpointed FP/VEC state, if used */ - tm_recheckpoint(¤t->thread, msr); + tm_recheckpoint(¤t->thread); /* This loads the speculative FP/VEC state, if used */ msr_check_and_set(msr & (MSR_FP | MSR_VEC)); diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 4dfdd8e56836..528ea744911c 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -552,7 +552,7 @@ static long restore_tm_sigcontexts(struct task_struct *tsk, /* Make sure the transaction is marked as failed */ tsk->thread.tm_texasr |= TEXASR_FS; /* This loads the checkpointed FP/VEC state, if used */ - tm_recheckpoint(&tsk->thread, msr); + tm_recheckpoint(&tsk->thread); msr_check_and_set(msr & (MSR_FP | MSR_VEC)); if (msr & MSR_FP) { diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index c4ba37822ba0..d89fb0e6f9ed 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -79,15 +79,12 @@ _GLOBAL(tm_abort) blr /* void tm_reclaim(struct thread_struct *thread, - * unsigned long orig_msr, * uint8_t cause) * * - Performs a full reclaim. This destroys outstanding * transactions and updates thread->regs.tm_ckpt_* with the * original checkpointed state. Note that thread->regs is * unchanged. - * - FP regs are written back to thread->transact_fpr before - * reclaiming. These are the transactional (current) versions. * * Purpose is to both abort transactions of, and preserve the state of, * a transactions at a context switch. We preserve/restore both sets of process @@ -98,9 +95,9 @@ _GLOBAL(tm_abort) * Call with IRQs off, stacks get all out of sync for some periods in here! */ _GLOBAL(tm_reclaim) - mfcr r6 + mfcr r5 mflr r0 - stw r6, 8(r1) + stw r5, 8(r1) std r0, 16(r1) std r2, STK_GOT(r1) stdu r1, -TM_FRAME_SIZE(r1) @@ -108,7 +105,6 @@ _GLOBAL(tm_reclaim) /* We've a struct pt_regs at [r1+STACK_FRAME_OVERHEAD]. */ std r3, STK_PARAM(R3)(r1) - std r4, STK_PARAM(R4)(r1) SAVE_NVGPRS(r1) /* We need to setup MSR for VSX register save instructions. */ @@ -138,8 +134,8 @@ _GLOBAL(tm_reclaim) std r1, PACAR1(r13) /* Clear MSR RI since we are about to change r1, EE is already off. */ - li r4, 0 - mtmsrd r4, 1 + li r5, 0 + mtmsrd r5, 1 /* * BE CAREFUL HERE: @@ -151,7 +147,7 @@ _GLOBAL(tm_reclaim) * to user register state. (FPRs, CCR etc. also!) * Use an sprg and a tm_scratch in the PACA to shuffle. */ - TRECLAIM(R5) /* Cause in r5 */ + TRECLAIM(R4) /* Cause in r4 */ /* ******************** GPRs ******************** */ /* Stash the checkpointed r13 away in the scratch SPR and get the real @@ -242,40 +238,30 @@ _GLOBAL(tm_reclaim) /* ******************** FPR/VR/VSRs ************ - * After reclaiming, capture the checkpointed FPRs/VRs /if used/. - * - * (If VSX used, FP and VMX are implied. Or, we don't need to look - * at MSR.VSX as copying FP regs if .FP, vector regs if .VMX covers it.) - * - * We're passed the thread's MSR as the second parameter + * After reclaiming, capture the checkpointed FPRs/VRs. * * We enabled VEC/FP/VSX in the msr above, so we can execute these * instructions! */ - ld r4, STK_PARAM(R4)(r1) /* Second parameter, MSR * */ mr r3, r12 - andis. r0, r4, MSR_VEC@h - beq dont_backup_vec + /* Altivec (VEC/VMX/VR)*/ addi r7, r3, THREAD_CKVRSTATE SAVE_32VRS(0, r6, r7) /* r6 scratch, r7 transact vr state */ mfvscr v0 li r6, VRSTATE_VSCR stvx v0, r7, r6 -dont_backup_vec: + + /* VRSAVE */ mfspr r0, SPRN_VRSAVE std r0, THREAD_CKVRSAVE(r3) - andi. r0, r4, MSR_FP - beq dont_backup_fp - + /* Floating Point (FP) */ addi r7, r3, THREAD_CKFPSTATE SAVE_32FPRS_VSRS(0, R6, R7) /* r6 scratch, r7 transact fp state */ - mffs fr0 stfd fr0,FPSTATE_FPSCR(r7) -dont_backup_fp: /* TM regs, incl TEXASR -- these live in thread_struct. Note they've * been updated by the treclaim, to explain to userland the failure @@ -343,22 +329,19 @@ _GLOBAL(__tm_recheckpoint) */ subi r7, r7, STACK_FRAME_OVERHEAD + /* We need to setup MSR for FP/VMX/VSX register save instructions. */ mfmsr r6 - /* R4 = original MSR to indicate whether thread used FP/Vector etc. */ - - /* Enable FP/vec in MSR if necessary! */ - lis r5, MSR_VEC@h + mr r5, r6 ori r5, r5, MSR_FP - and. r5, r4, r5 - beq restore_gprs /* if neither, skip both */ - +#ifdef CONFIG_ALTIVEC + oris r5, r5, MSR_VEC@h +#endif #ifdef CONFIG_VSX BEGIN_FTR_SECTION - oris r5, r5, MSR_VSX@h + oris r5,r5, MSR_VSX@h END_FTR_SECTION_IFSET(CPU_FTR_VSX) #endif - or r5, r6, r5 /* Set MSR.FP+.VSX/.VEC */ - mtmsr r5 + mtmsrd r5 #ifdef CONFIG_ALTIVEC /* @@ -367,28 +350,20 @@ _GLOBAL(__tm_recheckpoint) * thread.fp_state[] version holds the 'live' (transactional) * and will be loaded subsequently by any FPUnavailable trap. */ - andis. r0, r4, MSR_VEC@h - beq dont_restore_vec - addi r8, r3, THREAD_CKVRSTATE li r5, VRSTATE_VSCR lvx v0, r8, r5 mtvscr v0 REST_32VRS(0, r5, r8) /* r5 scratch, r8 ptr */ -dont_restore_vec: ld r5, THREAD_CKVRSAVE(r3) mtspr SPRN_VRSAVE, r5 #endif - andi. r0, r4, MSR_FP - beq dont_restore_fp - addi r8, r3, THREAD_CKFPSTATE lfd fr0, FPSTATE_FPSCR(r8) MTFSF_L(fr0) REST_32FPRS_VSRS(0, R4, R8) -dont_restore_fp: mtmsr r6 /* FP/Vec off again! */ restore_gprs: diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index e099e61c702b..4e5a543d0b74 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1693,7 +1693,7 @@ void fp_unavailable_tm(struct pt_regs *regs) * If VMX is in use, the VRs now hold checkpointed values, * so we don't want to load the VRs from the thread_struct. */ - tm_recheckpoint(¤t->thread, orig_msr | MSR_FP); + tm_recheckpoint(¤t->thread); /* If VMX is in use, get the transactional values back */ if (orig_msr & MSR_VEC) { @@ -1721,7 +1721,7 @@ void altivec_unavailable_tm(struct pt_regs *regs) regs->nip, regs->msr); tm_reclaim_current(TM_CAUSE_FAC_UNAV); current->thread.load_vec = 1; - tm_recheckpoint(¤t->thread, orig_msr | MSR_VEC); + tm_recheckpoint(¤t->thread); current->thread.used_vr = 1; if (orig_msr & MSR_FP) { @@ -1733,8 +1733,6 @@ void altivec_unavailable_tm(struct pt_regs *regs) void vsx_unavailable_tm(struct pt_regs *regs) { - unsigned long orig_msr = regs->msr; - /* See the comments in fp_unavailable_tm(). This works similarly, * though we're loading both FP and VEC registers in here. * @@ -1748,29 +1746,17 @@ void vsx_unavailable_tm(struct pt_regs *regs) current->thread.used_vsr = 1; - /* If FP and VMX are already loaded, we have all the state we need */ - if ((orig_msr & (MSR_FP | MSR_VEC)) == (MSR_FP | MSR_VEC)) { - regs->msr |= MSR_VSX; - return; - } - /* This reclaims FP and/or VR regs if they're already enabled */ tm_reclaim_current(TM_CAUSE_FAC_UNAV); current->thread.load_vec = 1; current->thread.load_fp = 1; - /* This loads & recheckpoints FP and VRs; but we have - * to be sure not to overwrite previously-valid state. - */ - tm_recheckpoint(¤t->thread, orig_msr | MSR_FP | MSR_VEC); - - msr_check_and_set(orig_msr & (MSR_FP | MSR_VEC)); + tm_recheckpoint(¤t->thread); - if (orig_msr & MSR_FP) - load_fp_state(¤t->thread.fp_state); - if (orig_msr & MSR_VEC) - load_vr_state(¤t->thread.vr_state); + msr_check_and_set(MSR_FP | MSR_VEC); + load_fp_state(¤t->thread.fp_state); + load_vr_state(¤t->thread.vr_state); } #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ -- cgit v1.2.3 From ec233ede4c8654894610ea54f4dae7adc954ac62 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Tue, 7 Nov 2017 18:23:53 -0800 Subject: powerpc: Add support for setting SPRN_TIDR We need the SPRN_TIDR to be set for use with fast thread-wakeup (core- to-core wakeup) and also with CAPI. Each thread in a process needs to have a unique id within the process. But for now, we assign globally unique thread ids to all threads in the system. Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Philippe Bergheaud Signed-off-by: Christophe Lombard [mpe: Simplify tidr clearing on fork() and ctx switch code] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/processor.h | 1 + arch/powerpc/include/asm/switch_to.h | 3 + arch/powerpc/kernel/process.c | 116 +++++++++++++++++++++++++++++++++++ 3 files changed, 120 insertions(+) (limited to 'arch/powerpc/kernel/process.c') diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index fab7ff877304..58cc21274423 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -329,6 +329,7 @@ struct thread_struct { */ int dscr_inherit; unsigned long ppr; /* used to save/restore SMT priority */ + unsigned long tidr; #endif #ifdef CONFIG_PPC_BOOK3S_64 unsigned long tar; diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h index 17c8380673a6..f5da32f43fed 100644 --- a/arch/powerpc/include/asm/switch_to.h +++ b/arch/powerpc/include/asm/switch_to.h @@ -91,4 +91,7 @@ static inline void clear_task_ebb(struct task_struct *t) #endif } +extern int set_thread_tidr(struct task_struct *t); +extern void clear_thread_tidr(struct task_struct *t); + #endif /* _ASM_POWERPC_SWITCH_TO_H */ diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 4083b737decb..887ae5047288 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1136,6 +1136,10 @@ static inline void restore_sprs(struct thread_struct *old_thread, if (old_thread->tar != new_thread->tar) mtspr(SPRN_TAR, new_thread->tar); } + + if (cpu_has_feature(CPU_FTR_ARCH_300) && + old_thread->tidr != new_thread->tidr) + mtspr(SPRN_TIDR, new_thread->tidr); #endif } @@ -1451,6 +1455,116 @@ void flush_thread(void) #endif /* CONFIG_HAVE_HW_BREAKPOINT */ } +#ifdef CONFIG_PPC64 +static DEFINE_SPINLOCK(vas_thread_id_lock); +static DEFINE_IDA(vas_thread_ida); + +/* + * We need to assign a unique thread id to each thread in a process. + * + * This thread id, referred to as TIDR, and separate from the Linux's tgid, + * is intended to be used to direct an ASB_Notify from the hardware to the + * thread, when a suitable event occurs in the system. + * + * One such event is a "paste" instruction in the context of Fast Thread + * Wakeup (aka Core-to-core wake up in the Virtual Accelerator Switchboard + * (VAS) in POWER9. + * + * To get a unique TIDR per process we could simply reuse task_pid_nr() but + * the problem is that task_pid_nr() is not yet available copy_thread() is + * called. Fixing that would require changing more intrusive arch-neutral + * code in code path in copy_process()?. + * + * Further, to assign unique TIDRs within each process, we need an atomic + * field (or an IDR) in task_struct, which again intrudes into the arch- + * neutral code. So try to assign globally unique TIDRs for now. + * + * NOTE: TIDR 0 indicates that the thread does not need a TIDR value. + * For now, only threads that expect to be notified by the VAS + * hardware need a TIDR value and we assign values > 0 for those. + */ +#define MAX_THREAD_CONTEXT ((1 << 16) - 1) +static int assign_thread_tidr(void) +{ + int index; + int err; + +again: + if (!ida_pre_get(&vas_thread_ida, GFP_KERNEL)) + return -ENOMEM; + + spin_lock(&vas_thread_id_lock); + err = ida_get_new_above(&vas_thread_ida, 1, &index); + spin_unlock(&vas_thread_id_lock); + + if (err == -EAGAIN) + goto again; + else if (err) + return err; + + if (index > MAX_THREAD_CONTEXT) { + spin_lock(&vas_thread_id_lock); + ida_remove(&vas_thread_ida, index); + spin_unlock(&vas_thread_id_lock); + return -ENOMEM; + } + + return index; +} + +static void free_thread_tidr(int id) +{ + spin_lock(&vas_thread_id_lock); + ida_remove(&vas_thread_ida, id); + spin_unlock(&vas_thread_id_lock); +} + +/* + * Clear any TIDR value assigned to this thread. + */ +void clear_thread_tidr(struct task_struct *t) +{ + if (!t->thread.tidr) + return; + + if (!cpu_has_feature(CPU_FTR_ARCH_300)) { + WARN_ON_ONCE(1); + return; + } + + mtspr(SPRN_TIDR, 0); + free_thread_tidr(t->thread.tidr); + t->thread.tidr = 0; +} + +void arch_release_task_struct(struct task_struct *t) +{ + clear_thread_tidr(t); +} + +/* + * Assign a unique TIDR (thread id) for task @t and set it in the thread + * structure. For now, we only support setting TIDR for 'current' task. + */ +int set_thread_tidr(struct task_struct *t) +{ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + return -EINVAL; + + if (t != current) + return -EINVAL; + + t->thread.tidr = assign_thread_tidr(); + if (t->thread.tidr < 0) + return t->thread.tidr; + + mtspr(SPRN_TIDR, t->thread.tidr); + + return 0; +} + +#endif /* CONFIG_PPC64 */ + void release_thread(struct task_struct *t) { @@ -1597,6 +1711,8 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, } if (cpu_has_feature(CPU_FTR_HAS_PPR)) p->thread.ppr = INIT_PPR; + + p->thread.tidr = 0; #endif kregs->nip = ppc_function_entry(f); return 0; -- cgit v1.2.3 From 9d2a4d71332cfdf4ea90754ad9b2f05a5ee5f6c7 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Tue, 7 Nov 2017 18:23:54 -0800 Subject: powerpc: Define set_thread_uses_vas() A CP_ABORT instruction is required in processes that have mapped a VAS "paste address" with the intention of using COPY/PASTE instructions. But since CP_ABORT is expensive, we want to restrict it to only processes that use/intend to use COPY/PASTE. Define an interface, set_thread_uses_vas(), that VAS can use to indicate that the current process opened a send window. During context switch, issue CP_ABORT only for processes that have the flag set. Thanks for input from Nick Piggin, Michael Ellerman. Signed-off-by: Sukadev Bhattiprolu [mpe: Fix to not use new_thread after _switch() returns] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/processor.h | 2 ++ arch/powerpc/include/asm/switch_to.h | 2 ++ arch/powerpc/kernel/process.c | 41 +++++++++++++++++++++++++++--------- 3 files changed, 35 insertions(+), 10 deletions(-) (limited to 'arch/powerpc/kernel/process.c') diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 58cc21274423..bdab3b74eb98 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -341,7 +341,9 @@ struct thread_struct { unsigned long sier; unsigned long mmcr2; unsigned mmcr0; + unsigned used_ebb; + unsigned int used_vas; #endif }; diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h index f5da32f43fed..07ff06e9dae1 100644 --- a/arch/powerpc/include/asm/switch_to.h +++ b/arch/powerpc/include/asm/switch_to.h @@ -91,6 +91,8 @@ static inline void clear_task_ebb(struct task_struct *t) #endif } +extern int set_thread_uses_vas(void); + extern int set_thread_tidr(struct task_struct *t); extern void clear_thread_tidr(struct task_struct *t); diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 887ae5047288..bfdd783e3916 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1244,17 +1244,17 @@ struct task_struct *__switch_to(struct task_struct *prev, * The copy-paste buffer can only store into foreign real * addresses, so unprivileged processes can not see the * data or use it in any way unless they have foreign real - * mappings. We don't have a VAS driver that allocates those - * yet, so no cpabort is required. + * mappings. If the new process has the foreign real address + * mappings, we must issue a cp_abort to clear any state and + * prevent snooping, corruption or a covert channel. + * + * DD1 allows paste into normal system memory so we do an + * unpaired copy, rather than cp_abort, to clear the buffer, + * since cp_abort is quite expensive. */ - if (cpu_has_feature(CPU_FTR_POWER9_DD1)) { - /* - * DD1 allows paste into normal system memory, so we - * do an unpaired copy here to clear the buffer and - * prevent a covert channel being set up. - * - * cpabort is not used because it is quite expensive. - */ + if (current_thread_info()->task->thread.used_vas) { + asm volatile(PPC_CP_ABORT); + } else if (cpu_has_feature(CPU_FTR_POWER9_DD1)) { asm volatile(PPC_COPY(%0, %1) : : "r"(dummy_copy_buffer), "r"(0)); } @@ -1455,6 +1455,27 @@ void flush_thread(void) #endif /* CONFIG_HAVE_HW_BREAKPOINT */ } +int set_thread_uses_vas(void) +{ +#ifdef CONFIG_PPC_BOOK3S_64 + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + return -EINVAL; + + current->thread.used_vas = 1; + + /* + * Even a process that has no foreign real address mapping can use + * an unpaired COPY instruction (to no real effect). Issue CP_ABORT + * to clear any pending COPY and prevent a covert channel. + * + * __switch_to() will issue CP_ABORT on future context switches. + */ + asm volatile(PPC_CP_ABORT); + +#endif /* CONFIG_PPC_BOOK3S_64 */ + return 0; +} + #ifdef CONFIG_PPC64 static DEFINE_SPINLOCK(vas_thread_id_lock); static DEFINE_IDA(vas_thread_ida); -- cgit v1.2.3