From 021d23428bdbae032294e8f4a29cb53cb50ae71c Mon Sep 17 00:00:00 2001 From: Wende Tan Date: Tue, 17 Oct 2023 15:21:04 -0700 Subject: RISC-V: build: Allow LTO to be selected Allow LTO to be selected for RISC-V, only when LLD >= 14, since there is an issue [1] in prior LLD versions that prevents LLD to generate proper machine code for RISC-V when writing `nop`s. To avoid boot failures in QEMU [2], '-mattr=+c' and '-mattr=+relax' need to be passed via '-mllvm' to ld.lld, as there appears to be an issue with LLVM's target-features and LTO [3], which can result in incorrect relocations to branch targets [4]. Once this is fixed in LLVM, it can be made conditional on affected ld.lld versions. Disable LTO for arch/riscv/kernel/pi, as llvm-objcopy expects an ELF object file when manipulating the files in that subfolder, rather than LLVM bitcode. [1] https://github.com/llvm/llvm-project/issues/50505, resolved by LLVM commit e63455d5e0e5 ("[MC] Use local MCSubtargetInfo in writeNops") [2] https://github.com/ClangBuiltLinux/linux/issues/1942 [3] https://github.com/llvm/llvm-project/issues/59350 [4] https://github.com/llvm/llvm-project/issues/65090 Tested-by: Wende Tan Signed-off-by: Wende Tan Co-developed-by: Nathan Chancellor Signed-off-by: Nathan Chancellor Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20231017-riscv-lto-v4-1-e7810b24e805@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/riscv/Kconfig') diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index bffbd869a068..60c8fd1a677f 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -47,6 +47,9 @@ config RISCV select ARCH_SUPPORTS_CFI_CLANG select ARCH_SUPPORTS_DEBUG_PAGEALLOC if MMU select ARCH_SUPPORTS_HUGETLBFS if MMU + # LLD >= 14: https://github.com/llvm/llvm-project/issues/50505 + select ARCH_SUPPORTS_LTO_CLANG if LLD_VERSION >= 140000 + select ARCH_SUPPORTS_LTO_CLANG_THIN if LLD_VERSION >= 140000 select ARCH_SUPPORTS_PAGE_TABLE_CHECK if MMU select ARCH_SUPPORTS_PER_VMA_LOCK if MMU select ARCH_SUPPORTS_SHADOW_CALL_STACK if HAVE_SHADOW_CALL_STACK -- cgit v1.2.3 From 34ca4ec628deb4f00da38c7d73486b8499e30dea Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 21 Jan 2024 16:19:13 -0800 Subject: RISC-V: add TOOLCHAIN_HAS_VECTOR_CRYPTO Add a kconfig symbol that indicates whether the toolchain supports the vector crypto extensions. This is needed by the RISC-V crypto code. Signed-off-by: Eric Biggers Link: https://lore.kernel.org/r/20240122002024.27477-3-ebiggers@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/riscv/Kconfig') diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index bffbd869a068..5613b2bb686e 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -578,6 +578,13 @@ config TOOLCHAIN_HAS_ZBB depends on LLD_VERSION >= 150000 || LD_VERSION >= 23900 depends on AS_HAS_OPTION_ARCH +# This symbol indicates that the toolchain supports all v1.0 vector crypto +# extensions, including Zvk*, Zvbb, and Zvbc. LLVM added all of these at once. +# binutils added all except Zvkb, then added Zvkb. So we just check for Zvkb. +config TOOLCHAIN_HAS_VECTOR_CRYPTO + def_bool $(as-instr, .option arch$(comma) +zvkb) + depends on AS_HAS_OPTION_ARCH + config RISCV_ISA_ZBB bool "Zbb extension support for bit manipulation instructions" depends on TOOLCHAIN_HAS_ZBB -- cgit v1.2.3 From e2d6b54b935a98c7d83f7e27597738be903d6703 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Wed, 2 Aug 2023 12:12:53 +0100 Subject: Revert "RISC-V: mark hibernation as nonportable" Revert commit ed309ce52218 ("RISC-V: mark hibernation as nonportable") as it appears the broken versions of OpenSBI have not made it to production on any systems that support hibernation. Signed-off-by: Conor Dooley Link: https://lore.kernel.org/r/20230802-chef-throng-d9de8b672a49@wendy Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'arch/riscv/Kconfig') diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 61826240926d..a82bc8bed503 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -1011,11 +1011,8 @@ menu "Power management options" source "kernel/power/Kconfig" -# Hibernation is only possible on systems where the SBI implementation has -# marked its reserved memory as not accessible from, or does not run -# from the same memory as, Linux config ARCH_HIBERNATION_POSSIBLE - def_bool NONPORTABLE + def_bool y config ARCH_HIBERNATION_HEADER def_bool HIBERNATION -- cgit v1.2.3 From 69be3fb111e73bd025ce6d2322371da5aa497c70 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Wed, 20 Dec 2023 01:50:45 +0800 Subject: riscv: enable MMU_GATHER_RCU_TABLE_FREE for SMP && MMU In order to implement fast gup we need to ensure that the page table walker is protected from page table pages being freed from under it. riscv situation is more complicated than other architectures: some riscv platforms may use IPI to perform TLB shootdown, for example, those platforms which support AIA, usually the riscv_ipi_for_rfence is true on these platforms; some riscv platforms may rely on the SBI to perform TLB shootdown, usually the riscv_ipi_for_rfence is false on these platforms. To keep software pagetable walkers safe in this case we switch to RCU based table free (MMU_GATHER_RCU_TABLE_FREE). See the comment below 'ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE' in include/asm-generic/tlb.h for more details. This patch enables MMU_GATHER_RCU_TABLE_FREE, then use *tlb_remove_page_ptdesc() for those platforms which use IPI to perform TLB shootdown; *tlb_remove_ptdesc() for those platforms which use SBI to perform TLB shootdown; Both case mean that disabling interrupts will block the free and protect the fast gup page walker. Signed-off-by: Jisheng Zhang Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20231219175046.2496-4-jszhang@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 + arch/riscv/include/asm/pgalloc.h | 23 ++++++++++++++++++----- arch/riscv/include/asm/tlb.h | 18 ++++++++++++++++++ 3 files changed, 37 insertions(+), 5 deletions(-) (limited to 'arch/riscv/Kconfig') diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 95a2a06acc6a..a0d9b8f5371d 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -147,6 +147,7 @@ config RISCV select IRQ_FORCED_THREADING select KASAN_VMALLOC if KASAN select LOCK_MM_AND_FIND_VMA + select MMU_GATHER_RCU_TABLE_FREE if SMP && MMU select MODULES_USE_ELF_RELA if MODULES select MODULE_SECTIONS if MODULES select OF diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h index 3c5e3bd15f46..deaf971253a2 100644 --- a/arch/riscv/include/asm/pgalloc.h +++ b/arch/riscv/include/asm/pgalloc.h @@ -102,7 +102,10 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, struct ptdesc *ptdesc = virt_to_ptdesc(pud); pagetable_pud_dtor(ptdesc); - tlb_remove_page_ptdesc(tlb, ptdesc); + if (riscv_use_ipi_for_rfence()) + tlb_remove_page_ptdesc(tlb, ptdesc); + else + tlb_remove_ptdesc(tlb, ptdesc); } } @@ -136,8 +139,12 @@ static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d) static inline void __p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d, unsigned long addr) { - if (pgtable_l5_enabled) - tlb_remove_page_ptdesc(tlb, virt_to_ptdesc(p4d)); + if (pgtable_l5_enabled) { + if (riscv_use_ipi_for_rfence()) + tlb_remove_page_ptdesc(tlb, virt_to_ptdesc(p4d)); + else + tlb_remove_ptdesc(tlb, virt_to_ptdesc(p4d)); + } } #endif /* __PAGETABLE_PMD_FOLDED */ @@ -169,7 +176,10 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, struct ptdesc *ptdesc = virt_to_ptdesc(pmd); pagetable_pmd_dtor(ptdesc); - tlb_remove_page_ptdesc(tlb, ptdesc); + if (riscv_use_ipi_for_rfence()) + tlb_remove_page_ptdesc(tlb, ptdesc); + else + tlb_remove_ptdesc(tlb, ptdesc); } #endif /* __PAGETABLE_PMD_FOLDED */ @@ -180,7 +190,10 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, struct ptdesc *ptdesc = page_ptdesc(pte); pagetable_pte_dtor(ptdesc); - tlb_remove_page_ptdesc(tlb, ptdesc); + if (riscv_use_ipi_for_rfence()) + tlb_remove_page_ptdesc(tlb, ptdesc); + else + tlb_remove_ptdesc(tlb, ptdesc); } #endif /* CONFIG_MMU */ diff --git a/arch/riscv/include/asm/tlb.h b/arch/riscv/include/asm/tlb.h index 1eb5682b2af6..a0b8b853503f 100644 --- a/arch/riscv/include/asm/tlb.h +++ b/arch/riscv/include/asm/tlb.h @@ -10,6 +10,24 @@ struct mmu_gather; static void tlb_flush(struct mmu_gather *tlb); +#ifdef CONFIG_MMU +#include + +/* + * While riscv platforms with riscv_ipi_for_rfence as true require an IPI to + * perform TLB shootdown, some platforms with riscv_ipi_for_rfence as false use + * SBI to perform TLB shootdown. To keep software pagetable walkers safe in this + * case we switch to RCU based table free (MMU_GATHER_RCU_TABLE_FREE). See the + * comment below 'ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE' in include/asm-generic/tlb.h + * for more details. + */ +static inline void __tlb_remove_table(void *table) +{ + free_page_and_swap_cache(table); +} + +#endif /* CONFIG_MMU */ + #define tlb_flush tlb_flush #include -- cgit v1.2.3 From 3f910b7a522e064d7261f31a00d9c9dca31d902a Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Wed, 20 Dec 2023 01:50:46 +0800 Subject: riscv: enable HAVE_FAST_GUP if MMU Activate the fast gup for riscv mmu platforms. Here are some GUP_FAST_BENCHMARK performance numbers: Before the patch: GUP_FAST_BENCHMARK: Time: get:53203 put:5085 us After the patch: GUP_FAST_BENCHMARK: Time: get:17711 put:5060 us The get time is reduced by 66.7%! IOW, 3x get speed! Signed-off-by: Jisheng Zhang Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20231219175046.2496-5-jszhang@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 + arch/riscv/include/asm/pgtable.h | 6 ++++++ 2 files changed, 7 insertions(+) (limited to 'arch/riscv/Kconfig') diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index a0d9b8f5371d..9e1e7c74b1f9 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -119,6 +119,7 @@ config RISCV select HAVE_FUNCTION_GRAPH_RETVAL if HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER if !XIP_KERNEL && !PREEMPTION select HAVE_EBPF_JIT if MMU + select HAVE_FAST_GUP if MMU select HAVE_FUNCTION_ARG_ACCESS_API select HAVE_FUNCTION_ERROR_INJECTION select HAVE_GCC_PLUGINS diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 294044429e8e..a76e86e1cb4c 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -673,6 +673,12 @@ static inline int pmd_write(pmd_t pmd) return pte_write(pmd_pte(pmd)); } +#define pud_write pud_write +static inline int pud_write(pud_t pud) +{ + return pte_write(pud_pte(pud)); +} + static inline int pmd_dirty(pmd_t pmd) { return pte_dirty(pmd_pte(pmd)); -- cgit v1.2.3 From 05d450aabd7386246c5aafc341fe9febe5855967 Mon Sep 17 00:00:00 2001 From: Song Shuai Date: Thu, 9 Nov 2023 21:37:51 +0800 Subject: riscv: Support RANDOMIZE_KSTACK_OFFSET Inspired from arm64's implement -- commit 70918779aec9 ("arm64: entry: Enable random_kstack_offset support") Add support of kernel stack offset randomization while handling syscall, the offset is defaultly limited by KSTACK_OFFSET_MAX() (i.e. 10 bits). In order to avoid trigger stack canaries (due to __builtin_alloca) and slowing down the entry path, use __no_stack_protector attribute to disable stack protector for do_trap_ecall_u() at the function level. Acked-by: Palmer Dabbelt Reviewed-by: Kees Cook Signed-off-by: Song Shuai Link: https://lore.kernel.org/r/20231109133751.212079-1-songshuaishuai@tinylab.org Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 + arch/riscv/kernel/traps.c | 17 ++++++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'arch/riscv/Kconfig') diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index cd9fc635857e..b49016bb5077 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -109,6 +109,7 @@ config RISCV select HAVE_ARCH_KGDB_QXFER_PKT select HAVE_ARCH_MMAP_RND_BITS if MMU select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT + select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_THREAD_STRUCT_WHITELIST select HAVE_ARCH_TRACEHOOK diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index a1b9be3c4332..868d6280cf66 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -310,7 +311,8 @@ asmlinkage __visible __trap_section void do_trap_break(struct pt_regs *regs) } } -asmlinkage __visible __trap_section void do_trap_ecall_u(struct pt_regs *regs) +asmlinkage __visible __trap_section __no_stack_protector +void do_trap_ecall_u(struct pt_regs *regs) { if (user_mode(regs)) { long syscall = regs->a7; @@ -322,10 +324,23 @@ asmlinkage __visible __trap_section void do_trap_ecall_u(struct pt_regs *regs) syscall = syscall_enter_from_user_mode(regs, syscall); + add_random_kstack_offset(); + if (syscall >= 0 && syscall < NR_syscalls) syscall_handler(regs, syscall); else if (syscall != -1) regs->a0 = -ENOSYS; + /* + * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(), + * so the maximum stack offset is 1k bytes (10 bits). + * + * The actual entropy will be further reduced by the compiler when + * applying stack alignment constraints: 16-byte (i.e. 4-bit) aligned + * for RV32I or RV64I. + * + * The resulting 6 bits of entropy is seen in SP[9:4]. + */ + choose_random_kstack_offset(get_random_u16()); syscall_exit_to_user_mode(regs); } else { -- cgit v1.2.3 From d6cfd1770f20392d7009ae1fdb04733794514fa9 Mon Sep 17 00:00:00 2001 From: Andrea Parri Date: Wed, 31 Jan 2024 15:49:33 +0100 Subject: membarrier: riscv: Add full memory barrier in switch_mm() The membarrier system call requires a full memory barrier after storing to rq->curr, before going back to user-space. The barrier is only needed when switching between processes: the barrier is implied by mmdrop() when switching from kernel to userspace, and it's not needed when switching from userspace to kernel. Rely on the feature/mechanism ARCH_HAS_MEMBARRIER_CALLBACKS and on the primitive membarrier_arch_switch_mm(), already adopted by the PowerPC architecture, to insert the required barrier. Fixes: fab957c11efe2f ("RISC-V: Atomic and Locking Code") Signed-off-by: Andrea Parri Reviewed-by: Mathieu Desnoyers Link: https://lore.kernel.org/r/20240131144936.29190-2-parri.andrea@gmail.com Signed-off-by: Palmer Dabbelt --- MAINTAINERS | 2 +- arch/riscv/Kconfig | 1 + arch/riscv/include/asm/membarrier.h | 31 +++++++++++++++++++++++++++++++ arch/riscv/mm/context.c | 2 ++ kernel/sched/core.c | 5 +++-- 5 files changed, 38 insertions(+), 3 deletions(-) create mode 100644 arch/riscv/include/asm/membarrier.h (limited to 'arch/riscv/Kconfig') diff --git a/MAINTAINERS b/MAINTAINERS index 8d1052fa6a69..2450e88d3e2c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14039,7 +14039,7 @@ M: Mathieu Desnoyers M: "Paul E. McKenney" L: linux-kernel@vger.kernel.org S: Supported -F: arch/powerpc/include/asm/membarrier.h +F: arch/*/include/asm/membarrier.h F: include/uapi/linux/membarrier.h F: kernel/sched/membarrier.c diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index bffbd869a068..087abf9e51c6 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -27,6 +27,7 @@ config RISCV select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_GIGANTIC_PAGE select ARCH_HAS_KCOV + select ARCH_HAS_MEMBARRIER_CALLBACKS select ARCH_HAS_MMIOWB select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE select ARCH_HAS_PMEM_API diff --git a/arch/riscv/include/asm/membarrier.h b/arch/riscv/include/asm/membarrier.h new file mode 100644 index 000000000000..6c016ebb5020 --- /dev/null +++ b/arch/riscv/include/asm/membarrier.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef _ASM_RISCV_MEMBARRIER_H +#define _ASM_RISCV_MEMBARRIER_H + +static inline void membarrier_arch_switch_mm(struct mm_struct *prev, + struct mm_struct *next, + struct task_struct *tsk) +{ + /* + * Only need the full barrier when switching between processes. + * Barrier when switching from kernel to userspace is not + * required here, given that it is implied by mmdrop(). Barrier + * when switching from userspace to kernel is not needed after + * store to rq->curr. + */ + if (IS_ENABLED(CONFIG_SMP) && + likely(!(atomic_read(&next->membarrier_state) & + (MEMBARRIER_STATE_PRIVATE_EXPEDITED | + MEMBARRIER_STATE_GLOBAL_EXPEDITED)) || !prev)) + return; + + /* + * The membarrier system call requires a full memory barrier + * after storing to rq->curr, before going back to user-space. + * Matches a full barrier in the proximity of the membarrier + * system call entry. + */ + smp_mb(); +} + +#endif /* _ASM_RISCV_MEMBARRIER_H */ diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c index 217fd4de6134..ba8eb3944687 100644 --- a/arch/riscv/mm/context.c +++ b/arch/riscv/mm/context.c @@ -323,6 +323,8 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next, if (unlikely(prev == next)) return; + membarrier_arch_switch_mm(prev, next, task); + /* * Mark the current MM context as inactive, and the next as * active. This is at least used by the icache flushing diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 9116bcc90346..c4ca8085885a 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6709,8 +6709,9 @@ static void __sched notrace __schedule(unsigned int sched_mode) * * Here are the schemes providing that barrier on the * various architectures: - * - mm ? switch_mm() : mmdrop() for x86, s390, sparc, PowerPC. - * switch_mm() rely on membarrier_arch_switch_mm() on PowerPC. + * - mm ? switch_mm() : mmdrop() for x86, s390, sparc, PowerPC, + * RISC-V. switch_mm() relies on membarrier_arch_switch_mm() + * on PowerPC and on RISC-V. * - finish_lock_switch() for weakly-ordered * architectures where spin_unlock is a full barrier, * - switch_to() for arm64 (weakly-ordered, spin_unlock -- cgit v1.2.3 From cd9b29014dc69609489261efe351d0c7709ae8bf Mon Sep 17 00:00:00 2001 From: Andrea Parri Date: Wed, 31 Jan 2024 15:49:36 +0100 Subject: membarrier: riscv: Provide core serializing command RISC-V uses xRET instructions on return from interrupt and to go back to user-space; the xRET instruction is not core serializing. Use FENCE.I for providing core serialization as follows: - by calling sync_core_before_usermode() on return from interrupt (cf. ipi_sync_core()), - via switch_mm() and sync_core_before_usermode() (respectively, for uthread->uthread and kthread->uthread transitions) before returning to user-space. On RISC-V, the serialization in switch_mm() is activated by resetting the icache_stale_mask of the mm at prepare_sync_core_cmd(). Suggested-by: Palmer Dabbelt Signed-off-by: Andrea Parri Reviewed-by: Mathieu Desnoyers Link: https://lore.kernel.org/r/20240131144936.29190-5-parri.andrea@gmail.com Signed-off-by: Palmer Dabbelt --- .../sched/membarrier-sync-core/arch-support.txt | 18 +++++++++++++- MAINTAINERS | 1 + arch/riscv/Kconfig | 3 +++ arch/riscv/include/asm/membarrier.h | 19 ++++++++++++++ arch/riscv/include/asm/sync_core.h | 29 ++++++++++++++++++++++ kernel/sched/core.c | 4 +++ kernel/sched/membarrier.c | 4 +++ 7 files changed, 77 insertions(+), 1 deletion(-) create mode 100644 arch/riscv/include/asm/sync_core.h (limited to 'arch/riscv/Kconfig') diff --git a/Documentation/features/sched/membarrier-sync-core/arch-support.txt b/Documentation/features/sched/membarrier-sync-core/arch-support.txt index d96b778b87ed..7425d2b994a3 100644 --- a/Documentation/features/sched/membarrier-sync-core/arch-support.txt +++ b/Documentation/features/sched/membarrier-sync-core/arch-support.txt @@ -10,6 +10,22 @@ # Rely on implicit context synchronization as a result of exception return # when returning from IPI handler, and when returning to user-space. # +# * riscv +# +# riscv uses xRET as return from interrupt and to return to user-space. +# +# Given that xRET is not core serializing, we rely on FENCE.I for providing +# core serialization: +# +# - by calling sync_core_before_usermode() on return from interrupt (cf. +# ipi_sync_core()), +# +# - via switch_mm() and sync_core_before_usermode() (respectively, for +# uthread->uthread and kthread->uthread transitions) before returning +# to user-space. +# +# The serialization in switch_mm() is activated by prepare_sync_core_cmd(). +# # * x86 # # x86-32 uses IRET as return from interrupt, which takes care of the IPI. @@ -43,7 +59,7 @@ | openrisc: | TODO | | parisc: | TODO | | powerpc: | ok | - | riscv: | TODO | + | riscv: | ok | | s390: | ok | | sh: | TODO | | sparc: | TODO | diff --git a/MAINTAINERS b/MAINTAINERS index 78c835cc69c9..cc80968ec355 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14041,6 +14041,7 @@ L: linux-kernel@vger.kernel.org S: Supported F: Documentation/scheduler/membarrier.rst F: arch/*/include/asm/membarrier.h +F: arch/*/include/asm/sync_core.h F: include/uapi/linux/membarrier.h F: kernel/sched/membarrier.c diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 087abf9e51c6..70836381b948 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -28,14 +28,17 @@ config RISCV select ARCH_HAS_GIGANTIC_PAGE select ARCH_HAS_KCOV select ARCH_HAS_MEMBARRIER_CALLBACKS + select ARCH_HAS_MEMBARRIER_SYNC_CORE select ARCH_HAS_MMIOWB select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE select ARCH_HAS_PMEM_API + select ARCH_HAS_PREPARE_SYNC_CORE_CMD select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SET_DIRECT_MAP if MMU select ARCH_HAS_SET_MEMORY if MMU select ARCH_HAS_STRICT_KERNEL_RWX if MMU && !XIP_KERNEL select ARCH_HAS_STRICT_MODULE_RWX if MMU && !XIP_KERNEL + select ARCH_HAS_SYNC_CORE_BEFORE_USERMODE select ARCH_HAS_SYSCALL_WRAPPER select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_UBSAN_SANITIZE_ALL diff --git a/arch/riscv/include/asm/membarrier.h b/arch/riscv/include/asm/membarrier.h index 6c016ebb5020..47b240d0d596 100644 --- a/arch/riscv/include/asm/membarrier.h +++ b/arch/riscv/include/asm/membarrier.h @@ -22,6 +22,25 @@ static inline void membarrier_arch_switch_mm(struct mm_struct *prev, /* * The membarrier system call requires a full memory barrier * after storing to rq->curr, before going back to user-space. + * + * This barrier is also needed for the SYNC_CORE command when + * switching between processes; in particular, on a transition + * from a thread belonging to another mm to a thread belonging + * to the mm for which a membarrier SYNC_CORE is done on CPU0: + * + * - [CPU0] sets all bits in the mm icache_stale_mask (in + * prepare_sync_core_cmd()); + * + * - [CPU1] stores to rq->curr (by the scheduler); + * + * - [CPU0] loads rq->curr within membarrier and observes + * cpu_rq(1)->curr->mm != mm, so the IPI is skipped on + * CPU1; this means membarrier relies on switch_mm() to + * issue the sync-core; + * + * - [CPU1] switch_mm() loads icache_stale_mask; if the bit + * is zero, switch_mm() may incorrectly skip the sync-core. + * * Matches a full barrier in the proximity of the membarrier * system call entry. */ diff --git a/arch/riscv/include/asm/sync_core.h b/arch/riscv/include/asm/sync_core.h new file mode 100644 index 000000000000..9153016da8f1 --- /dev/null +++ b/arch/riscv/include/asm/sync_core.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_RISCV_SYNC_CORE_H +#define _ASM_RISCV_SYNC_CORE_H + +/* + * RISC-V implements return to user-space through an xRET instruction, + * which is not core serializing. + */ +static inline void sync_core_before_usermode(void) +{ + asm volatile ("fence.i" ::: "memory"); +} + +#ifdef CONFIG_SMP +/* + * Ensure the next switch_mm() on every CPU issues a core serializing + * instruction for the given @mm. + */ +static inline void prepare_sync_core_cmd(struct mm_struct *mm) +{ + cpumask_setall(&mm->context.icache_stale_mask); +} +#else +static inline void prepare_sync_core_cmd(struct mm_struct *mm) +{ +} +#endif /* CONFIG_SMP */ + +#endif /* _ASM_RISCV_SYNC_CORE_H */ diff --git a/kernel/sched/core.c b/kernel/sched/core.c index a972628e7756..e4a87bcf28d4 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6721,6 +6721,10 @@ static void __sched notrace __schedule(unsigned int sched_mode) * * The barrier matches a full barrier in the proximity of * the membarrier system call entry. + * + * On RISC-V, this barrier pairing is also needed for the + * SYNC_CORE command when switching between processes, cf. + * the inline comments in membarrier_arch_switch_mm(). */ ++*switch_count; diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c index 6d1f31b3a967..703e8d80a576 100644 --- a/kernel/sched/membarrier.c +++ b/kernel/sched/membarrier.c @@ -342,6 +342,10 @@ static int membarrier_private_expedited(int flags, int cpu_id) /* * Matches memory barriers after rq->curr modification in * scheduler. + * + * On RISC-V, this barrier pairing is also needed for the + * SYNC_CORE command when switching between processes, cf. + * the inline comments in membarrier_arch_switch_mm(). */ smp_mb(); /* system call entry is not a mb. */ -- cgit v1.2.3 From 886516fae2b73a1578600e95631436785f3e44d6 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 27 Jan 2024 01:00:54 -0800 Subject: RISC-V: fix check for zvkb with tip-of-tree clang LLVM commit 8e01042da9d3 ("[RISCV] Add missing dependency check for Zvkb (#79467)") broke the check used by the TOOLCHAIN_HAS_VECTOR_CRYPTO kconfig symbol because it made zvkb start depending on v or zve*. Fix this by specifying both v and zvkb when checking for support for zvkb. Signed-off-by: Eric Biggers Reviewed-by: Nathan Chancellor Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20240127090055.124336-1-ebiggers@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/riscv/Kconfig') diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 27b1f44caa18..0bfcfec67ed5 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -591,7 +591,7 @@ config TOOLCHAIN_HAS_ZBB # extensions, including Zvk*, Zvbb, and Zvbc. LLVM added all of these at once. # binutils added all except Zvkb, then added Zvkb. So we just check for Zvkb. config TOOLCHAIN_HAS_VECTOR_CRYPTO - def_bool $(as-instr, .option arch$(comma) +zvkb) + def_bool $(as-instr, .option arch$(comma) +v$(comma) +zvkb) depends on AS_HAS_OPTION_ARCH config RISCV_ISA_ZBB -- cgit v1.2.3 From f413aae96cda059635910c462ede0a8f0385897c Mon Sep 17 00:00:00 2001 From: Charlie Jenkins Date: Fri, 8 Mar 2024 10:25:58 -0800 Subject: riscv: Set unaligned access speed at compile time Introduce Kconfig options to set the kernel unaligned access support. These options provide a non-portable alternative to the runtime unaligned access probe. To support this, the unaligned access probing code is moved into it's own file and gated behind a new RISCV_PROBE_UNALIGNED_ACCESS_SUPPORT option. Signed-off-by: Charlie Jenkins Reviewed-by: Conor Dooley Tested-by: Samuel Holland Link: https://lore.kernel.org/r/20240308-disable_misaligned_probe_config-v9-4-a388770ba0ce@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 58 ++++-- arch/riscv/include/asm/cpufeature.h | 24 +-- arch/riscv/kernel/Makefile | 4 +- arch/riscv/kernel/cpufeature.c | 272 ---------------------------- arch/riscv/kernel/sys_hwprobe.c | 13 ++ arch/riscv/kernel/traps_misaligned.c | 2 + arch/riscv/kernel/unaligned_access_speed.c | 282 +++++++++++++++++++++++++++++ 7 files changed, 359 insertions(+), 296 deletions(-) create mode 100644 arch/riscv/kernel/unaligned_access_speed.c (limited to 'arch/riscv/Kconfig') diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index bffbd869a068..51481bf9364e 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -688,27 +688,61 @@ config THREAD_SIZE_ORDER affects irq stack size, which is equal to thread stack size. config RISCV_MISALIGNED - bool "Support misaligned load/store traps for kernel and userspace" + bool select SYSCTL_ARCH_UNALIGN_ALLOW - default y help - Say Y here if you want the kernel to embed support for misaligned - load/store for both kernel and userspace. When disable, misaligned - accesses will generate SIGBUS in userspace and panic in kernel. + Embed support for emulating misaligned loads and stores. + +choice + prompt "Unaligned Accesses Support" + default RISCV_PROBE_UNALIGNED_ACCESS + help + This determines the level of support for unaligned accesses. This + information is used by the kernel to perform optimizations. It is also + exposed to user space via the hwprobe syscall. The hardware will be + probed at boot by default. + +config RISCV_PROBE_UNALIGNED_ACCESS + bool "Probe for hardware unaligned access support" + select RISCV_MISALIGNED + help + During boot, the kernel will run a series of tests to determine the + speed of unaligned accesses. This probing will dynamically determine + the speed of unaligned accesses on the underlying system. If unaligned + memory accesses trap into the kernel as they are not supported by the + system, the kernel will emulate the unaligned accesses to preserve the + UABI. + +config RISCV_EMULATED_UNALIGNED_ACCESS + bool "Emulate unaligned access where system support is missing" + select RISCV_MISALIGNED + help + If unaligned memory accesses trap into the kernel as they are not + supported by the system, the kernel will emulate the unaligned + accesses to preserve the UABI. When the underlying system does support + unaligned accesses, the unaligned accesses are assumed to be slow. + +config RISCV_SLOW_UNALIGNED_ACCESS + bool "Assume the system supports slow unaligned memory accesses" + depends on NONPORTABLE + help + Assume that the system supports slow unaligned memory accesses. The + kernel and userspace programs may not be able to run at all on systems + that do not support unaligned memory accesses. config RISCV_EFFICIENT_UNALIGNED_ACCESS - bool "Assume the CPU supports fast unaligned memory accesses" + bool "Assume the system supports fast unaligned memory accesses" depends on NONPORTABLE select DCACHE_WORD_ACCESS if MMU select HAVE_EFFICIENT_UNALIGNED_ACCESS help - Say Y here if you want the kernel to assume that the CPU supports - efficient unaligned memory accesses. When enabled, this option - improves the performance of the kernel on such CPUs. However, the - kernel will run much more slowly, or will not be able to run at all, - on CPUs that do not support efficient unaligned memory accesses. + Assume that the system supports fast unaligned memory accesses. When + enabled, this option improves the performance of the kernel on such + systems. However, the kernel and userspace programs will run much more + slowly, or will not be able to run at all, on systems that do not + support efficient unaligned memory accesses. - If unsure what to do here, say N. +endchoice endmenu # "Platform type" diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h index 6fec91845aa0..46061f5e9764 100644 --- a/arch/riscv/include/asm/cpufeature.h +++ b/arch/riscv/include/asm/cpufeature.h @@ -28,37 +28,39 @@ struct riscv_isainfo { DECLARE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo); -DECLARE_PER_CPU(long, misaligned_access_speed); - /* Per-cpu ISA extensions. */ extern struct riscv_isainfo hart_isa[NR_CPUS]; void riscv_user_isa_enable(void); -#ifdef CONFIG_RISCV_MISALIGNED -bool unaligned_ctl_available(void); +#if defined(CONFIG_RISCV_MISALIGNED) bool check_unaligned_access_emulated_all_cpus(void); void unaligned_emulation_finish(void); +bool unaligned_ctl_available(void); +DECLARE_PER_CPU(long, misaligned_access_speed); #else static inline bool unaligned_ctl_available(void) { return false; } - -static inline bool check_unaligned_access_emulated(int cpu) -{ - return false; -} - -static inline void unaligned_emulation_finish(void) {} #endif +#if defined(CONFIG_RISCV_PROBE_UNALIGNED_ACCESS) DECLARE_STATIC_KEY_FALSE(fast_unaligned_access_speed_key); static __always_inline bool has_fast_unaligned_accesses(void) { return static_branch_likely(&fast_unaligned_access_speed_key); } +#else +static __always_inline bool has_fast_unaligned_accesses(void) +{ + if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) + return true; + else + return false; +} +#endif unsigned long riscv_get_elf_hwcap(void); diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index f71910718053..c8085126a6f9 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -38,7 +38,6 @@ extra-y += vmlinux.lds obj-y += head.o obj-y += soc.o obj-$(CONFIG_RISCV_ALTERNATIVE) += alternative.o -obj-y += copy-unaligned.o obj-y += cpu.o obj-y += cpufeature.o obj-y += entry.o @@ -62,6 +61,9 @@ obj-y += tests/ obj-$(CONFIG_MMU) += vdso.o vdso/ obj-$(CONFIG_RISCV_MISALIGNED) += traps_misaligned.o +obj-$(CONFIG_RISCV_MISALIGNED) += unaligned_access_speed.o +obj-$(CONFIG_RISCV_PROBE_UNALIGNED_ACCESS) += copy-unaligned.o + obj-$(CONFIG_FPU) += fpu.o obj-$(CONFIG_RISCV_ISA_V) += vector.o obj-$(CONFIG_RISCV_ISA_V) += kernel_mode_vector.o diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index abb3a2f53106..319670af5704 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include @@ -21,20 +20,12 @@ #include #include #include -#include #include #include #include -#include "copy-unaligned.h" - #define NUM_ALPHA_EXTS ('z' - 'a' + 1) -#define MISALIGNED_ACCESS_JIFFIES_LG2 1 -#define MISALIGNED_BUFFER_SIZE 0x4000 -#define MISALIGNED_BUFFER_ORDER get_order(MISALIGNED_BUFFER_SIZE) -#define MISALIGNED_COPY_SIZE ((MISALIGNED_BUFFER_SIZE / 2) - 0x80) - unsigned long elf_hwcap __read_mostly; /* Host ISA bitmap */ @@ -43,11 +34,6 @@ static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly; /* Per-cpu ISA extensions. */ struct riscv_isainfo hart_isa[NR_CPUS]; -/* Performance information */ -DEFINE_PER_CPU(long, misaligned_access_speed); - -static cpumask_t fast_misaligned_access; - /** * riscv_isa_extension_base() - Get base extension word * @@ -706,264 +692,6 @@ unsigned long riscv_get_elf_hwcap(void) return hwcap; } -static int check_unaligned_access(void *param) -{ - int cpu = smp_processor_id(); - u64 start_cycles, end_cycles; - u64 word_cycles; - u64 byte_cycles; - int ratio; - unsigned long start_jiffies, now; - struct page *page = param; - void *dst; - void *src; - long speed = RISCV_HWPROBE_MISALIGNED_SLOW; - - if (IS_ENABLED(CONFIG_RISCV_MISALIGNED) && - per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN) - return 0; - - /* Make an unaligned destination buffer. */ - dst = (void *)((unsigned long)page_address(page) | 0x1); - /* Unalign src as well, but differently (off by 1 + 2 = 3). */ - src = dst + (MISALIGNED_BUFFER_SIZE / 2); - src += 2; - word_cycles = -1ULL; - /* Do a warmup. */ - __riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE); - preempt_disable(); - start_jiffies = jiffies; - while ((now = jiffies) == start_jiffies) - cpu_relax(); - - /* - * For a fixed amount of time, repeatedly try the function, and take - * the best time in cycles as the measurement. - */ - while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) { - start_cycles = get_cycles64(); - /* Ensure the CSR read can't reorder WRT to the copy. */ - mb(); - __riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE); - /* Ensure the copy ends before the end time is snapped. */ - mb(); - end_cycles = get_cycles64(); - if ((end_cycles - start_cycles) < word_cycles) - word_cycles = end_cycles - start_cycles; - } - - byte_cycles = -1ULL; - __riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE); - start_jiffies = jiffies; - while ((now = jiffies) == start_jiffies) - cpu_relax(); - - while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) { - start_cycles = get_cycles64(); - mb(); - __riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE); - mb(); - end_cycles = get_cycles64(); - if ((end_cycles - start_cycles) < byte_cycles) - byte_cycles = end_cycles - start_cycles; - } - - preempt_enable(); - - /* Don't divide by zero. */ - if (!word_cycles || !byte_cycles) { - pr_warn("cpu%d: rdtime lacks granularity needed to measure unaligned access speed\n", - cpu); - - return 0; - } - - if (word_cycles < byte_cycles) - speed = RISCV_HWPROBE_MISALIGNED_FAST; - - ratio = div_u64((byte_cycles * 100), word_cycles); - pr_info("cpu%d: Ratio of byte access time to unaligned word access is %d.%02d, unaligned accesses are %s\n", - cpu, - ratio / 100, - ratio % 100, - (speed == RISCV_HWPROBE_MISALIGNED_FAST) ? "fast" : "slow"); - - per_cpu(misaligned_access_speed, cpu) = speed; - - /* - * Set the value of fast_misaligned_access of a CPU. These operations - * are atomic to avoid race conditions. - */ - if (speed == RISCV_HWPROBE_MISALIGNED_FAST) - cpumask_set_cpu(cpu, &fast_misaligned_access); - else - cpumask_clear_cpu(cpu, &fast_misaligned_access); - - return 0; -} - -static void check_unaligned_access_nonboot_cpu(void *param) -{ - unsigned int cpu = smp_processor_id(); - struct page **pages = param; - - if (smp_processor_id() != 0) - check_unaligned_access(pages[cpu]); -} - -DEFINE_STATIC_KEY_FALSE(fast_unaligned_access_speed_key); - -static void modify_unaligned_access_branches(cpumask_t *mask, int weight) -{ - if (cpumask_weight(mask) == weight) - static_branch_enable_cpuslocked(&fast_unaligned_access_speed_key); - else - static_branch_disable_cpuslocked(&fast_unaligned_access_speed_key); -} - -static void set_unaligned_access_static_branches_except_cpu(int cpu) -{ - /* - * Same as set_unaligned_access_static_branches, except excludes the - * given CPU from the result. When a CPU is hotplugged into an offline - * state, this function is called before the CPU is set to offline in - * the cpumask, and thus the CPU needs to be explicitly excluded. - */ - - cpumask_t fast_except_me; - - cpumask_and(&fast_except_me, &fast_misaligned_access, cpu_online_mask); - cpumask_clear_cpu(cpu, &fast_except_me); - - modify_unaligned_access_branches(&fast_except_me, num_online_cpus() - 1); -} - -static void set_unaligned_access_static_branches(void) -{ - /* - * This will be called after check_unaligned_access_all_cpus so the - * result of unaligned access speed for all CPUs will be available. - * - * To avoid the number of online cpus changing between reading - * cpu_online_mask and calling num_online_cpus, cpus_read_lock must be - * held before calling this function. - */ - - cpumask_t fast_and_online; - - cpumask_and(&fast_and_online, &fast_misaligned_access, cpu_online_mask); - - modify_unaligned_access_branches(&fast_and_online, num_online_cpus()); -} - -static int lock_and_set_unaligned_access_static_branch(void) -{ - cpus_read_lock(); - set_unaligned_access_static_branches(); - cpus_read_unlock(); - - return 0; -} - -arch_initcall_sync(lock_and_set_unaligned_access_static_branch); - -static int riscv_online_cpu(unsigned int cpu) -{ - static struct page *buf; - - /* We are already set since the last check */ - if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN) - goto exit; - - buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); - if (!buf) { - pr_warn("Allocation failure, not measuring misaligned performance\n"); - return -ENOMEM; - } - - check_unaligned_access(buf); - __free_pages(buf, MISALIGNED_BUFFER_ORDER); - -exit: - set_unaligned_access_static_branches(); - - return 0; -} - -static int riscv_offline_cpu(unsigned int cpu) -{ - set_unaligned_access_static_branches_except_cpu(cpu); - - return 0; -} - -/* Measure unaligned access speed on all CPUs present at boot in parallel. */ -static int check_unaligned_access_speed_all_cpus(void) -{ - unsigned int cpu; - unsigned int cpu_count = num_possible_cpus(); - struct page **bufs = kzalloc(cpu_count * sizeof(struct page *), - GFP_KERNEL); - - if (!bufs) { - pr_warn("Allocation failure, not measuring misaligned performance\n"); - return 0; - } - - /* - * Allocate separate buffers for each CPU so there's no fighting over - * cache lines. - */ - for_each_cpu(cpu, cpu_online_mask) { - bufs[cpu] = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); - if (!bufs[cpu]) { - pr_warn("Allocation failure, not measuring misaligned performance\n"); - goto out; - } - } - - /* Check everybody except 0, who stays behind to tend jiffies. */ - on_each_cpu(check_unaligned_access_nonboot_cpu, bufs, 1); - - /* Check core 0. */ - smp_call_on_cpu(0, check_unaligned_access, bufs[0], true); - - /* - * Setup hotplug callbacks for any new CPUs that come online or go - * offline. - */ - cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online", - riscv_online_cpu, riscv_offline_cpu); - -out: - for_each_cpu(cpu, cpu_online_mask) { - if (bufs[cpu]) - __free_pages(bufs[cpu], MISALIGNED_BUFFER_ORDER); - } - - kfree(bufs); - return 0; -} - -#ifdef CONFIG_RISCV_MISALIGNED -static int check_unaligned_access_all_cpus(void) -{ - bool all_cpus_emulated = check_unaligned_access_emulated_all_cpus(); - - if (!all_cpus_emulated) - return check_unaligned_access_speed_all_cpus(); - - return 0; -} -#else -static int check_unaligned_access_all_cpus(void) -{ - return check_unaligned_access_speed_all_cpus(); -} -#endif - -arch_initcall(check_unaligned_access_all_cpus); - void riscv_user_isa_enable(void) { if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_ZICBOZ)) diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c index a7c56b41efd2..8cae41a502dd 100644 --- a/arch/riscv/kernel/sys_hwprobe.c +++ b/arch/riscv/kernel/sys_hwprobe.c @@ -147,6 +147,7 @@ static bool hwprobe_ext0_has(const struct cpumask *cpus, unsigned long ext) return (pair.value & ext); } +#if defined(CONFIG_RISCV_PROBE_UNALIGNED_ACCESS) static u64 hwprobe_misaligned(const struct cpumask *cpus) { int cpu; @@ -169,6 +170,18 @@ static u64 hwprobe_misaligned(const struct cpumask *cpus) return perf; } +#else +static u64 hwprobe_misaligned(const struct cpumask *cpus) +{ + if (IS_ENABLED(CONFIG_RISCV_EFFICIENT_UNALIGNED_ACCESS)) + return RISCV_HWPROBE_MISALIGNED_FAST; + + if (IS_ENABLED(CONFIG_RISCV_EMULATED_UNALIGNED_ACCESS) && unaligned_ctl_available()) + return RISCV_HWPROBE_MISALIGNED_EMULATED; + + return RISCV_HWPROBE_MISALIGNED_SLOW; +} +#endif static void hwprobe_one_pair(struct riscv_hwprobe *pair, const struct cpumask *cpus) diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c index e55718179f42..2adb7c3e4dd5 100644 --- a/arch/riscv/kernel/traps_misaligned.c +++ b/arch/riscv/kernel/traps_misaligned.c @@ -413,7 +413,9 @@ int handle_misaligned_load(struct pt_regs *regs) perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr); +#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS *this_cpu_ptr(&misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_EMULATED; +#endif if (!unaligned_enabled) return -1; diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c new file mode 100644 index 000000000000..52264ea4f0bd --- /dev/null +++ b/arch/riscv/kernel/unaligned_access_speed.c @@ -0,0 +1,282 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2024 Rivos Inc. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "copy-unaligned.h" + +#define MISALIGNED_ACCESS_JIFFIES_LG2 1 +#define MISALIGNED_BUFFER_SIZE 0x4000 +#define MISALIGNED_BUFFER_ORDER get_order(MISALIGNED_BUFFER_SIZE) +#define MISALIGNED_COPY_SIZE ((MISALIGNED_BUFFER_SIZE / 2) - 0x80) + +DEFINE_PER_CPU(long, misaligned_access_speed); + +#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS +static cpumask_t fast_misaligned_access; +static int check_unaligned_access(void *param) +{ + int cpu = smp_processor_id(); + u64 start_cycles, end_cycles; + u64 word_cycles; + u64 byte_cycles; + int ratio; + unsigned long start_jiffies, now; + struct page *page = param; + void *dst; + void *src; + long speed = RISCV_HWPROBE_MISALIGNED_SLOW; + + if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN) + return 0; + + /* Make an unaligned destination buffer. */ + dst = (void *)((unsigned long)page_address(page) | 0x1); + /* Unalign src as well, but differently (off by 1 + 2 = 3). */ + src = dst + (MISALIGNED_BUFFER_SIZE / 2); + src += 2; + word_cycles = -1ULL; + /* Do a warmup. */ + __riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE); + preempt_disable(); + start_jiffies = jiffies; + while ((now = jiffies) == start_jiffies) + cpu_relax(); + + /* + * For a fixed amount of time, repeatedly try the function, and take + * the best time in cycles as the measurement. + */ + while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) { + start_cycles = get_cycles64(); + /* Ensure the CSR read can't reorder WRT to the copy. */ + mb(); + __riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE); + /* Ensure the copy ends before the end time is snapped. */ + mb(); + end_cycles = get_cycles64(); + if ((end_cycles - start_cycles) < word_cycles) + word_cycles = end_cycles - start_cycles; + } + + byte_cycles = -1ULL; + __riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE); + start_jiffies = jiffies; + while ((now = jiffies) == start_jiffies) + cpu_relax(); + + while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) { + start_cycles = get_cycles64(); + mb(); + __riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE); + mb(); + end_cycles = get_cycles64(); + if ((end_cycles - start_cycles) < byte_cycles) + byte_cycles = end_cycles - start_cycles; + } + + preempt_enable(); + + /* Don't divide by zero. */ + if (!word_cycles || !byte_cycles) { + pr_warn("cpu%d: rdtime lacks granularity needed to measure unaligned access speed\n", + cpu); + + return 0; + } + + if (word_cycles < byte_cycles) + speed = RISCV_HWPROBE_MISALIGNED_FAST; + + ratio = div_u64((byte_cycles * 100), word_cycles); + pr_info("cpu%d: Ratio of byte access time to unaligned word access is %d.%02d, unaligned accesses are %s\n", + cpu, + ratio / 100, + ratio % 100, + (speed == RISCV_HWPROBE_MISALIGNED_FAST) ? "fast" : "slow"); + + per_cpu(misaligned_access_speed, cpu) = speed; + + /* + * Set the value of fast_misaligned_access of a CPU. These operations + * are atomic to avoid race conditions. + */ + if (speed == RISCV_HWPROBE_MISALIGNED_FAST) + cpumask_set_cpu(cpu, &fast_misaligned_access); + else + cpumask_clear_cpu(cpu, &fast_misaligned_access); + + return 0; +} + +static void check_unaligned_access_nonboot_cpu(void *param) +{ + unsigned int cpu = smp_processor_id(); + struct page **pages = param; + + if (smp_processor_id() != 0) + check_unaligned_access(pages[cpu]); +} + +DEFINE_STATIC_KEY_FALSE(fast_unaligned_access_speed_key); + +static void modify_unaligned_access_branches(cpumask_t *mask, int weight) +{ + if (cpumask_weight(mask) == weight) + static_branch_enable_cpuslocked(&fast_unaligned_access_speed_key); + else + static_branch_disable_cpuslocked(&fast_unaligned_access_speed_key); +} + +static void set_unaligned_access_static_branches_except_cpu(int cpu) +{ + /* + * Same as set_unaligned_access_static_branches, except excludes the + * given CPU from the result. When a CPU is hotplugged into an offline + * state, this function is called before the CPU is set to offline in + * the cpumask, and thus the CPU needs to be explicitly excluded. + */ + + cpumask_t fast_except_me; + + cpumask_and(&fast_except_me, &fast_misaligned_access, cpu_online_mask); + cpumask_clear_cpu(cpu, &fast_except_me); + + modify_unaligned_access_branches(&fast_except_me, num_online_cpus() - 1); +} + +static void set_unaligned_access_static_branches(void) +{ + /* + * This will be called after check_unaligned_access_all_cpus so the + * result of unaligned access speed for all CPUs will be available. + * + * To avoid the number of online cpus changing between reading + * cpu_online_mask and calling num_online_cpus, cpus_read_lock must be + * held before calling this function. + */ + + cpumask_t fast_and_online; + + cpumask_and(&fast_and_online, &fast_misaligned_access, cpu_online_mask); + + modify_unaligned_access_branches(&fast_and_online, num_online_cpus()); +} + +static int lock_and_set_unaligned_access_static_branch(void) +{ + cpus_read_lock(); + set_unaligned_access_static_branches(); + cpus_read_unlock(); + + return 0; +} + +arch_initcall_sync(lock_and_set_unaligned_access_static_branch); + +static int riscv_online_cpu(unsigned int cpu) +{ + static struct page *buf; + + /* We are already set since the last check */ + if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN) + goto exit; + + buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); + if (!buf) { + pr_warn("Allocation failure, not measuring misaligned performance\n"); + return -ENOMEM; + } + + check_unaligned_access(buf); + __free_pages(buf, MISALIGNED_BUFFER_ORDER); + +exit: + set_unaligned_access_static_branches(); + + return 0; +} + +static int riscv_offline_cpu(unsigned int cpu) +{ + set_unaligned_access_static_branches_except_cpu(cpu); + + return 0; +} + +/* Measure unaligned access speed on all CPUs present at boot in parallel. */ +static int check_unaligned_access_speed_all_cpus(void) +{ + unsigned int cpu; + unsigned int cpu_count = num_possible_cpus(); + struct page **bufs = kzalloc(cpu_count * sizeof(struct page *), + GFP_KERNEL); + + if (!bufs) { + pr_warn("Allocation failure, not measuring misaligned performance\n"); + return 0; + } + + /* + * Allocate separate buffers for each CPU so there's no fighting over + * cache lines. + */ + for_each_cpu(cpu, cpu_online_mask) { + bufs[cpu] = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); + if (!bufs[cpu]) { + pr_warn("Allocation failure, not measuring misaligned performance\n"); + goto out; + } + } + + /* Check everybody except 0, who stays behind to tend jiffies. */ + on_each_cpu(check_unaligned_access_nonboot_cpu, bufs, 1); + + /* Check core 0. */ + smp_call_on_cpu(0, check_unaligned_access, bufs[0], true); + + /* + * Setup hotplug callbacks for any new CPUs that come online or go + * offline. + */ + cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online", + riscv_online_cpu, riscv_offline_cpu); + +out: + for_each_cpu(cpu, cpu_online_mask) { + if (bufs[cpu]) + __free_pages(bufs[cpu], MISALIGNED_BUFFER_ORDER); + } + + kfree(bufs); + return 0; +} + +static int check_unaligned_access_all_cpus(void) +{ + bool all_cpus_emulated = check_unaligned_access_emulated_all_cpus(); + + if (!all_cpus_emulated) + return check_unaligned_access_speed_all_cpus(); + + return 0; +} +#else /* CONFIG_RISCV_PROBE_UNALIGNED_ACCESS */ +static int check_unaligned_access_all_cpus(void) +{ + check_unaligned_access_emulated_all_cpus(); + + return 0; +} +#endif + +arch_initcall(check_unaligned_access_all_cpus); -- cgit v1.2.3