From 1852e22b318b8d1c02b574da679b1b74f3686090 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 14 Dec 2021 15:56:55 +0000 Subject: arm64: dts: apple: Add t8103 PMU interrupt affinities The two PMU pseudo interrupts have specific affinities. One set is affine to the small cores, and the other set affine to the big ones. Signed-off-by: Marc Zyngier --- arch/arm64/boot/dts/apple/t8103.dtsi | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'arch') diff --git a/arch/arm64/boot/dts/apple/t8103.dtsi b/arch/arm64/boot/dts/apple/t8103.dtsi index 19afbc91020a..a2e006538c56 100644 --- a/arch/arm64/boot/dts/apple/t8103.dtsi +++ b/arch/arm64/boot/dts/apple/t8103.dtsi @@ -213,6 +213,18 @@ interrupt-controller; reg = <0x2 0x3b100000 0x0 0x8000>; power-domains = <&ps_aic>; + + affinities { + e-core-pmu-affinity { + apple,fiq-index = ; + cpus = <&cpu0 &cpu1 &cpu2 &cpu3>; + }; + + p-core-pmu-affinity { + apple,fiq-index = ; + cpus = <&cpu4 &cpu5 &cpu6 &cpu7>; + }; + }; }; pmgr: power-management@23b700000 { -- cgit v1.2.3 From 0f522efcd79634a6113195842ee763dc6ebacfbb Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 2 Nov 2021 17:09:49 +0000 Subject: arm64: dts: apple: Add t8303 PMU nodes Advertise the two PMU nodes for the t8103 SoC. Reviewed-by: Hector Martin Signed-off-by: Marc Zyngier --- arch/arm64/boot/dts/apple/t8103.dtsi | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'arch') diff --git a/arch/arm64/boot/dts/apple/t8103.dtsi b/arch/arm64/boot/dts/apple/t8103.dtsi index a2e006538c56..9f8f4145db88 100644 --- a/arch/arm64/boot/dts/apple/t8103.dtsi +++ b/arch/arm64/boot/dts/apple/t8103.dtsi @@ -97,6 +97,18 @@ ; }; + pmu-e { + compatible = "apple,icestorm-pmu"; + interrupt-parent = <&aic>; + interrupts = ; + }; + + pmu-p { + compatible = "apple,firestorm-pmu"; + interrupt-parent = <&aic>; + interrupts = ; + }; + clkref: clock-ref { compatible = "fixed-clock"; #clock-cells = <0>; -- cgit v1.2.3 From 11db7410cfcba2e5ffed7b8bb2a57d4dd5e22063 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 3 Nov 2021 13:55:19 +0000 Subject: irqchip/apple-aic: Move PMU-specific registers to their own include file As we are about to have a PMU driver, move the PMU bits from the AIC driver into a common include file. Reviewed-by: Hector Martin Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/apple_m1_pmu.h | 19 +++++++++++++++++++ drivers/irqchip/irq-apple-aic.c | 11 +---------- 2 files changed, 20 insertions(+), 10 deletions(-) create mode 100644 arch/arm64/include/asm/apple_m1_pmu.h (limited to 'arch') diff --git a/arch/arm64/include/asm/apple_m1_pmu.h b/arch/arm64/include/asm/apple_m1_pmu.h new file mode 100644 index 000000000000..b848af7faadc --- /dev/null +++ b/arch/arm64/include/asm/apple_m1_pmu.h @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0 + +#ifndef __ASM_APPLE_M1_PMU_h +#define __ASM_APPLE_M1_PMU_h + +#include +#include + +/* Core PMC control register */ +#define SYS_IMP_APL_PMCR0_EL1 sys_reg(3, 1, 15, 0, 0) +#define PMCR0_IMODE GENMASK(10, 8) +#define PMCR0_IMODE_OFF 0 +#define PMCR0_IMODE_PMI 1 +#define PMCR0_IMODE_AIC 2 +#define PMCR0_IMODE_HALT 3 +#define PMCR0_IMODE_FIQ 4 +#define PMCR0_IACT BIT(11) + +#endif /* __ASM_APPLE_M1_PMU_h */ diff --git a/drivers/irqchip/irq-apple-aic.c b/drivers/irqchip/irq-apple-aic.c index 873544e58676..b40199c6625e 100644 --- a/drivers/irqchip/irq-apple-aic.c +++ b/drivers/irqchip/irq-apple-aic.c @@ -55,6 +55,7 @@ #include #include #include +#include #include #include #include @@ -109,16 +110,6 @@ * Note: sysreg-based IPIs are not supported yet. */ -/* Core PMC control register */ -#define SYS_IMP_APL_PMCR0_EL1 sys_reg(3, 1, 15, 0, 0) -#define PMCR0_IMODE GENMASK(10, 8) -#define PMCR0_IMODE_OFF 0 -#define PMCR0_IMODE_PMI 1 -#define PMCR0_IMODE_AIC 2 -#define PMCR0_IMODE_HALT 3 -#define PMCR0_IMODE_FIQ 4 -#define PMCR0_IACT BIT(11) - /* IPI request registers */ #define SYS_IMP_APL_IPI_RR_LOCAL_EL1 sys_reg(3, 5, 15, 0, 0) #define SYS_IMP_APL_IPI_RR_GLOBAL_EL1 sys_reg(3, 5, 15, 0, 1) -- cgit v1.2.3 From 35bde68bba5413592d88864eced79f8a0482bb4f Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 13 Jan 2022 14:12:39 +0100 Subject: arm64: random: implement arch_get_random_int/_long based on RNDR When support for RNDR/RNDRRS was introduced, we elected to only implement arch_get_random_seed_int/_long(), and back them by RNDR instead of RNDRRS. This was needed to prevent potential performance and/or starvation issues resulting from the fact that the /dev/random driver used to invoke these routines on various hot paths. These issues have all been addressed now [0] [1], and so we can wire up this API more straight-forwardly: - map arch_get_random_int/_long() onto RNDR, which returns the output of a DRBG that is reseeded at an implemented defined rate; - map arch_get_random_seed_int/_long() onto the TRNG firmware service, which returns true, conditioned entropy, or onto RNDRRS if the TRNG service is unavailable, which returns the output of a DRBG that is reseeded every time it is used. [0] 390596c9959c random: avoid arch_get_random_seed_long() when collecting IRQ randomness [1] 2ee25b6968b1 random: avoid superfluous call to RDRAND in CRNG extraction Cc: Andre Przywara Cc: Mark Brown Signed-off-by: Ard Biesheuvel Acked-by: Jason A. Donenfeld Reviewed-by: Andre Przywara Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20220113131239.1610455-1-ardb@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/archrandom.h | 45 ++++++++++++++++++++++++++++++++----- 1 file changed, 39 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/archrandom.h b/arch/arm64/include/asm/archrandom.h index 09e43272ccb0..d1bb5e71df25 100644 --- a/arch/arm64/include/asm/archrandom.h +++ b/arch/arm64/include/asm/archrandom.h @@ -42,13 +42,47 @@ static inline bool __arm64_rndr(unsigned long *v) return ok; } +static inline bool __arm64_rndrrs(unsigned long *v) +{ + bool ok; + + /* + * Reads of RNDRRS set PSTATE.NZCV to 0b0000 on success, + * and set PSTATE.NZCV to 0b0100 otherwise. + */ + asm volatile( + __mrs_s("%0", SYS_RNDRRS_EL0) "\n" + " cset %w1, ne\n" + : "=r" (*v), "=r" (ok) + : + : "cc"); + + return ok; +} + static inline bool __must_check arch_get_random_long(unsigned long *v) { + /* + * Only support the generic interface after we have detected + * the system wide capability, avoiding complexity with the + * cpufeature code and with potential scheduling between CPUs + * with and without the feature. + */ + if (cpus_have_const_cap(ARM64_HAS_RNG) && __arm64_rndr(v)) + return true; return false; } static inline bool __must_check arch_get_random_int(unsigned int *v) { + if (cpus_have_const_cap(ARM64_HAS_RNG)) { + unsigned long val; + + if (__arm64_rndr(&val)) { + *v = val; + return true; + } + } return false; } @@ -71,12 +105,11 @@ static inline bool __must_check arch_get_random_seed_long(unsigned long *v) } /* - * Only support the generic interface after we have detected - * the system wide capability, avoiding complexity with the - * cpufeature code and with potential scheduling between CPUs - * with and without the feature. + * RNDRRS is not backed by an entropy source but by a DRBG that is + * reseeded after each invocation. This is not a 100% fit but good + * enough to implement this API if no other entropy source exists. */ - if (cpus_have_const_cap(ARM64_HAS_RNG) && __arm64_rndr(v)) + if (cpus_have_const_cap(ARM64_HAS_RNG) && __arm64_rndrrs(v)) return true; return false; @@ -96,7 +129,7 @@ static inline bool __must_check arch_get_random_seed_int(unsigned int *v) } if (cpus_have_const_cap(ARM64_HAS_RNG)) { - if (__arm64_rndr(&val)) { + if (__arm64_rndrrs(&val)) { *v = val; return true; } -- cgit v1.2.3 From 3352a5556f52bb49b82c0258c0c67f7371ba1f80 Mon Sep 17 00:00:00 2001 From: He Ying Date: Tue, 11 Jan 2022 22:24:10 -0500 Subject: arm64: entry: Save some nops when CONFIG_ARM64_PSEUDO_NMI is not set Arm64 pseudo-NMI feature code brings some additional nops when CONFIG_ARM64_PSEUDO_NMI is not set, which is not necessary. So add necessary ifdeffery to avoid it. Signed-off-by: He Ying Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20220112032410.29231-1-heying24@huawei.com Signed-off-by: Will Deacon --- arch/arm64/kernel/entry.S | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 772ec2ecf488..eb59621d6c6a 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -300,6 +300,7 @@ alternative_else_nop_endif str w21, [sp, #S_SYSCALLNO] .endif +#ifdef CONFIG_ARM64_PSEUDO_NMI /* Save pmr */ alternative_if ARM64_HAS_IRQ_PRIO_MASKING mrs_s x20, SYS_ICC_PMR_EL1 @@ -307,6 +308,7 @@ alternative_if ARM64_HAS_IRQ_PRIO_MASKING mov x20, #GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET msr_s SYS_ICC_PMR_EL1, x20 alternative_else_nop_endif +#endif /* Re-enable tag checking (TCO set on exception entry) */ #ifdef CONFIG_ARM64_MTE @@ -330,6 +332,7 @@ alternative_else_nop_endif disable_daif .endif +#ifdef CONFIG_ARM64_PSEUDO_NMI /* Restore pmr */ alternative_if ARM64_HAS_IRQ_PRIO_MASKING ldr x20, [sp, #S_PMR_SAVE] @@ -339,6 +342,7 @@ alternative_if ARM64_HAS_IRQ_PRIO_MASKING dsb sy // Ensure priority change is seen by redistributor .L__skip_pmr_sync\@: alternative_else_nop_endif +#endif ldp x21, x22, [sp, #S_PC] // load ELR, SPSR -- cgit v1.2.3 From e921da6bc7cac5f0e8458fe5df18ae08eb538f54 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Tue, 25 Jan 2022 20:08:33 +0530 Subject: arm64/mm: Consolidate TCR_EL1 fields This renames and moves SYS_TCR_EL1_TCMA1 and SYS_TCR_EL1_TCMA0 definitions into pgtable-hwdef.h thus consolidating all TCR fields in a single header. This does not cause any functional change. Cc: Catalin Marinas Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Acked-by: Catalin Marinas Signed-off-by: Anshuman Khandual Link: https://lore.kernel.org/r/1643121513-21854-1-git-send-email-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgtable-hwdef.h | 2 ++ arch/arm64/include/asm/sysreg.h | 4 ---- arch/arm64/mm/proc.S | 2 +- 3 files changed, 3 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 40085e53f573..66671ff05183 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -273,6 +273,8 @@ #define TCR_NFD1 (UL(1) << 54) #define TCR_E0PD0 (UL(1) << 55) #define TCR_E0PD1 (UL(1) << 56) +#define TCR_TCMA0 (UL(1) << 57) +#define TCR_TCMA1 (UL(1) << 58) /* * TTBR. diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 898bee0004ae..34800d264f69 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -1101,10 +1101,6 @@ #define CPACR_EL1_ZEN_EL0EN (BIT(17)) /* enable EL0 access, if EL1EN set */ #define CPACR_EL1_ZEN (CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN) -/* TCR EL1 Bit Definitions */ -#define SYS_TCR_EL1_TCMA1 (BIT(58)) -#define SYS_TCR_EL1_TCMA0 (BIT(57)) - /* GCR_EL1 Definitions */ #define SYS_GCR_EL1_RRND (BIT(16)) #define SYS_GCR_EL1_EXCL_MASK 0xffffUL diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index d35c90d2e47a..50bbed947bec 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -46,7 +46,7 @@ #endif #ifdef CONFIG_KASAN_HW_TAGS -#define TCR_MTE_FLAGS SYS_TCR_EL1_TCMA1 | TCR_TBI1 | TCR_TBID1 +#define TCR_MTE_FLAGS TCR_TCMA1 | TCR_TBI1 | TCR_TBID1 #else /* * The mte_zero_clear_page_tags() implementation uses DC GZVA, which relies on -- cgit v1.2.3 From ee017ee353506fcec58e481673e4331ff198a80e Mon Sep 17 00:00:00 2001 From: Jianyong Wu Date: Tue, 1 Feb 2022 19:44:00 +0800 Subject: arm64/mm: avoid fixmap race condition when create pud mapping The 'fixmap' is a global resource and is used recursively by create pud mapping(), leading to a potential race condition in the presence of a concurrent call to alloc_init_pud(): kernel_init thread virtio-mem workqueue thread ================== =========================== alloc_init_pud(...) alloc_init_pud(...) pudp = pud_set_fixmap_offset(...) pudp = pud_set_fixmap_offset(...) READ_ONCE(*pudp) pud_clear_fixmap(...) READ_ONCE(*pudp) // CRASH! As kernel may sleep during creating pud mapping, introduce a mutex lock to serialise use of the fixmap entries by alloc_init_pud(). However, there is no need for locking in early boot stage and it doesn't work well with KASLR enabled when early boot. So, enable lock when system_state doesn't equal to "SYSTEM_BOOTING". Signed-off-by: Jianyong Wu Reviewed-by: Catalin Marinas Fixes: f4710445458c ("arm64: mm: use fixmap when creating page tables") Link: https://lore.kernel.org/r/20220201114400.56885-1-jianyong.wu@arm.com Signed-off-by: Will Deacon --- arch/arm64/mm/mmu.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch') diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index acfae9b41cc8..1681430ecab7 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -63,6 +63,7 @@ static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss __maybe_unused; static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused; static DEFINE_SPINLOCK(swapper_pgdir_lock); +static DEFINE_MUTEX(fixmap_lock); void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd) { @@ -329,6 +330,12 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end, } BUG_ON(p4d_bad(p4d)); + /* + * No need for locking during early boot. And it doesn't work as + * expected with KASLR enabled. + */ + if (system_state != SYSTEM_BOOTING) + mutex_lock(&fixmap_lock); pudp = pud_set_fixmap_offset(p4dp, addr); do { pud_t old_pud = READ_ONCE(*pudp); @@ -359,6 +366,8 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end, } while (pudp++, addr = next, addr != end); pud_clear_fixmap(); + if (system_state != SYSTEM_BOOTING) + mutex_unlock(&fixmap_lock); } static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, -- cgit v1.2.3 From a6aab018829948c1818bed656656df9ae321408b Mon Sep 17 00:00:00 2001 From: James Morse Date: Thu, 27 Jan 2022 16:21:27 +0000 Subject: arm64: insn: Generate 64 bit mask immediates correctly When the insn framework is used to encode an AND/ORR/EOR instruction, aarch64_encode_immediate() is used to pick the immr imms values. If the immediate is a 64bit mask, with bit 63 set, and zeros in any of the upper 32 bits, the immr value is incorrectly calculated meaning the wrong mask is generated. For example, 0x8000000000000001 should have an immr of 1, but 32 is used, meaning the resulting mask is 0x0000000300000000. It would appear eBPF is unable to hit these cases, as build_insn()'s imm value is a s32, so when used with BPF_ALU64, the sign-extended u64 immediate would always have all-1s or all-0s in the upper 32 bits. KVM does not generate a va_mask with any of the top bits set as these VA wouldn't be usable with TTBR0_EL2. This happens because the rotation is calculated from fls(~imm), which takes an unsigned int, but the immediate may be 64bit. Use fls64() so the 64bit mask doesn't get truncated to a u32. Signed-off-by: James Morse Brown-paper-bag-for: Marc Zyngier Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20220127162127.2391947-4-james.morse@arm.com Signed-off-by: Will Deacon --- arch/arm64/lib/insn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/lib/insn.c b/arch/arm64/lib/insn.c index fccfe363e567..e485cd735261 100644 --- a/arch/arm64/lib/insn.c +++ b/arch/arm64/lib/insn.c @@ -1379,7 +1379,7 @@ static u32 aarch64_encode_immediate(u64 imm, * Compute the rotation to get a continuous set of * ones, with the first bit set at position 0 */ - ror = fls(~imm); + ror = fls64(~imm); } /* -- cgit v1.2.3 From 16860a209cf1ad20a3b454b1c56d64c9ea9532ac Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 4 Feb 2022 10:44:39 +0000 Subject: arm64: atomics: remove redundant static branch Due to a historical oversight, we emit a redundant static branch for each atomic/atomic64 operation when CONFIG_ARM64_LSE_ATOMICS is selected. We can safely remove this, making the kernel Image reasonably smaller. When CONFIG_ARM64_LSE_ATOMICS is selected, every LSE atomic operation has two preceding static branches with the same target, e.g. b f7c b f7c mov w0, #0x1 // #1 ldadd w0, w0, [x19] This is because the __lse_ll_sc_body() wrapper uses system_uses_lse_atomics(), which checks both `arm64_const_caps_ready` and `cpu_hwcap_keys[ARM64_HAS_LSE_ATOMICS]`, each of which emits a static branch. This has been the case since commit: addfc38672c73efd ("arm64: atomics: avoid out-of-line ll/sc atomics") However, there was never a need to check `arm64_const_caps_ready`, which was itself introduced in commit: 63a1e1c95e60e798 ("arm64/cpufeature: don't use mutex in bringup path") ... so that cpus_have_const_cap() could fall back to checking the `cpu_hwcaps` bitmap prior to the static keys for individual caps becoming enabled. As system_uses_lse_atomics() doesn't check `cpu_hwcaps`, and doesn't need to as we can safely use the LL/SC atomics prior to enabling the `ARM64_HAS_LSE_ATOMICS` static key, it doesn't need to check `arm64_const_caps_ready`. This patch removes the `arm64_const_caps_ready` check from system_uses_lse_atomics(). As the arch_atomic_* routines are meant to be safely usable in noinstr code, I've also marked system_uses_lse_atomics() as __always_inline. This results in one fewer static branch per atomic operation, with the prior example becoming: b f78 mov w0, #0x1 // #1 ldadd w0, w0, [x19] Each static branch consists of the branch itself and an associated __jump_table entry. Removing these has a reasonable impact on the Image size, with a GCC 11.1.0 defconfig v5.17-rc2 Image being reduced by 128KiB: | [mark@lakrids:~/src/linux]% ls -al Image* | -rw-r--r-- 1 mark mark 34619904 Feb 3 18:24 Image.baseline | -rw-r--r-- 1 mark mark 34488832 Feb 3 18:33 Image.onebranch Signed-off-by: Mark Rutland Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: Suzuki Poulose Cc: Will Deacon Link: https://lore.kernel.org/r/20220204104439.270567-1-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/lse.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/lse.h b/arch/arm64/include/asm/lse.h index 5d10051c3e62..29c85810ae69 100644 --- a/arch/arm64/include/asm/lse.h +++ b/arch/arm64/include/asm/lse.h @@ -17,12 +17,10 @@ #include extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS]; -extern struct static_key_false arm64_const_caps_ready; -static inline bool system_uses_lse_atomics(void) +static __always_inline bool system_uses_lse_atomics(void) { - return (static_branch_likely(&arm64_const_caps_ready)) && - static_branch_likely(&cpu_hwcap_keys[ARM64_HAS_LSE_ATOMICS]); + return static_branch_likely(&cpu_hwcap_keys[ARM64_HAS_LSE_ATOMICS]); } #define __lse_ll_sc_body(op, ...) \ -- cgit v1.2.3 From b62a8486de3ab1d7c2353ec422b9cca3abfcfbcd Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 31 Jan 2022 16:54:52 +0000 Subject: elfcore: Replace CONFIG_{IA64, UML} checks with a new option As arm64 is about to introduce MTE-specific phdrs in the core dump, add a common CONFIG_ARCH_BINFMT_ELF_EXTRA_PHDRS option currently selectable by UML_X86 and IA64. Signed-off-by: Catalin Marinas Cc: Eric Biederman Link: https://lore.kernel.org/r/20220131165456.2160675-2-catalin.marinas@arm.com Signed-off-by: Will Deacon --- arch/ia64/Kconfig | 1 + arch/x86/um/Kconfig | 1 + fs/Kconfig.binfmt | 3 +++ include/linux/elfcore.h | 4 ++-- 4 files changed, 7 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index a7e01573abd8..e003b2473c64 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -8,6 +8,7 @@ menu "Processor type and features" config IA64 bool + select ARCH_BINFMT_ELF_EXTRA_PHDRS select ARCH_HAS_DMA_MARK_CLEAN select ARCH_HAS_STRNCPY_FROM_USER select ARCH_HAS_STRNLEN_USER diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig index 40d6a06e41c8..ead7e5b3a975 100644 --- a/arch/x86/um/Kconfig +++ b/arch/x86/um/Kconfig @@ -8,6 +8,7 @@ endmenu config UML_X86 def_bool y + select ARCH_BINFMT_ELF_EXTRA_PHDRS if X86_32 config 64BIT bool "64-bit kernel" if "$(SUBARCH)" = "x86" diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index 4d5ae61580aa..68e586283764 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt @@ -36,6 +36,9 @@ config COMPAT_BINFMT_ELF config ARCH_BINFMT_ELF_STATE bool +config ARCH_BINFMT_ELF_EXTRA_PHDRS + bool + config ARCH_HAVE_ELF_PROT bool diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h index 746e081879a5..f8e206e82476 100644 --- a/include/linux/elfcore.h +++ b/include/linux/elfcore.h @@ -114,7 +114,7 @@ static inline int elf_core_copy_task_fpregs(struct task_struct *t, struct pt_reg #endif } -#if (defined(CONFIG_UML) && defined(CONFIG_X86_32)) || defined(CONFIG_IA64) +#ifdef CONFIG_ARCH_BINFMT_ELF_EXTRA_PHDRS /* * These functions parameterize elf_core_dump in fs/binfmt_elf.c to write out * extra segments containing the gate DSO contents. Dumping its @@ -149,6 +149,6 @@ static inline size_t elf_core_extra_data_size(void) { return 0; } -#endif +#endif /* CONFIG_ARCH_BINFMT_ELF_EXTRA_PHDRS */ #endif /* _LINUX_ELFCORE_H */ -- cgit v1.2.3 From ab1e435ca7913e384ed801210418633eee43a71b Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 31 Jan 2022 16:54:54 +0000 Subject: arm64: mte: Define the number of bytes for storing the tags in a page Rather than explicitly calculating the number of bytes for a compact tag storage format corresponding to a page, just add a MTE_PAGE_TAG_STORAGE macro. With the current MTE implementation of 4 bits per tag, we store 2 tags in a byte. Signed-off-by: Catalin Marinas Acked-by: Luis Machado Link: https://lore.kernel.org/r/20220131165456.2160675-4-catalin.marinas@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/mte-def.h | 1 + arch/arm64/lib/mte.S | 4 ++-- arch/arm64/mm/mteswap.c | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/mte-def.h b/arch/arm64/include/asm/mte-def.h index 626d359b396e..14ee86b019c2 100644 --- a/arch/arm64/include/asm/mte-def.h +++ b/arch/arm64/include/asm/mte-def.h @@ -11,6 +11,7 @@ #define MTE_TAG_SHIFT 56 #define MTE_TAG_SIZE 4 #define MTE_TAG_MASK GENMASK((MTE_TAG_SHIFT + (MTE_TAG_SIZE - 1)), MTE_TAG_SHIFT) +#define MTE_PAGE_TAG_STORAGE (MTE_GRANULES_PER_PAGE * MTE_TAG_SIZE / 8) #define __MTE_PREAMBLE ARM64_ASM_PREAMBLE ".arch_extension memtag\n" diff --git a/arch/arm64/lib/mte.S b/arch/arm64/lib/mte.S index f531dcb95174..8590af3c98c0 100644 --- a/arch/arm64/lib/mte.S +++ b/arch/arm64/lib/mte.S @@ -134,7 +134,7 @@ SYM_FUNC_END(mte_copy_tags_to_user) /* * Save the tags in a page * x0 - page address - * x1 - tag storage + * x1 - tag storage, MTE_PAGE_TAG_STORAGE bytes */ SYM_FUNC_START(mte_save_page_tags) multitag_transfer_size x7, x5 @@ -158,7 +158,7 @@ SYM_FUNC_END(mte_save_page_tags) /* * Restore the tags in a page * x0 - page address - * x1 - tag storage + * x1 - tag storage, MTE_PAGE_TAG_STORAGE bytes */ SYM_FUNC_START(mte_restore_page_tags) multitag_transfer_size x7, x5 diff --git a/arch/arm64/mm/mteswap.c b/arch/arm64/mm/mteswap.c index 7c4ef56265ee..a9e50e930484 100644 --- a/arch/arm64/mm/mteswap.c +++ b/arch/arm64/mm/mteswap.c @@ -12,7 +12,7 @@ static DEFINE_XARRAY(mte_pages); void *mte_allocate_tag_storage(void) { /* tags granule is 16 bytes, 2 tags stored per byte */ - return kmalloc(PAGE_SIZE / 16 / 2, GFP_KERNEL); + return kmalloc(MTE_PAGE_TAG_STORAGE, GFP_KERNEL); } void mte_free_tag_storage(char *storage) -- cgit v1.2.3 From 6dd8b1a0b6cb3ed93d24110e02e67ff9d006610a Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 31 Jan 2022 16:54:55 +0000 Subject: arm64: mte: Dump the MTE tags in the core file For each vma mapped with PROT_MTE (the VM_MTE flag set), generate a PT_ARM_MEMTAG_MTE segment in the core file and dump the corresponding tags. The in-file size for such segments is 128 bytes per page. For pages in a VM_MTE vma which are not present in the user page tables or don't have the PG_mte_tagged flag set (e.g. execute-only), just write zeros in the core file. An example of program headers for two vmas, one 2-page, the other 4-page long: Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align ... LOAD 0x030000 0x0000ffff80034000 0x0000000000000000 0x000000 0x002000 RW 0x1000 LOAD 0x030000 0x0000ffff80036000 0x0000000000000000 0x004000 0x004000 RW 0x1000 ... LOPROC+0x1 0x05b000 0x0000ffff80034000 0x0000000000000000 0x000100 0x002000 0 LOPROC+0x1 0x05b100 0x0000ffff80036000 0x0000000000000000 0x000200 0x004000 0 Signed-off-by: Catalin Marinas Acked-by: Luis Machado Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20220131165456.2160675-5-catalin.marinas@arm.com Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 1 + arch/arm64/kernel/Makefile | 1 + arch/arm64/kernel/elfcore.c | 123 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 125 insertions(+) create mode 100644 arch/arm64/kernel/elfcore.c (limited to 'arch') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index cbcd42decb2a..b55c11796fad 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -10,6 +10,7 @@ config ARM64 select ACPI_SPCR_TABLE if ACPI select ACPI_PPTT if ACPI select ARCH_HAS_DEBUG_WX + select ARCH_BINFMT_ELF_EXTRA_PHDRS select ARCH_BINFMT_ELF_STATE select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 88b3e2a21408..986837d7ec82 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -61,6 +61,7 @@ obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o obj-$(CONFIG_PARAVIRT) += paravirt.o obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o +obj-$(CONFIG_ELF_CORE) += elfcore.o obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o \ cpu-reset.o obj-$(CONFIG_KEXEC_FILE) += machine_kexec_file.o kexec_image.o diff --git a/arch/arm64/kernel/elfcore.c b/arch/arm64/kernel/elfcore.c new file mode 100644 index 000000000000..3455ee4acc04 --- /dev/null +++ b/arch/arm64/kernel/elfcore.c @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include +#include +#include + +#include +#include + +#define for_each_mte_vma(tsk, vma) \ + if (system_supports_mte()) \ + for (vma = tsk->mm->mmap; vma; vma = vma->vm_next) \ + if (vma->vm_flags & VM_MTE) + +static unsigned long mte_vma_tag_dump_size(struct vm_area_struct *vma) +{ + if (vma->vm_flags & VM_DONTDUMP) + return 0; + + return vma_pages(vma) * MTE_PAGE_TAG_STORAGE; +} + +/* Derived from dump_user_range(); start/end must be page-aligned */ +static int mte_dump_tag_range(struct coredump_params *cprm, + unsigned long start, unsigned long end) +{ + unsigned long addr; + + for (addr = start; addr < end; addr += PAGE_SIZE) { + char tags[MTE_PAGE_TAG_STORAGE]; + struct page *page = get_dump_page(addr); + + /* + * get_dump_page() returns NULL when encountering an empty + * page table entry that would otherwise have been filled with + * the zero page. Skip the equivalent tag dump which would + * have been all zeros. + */ + if (!page) { + dump_skip(cprm, MTE_PAGE_TAG_STORAGE); + continue; + } + + /* + * Pages mapped in user space as !pte_access_permitted() (e.g. + * PROT_EXEC only) may not have the PG_mte_tagged flag set. + */ + if (!test_bit(PG_mte_tagged, &page->flags)) { + put_page(page); + dump_skip(cprm, MTE_PAGE_TAG_STORAGE); + continue; + } + + mte_save_page_tags(page_address(page), tags); + put_page(page); + if (!dump_emit(cprm, tags, MTE_PAGE_TAG_STORAGE)) + return 0; + } + + return 1; +} + +Elf_Half elf_core_extra_phdrs(void) +{ + struct vm_area_struct *vma; + int vma_count = 0; + + for_each_mte_vma(current, vma) + vma_count++; + + return vma_count; +} + +int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset) +{ + struct vm_area_struct *vma; + + for_each_mte_vma(current, vma) { + struct elf_phdr phdr; + + phdr.p_type = PT_ARM_MEMTAG_MTE; + phdr.p_offset = offset; + phdr.p_vaddr = vma->vm_start; + phdr.p_paddr = 0; + phdr.p_filesz = mte_vma_tag_dump_size(vma); + phdr.p_memsz = vma->vm_end - vma->vm_start; + offset += phdr.p_filesz; + phdr.p_flags = 0; + phdr.p_align = 0; + + if (!dump_emit(cprm, &phdr, sizeof(phdr))) + return 0; + } + + return 1; +} + +size_t elf_core_extra_data_size(void) +{ + struct vm_area_struct *vma; + size_t data_size = 0; + + for_each_mte_vma(current, vma) + data_size += mte_vma_tag_dump_size(vma); + + return data_size; +} + +int elf_core_write_extra_data(struct coredump_params *cprm) +{ + struct vm_area_struct *vma; + + for_each_mte_vma(current, vma) { + if (vma->vm_flags & VM_DONTDUMP) + continue; + + if (!mte_dump_tag_range(cprm, vma->vm_start, vma->vm_end)) + return 0; + } + + return 1; +} -- cgit v1.2.3 From 0f61f6be1f7f44edfab0cb731c0a2340a838956f Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 16 Feb 2022 16:22:27 +0000 Subject: arm64: clean up symbol aliasing Now that we have SYM_FUNC_ALIAS() and SYM_FUNC_ALIAS_WEAK(), use those to simplify and more consistently define function aliases across arch/arm64. Aliases are now defined in terms of a canonical function name. For position-independent functions I've made the __pi_ name the canonical name, and defined other alises in terms of this. The SYM_FUNC_{START,END}_PI(func) macros obscure the __pi_ name, and make this hard to seatch for. The SYM_FUNC_START_WEAK_PI() macro also obscures the fact that the __pi_ fymbol is global and the symbol is weak. For clarity, I have removed these macros and used SYM_FUNC_{START,END}() directly with the __pi_ name. For example: SYM_FUNC_START_WEAK_PI(func) ... asm insns ... SYM_FUNC_END_PI(func) EXPORT_SYMBOL(func) ... becomes: SYM_FUNC_START(__pi_func) ... asm insns ... SYM_FUNC_END(__pi_func) SYM_FUNC_ALIAS_WEAK(func, __pi_func) EXPORT_SYMBOL(func) For clarity, where there are multiple annotations such as EXPORT_SYMBOL(), I've tried to keep annotations grouped by symbol. For example, where a function has a name and an alias which are both exported, this is organised as: SYM_FUNC_START(func) ... asm insns ... SYM_FUNC_END(func) EXPORT_SYMBOL(func) SYM_FUNC_ALIAS(alias, func) EXPORT_SYMBOL(alias) For consistency with the other string functions, I've defined strrchr as a position-independent function, as it can safely be used as such even though we have no users today. As we no longer use SYM_FUNC_{START,END}_ALIAS(), our local copies are removed. The common versions will be removed by a subsequent patch. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Acked-by: Ard Biesheuvel Acked-by: Catalin Marinas Acked-by: Josh Poimboeuf Acked-by: Mark Brown Cc: Joey Gouly Cc: Will Deacon Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220216162229.1076788-3-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/linkage.h | 24 ------------------------ arch/arm64/kvm/hyp/nvhe/cache.S | 5 +++-- arch/arm64/lib/clear_page.S | 5 +++-- arch/arm64/lib/copy_page.S | 5 +++-- arch/arm64/lib/memchr.S | 5 +++-- arch/arm64/lib/memcmp.S | 6 +++--- arch/arm64/lib/memcpy.S | 21 +++++++++++---------- arch/arm64/lib/memset.S | 12 +++++++----- arch/arm64/lib/strchr.S | 6 ++++-- arch/arm64/lib/strcmp.S | 6 +++--- arch/arm64/lib/strlen.S | 6 +++--- arch/arm64/lib/strncmp.S | 6 +++--- arch/arm64/lib/strnlen.S | 6 ++++-- arch/arm64/lib/strrchr.S | 5 +++-- arch/arm64/mm/cache.S | 35 +++++++++++++++++++++-------------- 15 files changed, 74 insertions(+), 79 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/linkage.h b/arch/arm64/include/asm/linkage.h index b77e9b3f5371..43f8c25b3fda 100644 --- a/arch/arm64/include/asm/linkage.h +++ b/arch/arm64/include/asm/linkage.h @@ -39,28 +39,4 @@ SYM_START(name, SYM_L_WEAK, SYM_A_NONE) \ bti c ; -/* - * Annotate a function as position independent, i.e., safe to be called before - * the kernel virtual mapping is activated. - */ -#define SYM_FUNC_START_PI(x) \ - SYM_FUNC_START_ALIAS(__pi_##x); \ - SYM_FUNC_START(x) - -#define SYM_FUNC_START_WEAK_PI(x) \ - SYM_FUNC_START_ALIAS(__pi_##x); \ - SYM_FUNC_START_WEAK(x) - -#define SYM_FUNC_START_WEAK_ALIAS_PI(x) \ - SYM_FUNC_START_ALIAS(__pi_##x); \ - SYM_START(x, SYM_L_WEAK, SYM_A_ALIGN) - -#define SYM_FUNC_END_PI(x) \ - SYM_FUNC_END(x); \ - SYM_FUNC_END_ALIAS(__pi_##x) - -#define SYM_FUNC_END_ALIAS_PI(x) \ - SYM_FUNC_END_ALIAS(x); \ - SYM_FUNC_END_ALIAS(__pi_##x) - #endif diff --git a/arch/arm64/kvm/hyp/nvhe/cache.S b/arch/arm64/kvm/hyp/nvhe/cache.S index 958734f4d6b0..0c367eb5f4e2 100644 --- a/arch/arm64/kvm/hyp/nvhe/cache.S +++ b/arch/arm64/kvm/hyp/nvhe/cache.S @@ -7,7 +7,8 @@ #include #include -SYM_FUNC_START_PI(dcache_clean_inval_poc) +SYM_FUNC_START(__pi_dcache_clean_inval_poc) dcache_by_line_op civac, sy, x0, x1, x2, x3 ret -SYM_FUNC_END_PI(dcache_clean_inval_poc) +SYM_FUNC_END(__pi_dcache_clean_inval_poc) +SYM_FUNC_ALIAS(dcache_clean_inval_poc, __pi_dcache_clean_inval_poc) diff --git a/arch/arm64/lib/clear_page.S b/arch/arm64/lib/clear_page.S index 1fd5d790ab80..ebde40e7fa2b 100644 --- a/arch/arm64/lib/clear_page.S +++ b/arch/arm64/lib/clear_page.S @@ -14,7 +14,7 @@ * Parameters: * x0 - dest */ -SYM_FUNC_START_PI(clear_page) +SYM_FUNC_START(__pi_clear_page) mrs x1, dczid_el0 tbnz x1, #4, 2f /* Branch if DC ZVA is prohibited */ and w1, w1, #0xf @@ -35,5 +35,6 @@ SYM_FUNC_START_PI(clear_page) tst x0, #(PAGE_SIZE - 1) b.ne 2b ret -SYM_FUNC_END_PI(clear_page) +SYM_FUNC_END(__pi_clear_page) +SYM_FUNC_ALIAS(clear_page, __pi_clear_page) EXPORT_SYMBOL(clear_page) diff --git a/arch/arm64/lib/copy_page.S b/arch/arm64/lib/copy_page.S index 29144f4cd449..c336d2ffdec5 100644 --- a/arch/arm64/lib/copy_page.S +++ b/arch/arm64/lib/copy_page.S @@ -17,7 +17,7 @@ * x0 - dest * x1 - src */ -SYM_FUNC_START_PI(copy_page) +SYM_FUNC_START(__pi_copy_page) alternative_if ARM64_HAS_NO_HW_PREFETCH // Prefetch three cache lines ahead. prfm pldl1strm, [x1, #128] @@ -75,5 +75,6 @@ alternative_else_nop_endif stnp x16, x17, [x0, #112 - 256] ret -SYM_FUNC_END_PI(copy_page) +SYM_FUNC_END(__pi_copy_page) +SYM_FUNC_ALIAS(copy_page, __pi_copy_page) EXPORT_SYMBOL(copy_page) diff --git a/arch/arm64/lib/memchr.S b/arch/arm64/lib/memchr.S index 7c2276fdab54..37a9f2a4f7f4 100644 --- a/arch/arm64/lib/memchr.S +++ b/arch/arm64/lib/memchr.S @@ -38,7 +38,7 @@ .p2align 4 nop -SYM_FUNC_START_WEAK_PI(memchr) +SYM_FUNC_START(__pi_memchr) and chrin, chrin, #0xff lsr wordcnt, cntin, #3 cbz wordcnt, L(byte_loop) @@ -71,5 +71,6 @@ CPU_LE( rev tmp, tmp) L(not_found): mov result, #0 ret -SYM_FUNC_END_PI(memchr) +SYM_FUNC_END(__pi_memchr) +SYM_FUNC_ALIAS_WEAK(memchr, __pi_memchr) EXPORT_SYMBOL_NOKASAN(memchr) diff --git a/arch/arm64/lib/memcmp.S b/arch/arm64/lib/memcmp.S index 7d956384222f..a5ccf2c55f91 100644 --- a/arch/arm64/lib/memcmp.S +++ b/arch/arm64/lib/memcmp.S @@ -32,7 +32,7 @@ #define tmp1 x7 #define tmp2 x8 -SYM_FUNC_START_WEAK_PI(memcmp) +SYM_FUNC_START(__pi_memcmp) subs limit, limit, 8 b.lo L(less8) @@ -134,6 +134,6 @@ L(byte_loop): b.eq L(byte_loop) sub result, data1w, data2w ret - -SYM_FUNC_END_PI(memcmp) +SYM_FUNC_END(__pi_memcmp) +SYM_FUNC_ALIAS_WEAK(memcmp, __pi_memcmp) EXPORT_SYMBOL_NOKASAN(memcmp) diff --git a/arch/arm64/lib/memcpy.S b/arch/arm64/lib/memcpy.S index b82fd64ee1e1..4ab48d49c451 100644 --- a/arch/arm64/lib/memcpy.S +++ b/arch/arm64/lib/memcpy.S @@ -57,10 +57,7 @@ The loop tail is handled by always copying 64 bytes from the end. */ -SYM_FUNC_START_ALIAS(__memmove) -SYM_FUNC_START_WEAK_ALIAS_PI(memmove) -SYM_FUNC_START_ALIAS(__memcpy) -SYM_FUNC_START_WEAK_PI(memcpy) +SYM_FUNC_START(__pi_memcpy) add srcend, src, count add dstend, dstin, count cmp count, 128 @@ -241,12 +238,16 @@ L(copy64_from_start): stp B_l, B_h, [dstin, 16] stp C_l, C_h, [dstin] ret +SYM_FUNC_END(__pi_memcpy) -SYM_FUNC_END_PI(memcpy) -EXPORT_SYMBOL(memcpy) -SYM_FUNC_END_ALIAS(__memcpy) +SYM_FUNC_ALIAS(__memcpy, __pi_memcpy) EXPORT_SYMBOL(__memcpy) -SYM_FUNC_END_ALIAS_PI(memmove) -EXPORT_SYMBOL(memmove) -SYM_FUNC_END_ALIAS(__memmove) +SYM_FUNC_ALIAS_WEAK(memcpy, __memcpy) +EXPORT_SYMBOL(memcpy) + +SYM_FUNC_ALIAS(__pi_memmove, __pi_memcpy) + +SYM_FUNC_ALIAS(__memmove, __pi_memmove) EXPORT_SYMBOL(__memmove) +SYM_FUNC_ALIAS_WEAK(memmove, __memmove) +EXPORT_SYMBOL(memmove) diff --git a/arch/arm64/lib/memset.S b/arch/arm64/lib/memset.S index a9c1c9a01ea9..a5aebe82ad73 100644 --- a/arch/arm64/lib/memset.S +++ b/arch/arm64/lib/memset.S @@ -42,8 +42,7 @@ dst .req x8 tmp3w .req w9 tmp3 .req x9 -SYM_FUNC_START_ALIAS(__memset) -SYM_FUNC_START_WEAK_PI(memset) +SYM_FUNC_START(__pi_memset) mov dst, dstin /* Preserve return value. */ and A_lw, val, #255 orr A_lw, A_lw, A_lw, lsl #8 @@ -202,7 +201,10 @@ SYM_FUNC_START_WEAK_PI(memset) ands count, count, zva_bits_x b.ne .Ltail_maybe_long ret -SYM_FUNC_END_PI(memset) -EXPORT_SYMBOL(memset) -SYM_FUNC_END_ALIAS(__memset) +SYM_FUNC_END(__pi_memset) + +SYM_FUNC_ALIAS(__memset, __pi_memset) EXPORT_SYMBOL(__memset) + +SYM_FUNC_ALIAS_WEAK(memset, __pi_memset) +EXPORT_SYMBOL(memset) diff --git a/arch/arm64/lib/strchr.S b/arch/arm64/lib/strchr.S index 1f47eae3b0d6..94ee67a6b212 100644 --- a/arch/arm64/lib/strchr.S +++ b/arch/arm64/lib/strchr.S @@ -18,7 +18,7 @@ * Returns: * x0 - address of first occurrence of 'c' or 0 */ -SYM_FUNC_START_WEAK(strchr) +SYM_FUNC_START(__pi_strchr) and w1, w1, #0xff 1: ldrb w2, [x0], #1 cmp w2, w1 @@ -28,5 +28,7 @@ SYM_FUNC_START_WEAK(strchr) cmp w2, w1 csel x0, x0, xzr, eq ret -SYM_FUNC_END(strchr) +SYM_FUNC_END(__pi_strchr) + +SYM_FUNC_ALIAS_WEAK(strchr, __pi_strchr) EXPORT_SYMBOL_NOKASAN(strchr) diff --git a/arch/arm64/lib/strcmp.S b/arch/arm64/lib/strcmp.S index 83bcad72ec97..cda7de747efc 100644 --- a/arch/arm64/lib/strcmp.S +++ b/arch/arm64/lib/strcmp.S @@ -41,7 +41,7 @@ /* Start of performance-critical section -- one 64B cache line. */ .align 6 -SYM_FUNC_START_WEAK_PI(strcmp) +SYM_FUNC_START(__pi_strcmp) eor tmp1, src1, src2 mov zeroones, #REP8_01 tst tmp1, #7 @@ -171,6 +171,6 @@ L(loop_misaligned): L(done): sub result, data1, data2 ret - -SYM_FUNC_END_PI(strcmp) +SYM_FUNC_END(__pi_strcmp) +SYM_FUNC_ALIAS_WEAK(strcmp, __pi_strcmp) EXPORT_SYMBOL_NOHWKASAN(strcmp) diff --git a/arch/arm64/lib/strlen.S b/arch/arm64/lib/strlen.S index 1648790e91b3..4919fe81ae54 100644 --- a/arch/arm64/lib/strlen.S +++ b/arch/arm64/lib/strlen.S @@ -79,7 +79,7 @@ whether the first fetch, which may be misaligned, crosses a page boundary. */ -SYM_FUNC_START_WEAK_PI(strlen) +SYM_FUNC_START(__pi_strlen) and tmp1, srcin, MIN_PAGE_SIZE - 1 mov zeroones, REP8_01 cmp tmp1, MIN_PAGE_SIZE - 16 @@ -208,6 +208,6 @@ L(page_cross): csel data1, data1, tmp4, eq csel data2, data2, tmp2, eq b L(page_cross_entry) - -SYM_FUNC_END_PI(strlen) +SYM_FUNC_END(__pi_strlen) +SYM_FUNC_ALIAS_WEAK(strlen, __pi_strlen) EXPORT_SYMBOL_NOKASAN(strlen) diff --git a/arch/arm64/lib/strncmp.S b/arch/arm64/lib/strncmp.S index e42bcfcd37e6..a848abcec975 100644 --- a/arch/arm64/lib/strncmp.S +++ b/arch/arm64/lib/strncmp.S @@ -44,7 +44,7 @@ #define endloop x15 #define count mask -SYM_FUNC_START_WEAK_PI(strncmp) +SYM_FUNC_START(__pi_strncmp) cbz limit, L(ret0) eor tmp1, src1, src2 mov zeroones, #REP8_01 @@ -256,6 +256,6 @@ L(done_loop): L(ret0): mov result, #0 ret - -SYM_FUNC_END_PI(strncmp) +SYM_FUNC_END(__pi_strncmp) +SYM_FUNC_ALIAS_WEAK(strncmp, __pi_strncmp) EXPORT_SYMBOL_NOHWKASAN(strncmp) diff --git a/arch/arm64/lib/strnlen.S b/arch/arm64/lib/strnlen.S index b72913a99038..d5ac0e10a01d 100644 --- a/arch/arm64/lib/strnlen.S +++ b/arch/arm64/lib/strnlen.S @@ -47,7 +47,7 @@ limit_wd .req x14 #define REP8_7f 0x7f7f7f7f7f7f7f7f #define REP8_80 0x8080808080808080 -SYM_FUNC_START_WEAK_PI(strnlen) +SYM_FUNC_START(__pi_strnlen) cbz limit, .Lhit_limit mov zeroones, #REP8_01 bic src, srcin, #15 @@ -156,5 +156,7 @@ CPU_LE( lsr tmp2, tmp2, tmp4 ) /* Shift (tmp1 & 63). */ .Lhit_limit: mov len, limit ret -SYM_FUNC_END_PI(strnlen) +SYM_FUNC_END(__pi_strnlen) + +SYM_FUNC_ALIAS_WEAK(strnlen, __pi_strnlen) EXPORT_SYMBOL_NOKASAN(strnlen) diff --git a/arch/arm64/lib/strrchr.S b/arch/arm64/lib/strrchr.S index 13132d1ed6d1..a5123cf0ce12 100644 --- a/arch/arm64/lib/strrchr.S +++ b/arch/arm64/lib/strrchr.S @@ -18,7 +18,7 @@ * Returns: * x0 - address of last occurrence of 'c' or 0 */ -SYM_FUNC_START_WEAK_PI(strrchr) +SYM_FUNC_START(__pi_strrchr) mov x3, #0 and w1, w1, #0xff 1: ldrb w2, [x0], #1 @@ -29,5 +29,6 @@ SYM_FUNC_START_WEAK_PI(strrchr) b 1b 2: mov x0, x3 ret -SYM_FUNC_END_PI(strrchr) +SYM_FUNC_END(__pi_strrchr) +SYM_FUNC_ALIAS_WEAK(strrchr, __pi_strrchr) EXPORT_SYMBOL_NOKASAN(strrchr) diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 7d0563db4201..0ea6cc25dc66 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -107,10 +107,11 @@ SYM_FUNC_END(icache_inval_pou) * - start - virtual start address of region * - end - virtual end address of region */ -SYM_FUNC_START_PI(dcache_clean_inval_poc) +SYM_FUNC_START(__pi_dcache_clean_inval_poc) dcache_by_line_op civac, sy, x0, x1, x2, x3 ret -SYM_FUNC_END_PI(dcache_clean_inval_poc) +SYM_FUNC_END(__pi_dcache_clean_inval_poc) +SYM_FUNC_ALIAS(dcache_clean_inval_poc, __pi_dcache_clean_inval_poc) /* * dcache_clean_pou(start, end) @@ -140,7 +141,7 @@ SYM_FUNC_END(dcache_clean_pou) * - start - kernel start address of region * - end - kernel end address of region */ -SYM_FUNC_START_PI(dcache_inval_poc) +SYM_FUNC_START(__pi_dcache_inval_poc) dcache_line_size x2, x3 sub x3, x2, #1 tst x1, x3 // end cache line aligned? @@ -158,7 +159,8 @@ SYM_FUNC_START_PI(dcache_inval_poc) b.lo 2b dsb sy ret -SYM_FUNC_END_PI(dcache_inval_poc) +SYM_FUNC_END(__pi_dcache_inval_poc) +SYM_FUNC_ALIAS(dcache_inval_poc, __pi_dcache_inval_poc) /* * dcache_clean_poc(start, end) @@ -169,10 +171,11 @@ SYM_FUNC_END_PI(dcache_inval_poc) * - start - virtual start address of region * - end - virtual end address of region */ -SYM_FUNC_START_PI(dcache_clean_poc) +SYM_FUNC_START(__pi_dcache_clean_poc) dcache_by_line_op cvac, sy, x0, x1, x2, x3 ret -SYM_FUNC_END_PI(dcache_clean_poc) +SYM_FUNC_END(__pi_dcache_clean_poc) +SYM_FUNC_ALIAS(dcache_clean_poc, __pi_dcache_clean_poc) /* * dcache_clean_pop(start, end) @@ -183,13 +186,14 @@ SYM_FUNC_END_PI(dcache_clean_poc) * - start - virtual start address of region * - end - virtual end address of region */ -SYM_FUNC_START_PI(dcache_clean_pop) +SYM_FUNC_START(__pi_dcache_clean_pop) alternative_if_not ARM64_HAS_DCPOP b dcache_clean_poc alternative_else_nop_endif dcache_by_line_op cvap, sy, x0, x1, x2, x3 ret -SYM_FUNC_END_PI(dcache_clean_pop) +SYM_FUNC_END(__pi_dcache_clean_pop) +SYM_FUNC_ALIAS(dcache_clean_pop, __pi_dcache_clean_pop) /* * __dma_flush_area(start, size) @@ -199,11 +203,12 @@ SYM_FUNC_END_PI(dcache_clean_pop) * - start - virtual start address of region * - size - size in question */ -SYM_FUNC_START_PI(__dma_flush_area) +SYM_FUNC_START(__pi___dma_flush_area) add x1, x0, x1 dcache_by_line_op civac, sy, x0, x1, x2, x3 ret -SYM_FUNC_END_PI(__dma_flush_area) +SYM_FUNC_END(__pi___dma_flush_area) +SYM_FUNC_ALIAS(__dma_flush_area, __pi___dma_flush_area) /* * __dma_map_area(start, size, dir) @@ -211,12 +216,13 @@ SYM_FUNC_END_PI(__dma_flush_area) * - size - size of region * - dir - DMA direction */ -SYM_FUNC_START_PI(__dma_map_area) +SYM_FUNC_START(__pi___dma_map_area) add x1, x0, x1 cmp w2, #DMA_FROM_DEVICE b.eq __pi_dcache_inval_poc b __pi_dcache_clean_poc -SYM_FUNC_END_PI(__dma_map_area) +SYM_FUNC_END(__pi___dma_map_area) +SYM_FUNC_ALIAS(__dma_map_area, __pi___dma_map_area) /* * __dma_unmap_area(start, size, dir) @@ -224,9 +230,10 @@ SYM_FUNC_END_PI(__dma_map_area) * - size - size of region * - dir - DMA direction */ -SYM_FUNC_START_PI(__dma_unmap_area) +SYM_FUNC_START(__pi___dma_unmap_area) add x1, x0, x1 cmp w2, #DMA_TO_DEVICE b.ne __pi_dcache_inval_poc ret -SYM_FUNC_END_PI(__dma_unmap_area) +SYM_FUNC_END(__pi___dma_unmap_area) +SYM_FUNC_ALIAS(__dma_unmap_area, __pi___dma_unmap_area) -- cgit v1.2.3 From 7be2e319640c8926bbba4e004a1bee9cf6ed67b0 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 16 Feb 2022 16:22:28 +0000 Subject: x86: clean up symbol aliasing Now that we have SYM_FUNC_ALIAS() and SYM_FUNC_ALIAS_WEAK(), use those to simplify the definition of function aliases across arch/x86. For clarity, where there are multiple annotations such as EXPORT_SYMBOL(), I've tried to keep annotations grouped by symbol. For example, where a function has a name and an alias which are both exported, this is organised as: SYM_FUNC_START(func) ... asm insns ... SYM_FUNC_END(func) EXPORT_SYMBOL(func) SYM_FUNC_ALIAS(alias, func) EXPORT_SYMBOL(alias) Where there are only aliases and no exports or other annotations, I have not bothered with line spacing, e.g. SYM_FUNC_START(func) ... asm insns ... SYM_FUNC_END(func) SYM_FUNC_ALIAS(alias, func) The tools/perf/ copies of memset_64.S and memset_64.S are updated likewise to avoid the build system complaining these are mismatched: | Warning: Kernel ABI header at 'tools/arch/x86/lib/memcpy_64.S' differs from latest version at 'arch/x86/lib/memcpy_64.S' | diff -u tools/arch/x86/lib/memcpy_64.S arch/x86/lib/memcpy_64.S | Warning: Kernel ABI header at 'tools/arch/x86/lib/memset_64.S' differs from latest version at 'arch/x86/lib/memset_64.S' | diff -u tools/arch/x86/lib/memset_64.S arch/x86/lib/memset_64.S There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Acked-by: Ard Biesheuvel Acked-by: Josh Poimboeuf Acked-by: Mark Brown Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: Dave Hansen Cc: Ingo Molnar Cc: Jiri Slaby Cc: Peter Zijlstra Cc: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220216162229.1076788-4-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/x86/boot/compressed/head_32.S | 3 +-- arch/x86/boot/compressed/head_64.S | 3 +-- arch/x86/crypto/aesni-intel_asm.S | 4 +--- arch/x86/lib/memcpy_64.S | 10 +++++----- arch/x86/lib/memmove_64.S | 4 ++-- arch/x86/lib/memset_64.S | 6 +++--- tools/arch/x86/lib/memcpy_64.S | 10 +++++----- tools/arch/x86/lib/memset_64.S | 6 +++--- 8 files changed, 21 insertions(+), 25 deletions(-) (limited to 'arch') diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 659fad53ca82..3b354eb9516d 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -152,14 +152,13 @@ SYM_FUNC_END(startup_32) #ifdef CONFIG_EFI_STUB SYM_FUNC_START(efi32_stub_entry) -SYM_FUNC_START_ALIAS(efi_stub_entry) add $0x4, %esp movl 8(%esp), %esi /* save boot_params pointer */ call efi_main /* efi_main returns the possibly relocated address of startup_32 */ jmp *%eax SYM_FUNC_END(efi32_stub_entry) -SYM_FUNC_END_ALIAS(efi_stub_entry) +SYM_FUNC_ALIAS(efi_stub_entry, efi32_stub_entry) #endif .text diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index fd9441f40457..dea95301196b 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -535,7 +535,6 @@ SYM_CODE_END(startup_64) #ifdef CONFIG_EFI_STUB .org 0x390 SYM_FUNC_START(efi64_stub_entry) -SYM_FUNC_START_ALIAS(efi_stub_entry) and $~0xf, %rsp /* realign the stack */ movq %rdx, %rbx /* save boot_params pointer */ call efi_main @@ -543,7 +542,7 @@ SYM_FUNC_START_ALIAS(efi_stub_entry) leaq rva(startup_64)(%rax), %rax jmp *%rax SYM_FUNC_END(efi64_stub_entry) -SYM_FUNC_END_ALIAS(efi_stub_entry) +SYM_FUNC_ALIAS(efi_stub_entry, efi64_stub_entry) #endif .text diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 363699dd7220..837c1e0aa021 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -1751,8 +1751,6 @@ SYM_FUNC_END(aesni_gcm_finalize) #endif - -SYM_FUNC_START_LOCAL_ALIAS(_key_expansion_128) SYM_FUNC_START_LOCAL(_key_expansion_256a) pshufd $0b11111111, %xmm1, %xmm1 shufps $0b00010000, %xmm0, %xmm4 @@ -1764,7 +1762,7 @@ SYM_FUNC_START_LOCAL(_key_expansion_256a) add $0x10, TKEYP RET SYM_FUNC_END(_key_expansion_256a) -SYM_FUNC_END_ALIAS(_key_expansion_128) +SYM_FUNC_ALIAS_LOCAL(_key_expansion_128, _key_expansion_256a) SYM_FUNC_START_LOCAL(_key_expansion_192a) pshufd $0b01010101, %xmm1, %xmm1 diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index 59cf2343f3d9..d0d7b9bc6cad 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -27,8 +27,7 @@ * Output: * rax original destination */ -SYM_FUNC_START_ALIAS(__memcpy) -SYM_FUNC_START_WEAK(memcpy) +SYM_FUNC_START(__memcpy) ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \ "jmp memcpy_erms", X86_FEATURE_ERMS @@ -40,11 +39,12 @@ SYM_FUNC_START_WEAK(memcpy) movl %edx, %ecx rep movsb RET -SYM_FUNC_END(memcpy) -SYM_FUNC_END_ALIAS(__memcpy) -EXPORT_SYMBOL(memcpy) +SYM_FUNC_END(__memcpy) EXPORT_SYMBOL(__memcpy) +SYM_FUNC_ALIAS_WEAK(memcpy, __memcpy) +EXPORT_SYMBOL(memcpy) + /* * memcpy_erms() - enhanced fast string memcpy. This is faster and * simpler than memcpy. Use memcpy_erms when possible. diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S index 50ea390df712..d83cba364e31 100644 --- a/arch/x86/lib/memmove_64.S +++ b/arch/x86/lib/memmove_64.S @@ -24,7 +24,6 @@ * Output: * rax: dest */ -SYM_FUNC_START_WEAK(memmove) SYM_FUNC_START(__memmove) mov %rdi, %rax @@ -207,6 +206,7 @@ SYM_FUNC_START(__memmove) 13: RET SYM_FUNC_END(__memmove) -SYM_FUNC_END_ALIAS(memmove) EXPORT_SYMBOL(__memmove) + +SYM_FUNC_ALIAS_WEAK(memmove, __memmove) EXPORT_SYMBOL(memmove) diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S index d624f2bc42f1..fc9ffd3ff3b2 100644 --- a/arch/x86/lib/memset_64.S +++ b/arch/x86/lib/memset_64.S @@ -17,7 +17,6 @@ * * rax original destination */ -SYM_FUNC_START_WEAK(memset) SYM_FUNC_START(__memset) /* * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended @@ -42,10 +41,11 @@ SYM_FUNC_START(__memset) movq %r9,%rax RET SYM_FUNC_END(__memset) -SYM_FUNC_END_ALIAS(memset) -EXPORT_SYMBOL(memset) EXPORT_SYMBOL(__memset) +SYM_FUNC_ALIAS_WEAK(memset, __memset) +EXPORT_SYMBOL(memset) + /* * ISO C memset - set a memory block to a byte value. This function uses * enhanced rep stosb to override the fast string function. diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S index 59cf2343f3d9..d0d7b9bc6cad 100644 --- a/tools/arch/x86/lib/memcpy_64.S +++ b/tools/arch/x86/lib/memcpy_64.S @@ -27,8 +27,7 @@ * Output: * rax original destination */ -SYM_FUNC_START_ALIAS(__memcpy) -SYM_FUNC_START_WEAK(memcpy) +SYM_FUNC_START(__memcpy) ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \ "jmp memcpy_erms", X86_FEATURE_ERMS @@ -40,11 +39,12 @@ SYM_FUNC_START_WEAK(memcpy) movl %edx, %ecx rep movsb RET -SYM_FUNC_END(memcpy) -SYM_FUNC_END_ALIAS(__memcpy) -EXPORT_SYMBOL(memcpy) +SYM_FUNC_END(__memcpy) EXPORT_SYMBOL(__memcpy) +SYM_FUNC_ALIAS_WEAK(memcpy, __memcpy) +EXPORT_SYMBOL(memcpy) + /* * memcpy_erms() - enhanced fast string memcpy. This is faster and * simpler than memcpy. Use memcpy_erms when possible. diff --git a/tools/arch/x86/lib/memset_64.S b/tools/arch/x86/lib/memset_64.S index d624f2bc42f1..fc9ffd3ff3b2 100644 --- a/tools/arch/x86/lib/memset_64.S +++ b/tools/arch/x86/lib/memset_64.S @@ -17,7 +17,6 @@ * * rax original destination */ -SYM_FUNC_START_WEAK(memset) SYM_FUNC_START(__memset) /* * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended @@ -42,10 +41,11 @@ SYM_FUNC_START(__memset) movq %r9,%rax RET SYM_FUNC_END(__memset) -SYM_FUNC_END_ALIAS(memset) -EXPORT_SYMBOL(memset) EXPORT_SYMBOL(__memset) +SYM_FUNC_ALIAS_WEAK(memset, __memset) +EXPORT_SYMBOL(memset) + /* * ISO C memset - set a memory block to a byte value. This function uses * enhanced rep stosb to override the fast string function. -- cgit v1.2.3 From 97e58e395e9c074fd096dad13c54e9f4112cf71d Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Thu, 17 Feb 2022 15:22:29 +0800 Subject: arm64: move AARCH64_BREAK_FAULT into insn-def.h If CONFIG_ARM64_LSE_ATOMICS is off, encoders for LSE-related instructions can return AARCH64_BREAK_FAULT directly in insn.h. In order to access AARCH64_BREAK_FAULT in insn.h, we can not include debug-monitors.h in insn.h, because debug-monitors.h has already depends on insn.h, so just move AARCH64_BREAK_FAULT into insn-def.h. It will be used by the following patch to eliminate unnecessary LSE-related encoders when CONFIG_ARM64_LSE_ATOMICS is off. Signed-off-by: Hou Tao Link: https://lore.kernel.org/r/20220217072232.1186625-2-houtao1@huawei.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/debug-monitors.h | 12 ------------ arch/arm64/include/asm/insn-def.h | 14 ++++++++++++++ 2 files changed, 14 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index 657c921fd784..00c291067e57 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -34,18 +34,6 @@ */ #define BREAK_INSTR_SIZE AARCH64_INSN_SIZE -/* - * BRK instruction encoding - * The #imm16 value should be placed at bits[20:5] within BRK ins - */ -#define AARCH64_BREAK_MON 0xd4200000 - -/* - * BRK instruction for provoking a fault on purpose - * Unlike kgdb, #imm16 value with unallocated handler is used for faulting. - */ -#define AARCH64_BREAK_FAULT (AARCH64_BREAK_MON | (FAULT_BRK_IMM << 5)) - #define AARCH64_BREAK_KGDB_DYN_DBG \ (AARCH64_BREAK_MON | (KGDB_DYN_DBG_BRK_IMM << 5)) diff --git a/arch/arm64/include/asm/insn-def.h b/arch/arm64/include/asm/insn-def.h index 2c075f615c6a..1a7d0d483698 100644 --- a/arch/arm64/include/asm/insn-def.h +++ b/arch/arm64/include/asm/insn-def.h @@ -3,7 +3,21 @@ #ifndef __ASM_INSN_DEF_H #define __ASM_INSN_DEF_H +#include + /* A64 instructions are always 32 bits. */ #define AARCH64_INSN_SIZE 4 +/* + * BRK instruction encoding + * The #imm16 value should be placed at bits[20:5] within BRK ins + */ +#define AARCH64_BREAK_MON 0xd4200000 + +/* + * BRK instruction for provoking a fault on purpose + * Unlike kgdb, #imm16 value with unallocated handler is used for faulting. + */ +#define AARCH64_BREAK_FAULT (AARCH64_BREAK_MON | (FAULT_BRK_IMM << 5)) + #endif /* __ASM_INSN_DEF_H */ -- cgit v1.2.3 From fa1114d9eba5087ba5e81aab4c56f546995e6cd3 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Thu, 17 Feb 2022 15:22:30 +0800 Subject: arm64: insn: add encoders for atomic operations It is a preparation patch for eBPF atomic supports under arm64. eBPF needs support atomic[64]_fetch_add, atomic[64]_[fetch_]{and,or,xor} and atomic[64]_{xchg|cmpxchg}. The ordering semantics of eBPF atomics are the same with the implementations in linux kernel. Add three helpers to support LDCLR/LDEOR/LDSET/SWP, CAS and DMB instructions. STADD/STCLR/STEOR/STSET are simply encoded as aliases for LDADD/LDCLR/LDEOR/LDSET with XZR as the destination register, so no extra helper is added. atomic_fetch_add() and other atomic ops needs support for STLXR instruction, so extend enum aarch64_insn_ldst_type to do that. LDADD/LDEOR/LDSET/SWP and CAS instructions are only available when LSE atomics is enabled, so just return AARCH64_BREAK_FAULT directly in these newly-added helpers if CONFIG_ARM64_LSE_ATOMICS is disabled. Signed-off-by: Hou Tao Link: https://lore.kernel.org/r/20220217072232.1186625-3-houtao1@huawei.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/insn.h | 80 ++++++++++++++++-- arch/arm64/lib/insn.c | 185 ++++++++++++++++++++++++++++++++++++++---- arch/arm64/net/bpf_jit.h | 11 ++- 3 files changed, 253 insertions(+), 23 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index 6b776c8667b2..0b6b31307e68 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -205,7 +205,9 @@ enum aarch64_insn_ldst_type { AARCH64_INSN_LDST_LOAD_PAIR_POST_INDEX, AARCH64_INSN_LDST_STORE_PAIR_POST_INDEX, AARCH64_INSN_LDST_LOAD_EX, + AARCH64_INSN_LDST_LOAD_ACQ_EX, AARCH64_INSN_LDST_STORE_EX, + AARCH64_INSN_LDST_STORE_REL_EX, }; enum aarch64_insn_adsb_type { @@ -280,6 +282,36 @@ enum aarch64_insn_adr_type { AARCH64_INSN_ADR_TYPE_ADR, }; +enum aarch64_insn_mem_atomic_op { + AARCH64_INSN_MEM_ATOMIC_ADD, + AARCH64_INSN_MEM_ATOMIC_CLR, + AARCH64_INSN_MEM_ATOMIC_EOR, + AARCH64_INSN_MEM_ATOMIC_SET, + AARCH64_INSN_MEM_ATOMIC_SWP, +}; + +enum aarch64_insn_mem_order_type { + AARCH64_INSN_MEM_ORDER_NONE, + AARCH64_INSN_MEM_ORDER_ACQ, + AARCH64_INSN_MEM_ORDER_REL, + AARCH64_INSN_MEM_ORDER_ACQREL, +}; + +enum aarch64_insn_mb_type { + AARCH64_INSN_MB_SY, + AARCH64_INSN_MB_ST, + AARCH64_INSN_MB_LD, + AARCH64_INSN_MB_ISH, + AARCH64_INSN_MB_ISHST, + AARCH64_INSN_MB_ISHLD, + AARCH64_INSN_MB_NSH, + AARCH64_INSN_MB_NSHST, + AARCH64_INSN_MB_NSHLD, + AARCH64_INSN_MB_OSH, + AARCH64_INSN_MB_OSHST, + AARCH64_INSN_MB_OSHLD, +}; + #define __AARCH64_INSN_FUNCS(abbr, mask, val) \ static __always_inline bool aarch64_insn_is_##abbr(u32 code) \ { \ @@ -303,6 +335,11 @@ __AARCH64_INSN_FUNCS(store_post, 0x3FE00C00, 0x38000400) __AARCH64_INSN_FUNCS(load_post, 0x3FE00C00, 0x38400400) __AARCH64_INSN_FUNCS(str_reg, 0x3FE0EC00, 0x38206800) __AARCH64_INSN_FUNCS(ldadd, 0x3F20FC00, 0x38200000) +__AARCH64_INSN_FUNCS(ldclr, 0x3F20FC00, 0x38201000) +__AARCH64_INSN_FUNCS(ldeor, 0x3F20FC00, 0x38202000) +__AARCH64_INSN_FUNCS(ldset, 0x3F20FC00, 0x38203000) +__AARCH64_INSN_FUNCS(swp, 0x3F20FC00, 0x38208000) +__AARCH64_INSN_FUNCS(cas, 0x3FA07C00, 0x08A07C00) __AARCH64_INSN_FUNCS(ldr_reg, 0x3FE0EC00, 0x38606800) __AARCH64_INSN_FUNCS(ldr_lit, 0xBF000000, 0x18000000) __AARCH64_INSN_FUNCS(ldrsw_lit, 0xFF000000, 0x98000000) @@ -474,13 +511,6 @@ u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg, enum aarch64_insn_register state, enum aarch64_insn_size_type size, enum aarch64_insn_ldst_type type); -u32 aarch64_insn_gen_ldadd(enum aarch64_insn_register result, - enum aarch64_insn_register address, - enum aarch64_insn_register value, - enum aarch64_insn_size_type size); -u32 aarch64_insn_gen_stadd(enum aarch64_insn_register address, - enum aarch64_insn_register value, - enum aarch64_insn_size_type size); u32 aarch64_insn_gen_add_sub_imm(enum aarch64_insn_register dst, enum aarch64_insn_register src, int imm, enum aarch64_insn_variant variant, @@ -541,6 +571,42 @@ u32 aarch64_insn_gen_prefetch(enum aarch64_insn_register base, enum aarch64_insn_prfm_type type, enum aarch64_insn_prfm_target target, enum aarch64_insn_prfm_policy policy); +#ifdef CONFIG_ARM64_LSE_ATOMICS +u32 aarch64_insn_gen_atomic_ld_op(enum aarch64_insn_register result, + enum aarch64_insn_register address, + enum aarch64_insn_register value, + enum aarch64_insn_size_type size, + enum aarch64_insn_mem_atomic_op op, + enum aarch64_insn_mem_order_type order); +u32 aarch64_insn_gen_cas(enum aarch64_insn_register result, + enum aarch64_insn_register address, + enum aarch64_insn_register value, + enum aarch64_insn_size_type size, + enum aarch64_insn_mem_order_type order); +#else +static inline +u32 aarch64_insn_gen_atomic_ld_op(enum aarch64_insn_register result, + enum aarch64_insn_register address, + enum aarch64_insn_register value, + enum aarch64_insn_size_type size, + enum aarch64_insn_mem_atomic_op op, + enum aarch64_insn_mem_order_type order) +{ + return AARCH64_BREAK_FAULT; +} + +static inline +u32 aarch64_insn_gen_cas(enum aarch64_insn_register result, + enum aarch64_insn_register address, + enum aarch64_insn_register value, + enum aarch64_insn_size_type size, + enum aarch64_insn_mem_order_type order) +{ + return AARCH64_BREAK_FAULT; +} +#endif +u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type); + s32 aarch64_get_branch_offset(u32 insn); u32 aarch64_set_branch_offset(u32 insn, s32 offset); diff --git a/arch/arm64/lib/insn.c b/arch/arm64/lib/insn.c index e485cd735261..5e90887deec4 100644 --- a/arch/arm64/lib/insn.c +++ b/arch/arm64/lib/insn.c @@ -578,10 +578,16 @@ u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg, switch (type) { case AARCH64_INSN_LDST_LOAD_EX: + case AARCH64_INSN_LDST_LOAD_ACQ_EX: insn = aarch64_insn_get_load_ex_value(); + if (type == AARCH64_INSN_LDST_LOAD_ACQ_EX) + insn |= BIT(15); break; case AARCH64_INSN_LDST_STORE_EX: + case AARCH64_INSN_LDST_STORE_REL_EX: insn = aarch64_insn_get_store_ex_value(); + if (type == AARCH64_INSN_LDST_STORE_REL_EX) + insn |= BIT(15); break; default: pr_err("%s: unknown load/store exclusive encoding %d\n", __func__, type); @@ -603,12 +609,65 @@ u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg, state); } -u32 aarch64_insn_gen_ldadd(enum aarch64_insn_register result, - enum aarch64_insn_register address, - enum aarch64_insn_register value, - enum aarch64_insn_size_type size) +#ifdef CONFIG_ARM64_LSE_ATOMICS +static u32 aarch64_insn_encode_ldst_order(enum aarch64_insn_mem_order_type type, + u32 insn) { - u32 insn = aarch64_insn_get_ldadd_value(); + u32 order; + + switch (type) { + case AARCH64_INSN_MEM_ORDER_NONE: + order = 0; + break; + case AARCH64_INSN_MEM_ORDER_ACQ: + order = 2; + break; + case AARCH64_INSN_MEM_ORDER_REL: + order = 1; + break; + case AARCH64_INSN_MEM_ORDER_ACQREL: + order = 3; + break; + default: + pr_err("%s: unknown mem order %d\n", __func__, type); + return AARCH64_BREAK_FAULT; + } + + insn &= ~GENMASK(23, 22); + insn |= order << 22; + + return insn; +} + +u32 aarch64_insn_gen_atomic_ld_op(enum aarch64_insn_register result, + enum aarch64_insn_register address, + enum aarch64_insn_register value, + enum aarch64_insn_size_type size, + enum aarch64_insn_mem_atomic_op op, + enum aarch64_insn_mem_order_type order) +{ + u32 insn; + + switch (op) { + case AARCH64_INSN_MEM_ATOMIC_ADD: + insn = aarch64_insn_get_ldadd_value(); + break; + case AARCH64_INSN_MEM_ATOMIC_CLR: + insn = aarch64_insn_get_ldclr_value(); + break; + case AARCH64_INSN_MEM_ATOMIC_EOR: + insn = aarch64_insn_get_ldeor_value(); + break; + case AARCH64_INSN_MEM_ATOMIC_SET: + insn = aarch64_insn_get_ldset_value(); + break; + case AARCH64_INSN_MEM_ATOMIC_SWP: + insn = aarch64_insn_get_swp_value(); + break; + default: + pr_err("%s: unimplemented mem atomic op %d\n", __func__, op); + return AARCH64_BREAK_FAULT; + } switch (size) { case AARCH64_INSN_SIZE_32: @@ -621,6 +680,8 @@ u32 aarch64_insn_gen_ldadd(enum aarch64_insn_register result, insn = aarch64_insn_encode_ldst_size(size, insn); + insn = aarch64_insn_encode_ldst_order(order, insn); + insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn, result); @@ -631,17 +692,68 @@ u32 aarch64_insn_gen_ldadd(enum aarch64_insn_register result, value); } -u32 aarch64_insn_gen_stadd(enum aarch64_insn_register address, - enum aarch64_insn_register value, - enum aarch64_insn_size_type size) +static u32 aarch64_insn_encode_cas_order(enum aarch64_insn_mem_order_type type, + u32 insn) { - /* - * STADD is simply encoded as an alias for LDADD with XZR as - * the destination register. - */ - return aarch64_insn_gen_ldadd(AARCH64_INSN_REG_ZR, address, - value, size); + u32 order; + + switch (type) { + case AARCH64_INSN_MEM_ORDER_NONE: + order = 0; + break; + case AARCH64_INSN_MEM_ORDER_ACQ: + order = BIT(22); + break; + case AARCH64_INSN_MEM_ORDER_REL: + order = BIT(15); + break; + case AARCH64_INSN_MEM_ORDER_ACQREL: + order = BIT(15) | BIT(22); + break; + default: + pr_err("%s: unknown mem order %d\n", __func__, type); + return AARCH64_BREAK_FAULT; + } + + insn &= ~(BIT(15) | BIT(22)); + insn |= order; + + return insn; +} + +u32 aarch64_insn_gen_cas(enum aarch64_insn_register result, + enum aarch64_insn_register address, + enum aarch64_insn_register value, + enum aarch64_insn_size_type size, + enum aarch64_insn_mem_order_type order) +{ + u32 insn; + + switch (size) { + case AARCH64_INSN_SIZE_32: + case AARCH64_INSN_SIZE_64: + break; + default: + pr_err("%s: unimplemented size encoding %d\n", __func__, size); + return AARCH64_BREAK_FAULT; + } + + insn = aarch64_insn_get_cas_value(); + + insn = aarch64_insn_encode_ldst_size(size, insn); + + insn = aarch64_insn_encode_cas_order(order, insn); + + insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn, + result); + + insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, + address); + + return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RS, insn, + value); } +#endif static u32 aarch64_insn_encode_prfm_imm(enum aarch64_insn_prfm_type type, enum aarch64_insn_prfm_target target, @@ -1456,3 +1568,48 @@ u32 aarch64_insn_gen_extr(enum aarch64_insn_variant variant, insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, Rn); return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RM, insn, Rm); } + +u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type) +{ + u32 opt; + u32 insn; + + switch (type) { + case AARCH64_INSN_MB_SY: + opt = 0xf; + break; + case AARCH64_INSN_MB_ST: + opt = 0xe; + break; + case AARCH64_INSN_MB_LD: + opt = 0xd; + break; + case AARCH64_INSN_MB_ISH: + opt = 0xb; + break; + case AARCH64_INSN_MB_ISHST: + opt = 0xa; + break; + case AARCH64_INSN_MB_ISHLD: + opt = 0x9; + break; + case AARCH64_INSN_MB_NSH: + opt = 0x7; + break; + case AARCH64_INSN_MB_NSHST: + opt = 0x6; + break; + case AARCH64_INSN_MB_NSHLD: + opt = 0x5; + break; + default: + pr_err("%s: unknown dmb type %d\n", __func__, type); + return AARCH64_BREAK_FAULT; + } + + insn = aarch64_insn_get_dmb_value(); + insn &= ~GENMASK(11, 8); + insn |= (opt << 8); + + return insn; +} diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h index cc0cf0f5c7c3..9d9250c7cc72 100644 --- a/arch/arm64/net/bpf_jit.h +++ b/arch/arm64/net/bpf_jit.h @@ -89,9 +89,16 @@ #define A64_STXR(sf, Rt, Rn, Rs) \ A64_LSX(sf, Rt, Rn, Rs, STORE_EX) -/* LSE atomics */ +/* + * LSE atomics + * + * STADD is simply encoded as an alias for LDADD with XZR as + * the destination register. + */ #define A64_STADD(sf, Rn, Rs) \ - aarch64_insn_gen_stadd(Rn, Rs, A64_SIZE(sf)) + aarch64_insn_gen_atomic_ld_op(A64_ZR, Rn, Rs, \ + A64_SIZE(sf), AARCH64_INSN_MEM_ATOMIC_ADD, \ + AARCH64_INSN_MEM_ORDER_NONE) /* Add/subtract (immediate) */ #define A64_ADDSUB_IMM(sf, Rd, Rn, imm12, type) \ -- cgit v1.2.3 From 38ddf7dafaeaf3fcdea65b3b4dfb06b4bcd9cc15 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Fri, 18 Feb 2022 17:29:45 -0800 Subject: arm64: mte: avoid clearing PSTATE.TCO on entry unless necessary On some microarchitectures, clearing PSTATE.TCO is expensive. Clearing TCO is only necessary if in-kernel MTE is enabled, or if MTE is enabled in the userspace process in synchronous (or, soon, asymmetric) mode, because we do not report uaccess faults to userspace in none or asynchronous modes. Therefore, adjust the kernel entry code to clear TCO only if necessary. Because it is now possible to switch to a task in which TCO needs to be clear from a task in which TCO is set, we also need to do the same thing on task switch. Signed-off-by: Peter Collingbourne Link: https://linux-review.googlesource.com/id/I52d82a580bd0500d420be501af2c35fa8c90729e Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220219012945.894950-2-pcc@google.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/mte.h | 22 ++++++++++++++++++++++ arch/arm64/kernel/entry-common.c | 3 +++ arch/arm64/kernel/entry.S | 7 ------- arch/arm64/kernel/mte.c | 3 +++ 4 files changed, 28 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h index 075539f5f1c8..adcb937342f1 100644 --- a/arch/arm64/include/asm/mte.h +++ b/arch/arm64/include/asm/mte.h @@ -11,7 +11,9 @@ #ifndef __ASSEMBLY__ #include +#include #include +#include #include #include @@ -86,6 +88,26 @@ static inline int mte_ptrace_copy_tags(struct task_struct *child, #endif /* CONFIG_ARM64_MTE */ +static inline void mte_disable_tco_entry(struct task_struct *task) +{ + if (!system_supports_mte()) + return; + + /* + * Re-enable tag checking (TCO set on exception entry). This is only + * necessary if MTE is enabled in either the kernel or the userspace + * task in synchronous or asymmetric mode (SCTLR_EL1.TCF0 bit 0 is set + * for both). With MTE disabled in the kernel and disabled or + * asynchronous in userspace, tag check faults (including in uaccesses) + * are not reported, therefore there is no need to re-enable checking. + * This is beneficial on microarchitectures where re-enabling TCO is + * expensive. + */ + if (kasan_hw_tags_enabled() || + (task->thread.sctlr_user & (1UL << SCTLR_EL1_TCF0_SHIFT))) + asm volatile(SET_PSTATE_TCO(0)); +} + #ifdef CONFIG_KASAN_HW_TAGS /* Whether the MTE asynchronous mode is enabled. */ DECLARE_STATIC_KEY_FALSE(mte_async_or_asymm_mode); diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index ef7fcefb96bd..7093b578e325 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -6,6 +6,7 @@ */ #include +#include #include #include #include @@ -56,6 +57,7 @@ static void noinstr enter_from_kernel_mode(struct pt_regs *regs) { __enter_from_kernel_mode(regs); mte_check_tfsr_entry(); + mte_disable_tco_entry(current); } /* @@ -103,6 +105,7 @@ static __always_inline void __enter_from_user_mode(void) CT_WARN_ON(ct_state() != CONTEXT_USER); user_exit_irqoff(); trace_hardirqs_off_finish(); + mte_disable_tco_entry(current); } static __always_inline void enter_from_user_mode(struct pt_regs *regs) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 772ec2ecf488..e1013a83d4f0 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -308,13 +308,6 @@ alternative_if ARM64_HAS_IRQ_PRIO_MASKING msr_s SYS_ICC_PMR_EL1, x20 alternative_else_nop_endif - /* Re-enable tag checking (TCO set on exception entry) */ -#ifdef CONFIG_ARM64_MTE -alternative_if ARM64_MTE - SET_PSTATE_TCO(0) -alternative_else_nop_endif -#endif - /* * Registers that may be useful after this macro is invoked: * diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index f418ebc65f95..f983795b5eda 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -253,6 +253,9 @@ void mte_thread_switch(struct task_struct *next) mte_update_sctlr_user(next); mte_update_gcr_excl(next); + /* TCO may not have been disabled on exception entry for the current task. */ + mte_disable_tco_entry(next); + /* * Check if an async tag exception occurred at EL1. * -- cgit v1.2.3 From a8a733b20109fc85a5b2e0318cef036b2c818ac3 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 17 Feb 2022 10:22:37 +0530 Subject: arm64/hugetlb: Define __hugetlb_valid_size() arch_hugetlb_valid_size() can be just factored out to create another helper to be used in arch_hugetlb_migration_supported() as well. This just defines __hugetlb_valid_size() for that purpose. Cc: Catalin Marinas Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Reviewed-by: Catalin Marinas Signed-off-by: Anshuman Khandual Link: https://lore.kernel.org/r/1645073557-6150-1-git-send-email-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- arch/arm64/mm/hugetlbpage.c | 38 ++++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 20 deletions(-) (limited to 'arch') diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index ffb9c229610a..a33aba91ad89 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -56,25 +56,34 @@ void __init arm64_hugetlb_cma_reserve(void) } #endif /* CONFIG_CMA */ -#ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION -bool arch_hugetlb_migration_supported(struct hstate *h) +static bool __hugetlb_valid_size(unsigned long size) { - size_t pagesize = huge_page_size(h); - - switch (pagesize) { + switch (size) { #ifndef __PAGETABLE_PMD_FOLDED case PUD_SIZE: return pud_sect_supported(); #endif - case PMD_SIZE: case CONT_PMD_SIZE: + case PMD_SIZE: case CONT_PTE_SIZE: return true; } - pr_warn("%s: unrecognized huge page size 0x%lx\n", - __func__, pagesize); + return false; } + +#ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION +bool arch_hugetlb_migration_supported(struct hstate *h) +{ + size_t pagesize = huge_page_size(h); + + if (!__hugetlb_valid_size(pagesize)) { + pr_warn("%s: unrecognized huge page size 0x%lx\n", + __func__, pagesize); + return false; + } + return true; +} #endif int pmd_huge(pmd_t pmd) @@ -506,16 +515,5 @@ arch_initcall(hugetlbpage_init); bool __init arch_hugetlb_valid_size(unsigned long size) { - switch (size) { -#ifndef __PAGETABLE_PMD_FOLDED - case PUD_SIZE: - return pud_sect_supported(); -#endif - case CONT_PMD_SIZE: - case PMD_SIZE: - case CONT_PTE_SIZE: - return true; - } - - return false; + return __hugetlb_valid_size(size); } -- cgit v1.2.3 From 3a4f7ef4bed5bdc77a1ac8132f9f0650bbcb3eae Mon Sep 17 00:00:00 2001 From: Liam Howlett Date: Fri, 18 Feb 2022 02:37:04 +0000 Subject: arm64: Change elfcore for_each_mte_vma() to use VMA iterator Rework for_each_mte_vma() to use a VMA iterator instead of an explicit linked-list. This will allow easy integration with the maple tree work which removes the VMA list altogether. Signed-off-by: Liam R. Howlett Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20220218023650.672072-1-Liam.Howlett@oracle.com [will: Folded in fix from Catalin] Link: https://lore.kernel.org/r/YhUcywqIhmHvX6dG@arm.com Signed-off--by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/kernel/elfcore.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/elfcore.c b/arch/arm64/kernel/elfcore.c index 3455ee4acc04..3ed39c61a510 100644 --- a/arch/arm64/kernel/elfcore.c +++ b/arch/arm64/kernel/elfcore.c @@ -8,9 +8,16 @@ #include #include -#define for_each_mte_vma(tsk, vma) \ +#ifndef VMA_ITERATOR +#define VMA_ITERATOR(name, mm, addr) \ + struct mm_struct *name = mm +#define for_each_vma(vmi, vma) \ + for (vma = vmi->mmap; vma; vma = vma->vm_next) +#endif + +#define for_each_mte_vma(vmi, vma) \ if (system_supports_mte()) \ - for (vma = tsk->mm->mmap; vma; vma = vma->vm_next) \ + for_each_vma(vmi, vma) \ if (vma->vm_flags & VM_MTE) static unsigned long mte_vma_tag_dump_size(struct vm_area_struct *vma) @@ -65,8 +72,9 @@ Elf_Half elf_core_extra_phdrs(void) { struct vm_area_struct *vma; int vma_count = 0; + VMA_ITERATOR(vmi, current->mm, 0); - for_each_mte_vma(current, vma) + for_each_mte_vma(vmi, vma) vma_count++; return vma_count; @@ -75,8 +83,9 @@ Elf_Half elf_core_extra_phdrs(void) int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset) { struct vm_area_struct *vma; + VMA_ITERATOR(vmi, current->mm, 0); - for_each_mte_vma(current, vma) { + for_each_mte_vma(vmi, vma) { struct elf_phdr phdr; phdr.p_type = PT_ARM_MEMTAG_MTE; @@ -100,8 +109,9 @@ size_t elf_core_extra_data_size(void) { struct vm_area_struct *vma; size_t data_size = 0; + VMA_ITERATOR(vmi, current->mm, 0); - for_each_mte_vma(current, vma) + for_each_mte_vma(vmi, vma) data_size += mte_vma_tag_dump_size(vma); return data_size; @@ -110,8 +120,9 @@ size_t elf_core_extra_data_size(void) int elf_core_write_extra_data(struct coredump_params *cprm) { struct vm_area_struct *vma; + VMA_ITERATOR(vmi, current->mm, 0); - for_each_mte_vma(current, vma) { + for_each_mte_vma(vmi, vma) { if (vma->vm_flags & VM_DONTDUMP) continue; -- cgit v1.2.3 From da844beb6d9f97cb6fe4b443f9610a9fcc534f9d Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Thu, 24 Feb 2022 12:49:50 +0000 Subject: arm64: cpufeature: Account min_field_value when cheking secondaries for PAuth In case, both boot_val and sec_val have value below min_field_value we would wrongly report that address authentication is supported. It is not a big issue because we enable address authentication based on boot cpu (and check there is correct). Signed-off-by: Vladimir Murzin Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20220224124952.119612-2-vladimir.murzin@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index e5f23dab1c8d..3271770b60d6 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1829,7 +1829,7 @@ static bool has_address_auth_cpucap(const struct arm64_cpu_capabilities *entry, /* Now check for the secondary CPUs with SCOPE_LOCAL_CPU scope */ sec_val = cpuid_feature_extract_field(__read_sysreg_by_encoding(entry->sys_reg), entry->field_pos, entry->sign); - return sec_val == boot_val; + return (sec_val >= entry->min_field_value) && (sec_val == boot_val); } static bool has_address_auth_metacap(const struct arm64_cpu_capabilities *entry, -- cgit v1.2.3 From be3256a086afb4048baf18e6a35a3a81482aa2fa Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Thu, 24 Feb 2022 12:49:51 +0000 Subject: arm64: cpufeature: Mark existing PAuth architected algorithm as QARMA5 In preparation of supporting PAuth QARMA3 architected algorithm mark existing one as QARMA5, so we can distingwish between two. Signed-off-by: Vladimir Murzin Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20220224124952.119612-3-vladimir.murzin@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 20 ++++++++++++-------- arch/arm64/tools/cpucaps | 4 ++-- 2 files changed, 14 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 3271770b60d6..1b955bea2856 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1835,15 +1835,19 @@ static bool has_address_auth_cpucap(const struct arm64_cpu_capabilities *entry, static bool has_address_auth_metacap(const struct arm64_cpu_capabilities *entry, int scope) { - return has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_ARCH], scope) || - has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_IMP_DEF], scope); + bool api = has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_IMP_DEF], scope); + bool apa = has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA5], scope); + + return apa || api; } static bool has_generic_auth(const struct arm64_cpu_capabilities *entry, int __unused) { - return __system_matches_cap(ARM64_HAS_GENERIC_AUTH_ARCH) || - __system_matches_cap(ARM64_HAS_GENERIC_AUTH_IMP_DEF); + bool gpi = __system_matches_cap(ARM64_HAS_GENERIC_AUTH_IMP_DEF); + bool gpa = __system_matches_cap(ARM64_HAS_GENERIC_AUTH_ARCH_QARMA5); + + return gpa || gpi; } #endif /* CONFIG_ARM64_PTR_AUTH */ @@ -2230,8 +2234,8 @@ static const struct arm64_cpu_capabilities arm64_features[] = { }, #ifdef CONFIG_ARM64_PTR_AUTH { - .desc = "Address authentication (architected algorithm)", - .capability = ARM64_HAS_ADDRESS_AUTH_ARCH, + .desc = "Address authentication (architected QARMA5 algorithm)", + .capability = ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA5, .type = ARM64_CPUCAP_BOOT_CPU_FEATURE, .sys_reg = SYS_ID_AA64ISAR1_EL1, .sign = FTR_UNSIGNED, @@ -2255,8 +2259,8 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_address_auth_metacap, }, { - .desc = "Generic authentication (architected algorithm)", - .capability = ARM64_HAS_GENERIC_AUTH_ARCH, + .desc = "Generic authentication (architected QARMA5 algorithm)", + .capability = ARM64_HAS_GENERIC_AUTH_ARCH_QARMA5, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .sys_reg = SYS_ID_AA64ISAR1_EL1, .sign = FTR_UNSIGNED, diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 9c65b1e25a96..4c39247581f6 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -7,7 +7,7 @@ BTI HAS_32BIT_EL0_DO_NOT_USE HAS_32BIT_EL1 HAS_ADDRESS_AUTH -HAS_ADDRESS_AUTH_ARCH +HAS_ADDRESS_AUTH_ARCH_QARMA5 HAS_ADDRESS_AUTH_IMP_DEF HAS_AMU_EXTN HAS_ARMv8_4_TTL @@ -21,7 +21,7 @@ HAS_E0PD HAS_ECV HAS_EPAN HAS_GENERIC_AUTH -HAS_GENERIC_AUTH_ARCH +HAS_GENERIC_AUTH_ARCH_QARMA5 HAS_GENERIC_AUTH_IMP_DEF HAS_IRQ_PRIO_MASKING HAS_LDAPR -- cgit v1.2.3 From def8c222f054d18aac1fd065a50b9db5feaefa9d Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Thu, 24 Feb 2022 12:49:52 +0000 Subject: arm64: Add support of PAuth QARMA3 architected algorithm QARMA3 is relaxed version of the QARMA5 algorithm which expected to reduce the latency of calculation while still delivering a suitable level of security. Support for QARMA3 can be discovered via ID_AA64ISAR2_EL1 APA3, bits [15:12] Indicates whether the QARMA3 algorithm is implemented in the PE for address authentication in AArch64 state. GPA3, bits [11:8] Indicates whether the QARMA3 algorithm is implemented in the PE for generic code authentication in AArch64 state. Signed-off-by: Vladimir Murzin Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20220224124952.119612-4-vladimir.murzin@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/asm_pointer_auth.h | 3 ++ arch/arm64/include/asm/cpufeature.h | 1 + arch/arm64/include/asm/kvm_hyp.h | 1 + arch/arm64/include/asm/sysreg.h | 12 ++++++++ arch/arm64/kernel/cpufeature.c | 41 ++++++++++++++++++++++++-- arch/arm64/kernel/idreg-override.c | 16 ++++++++-- arch/arm64/kvm/arm.c | 1 + arch/arm64/kvm/hyp/include/nvhe/fixed_config.h | 5 ++++ arch/arm64/kvm/hyp/nvhe/sys_regs.c | 14 +++++++++ arch/arm64/kvm/sys_regs.c | 5 ++++ arch/arm64/tools/cpucaps | 2 ++ 11 files changed, 97 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/asm_pointer_auth.h b/arch/arm64/include/asm/asm_pointer_auth.h index f1bba5fc61c4..ead62f7dd269 100644 --- a/arch/arm64/include/asm/asm_pointer_auth.h +++ b/arch/arm64/include/asm/asm_pointer_auth.h @@ -60,6 +60,9 @@ alternative_else_nop_endif .macro __ptrauth_keys_init_cpu tsk, tmp1, tmp2, tmp3 mrs \tmp1, id_aa64isar1_el1 ubfx \tmp1, \tmp1, #ID_AA64ISAR1_APA_SHIFT, #8 + mrs_s \tmp2, SYS_ID_AA64ISAR2_EL1 + ubfx \tmp2, \tmp2, #ID_AA64ISAR2_APA3_SHIFT, #4 + orr \tmp1, \tmp1, \tmp2 cbz \tmp1, .Lno_addr_auth\@ mov_q \tmp1, (SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | \ SCTLR_ELx_ENDA | SCTLR_ELx_ENDB) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index ef6be92b1921..fe7137ff6190 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -854,6 +854,7 @@ static inline unsigned int get_vmid_bits(u64 mmfr1) extern struct arm64_ftr_override id_aa64mmfr1_override; extern struct arm64_ftr_override id_aa64pfr1_override; extern struct arm64_ftr_override id_aa64isar1_override; +extern struct arm64_ftr_override id_aa64isar2_override; u32 get_kvm_ipa_limit(void); void dump_cpu_features(void); diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 462882f356c7..aa7fa2a08f06 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -118,6 +118,7 @@ extern u64 kvm_nvhe_sym(id_aa64pfr0_el1_sys_val); extern u64 kvm_nvhe_sym(id_aa64pfr1_el1_sys_val); extern u64 kvm_nvhe_sym(id_aa64isar0_el1_sys_val); extern u64 kvm_nvhe_sym(id_aa64isar1_el1_sys_val); +extern u64 kvm_nvhe_sym(id_aa64isar2_el1_sys_val); extern u64 kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val); extern u64 kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val); extern u64 kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val); diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 898bee0004ae..cbe416462b88 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -773,6 +773,8 @@ #define ID_AA64ISAR1_GPI_IMP_DEF 0x1 /* id_aa64isar2 */ +#define ID_AA64ISAR2_APA3_SHIFT 12 +#define ID_AA64ISAR2_GPA3_SHIFT 8 #define ID_AA64ISAR2_RPRES_SHIFT 4 #define ID_AA64ISAR2_WFXT_SHIFT 0 @@ -786,6 +788,16 @@ #define ID_AA64ISAR2_WFXT_NI 0x0 #define ID_AA64ISAR2_WFXT_SUPPORTED 0x2 +#define ID_AA64ISAR2_APA3_NI 0x0 +#define ID_AA64ISAR2_APA3_ARCHITECTED 0x1 +#define ID_AA64ISAR2_APA3_ARCH_EPAC 0x2 +#define ID_AA64ISAR2_APA3_ARCH_EPAC2 0x3 +#define ID_AA64ISAR2_APA3_ARCH_EPAC2_FPAC 0x4 +#define ID_AA64ISAR2_APA3_ARCH_EPAC2_FPAC_CMB 0x5 + +#define ID_AA64ISAR2_GPA3_NI 0x0 +#define ID_AA64ISAR2_GPA3_ARCHITECTED 0x1 + /* id_aa64pfr0 */ #define ID_AA64PFR0_CSV3_SHIFT 60 #define ID_AA64PFR0_CSV2_SHIFT 56 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 1b955bea2856..f6ecad8fc1c1 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -226,6 +226,10 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = { }; static const struct arm64_ftr_bits ftr_id_aa64isar2[] = { + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), + FTR_STRICT, FTR_EXACT, ID_AA64ISAR2_APA3_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_GPA3_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_RPRES_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -596,6 +600,7 @@ static const struct arm64_ftr_bits ftr_raz[] = { struct arm64_ftr_override __ro_after_init id_aa64mmfr1_override; struct arm64_ftr_override __ro_after_init id_aa64pfr1_override; struct arm64_ftr_override __ro_after_init id_aa64isar1_override; +struct arm64_ftr_override __ro_after_init id_aa64isar2_override; static const struct __ftr_reg_entry { u32 sys_id; @@ -644,6 +649,8 @@ static const struct __ftr_reg_entry { ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1, &id_aa64isar1_override), ARM64_FTR_REG(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2), + ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2, + &id_aa64isar2_override), /* Op1 = 0, CRn = 0, CRm = 7 */ ARM64_FTR_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0), @@ -1837,8 +1844,9 @@ static bool has_address_auth_metacap(const struct arm64_cpu_capabilities *entry, { bool api = has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_IMP_DEF], scope); bool apa = has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA5], scope); + bool apa3 = has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA3], scope); - return apa || api; + return apa || apa3 || api; } static bool has_generic_auth(const struct arm64_cpu_capabilities *entry, @@ -1846,8 +1854,9 @@ static bool has_generic_auth(const struct arm64_cpu_capabilities *entry, { bool gpi = __system_matches_cap(ARM64_HAS_GENERIC_AUTH_IMP_DEF); bool gpa = __system_matches_cap(ARM64_HAS_GENERIC_AUTH_ARCH_QARMA5); + bool gpa3 = __system_matches_cap(ARM64_HAS_GENERIC_AUTH_ARCH_QARMA3); - return gpa || gpi; + return gpa || gpa3 || gpi; } #endif /* CONFIG_ARM64_PTR_AUTH */ @@ -2243,6 +2252,16 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .min_field_value = ID_AA64ISAR1_APA_ARCHITECTED, .matches = has_address_auth_cpucap, }, + { + .desc = "Address authentication (architected QARMA3 algorithm)", + .capability = ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA3, + .type = ARM64_CPUCAP_BOOT_CPU_FEATURE, + .sys_reg = SYS_ID_AA64ISAR2_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64ISAR2_APA3_SHIFT, + .min_field_value = ID_AA64ISAR2_APA3_ARCHITECTED, + .matches = has_address_auth_cpucap, + }, { .desc = "Address authentication (IMP DEF algorithm)", .capability = ARM64_HAS_ADDRESS_AUTH_IMP_DEF, @@ -2268,6 +2287,16 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .min_field_value = ID_AA64ISAR1_GPA_ARCHITECTED, .matches = has_cpuid_feature, }, + { + .desc = "Generic authentication (architected QARMA3 algorithm)", + .capability = ARM64_HAS_GENERIC_AUTH_ARCH_QARMA3, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .sys_reg = SYS_ID_AA64ISAR2_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64ISAR2_GPA3_SHIFT, + .min_field_value = ID_AA64ISAR2_GPA3_ARCHITECTED, + .matches = has_cpuid_feature, + }, { .desc = "Generic authentication (IMP DEF algorithm)", .capability = ARM64_HAS_GENERIC_AUTH_IMP_DEF, @@ -2415,6 +2444,10 @@ static const struct arm64_cpu_capabilities ptr_auth_hwcap_addr_matches[] = { HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_APA_SHIFT, FTR_UNSIGNED, ID_AA64ISAR1_APA_ARCHITECTED) }, + { + HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_APA3_SHIFT, + FTR_UNSIGNED, ID_AA64ISAR2_APA3_ARCHITECTED) + }, { HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_API_SHIFT, FTR_UNSIGNED, ID_AA64ISAR1_API_IMP_DEF) @@ -2427,6 +2460,10 @@ static const struct arm64_cpu_capabilities ptr_auth_hwcap_gen_matches[] = { HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_GPA_SHIFT, FTR_UNSIGNED, ID_AA64ISAR1_GPA_ARCHITECTED) }, + { + HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_GPA3_SHIFT, + FTR_UNSIGNED, ID_AA64ISAR2_GPA3_ARCHITECTED) + }, { HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_GPI_SHIFT, FTR_UNSIGNED, ID_AA64ISAR1_GPI_IMP_DEF) diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c index d8e606fe3c21..8a2ceb591686 100644 --- a/arch/arm64/kernel/idreg-override.c +++ b/arch/arm64/kernel/idreg-override.c @@ -17,7 +17,7 @@ #define FTR_DESC_NAME_LEN 20 #define FTR_DESC_FIELD_LEN 10 #define FTR_ALIAS_NAME_LEN 30 -#define FTR_ALIAS_OPTION_LEN 80 +#define FTR_ALIAS_OPTION_LEN 116 struct ftr_set_desc { char name[FTR_DESC_NAME_LEN]; @@ -71,6 +71,16 @@ static const struct ftr_set_desc isar1 __initconst = { }, }; +static const struct ftr_set_desc isar2 __initconst = { + .name = "id_aa64isar2", + .override = &id_aa64isar2_override, + .fields = { + { "gpa3", ID_AA64ISAR2_GPA3_SHIFT }, + { "apa3", ID_AA64ISAR2_APA3_SHIFT }, + {} + }, +}; + extern struct arm64_ftr_override kaslr_feature_override; static const struct ftr_set_desc kaslr __initconst = { @@ -88,6 +98,7 @@ static const struct ftr_set_desc * const regs[] __initconst = { &mmfr1, &pfr1, &isar1, + &isar2, &kaslr, }; @@ -100,7 +111,8 @@ static const struct { { "arm64.nobti", "id_aa64pfr1.bt=0" }, { "arm64.nopauth", "id_aa64isar1.gpi=0 id_aa64isar1.gpa=0 " - "id_aa64isar1.api=0 id_aa64isar1.apa=0" }, + "id_aa64isar1.api=0 id_aa64isar1.apa=0 " + "id_aa64isar2.gpa3=0 id_aa64isar2.apa3=0" }, { "arm64.nomte", "id_aa64pfr1.mte=0" }, { "nokaslr", "kaslr.disabled=1" }, }; diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index ecc5958e27fe..f3bfc0ddeb0b 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1870,6 +1870,7 @@ static int kvm_hyp_init_protection(u32 hyp_va_bits) kvm_nvhe_sym(id_aa64pfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1); kvm_nvhe_sym(id_aa64isar0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR0_EL1); kvm_nvhe_sym(id_aa64isar1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR1_EL1); + kvm_nvhe_sym(id_aa64isar2_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR2_EL1); kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR2_EL1); diff --git a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h index eea1f6a53723..5ad626527d41 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h +++ b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h @@ -192,6 +192,11 @@ ARM64_FEATURE_MASK(ID_AA64ISAR1_I8MM) \ ) +#define PVM_ID_AA64ISAR2_ALLOW (\ + ARM64_FEATURE_MASK(ID_AA64ISAR2_GPA3) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR2_APA3) \ + ) + u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id); bool kvm_handle_pvm_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code); bool kvm_handle_pvm_restricted(struct kvm_vcpu *vcpu, u64 *exit_code); diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c index 792cf6e6ac92..33f5181af330 100644 --- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c +++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c @@ -22,6 +22,7 @@ u64 id_aa64pfr0_el1_sys_val; u64 id_aa64pfr1_el1_sys_val; u64 id_aa64isar0_el1_sys_val; u64 id_aa64isar1_el1_sys_val; +u64 id_aa64isar2_el1_sys_val; u64 id_aa64mmfr0_el1_sys_val; u64 id_aa64mmfr1_el1_sys_val; u64 id_aa64mmfr2_el1_sys_val; @@ -183,6 +184,17 @@ static u64 get_pvm_id_aa64isar1(const struct kvm_vcpu *vcpu) return id_aa64isar1_el1_sys_val & allow_mask; } +static u64 get_pvm_id_aa64isar2(const struct kvm_vcpu *vcpu) +{ + u64 allow_mask = PVM_ID_AA64ISAR2_ALLOW; + + if (!vcpu_has_ptrauth(vcpu)) + allow_mask &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR2_APA3) | + ARM64_FEATURE_MASK(ID_AA64ISAR2_GPA3)); + + return id_aa64isar2_el1_sys_val & allow_mask; +} + static u64 get_pvm_id_aa64mmfr0(const struct kvm_vcpu *vcpu) { u64 set_mask; @@ -225,6 +237,8 @@ u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id) return get_pvm_id_aa64isar0(vcpu); case SYS_ID_AA64ISAR1_EL1: return get_pvm_id_aa64isar1(vcpu); + case SYS_ID_AA64ISAR2_EL1: + return get_pvm_id_aa64isar2(vcpu); case SYS_ID_AA64MMFR0_EL1: return get_pvm_id_aa64mmfr0(vcpu); case SYS_ID_AA64MMFR1_EL1: diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 4dc2fba316ff..baa65292bbc2 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1097,6 +1097,11 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, ARM64_FEATURE_MASK(ID_AA64ISAR1_GPA) | ARM64_FEATURE_MASK(ID_AA64ISAR1_GPI)); break; + case SYS_ID_AA64ISAR2_EL1: + if (!vcpu_has_ptrauth(vcpu)) + val &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR2_APA3) | + ARM64_FEATURE_MASK(ID_AA64ISAR2_GPA3)); + break; case SYS_ID_AA64DFR0_EL1: /* Limit debug to ARMv8.0 */ val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_DEBUGVER); diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 4c39247581f6..162bc2443217 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -7,6 +7,7 @@ BTI HAS_32BIT_EL0_DO_NOT_USE HAS_32BIT_EL1 HAS_ADDRESS_AUTH +HAS_ADDRESS_AUTH_ARCH_QARMA3 HAS_ADDRESS_AUTH_ARCH_QARMA5 HAS_ADDRESS_AUTH_IMP_DEF HAS_AMU_EXTN @@ -21,6 +22,7 @@ HAS_E0PD HAS_ECV HAS_EPAN HAS_GENERIC_AUTH +HAS_GENERIC_AUTH_ARCH_QARMA3 HAS_GENERIC_AUTH_ARCH_QARMA5 HAS_GENERIC_AUTH_IMP_DEF HAS_IRQ_PRIO_MASKING -- cgit v1.2.3 From 032e6c33790dc51836a40ef702de4c9e0941145f Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Thu, 24 Feb 2022 16:47:39 +0000 Subject: arm64: cpufeature: Remove cpu_has_fwb() check cpu_has_fwb() is supposed to warn user is following architectural requirement is not valid: LoUU, bits [29:27] - Level of Unification Uniprocessor for the cache hierarchy. Note When FEAT_S2FWB is implemented, the architecture requires that this field is zero so that no levels of data cache need to be cleaned in order to manage coherency with instruction fetches. LoUIS, bits [23:21] - Level of Unification Inner Shareable for the cache hierarchy. Note When FEAT_S2FWB is implemented, the architecture requires that this field is zero so that no levels of data cache need to be cleaned in order to manage coherency with instruction fetches. It is not really clear what user have to do if assertion fires. Having assertions about the CPU design like this inspire even more assertions to be added and the kernel definitely is not the right place for that, so let's remove cpu_has_fwb() altogether. Signed-off-by: Vladimir Murzin Link: https://lore.kernel.org/r/20220224164739.119168-1-vladimir.murzin@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 9 --------- 1 file changed, 9 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index e5f23dab1c8d..6d1da359f804 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1775,14 +1775,6 @@ static void cpu_copy_el2regs(const struct arm64_cpu_capabilities *__unused) write_sysreg(read_sysreg(tpidr_el1), tpidr_el2); } -static void cpu_has_fwb(const struct arm64_cpu_capabilities *__unused) -{ - u64 val = read_sysreg_s(SYS_CLIDR_EL1); - - /* Check that CLIDR_EL1.LOU{U,IS} are both 0 */ - WARN_ON(CLIDR_LOUU(val) || CLIDR_LOUIS(val)); -} - #ifdef CONFIG_ARM64_PAN static void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused) { @@ -2144,7 +2136,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .field_pos = ID_AA64MMFR2_FWB_SHIFT, .min_field_value = 1, .matches = has_cpuid_feature, - .cpu_enable = cpu_has_fwb, }, { .desc = "ARMv8.4 Translation Table Level", -- cgit v1.2.3 From 4013e26670c590944abdab56c4fa797527b74325 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Fri, 18 Feb 2022 00:12:09 -0800 Subject: arm64: module: remove (NOLOAD) from linker script On ELF, (NOLOAD) sets the section type to SHT_NOBITS[1]. It is conceptually inappropriate for .plt and .text.* sections which are always SHT_PROGBITS. In GNU ld, if PLT entries are needed, .plt will be SHT_PROGBITS anyway and (NOLOAD) will be essentially ignored. In ld.lld, since https://reviews.llvm.org/D118840 ("[ELF] Support (TYPE=) to customize the output section type"), ld.lld will report a `section type mismatch` error. Just remove (NOLOAD) to fix the error. [1] https://lld.llvm.org/ELF/linker_script.html As of today, "The section should be marked as not loadable" on https://sourceware.org/binutils/docs/ld/Output-Section-Type.html is outdated for ELF. Tested-by: Nathan Chancellor Reported-by: Nathan Chancellor Signed-off-by: Fangrui Song Acked-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20220218081209.354383-1-maskray@google.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/module.lds.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/module.lds.h b/arch/arm64/include/asm/module.lds.h index a11ccadd47d2..094701ec5500 100644 --- a/arch/arm64/include/asm/module.lds.h +++ b/arch/arm64/include/asm/module.lds.h @@ -1,8 +1,8 @@ SECTIONS { #ifdef CONFIG_ARM64_MODULE_PLTS - .plt 0 (NOLOAD) : { BYTE(0) } - .init.plt 0 (NOLOAD) : { BYTE(0) } - .text.ftrace_trampoline 0 (NOLOAD) : { BYTE(0) } + .plt 0 : { BYTE(0) } + .init.plt 0 : { BYTE(0) } + .text.ftrace_trampoline 0 : { BYTE(0) } #endif #ifdef CONFIG_KASAN_SW_TAGS -- cgit v1.2.3 From 879358fc670dbc8dc3b0e3e4975ff39e38847707 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 7 Feb 2022 15:20:30 +0000 Subject: arm64: Define CPACR_EL1_FPEN similarly to other floating point controls The base floating point, SVE and SME all have enable controls for EL0 and EL1 in CPACR_EL1 which have a similar layout and function. Currently the basic floating point enable FPEN is defined differently to the SVE control, specified as a single define in kvm_arm.h rather than in sysreg.h. Move the define to sysreg.h and provide separate EL0 and EL1 control bits so code managing the different floating point enables can look consistent. Signed-off-by: Mark Brown Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20220207152109.197566-2-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/kvm_arm.h | 1 - arch/arm64/include/asm/sysreg.h | 4 ++++ 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 01d47c5886dc..eec790842fe2 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -355,7 +355,6 @@ ECN(SOFTSTP_CUR), ECN(WATCHPT_LOW), ECN(WATCHPT_CUR), \ ECN(BKPT32), ECN(VECTOR32), ECN(BRK64) -#define CPACR_EL1_FPEN (3 << 20) #define CPACR_EL1_TTA (1 << 28) #define CPACR_EL1_DEFAULT (CPACR_EL1_FPEN | CPACR_EL1_ZEN_EL1EN) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 898bee0004ae..1da4c43d597d 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -1097,6 +1097,10 @@ #define ZCR_ELx_LEN_SIZE 9 #define ZCR_ELx_LEN_MASK 0x1ff +#define CPACR_EL1_FPEN_EL1EN (BIT(20)) /* enable EL1 access */ +#define CPACR_EL1_FPEN_EL0EN (BIT(21)) /* enable EL0 access, if EL1EN set */ +#define CPACR_EL1_FPEN (CPACR_EL1_FPEN_EL1EN | CPACR_EL1_FPEN_EL0EN) + #define CPACR_EL1_ZEN_EL1EN (BIT(16)) /* enable EL1 access */ #define CPACR_EL1_ZEN_EL0EN (BIT(17)) /* enable EL0 access, if EL1EN set */ #define CPACR_EL1_ZEN (CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN) -- cgit v1.2.3 From 3bb72d86d80eb9296d43f9e807b6f9ff58049552 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 7 Feb 2022 15:20:31 +0000 Subject: arm64: Always use individual bits in CPACR floating point enables CPACR_EL1 has several bitfields for controlling traps for floating point features to EL1, each of which has a separate bits for EL0 and EL1. Marc Zyngier noted that we are not consistent in our use of defines to manipulate these, sometimes using a define covering the whole field and sometimes using defines for the individual bits. Make this consistent by expanding the whole field defines where they are used (currently only in the KVM code) and deleting them so that no further uses can be introduced. Suggested-by: Marc Zyngier Signed-off-by: Mark Brown Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20220207152109.197566-3-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/kvm_arm.h | 3 ++- arch/arm64/include/asm/sysreg.h | 2 -- arch/arm64/kvm/hyp/include/hyp/switch.h | 4 ++-- arch/arm64/kvm/hyp/vhe/switch.c | 6 +++--- 4 files changed, 7 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index eec790842fe2..1767ded83888 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -356,6 +356,7 @@ ECN(BKPT32), ECN(VECTOR32), ECN(BRK64) #define CPACR_EL1_TTA (1 << 28) -#define CPACR_EL1_DEFAULT (CPACR_EL1_FPEN | CPACR_EL1_ZEN_EL1EN) +#define CPACR_EL1_DEFAULT (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN |\ + CPACR_EL1_ZEN_EL1EN) #endif /* __ARM64_KVM_ARM_H__ */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 1da4c43d597d..e66dd9ebc337 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -1099,11 +1099,9 @@ #define CPACR_EL1_FPEN_EL1EN (BIT(20)) /* enable EL1 access */ #define CPACR_EL1_FPEN_EL0EN (BIT(21)) /* enable EL0 access, if EL1EN set */ -#define CPACR_EL1_FPEN (CPACR_EL1_FPEN_EL1EN | CPACR_EL1_FPEN_EL0EN) #define CPACR_EL1_ZEN_EL1EN (BIT(16)) /* enable EL1 access */ #define CPACR_EL1_ZEN_EL0EN (BIT(17)) /* enable EL0 access, if EL1EN set */ -#define CPACR_EL1_ZEN (CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN) /* TCR EL1 Bit Definitions */ #define SYS_TCR_EL1_TCMA1 (BIT(58)) diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 701cfb964905..6379a1e3e6e5 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -174,9 +174,9 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) /* Valid trap. Switch the context: */ if (has_vhe()) { - reg = CPACR_EL1_FPEN; + reg = CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN; if (sve_guest) - reg |= CPACR_EL1_ZEN; + reg |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN; sysreg_clear_set(cpacr_el1, 0, reg); } else { diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 11d053fdd604..619353b06e38 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -38,7 +38,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu) val = read_sysreg(cpacr_el1); val |= CPACR_EL1_TTA; - val &= ~CPACR_EL1_ZEN; + val &= ~(CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN); /* * With VHE (HCR.E2H == 1), accesses to CPACR_EL1 are routed to @@ -53,9 +53,9 @@ static void __activate_traps(struct kvm_vcpu *vcpu) if (update_fp_enabled(vcpu)) { if (vcpu_has_sve(vcpu)) - val |= CPACR_EL1_ZEN; + val |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN; } else { - val &= ~CPACR_EL1_FPEN; + val &= ~(CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN); __activate_traps_fpsimd32(vcpu); } -- cgit v1.2.3 From 0a2eec83c2c23cf609e781732b338a9a4f18e00c Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 7 Feb 2022 15:20:32 +0000 Subject: arm64: cpufeature: Always specify and use a field width for capabilities Since all the fields in the main ID registers are 4 bits wide we have up until now not bothered specifying the width in the code. Since we now wish to use this mechanism to enumerate features from the floating point feature registers which do not follow this pattern add a width to the table. This means updating all the existing table entries but makes it less likely that we run into issues in future due to implicitly assuming a 4 bit width. Signed-off-by: Mark Brown Cc: Suzuki K Poulose Reviewed-by: Suzuki K Poulose Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220207152109.197566-4-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpufeature.h | 1 + arch/arm64/kernel/cpufeature.c | 167 ++++++++++++++++++++++-------------- 2 files changed, 102 insertions(+), 66 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index ef6be92b1921..2728abd9cae4 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -356,6 +356,7 @@ struct arm64_cpu_capabilities { struct { /* Feature register checking */ u32 sys_reg; u8 field_pos; + u8 field_width; u8 min_field_value; u8 hwcap_type; bool sign; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index e5f23dab1c8d..64a748c2b351 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1307,7 +1307,9 @@ u64 __read_sysreg_by_encoding(u32 sys_id) static bool feature_matches(u64 reg, const struct arm64_cpu_capabilities *entry) { - int val = cpuid_feature_extract_field(reg, entry->field_pos, entry->sign); + int val = cpuid_feature_extract_field_width(reg, entry->field_pos, + entry->field_width, + entry->sign); return val >= entry->min_field_value; } @@ -1955,6 +1957,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64MMFR0_EL1, .field_pos = ID_AA64MMFR0_ECV_SHIFT, + .field_width = 4, .sign = FTR_UNSIGNED, .min_field_value = 1, }, @@ -1966,6 +1969,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64MMFR1_EL1, .field_pos = ID_AA64MMFR1_PAN_SHIFT, + .field_width = 4, .sign = FTR_UNSIGNED, .min_field_value = 1, .cpu_enable = cpu_enable_pan, @@ -1979,6 +1983,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64MMFR1_EL1, .field_pos = ID_AA64MMFR1_PAN_SHIFT, + .field_width = 4, .sign = FTR_UNSIGNED, .min_field_value = 3, }, @@ -1991,6 +1996,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64ISAR0_EL1, .field_pos = ID_AA64ISAR0_ATOMICS_SHIFT, + .field_width = 4, .sign = FTR_UNSIGNED, .min_field_value = 2, }, @@ -2015,6 +2021,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64PFR0_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64PFR0_EL0_SHIFT, + .field_width = 4, .min_field_value = ID_AA64PFR0_ELx_32BIT_64BIT, }, #ifdef CONFIG_KVM @@ -2026,6 +2033,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64PFR0_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64PFR0_EL1_SHIFT, + .field_width = 4, .min_field_value = ID_AA64PFR0_ELx_32BIT_64BIT, }, { @@ -2046,6 +2054,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { */ .sys_reg = SYS_ID_AA64PFR0_EL1, .field_pos = ID_AA64PFR0_CSV3_SHIFT, + .field_width = 4, .min_field_value = 1, .matches = unmap_kernel_at_el0, .cpu_enable = kpti_install_ng_mappings, @@ -2065,6 +2074,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64ISAR1_EL1, .field_pos = ID_AA64ISAR1_DPB_SHIFT, + .field_width = 4, .min_field_value = 1, }, { @@ -2075,6 +2085,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64ISAR1_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64ISAR1_DPB_SHIFT, + .field_width = 4, .min_field_value = 2, }, #endif @@ -2086,6 +2097,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64PFR0_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64PFR0_SVE_SHIFT, + .field_width = 4, .min_field_value = ID_AA64PFR0_SVE, .matches = has_cpuid_feature, .cpu_enable = sve_kernel_enable, @@ -2100,6 +2112,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64PFR0_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64PFR0_RAS_SHIFT, + .field_width = 4, .min_field_value = ID_AA64PFR0_RAS_V1, .cpu_enable = cpu_clear_disr, }, @@ -2118,6 +2131,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64PFR0_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64PFR0_AMU_SHIFT, + .field_width = 4, .min_field_value = ID_AA64PFR0_AMU, .cpu_enable = cpu_amu_enable, }, @@ -2142,6 +2156,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64MMFR2_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64MMFR2_FWB_SHIFT, + .field_width = 4, .min_field_value = 1, .matches = has_cpuid_feature, .cpu_enable = cpu_has_fwb, @@ -2153,6 +2168,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64MMFR2_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64MMFR2_TTL_SHIFT, + .field_width = 4, .min_field_value = 1, .matches = has_cpuid_feature, }, @@ -2163,6 +2179,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64ISAR0_EL1, .field_pos = ID_AA64ISAR0_TLB_SHIFT, + .field_width = 4, .sign = FTR_UNSIGNED, .min_field_value = ID_AA64ISAR0_TLB_RANGE, }, @@ -2181,6 +2198,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64MMFR1_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64MMFR1_HADBS_SHIFT, + .field_width = 4, .min_field_value = 2, .matches = has_hw_dbm, .cpu_enable = cpu_enable_hw_dbm, @@ -2193,6 +2211,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64ISAR0_EL1, .field_pos = ID_AA64ISAR0_CRC32_SHIFT, + .field_width = 4, .min_field_value = 1, }, { @@ -2202,6 +2221,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64PFR1_EL1, .field_pos = ID_AA64PFR1_SSBS_SHIFT, + .field_width = 4, .sign = FTR_UNSIGNED, .min_field_value = ID_AA64PFR1_SSBS_PSTATE_ONLY, }, @@ -2214,6 +2234,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64MMFR2_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64MMFR2_CNP_SHIFT, + .field_width = 4, .min_field_value = 1, .cpu_enable = cpu_enable_cnp, }, @@ -2225,6 +2246,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64ISAR1_EL1, .field_pos = ID_AA64ISAR1_SB_SHIFT, + .field_width = 4, .sign = FTR_UNSIGNED, .min_field_value = 1, }, @@ -2236,6 +2258,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64ISAR1_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64ISAR1_APA_SHIFT, + .field_width = 4, .min_field_value = ID_AA64ISAR1_APA_ARCHITECTED, .matches = has_address_auth_cpucap, }, @@ -2246,6 +2269,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64ISAR1_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64ISAR1_API_SHIFT, + .field_width = 4, .min_field_value = ID_AA64ISAR1_API_IMP_DEF, .matches = has_address_auth_cpucap, }, @@ -2261,6 +2285,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64ISAR1_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64ISAR1_GPA_SHIFT, + .field_width = 4, .min_field_value = ID_AA64ISAR1_GPA_ARCHITECTED, .matches = has_cpuid_feature, }, @@ -2271,6 +2296,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64ISAR1_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64ISAR1_GPI_SHIFT, + .field_width = 4, .min_field_value = ID_AA64ISAR1_GPI_IMP_DEF, .matches = has_cpuid_feature, }, @@ -2291,6 +2317,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = can_use_gic_priorities, .sys_reg = SYS_ID_AA64PFR0_EL1, .field_pos = ID_AA64PFR0_GIC_SHIFT, + .field_width = 4, .sign = FTR_UNSIGNED, .min_field_value = 1, }, @@ -2302,6 +2329,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_SYSTEM_FEATURE, .sys_reg = SYS_ID_AA64MMFR2_EL1, .sign = FTR_UNSIGNED, + .field_width = 4, .field_pos = ID_AA64MMFR2_E0PD_SHIFT, .matches = has_cpuid_feature, .min_field_value = 1, @@ -2316,6 +2344,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64ISAR0_EL1, .field_pos = ID_AA64ISAR0_RNDR_SHIFT, + .field_width = 4, .sign = FTR_UNSIGNED, .min_field_value = 1, }, @@ -2333,6 +2362,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .cpu_enable = bti_enable, .sys_reg = SYS_ID_AA64PFR1_EL1, .field_pos = ID_AA64PFR1_BT_SHIFT, + .field_width = 4, .min_field_value = ID_AA64PFR1_BT_BTI, .sign = FTR_UNSIGNED, }, @@ -2345,6 +2375,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64PFR1_EL1, .field_pos = ID_AA64PFR1_MTE_SHIFT, + .field_width = 4, .min_field_value = ID_AA64PFR1_MTE, .sign = FTR_UNSIGNED, .cpu_enable = cpu_enable_mte, @@ -2356,6 +2387,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64PFR1_EL1, .field_pos = ID_AA64PFR1_MTE_SHIFT, + .field_width = 4, .min_field_value = ID_AA64PFR1_MTE_ASYMM, .sign = FTR_UNSIGNED, }, @@ -2367,16 +2399,18 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64ISAR1_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64ISAR1_LRCPC_SHIFT, + .field_width = 4, .matches = has_cpuid_feature, .min_field_value = 1, }, {}, }; -#define HWCAP_CPUID_MATCH(reg, field, s, min_value) \ +#define HWCAP_CPUID_MATCH(reg, field, width, s, min_value) \ .matches = has_cpuid_feature, \ .sys_reg = reg, \ .field_pos = field, \ + .field_width = width, \ .sign = s, \ .min_field_value = min_value, @@ -2386,10 +2420,10 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .hwcap_type = cap_type, \ .hwcap = cap, \ -#define HWCAP_CAP(reg, field, s, min_value, cap_type, cap) \ +#define HWCAP_CAP(reg, field, width, s, min_value, cap_type, cap) \ { \ __HWCAP_CAP(#cap, cap_type, cap) \ - HWCAP_CPUID_MATCH(reg, field, s, min_value) \ + HWCAP_CPUID_MATCH(reg, field, width, s, min_value) \ } #define HWCAP_MULTI_CAP(list, cap_type, cap) \ @@ -2409,11 +2443,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = { static const struct arm64_cpu_capabilities ptr_auth_hwcap_addr_matches[] = { { HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_APA_SHIFT, - FTR_UNSIGNED, ID_AA64ISAR1_APA_ARCHITECTED) + 4, FTR_UNSIGNED, + ID_AA64ISAR1_APA_ARCHITECTED) }, { HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_API_SHIFT, - FTR_UNSIGNED, ID_AA64ISAR1_API_IMP_DEF) + 4, FTR_UNSIGNED, ID_AA64ISAR1_API_IMP_DEF) }, {}, }; @@ -2421,77 +2456,77 @@ static const struct arm64_cpu_capabilities ptr_auth_hwcap_addr_matches[] = { static const struct arm64_cpu_capabilities ptr_auth_hwcap_gen_matches[] = { { HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_GPA_SHIFT, - FTR_UNSIGNED, ID_AA64ISAR1_GPA_ARCHITECTED) + 4, FTR_UNSIGNED, ID_AA64ISAR1_GPA_ARCHITECTED) }, { HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_GPI_SHIFT, - FTR_UNSIGNED, ID_AA64ISAR1_GPI_IMP_DEF) + 4, FTR_UNSIGNED, ID_AA64ISAR1_GPI_IMP_DEF) }, {}, }; #endif static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_PMULL), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AES), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA1), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA2), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_SHA512), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_CRC32), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ATOMICS), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_RDM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDRDM), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA3_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA3), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM3_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SM3), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM4_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SM4), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDDP), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_FHM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDFHM), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FLAGM), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_FLAGM2), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_RNDR_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RNG), - HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_FP), - HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FPHP), - HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_ASIMD), - HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDHP), - HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_DIT_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DIT), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DCPOP), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_DCPODP), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_JSCVT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_JSCVT), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FCMA), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_LRCPC), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ILRCPC), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FRINTTS_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FRINT), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_SB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SB), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_BF16_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_BF16), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DGH_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DGH), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_I8MM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_I8MM), - HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_AT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_USCAT), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_PMULL), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AES), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA1), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA2), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_SHA512), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_CRC32), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ATOMICS), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_RDM_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDRDM), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA3_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA3), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM3_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SM3), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM4_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SM4), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDDP), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_FHM_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDFHM), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FLAGM), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_FLAGM2), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_RNDR_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RNG), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, 4, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_FP), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, 4, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FPHP), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, 4, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_ASIMD), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, 4, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDHP), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_DIT_SHIFT, 4, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DIT), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DCPOP), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_DCPODP), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_JSCVT_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_JSCVT), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FCMA), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_LRCPC), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ILRCPC), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FRINTTS_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FRINT), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_SB_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SB), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_BF16_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_BF16), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DGH_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DGH), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_I8MM_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_I8MM), + HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_AT_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_USCAT), #ifdef CONFIG_ARM64_SVE - HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_SVE_SHIFT, FTR_UNSIGNED, ID_AA64PFR0_SVE, CAP_HWCAP, KERNEL_HWCAP_SVE), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SVEVER_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_SVEVER_SVE2, CAP_HWCAP, KERNEL_HWCAP_SVE2), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_AES_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_AES, CAP_HWCAP, KERNEL_HWCAP_SVEAES), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_AES_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_AES_PMULL, CAP_HWCAP, KERNEL_HWCAP_SVEPMULL), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_BITPERM_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_BITPERM, CAP_HWCAP, KERNEL_HWCAP_SVEBITPERM), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_BF16_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_BF16, CAP_HWCAP, KERNEL_HWCAP_SVEBF16), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SHA3_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_SHA3, CAP_HWCAP, KERNEL_HWCAP_SVESHA3), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SM4_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_SM4, CAP_HWCAP, KERNEL_HWCAP_SVESM4), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_I8MM_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_I8MM, CAP_HWCAP, KERNEL_HWCAP_SVEI8MM), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_F32MM_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_F32MM, CAP_HWCAP, KERNEL_HWCAP_SVEF32MM), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_F64MM_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_F64MM, CAP_HWCAP, KERNEL_HWCAP_SVEF64MM), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_SVE_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR0_SVE, CAP_HWCAP, KERNEL_HWCAP_SVE), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SVEVER_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_SVEVER_SVE2, CAP_HWCAP, KERNEL_HWCAP_SVE2), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_AES_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_AES, CAP_HWCAP, KERNEL_HWCAP_SVEAES), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_AES_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_AES_PMULL, CAP_HWCAP, KERNEL_HWCAP_SVEPMULL), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_BITPERM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_BITPERM, CAP_HWCAP, KERNEL_HWCAP_SVEBITPERM), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_BF16_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_BF16, CAP_HWCAP, KERNEL_HWCAP_SVEBF16), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SHA3_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_SHA3, CAP_HWCAP, KERNEL_HWCAP_SVESHA3), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SM4_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_SM4, CAP_HWCAP, KERNEL_HWCAP_SVESM4), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_I8MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_I8MM, CAP_HWCAP, KERNEL_HWCAP_SVEI8MM), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_F32MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_F32MM, CAP_HWCAP, KERNEL_HWCAP_SVEF32MM), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_F64MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_F64MM, CAP_HWCAP, KERNEL_HWCAP_SVEF64MM), #endif - HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SSBS_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_SSBS_PSTATE_INSNS, CAP_HWCAP, KERNEL_HWCAP_SSBS), + HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SSBS_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_SSBS_PSTATE_INSNS, CAP_HWCAP, KERNEL_HWCAP_SSBS), #ifdef CONFIG_ARM64_BTI - HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_BT_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_BT_BTI, CAP_HWCAP, KERNEL_HWCAP_BTI), + HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_BT_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_BT_BTI, CAP_HWCAP, KERNEL_HWCAP_BTI), #endif #ifdef CONFIG_ARM64_PTR_AUTH HWCAP_MULTI_CAP(ptr_auth_hwcap_addr_matches, CAP_HWCAP, KERNEL_HWCAP_PACA), HWCAP_MULTI_CAP(ptr_auth_hwcap_gen_matches, CAP_HWCAP, KERNEL_HWCAP_PACG), #endif #ifdef CONFIG_ARM64_MTE - HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_MTE_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_MTE, CAP_HWCAP, KERNEL_HWCAP_MTE), + HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_MTE_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_MTE, CAP_HWCAP, KERNEL_HWCAP_MTE), #endif /* CONFIG_ARM64_MTE */ - HWCAP_CAP(SYS_ID_AA64MMFR0_EL1, ID_AA64MMFR0_ECV_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ECV), - HWCAP_CAP(SYS_ID_AA64MMFR1_EL1, ID_AA64MMFR1_AFP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AFP), - HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_RPRES_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RPRES), + HWCAP_CAP(SYS_ID_AA64MMFR0_EL1, ID_AA64MMFR0_ECV_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ECV), + HWCAP_CAP(SYS_ID_AA64MMFR1_EL1, ID_AA64MMFR1_AFP_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AFP), + HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_RPRES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RPRES), {}, }; @@ -2520,15 +2555,15 @@ static bool compat_has_neon(const struct arm64_cpu_capabilities *cap, int scope) static const struct arm64_cpu_capabilities compat_elf_hwcaps[] = { #ifdef CONFIG_COMPAT HWCAP_CAP_MATCH(compat_has_neon, CAP_COMPAT_HWCAP, COMPAT_HWCAP_NEON), - HWCAP_CAP(SYS_MVFR1_EL1, MVFR1_SIMDFMAC_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv4), + HWCAP_CAP(SYS_MVFR1_EL1, MVFR1_SIMDFMAC_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv4), /* Arm v8 mandates MVFR0.FPDP == {0, 2}. So, piggy back on this for the presence of VFP support */ - HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_FPDP_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFP), - HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_FPDP_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv3), - HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL), - HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES), - HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA1), - HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA2_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA2), - HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_CRC32_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_CRC32), + HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_FPDP_SHIFT, 4, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFP), + HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_FPDP_SHIFT, 4, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv3), + HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, 4, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL), + HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES), + HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA1_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA1), + HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA2_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA2), + HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_CRC32_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_CRC32), #endif {}, }; -- cgit v1.2.3 From cb627397e02bc65e44912daebfe0bbe6b0ecd384 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 16 Feb 2022 17:32:22 +0000 Subject: arm64/mte: Add a little bit of documentation for mte_update_sctlr_user() The code isn't that obscure but it probably won't hurt to have a little bit more documentation for anyone trying to find out where everything actually takes effect. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Reviewed-by: Vincenzo Frascino Tested-by: Branislav Rankov Link: https://lore.kernel.org/r/20220216173224.2342152-3-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/mte.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index f983795b5eda..b9a2d13e85f6 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -186,6 +186,11 @@ void mte_check_tfsr_el1(void) } #endif +/* + * This is where we actually resolve the system and process MTE mode + * configuration into an actual value in SCTLR_EL1 that affects + * userspace. + */ static void mte_update_sctlr_user(struct task_struct *task) { /* @@ -199,8 +204,17 @@ static void mte_update_sctlr_user(struct task_struct *task) unsigned long pref, resolved_mte_tcf; pref = __this_cpu_read(mte_tcf_preferred); + /* + * If there is no overlap between the system preferred and + * program requested values go with what was requested. + */ resolved_mte_tcf = (mte_ctrl & pref) ? pref : mte_ctrl; sctlr &= ~SCTLR_EL1_TCF0_MASK; + /* + * Pick an actual setting. The order in which we check for + * set bits and map into register values determines our + * default order. + */ if (resolved_mte_tcf & MTE_CTRL_TCF_ASYNC) sctlr |= SCTLR_EL1_TCF0_ASYNC; else if (resolved_mte_tcf & MTE_CTRL_TCF_SYNC) -- cgit v1.2.3 From d082a0255fcb8fcb4bd8257df111f2caa67086bc Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 16 Feb 2022 17:32:23 +0000 Subject: arm64/mte: Add hwcap for asymmetric mode Allow userspace to detect support for asymmetric mode by providing a hwcap for it, using the official feature name FEAT_MTE3. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Reviewed-by: Vincenzo Frascino Tested-by: Branislav Rankov Link: https://lore.kernel.org/r/20220216173224.2342152-4-broonie@kernel.org Signed-off-by: Will Deacon --- Documentation/arm64/elf_hwcaps.rst | 5 +++++ arch/arm64/include/asm/hwcap.h | 1 + arch/arm64/include/uapi/asm/hwcap.h | 1 + arch/arm64/kernel/cpufeature.c | 1 + arch/arm64/kernel/cpuinfo.c | 1 + 5 files changed, 9 insertions(+) (limited to 'arch') diff --git a/Documentation/arm64/elf_hwcaps.rst b/Documentation/arm64/elf_hwcaps.rst index b72ff17d600a..a8f30963e550 100644 --- a/Documentation/arm64/elf_hwcaps.rst +++ b/Documentation/arm64/elf_hwcaps.rst @@ -259,6 +259,11 @@ HWCAP2_RPRES Functionality implied by ID_AA64ISAR2_EL1.RPRES == 0b0001. +HWCAP2_MTE3 + + Functionality implied by ID_AA64PFR1_EL1.MTE == 0b0011, as described + by Documentation/arm64/memory-tagging-extension.rst. + 4. Unused AT_HWCAP bits ----------------------- diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index f68fbb207473..8db5ec0089db 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -108,6 +108,7 @@ #define KERNEL_HWCAP_ECV __khwcap2_feature(ECV) #define KERNEL_HWCAP_AFP __khwcap2_feature(AFP) #define KERNEL_HWCAP_RPRES __khwcap2_feature(RPRES) +#define KERNEL_HWCAP_MTE3 __khwcap2_feature(MTE3) /* * This yields a mask that user programs can use to figure out what diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index f03731847d9d..99cb5d383048 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -78,5 +78,6 @@ #define HWCAP2_ECV (1 << 19) #define HWCAP2_AFP (1 << 20) #define HWCAP2_RPRES (1 << 21) +#define HWCAP2_MTE3 (1 << 22) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index e5f23dab1c8d..5809d5d59258 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -2488,6 +2488,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { #endif #ifdef CONFIG_ARM64_MTE HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_MTE_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_MTE, CAP_HWCAP, KERNEL_HWCAP_MTE), + HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_MTE_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_MTE_ASYMM, CAP_HWCAP, KERNEL_HWCAP_MTE3), #endif /* CONFIG_ARM64_MTE */ HWCAP_CAP(SYS_ID_AA64MMFR0_EL1, ID_AA64MMFR0_ECV_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ECV), HWCAP_CAP(SYS_ID_AA64MMFR1_EL1, ID_AA64MMFR1_AFP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AFP), diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 591c18a889a5..330b92ea863a 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -97,6 +97,7 @@ static const char *const hwcap_str[] = { [KERNEL_HWCAP_ECV] = "ecv", [KERNEL_HWCAP_AFP] = "afp", [KERNEL_HWCAP_RPRES] = "rpres", + [KERNEL_HWCAP_MTE3] = "mte3", }; #ifdef CONFIG_COMPAT -- cgit v1.2.3 From 766121ba5de38a6f67980ec24a6af76c55def100 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 16 Feb 2022 17:32:24 +0000 Subject: arm64/mte: Add userspace interface for enabling asymmetric mode The architecture provides an asymmetric mode for MTE where tag mismatches are checked asynchronously for stores but synchronously for loads. Allow userspace processes to select this and make it available as a default mode via the existing per-CPU sysfs interface. Since there PR_MTE_TCF_ values are a bitmask (allowing the kernel to choose between the multiple modes) and there are no free bits adjacent to the existing PR_MTE_TCF_ bits the set of bits used to specify the mode becomes disjoint. Programs using the new interface should be aware of this and programs that do not use it will not see any change in behaviour. When userspace requests two possible modes but the system default for the CPU is the third mode (eg, default is synchronous but userspace requests either asynchronous or asymmetric) the preference order is: ASYMM > ASYNC > SYNC This situation is not currently possible since there are only two modes and it is mandatory to have a system default so there could be no ambiguity and there is no ABI change. The chosen order is basically arbitrary as we do not have a clear metric for what is better here. If userspace requests specifically asymmetric mode via the prctl() and the system does not support it then we will return an error, this mirrors how we handle the case where userspace enables MTE on a system that does not support MTE at all and the behaviour that will be seen if running on an older kernel that does not support userspace use of asymmetric mode. Attempts to set asymmetric mode as the default mode will result in an error if the system does not support it. Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Reviewed-by: Vincenzo Frascino Tested-by: Branislav Rankov Link: https://lore.kernel.org/r/20220216173224.2342152-5-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/processor.h | 1 + arch/arm64/kernel/mte.c | 12 +++++++++++- arch/arm64/kernel/process.c | 5 ++++- include/uapi/linux/prctl.h | 4 +++- 4 files changed, 19 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 6f41b65f9962..73e38d9a540c 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -21,6 +21,7 @@ #define MTE_CTRL_TCF_SYNC (1UL << 16) #define MTE_CTRL_TCF_ASYNC (1UL << 17) +#define MTE_CTRL_TCF_ASYMM (1UL << 18) #ifndef __ASSEMBLY__ diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index b9a2d13e85f6..cbbd8d93fc50 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -215,7 +215,9 @@ static void mte_update_sctlr_user(struct task_struct *task) * set bits and map into register values determines our * default order. */ - if (resolved_mte_tcf & MTE_CTRL_TCF_ASYNC) + if (resolved_mte_tcf & MTE_CTRL_TCF_ASYMM) + sctlr |= SCTLR_EL1_TCF0_ASYMM; + else if (resolved_mte_tcf & MTE_CTRL_TCF_ASYNC) sctlr |= SCTLR_EL1_TCF0_ASYNC; else if (resolved_mte_tcf & MTE_CTRL_TCF_SYNC) sctlr |= SCTLR_EL1_TCF0_SYNC; @@ -309,6 +311,8 @@ long set_mte_ctrl(struct task_struct *task, unsigned long arg) mte_ctrl |= MTE_CTRL_TCF_ASYNC; if (arg & PR_MTE_TCF_SYNC) mte_ctrl |= MTE_CTRL_TCF_SYNC; + if (arg & PR_MTE_TCF_ASYMM) + mte_ctrl |= MTE_CTRL_TCF_ASYMM; task->thread.mte_ctrl = mte_ctrl; if (task == current) { @@ -337,6 +341,8 @@ long get_mte_ctrl(struct task_struct *task) ret |= PR_MTE_TCF_ASYNC; if (mte_ctrl & MTE_CTRL_TCF_SYNC) ret |= PR_MTE_TCF_SYNC; + if (mte_ctrl & MTE_CTRL_TCF_ASYMM) + ret |= PR_MTE_TCF_ASYMM; return ret; } @@ -484,6 +490,8 @@ static ssize_t mte_tcf_preferred_show(struct device *dev, return sysfs_emit(buf, "async\n"); case MTE_CTRL_TCF_SYNC: return sysfs_emit(buf, "sync\n"); + case MTE_CTRL_TCF_ASYMM: + return sysfs_emit(buf, "asymm\n"); default: return sysfs_emit(buf, "???\n"); } @@ -499,6 +507,8 @@ static ssize_t mte_tcf_preferred_store(struct device *dev, tcf = MTE_CTRL_TCF_ASYNC; else if (sysfs_streq(buf, "sync")) tcf = MTE_CTRL_TCF_SYNC; + else if (cpus_have_cap(ARM64_MTE_ASYMM) && sysfs_streq(buf, "asymm")) + tcf = MTE_CTRL_TCF_ASYMM; else return -EINVAL; diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 5369e649fa79..941cfa7117b9 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -635,7 +635,10 @@ long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg) return -EINVAL; if (system_supports_mte()) - valid_mask |= PR_MTE_TCF_MASK | PR_MTE_TAG_MASK; + valid_mask |= PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC \ + | PR_MTE_TAG_MASK; + if (cpus_have_cap(ARM64_MTE_ASYMM)) + valid_mask |= PR_MTE_TCF_ASYMM; if (arg & ~valid_mask) return -EINVAL; diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index e998764f0262..4ae2b21e4066 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -238,7 +238,9 @@ struct prctl_mm_map { # define PR_MTE_TCF_NONE 0UL # define PR_MTE_TCF_SYNC (1UL << 1) # define PR_MTE_TCF_ASYNC (1UL << 2) -# define PR_MTE_TCF_MASK (PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC) +# define PR_MTE_TCF_ASYMM (1UL << 19) +# define PR_MTE_TCF_MASK (PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC | \ + PR_MTE_TCF_ASYMM) /* MTE tag inclusion mask */ # define PR_MTE_TAG_SHIFT 3 # define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT) -- cgit v1.2.3 From 2369f171d5c5550b85ce96fd35d4438cf2e6b09e Mon Sep 17 00:00:00 2001 From: Huang Shijie Date: Wed, 9 Feb 2022 09:26:42 +0000 Subject: arm64: crash_core: Export MODULES, VMALLOC, and VMEMMAP ranges The following interrelated ranges are needed by the kdump crash tool: MODULES_VADDR ~ MODULES_END, VMALLOC_START ~ VMALLOC_END, VMEMMAP_START ~ VMEMMAP_END Since these values change from time to time, it is preferable to export them via vmcoreinfo than to change the crash's code frequently. Signed-off-by: Huang Shijie Link: https://lore.kernel.org/r/20220209092642.9181-1-shijie@os.amperecomputing.com Signed-off-by: Will Deacon --- Documentation/admin-guide/kdump/vmcoreinfo.rst | 8 ++++++++ arch/arm64/kernel/crash_core.c | 6 ++++++ 2 files changed, 14 insertions(+) (limited to 'arch') diff --git a/Documentation/admin-guide/kdump/vmcoreinfo.rst b/Documentation/admin-guide/kdump/vmcoreinfo.rst index 3861a25faae1..a339af45a22e 100644 --- a/Documentation/admin-guide/kdump/vmcoreinfo.rst +++ b/Documentation/admin-guide/kdump/vmcoreinfo.rst @@ -494,6 +494,14 @@ architecture which is used to lookup the page-tables for the Virtual addresses in the higher VA range (refer to ARMv8 ARM document for more details). +MODULES_VADDR|MODULES_END|VMALLOC_START|VMALLOC_END|VMEMMAP_START|VMEMMAP_END +------------- + +Used to get the correct ranges: + MODULES_VADDR ~ MODULES_END-1 : Kernel module space. + VMALLOC_START ~ VMALLOC_END-1 : vmalloc() / ioremap() space. + VMEMMAP_START ~ VMEMMAP_END-1 : vmemmap region, used for struct page array. + arm === diff --git a/arch/arm64/kernel/crash_core.c b/arch/arm64/kernel/crash_core.c index 314391a156ee..2b65aae332ce 100644 --- a/arch/arm64/kernel/crash_core.c +++ b/arch/arm64/kernel/crash_core.c @@ -20,6 +20,12 @@ void arch_crash_save_vmcoreinfo(void) { VMCOREINFO_NUMBER(VA_BITS); /* Please note VMCOREINFO_NUMBER() uses "%d", not "%x" */ + vmcoreinfo_append_str("NUMBER(MODULES_VADDR)=0x%lx\n", MODULES_VADDR); + vmcoreinfo_append_str("NUMBER(MODULES_END)=0x%lx\n", MODULES_END); + vmcoreinfo_append_str("NUMBER(VMALLOC_START)=0x%lx\n", VMALLOC_START); + vmcoreinfo_append_str("NUMBER(VMALLOC_END)=0x%lx\n", VMALLOC_END); + vmcoreinfo_append_str("NUMBER(VMEMMAP_START)=0x%lx\n", VMEMMAP_START); + vmcoreinfo_append_str("NUMBER(VMEMMAP_END)=0x%lx\n", VMEMMAP_END); vmcoreinfo_append_str("NUMBER(kimage_voffset)=0x%llx\n", kimage_voffset); vmcoreinfo_append_str("NUMBER(PHYS_OFFSET)=0x%llx\n", -- cgit v1.2.3 From 614c0b9fee711dd89b1dd65c88ba83612a373fdc Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 24 Feb 2022 18:10:28 +0000 Subject: arm64: prevent instrumentation of bp hardening callbacks We may call arm64_apply_bp_hardening() early during entry (e.g. in el0_ia()) before it is safe to run instrumented code. Unfortunately this may result in running instrumented code in two cases: * The hardening callbacks called by arm64_apply_bp_hardening() are not marked as `noinstr`, and have been observed to be instrumented when compiled with either GCC or LLVM. * Since arm64_apply_bp_hardening() itself is only marked as `inline` rather than `__always_inline`, it is possible that the compiler decides to place it out-of-line, whereupon it may be instrumented. For example, with defconfig built with clang 13.0.0, call_hvc_arch_workaround_1() is compiled as: | : | d503233f paciasp | f81f0ffe str x30, [sp, #-16]! | 320183e0 mov w0, #0x80008000 | d503201f nop | d4000002 hvc #0x0 | f84107fe ldr x30, [sp], #16 | d50323bf autiasp | d65f03c0 ret ... but when CONFIG_FTRACE=y and CONFIG_KCOV=y this is compiled as: | : | d503245f bti c | d503201f nop | d503201f nop | d503233f paciasp | a9bf7bfd stp x29, x30, [sp, #-16]! | 910003fd mov x29, sp | 94000000 bl 0 <__sanitizer_cov_trace_pc> | 320183e0 mov w0, #0x80008000 | d503201f nop | d4000002 hvc #0x0 | a8c17bfd ldp x29, x30, [sp], #16 | d50323bf autiasp | d65f03c0 ret ... with a patchable function entry registered with ftrace, and a direct call to __sanitizer_cov_trace_pc(). Neither of these are safe early during entry sequences. This patch avoids the unsafe instrumentation by marking arm64_apply_bp_hardening() as `__always_inline` and by marking the hardening functions as `noinstr`. This avoids the potential for instrumentation, and causes clang to consistently generate the function as with the defconfig sample. Note: in the defconfig compilation, when CONFIG_SVE=y, x30 is spilled to the stack without being placed in a frame record, which will result in a missing entry if call_hvc_arch_workaround_1() is backtraced. Similar is true of qcom_link_stack_sanitisation(), where inline asm spills the LR to a GPR prior to corrupting it. This is not a significant issue presently as we will only backtrace here if an exception is taken, and in such cases we may omit entries for other reasons today. The relevant hardening functions were introduced in commits: ec82b567a74fbdff ("arm64: Implement branch predictor hardening for Falkor") b092201e00206141 ("arm64: Add ARM_SMCCC_ARCH_WORKAROUND_1 BP hardening support") ... and these were subsequently moved in commit: d4647f0a2ad71110 ("arm64: Rewrite Spectre-v2 mitigation code") The arm64_apply_bp_hardening() function was introduced in commit: 0f15adbb2861ce6f ("arm64: Add skeleton to harden the branch predictor against aliasing attacks") ... and was subsequently moved and reworked in commit: 6279017e807708a0 ("KVM: arm64: Move BP hardening helpers into spectre.h") Fixes: ec82b567a74fbdff ("arm64: Implement branch predictor hardening for Falkor") Fixes: b092201e00206141 ("arm64: Add ARM_SMCCC_ARCH_WORKAROUND_1 BP hardening support") Fixes: d4647f0a2ad71110 ("arm64: Rewrite Spectre-v2 mitigation code") Fixes: 0f15adbb2861ce6f ("arm64: Add skeleton to harden the branch predictor against aliasing attacks") Fixes: 6279017e807708a0 ("KVM: arm64: Move BP hardening helpers into spectre.h") Signed-off-by: Mark Rutland Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: James Morse Cc: Marc Zyngier Cc: Mark Brown Cc: Will Deacon Acked-by: Marc Zyngier Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20220224181028.512873-1-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/spectre.h | 3 ++- arch/arm64/kernel/proton-pack.c | 9 ++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h index f62ca39da6c5..d476c06757c5 100644 --- a/arch/arm64/include/asm/spectre.h +++ b/arch/arm64/include/asm/spectre.h @@ -67,7 +67,8 @@ struct bp_hardening_data { DECLARE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); -static inline void arm64_apply_bp_hardening(void) +/* Called during entry so must be __always_inline */ +static __always_inline void arm64_apply_bp_hardening(void) { struct bp_hardening_data *d; diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index 902e4084c477..ea78f0b3d847 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -193,17 +193,20 @@ static void install_bp_hardening_cb(bp_hardening_cb_t fn) __this_cpu_write(bp_hardening_data.slot, HYP_VECTOR_SPECTRE_DIRECT); } -static void call_smc_arch_workaround_1(void) +/* Called during entry so must be noinstr */ +static noinstr void call_smc_arch_workaround_1(void) { arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL); } -static void call_hvc_arch_workaround_1(void) +/* Called during entry so must be noinstr */ +static noinstr void call_hvc_arch_workaround_1(void) { arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL); } -static void qcom_link_stack_sanitisation(void) +/* Called during entry so must be noinstr */ +static noinstr void qcom_link_stack_sanitisation(void) { u64 tmp; -- cgit v1.2.3 From 0a32c88ddb9af30e8a16d41d7b9b824c27d29459 Mon Sep 17 00:00:00 2001 From: David Engraf Date: Fri, 25 Feb 2022 11:40:08 +0100 Subject: arm64: signal: nofpsimd: Do not allocate fp/simd context when not available Commit 6d502b6ba1b2 ("arm64: signal: nofpsimd: Handle fp/simd context for signal frames") introduced saving the fp/simd context for signal handling only when support is available. But setup_sigframe_layout() always reserves memory for fp/simd context. The additional memory is not touched because preserve_fpsimd_context() is not called and thus the magic is invalid. This may lead to an error when parse_user_sigframe() checks the fp/simd area and does not find a valid magic number. Signed-off-by: David Engraf Reviwed-by: Mark Brown Fixes: 6d502b6ba1b267b3 ("arm64: signal: nofpsimd: Handle fp/simd context for signal frames") Cc: # 5.6.x Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220225104008.820289-1-david.engraf@sysgo.com Signed-off-by: Will Deacon --- arch/arm64/kernel/signal.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index d8aaf4b6f432..3d66fba69016 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -577,10 +577,12 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user, { int err; - err = sigframe_alloc(user, &user->fpsimd_offset, - sizeof(struct fpsimd_context)); - if (err) - return err; + if (system_supports_fpsimd()) { + err = sigframe_alloc(user, &user->fpsimd_offset, + sizeof(struct fpsimd_context)); + if (err) + return err; + } /* fault information, if valid */ if (add_all || current->thread.fault_code) { -- cgit v1.2.3 From b8fc780137b4b2d7b84d75488b429fd882e0dd3b Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 2 Mar 2022 13:42:25 +0000 Subject: arm64: cpufeature: Add missing .field_width for GIC system registers This was missed when making specification of a field standard. Fixes: 0a2eec83c2c23cf6 ("arm64: cpufeature: Always specify and use a field width for capabilities") Reported-by: Qian Cai Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220302134225.159217-1-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 64a748c2b351..499e37a30156 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1947,6 +1947,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_useable_gicv3_cpuif, .sys_reg = SYS_ID_AA64PFR0_EL1, .field_pos = ID_AA64PFR0_GIC_SHIFT, + .field_width = 4, .sign = FTR_UNSIGNED, .min_field_value = 1, }, -- cgit v1.2.3 From f2c281204b47309534f26dc63cee2a130c2b497b Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 7 Mar 2022 18:08:59 +0000 Subject: arm64: cpufeature: Warn if we attempt to read a zero width field Add a WARN_ON_ONCE() when extracting a field if no width is specified. This should never happen outside of development since it will be triggered with or without the feature so long as the relevant ID register is present. If the warning triggers hope that the field was the standard 4 bits wide and soldier on. Suggested-by: Marc Zyngier Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220307180900.3045812-1-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpufeature.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 2728abd9cae4..2b407fbccdfb 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -577,6 +577,8 @@ static inline u64 arm64_ftr_reg_user_value(const struct arm64_ftr_reg *reg) static inline int __attribute_const__ cpuid_feature_extract_field_width(u64 features, int field, int width, bool sign) { + if (WARN_ON_ONCE(!width)) + width = 4; return (sign) ? cpuid_feature_extract_signed_field_width(features, field, width) : cpuid_feature_extract_unsigned_field_width(features, field, width); -- cgit v1.2.3 From cf5a501d985ba1b6ace9b18c64346441819bffea Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Wed, 2 Mar 2022 16:46:23 +0800 Subject: arm64: avoid flushing icache multiple times on contiguous HugeTLB When a contiguous HugeTLB page is mapped, set_pte_at() will be called CONT_PTES/CONT_PMDS times. Therefore, __sync_icache_dcache() will flush cache multiple times if the page is executable (to ensure the I-D cache coherency). However, the first flushing cache already covers subsequent cache flush operations. So only flusing cache for the head page if it is a HugeTLB page to avoid redundant cache flushing. In the next patch, it is also depends on this change since the tail vmemmap pages of HugeTLB is mapped with read-only meanning only head page struct can be modified. Signed-off-by: Muchun Song Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20220302084624.33340-1-songmuchun@bytedance.com Signed-off-by: Will Deacon --- arch/arm64/mm/flush.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch') diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index 2aaf950b906c..a06c6ac770d4 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -52,6 +52,13 @@ void __sync_icache_dcache(pte_t pte) { struct page *page = pte_page(pte); + /* + * HugeTLB pages are always fully mapped, so only setting head page's + * PG_dcache_clean flag is enough. + */ + if (PageHuge(page)) + page = compound_head(page); + if (!test_bit(PG_dcache_clean, &page->flags)) { sync_icache_aliases((unsigned long)page_address(page), (unsigned long)page_address(page) + -- cgit v1.2.3 From 1310222c276b7946e440a7ab49c1e1508561f5fd Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 16 Feb 2022 10:36:52 +0530 Subject: arm64/mm: Drop use_1G_block() pud_sect_supported() already checks for PUD level block mapping support i.e on ARM64_4K_PAGES config. Hence pud_sect_supported(), along with some other required alignment checks can help completely drop use_1G_block(). Cc: Catalin Marinas Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Anshuman Khandual Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/1644988012-25455-1-git-send-email-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- arch/arm64/mm/mmu.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 1681430ecab7..12feac701d8e 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -295,18 +295,6 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, } while (addr = next, addr != end); } -static inline bool use_1G_block(unsigned long addr, unsigned long next, - unsigned long phys) -{ - if (PAGE_SHIFT != 12) - return false; - - if (((addr | next | phys) & ~PUD_MASK) != 0) - return false; - - return true; -} - static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, phys_addr_t (*pgtable_alloc)(int), @@ -345,7 +333,8 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end, /* * For 4K granule only, attempt to put down a 1GB block */ - if (use_1G_block(addr, next, phys) && + if (pud_sect_supported() && + ((addr | next | phys) & ~PUD_MASK) == 0 && (flags & NO_BLOCK_MAPPINGS) == 0) { pud_set_huge(pudp, phys, prot); -- cgit v1.2.3 From 24a147bcef8ca039cb75d6d4b68c7cc339b11178 Mon Sep 17 00:00:00 2001 From: Linu Cherian Date: Mon, 7 Mar 2022 20:00:14 +0530 Subject: irqchip/gic-v3: Workaround Marvell erratum 38545 when reading IAR When a IAR register read races with a GIC interrupt RELEASE event, GIC-CPU interface could wrongly return a valid INTID to the CPU for an interrupt that is already released(non activated) instead of 0x3ff. As a side effect, an interrupt handler could run twice, once with interrupt priority and then with idle priority. As a workaround, gic_read_iar is updated so that it will return a valid interrupt ID only if there is a change in the active priority list after the IAR read on all the affected Silicons. Since there are silicon variants where both 23154 and 38545 are applicable, workaround for erratum 23154 has been extended to address both of them. Signed-off-by: Linu Cherian Reviewed-by: Marc Zyngier Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20220307143014.22758-1-lcherian@marvell.com Signed-off-by: Will Deacon --- Documentation/arm64/silicon-errata.rst | 2 +- arch/arm64/Kconfig | 8 ++++++-- arch/arm64/include/asm/arch_gicv3.h | 23 +++++++++++++++++++++-- arch/arm64/include/asm/cputype.h | 13 +++++++++++++ arch/arm64/kernel/cpu_errata.c | 20 +++++++++++++++++--- 5 files changed, 58 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index ea281dd75517..466cb9e89047 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -136,7 +136,7 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | Cavium | ThunderX ITS | #23144 | CAVIUM_ERRATUM_23144 | +----------------+-----------------+-----------------+-----------------------------+ -| Cavium | ThunderX GICv3 | #23154 | CAVIUM_ERRATUM_23154 | +| Cavium | ThunderX GICv3 | #23154,38545 | CAVIUM_ERRATUM_23154 | +----------------+-----------------+-----------------+-----------------------------+ | Cavium | ThunderX GICv3 | #38539 | N/A | +----------------+-----------------+-----------------+-----------------------------+ diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index cbcd42decb2a..b154aa98c43a 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -890,13 +890,17 @@ config CAVIUM_ERRATUM_23144 If unsure, say Y. config CAVIUM_ERRATUM_23154 - bool "Cavium erratum 23154: Access to ICC_IAR1_EL1 is not sync'ed" + bool "Cavium errata 23154 and 38545: GICv3 lacks HW synchronisation" default y help - The gicv3 of ThunderX requires a modified version for + The ThunderX GICv3 implementation requires a modified version for reading the IAR status to ensure data synchronization (access to icc_iar1_el1 is not sync'ed before and after). + It also suffers from erratum 38545 (also present on Marvell's + OcteonTX and OcteonTX2), resulting in deactivated interrupts being + spuriously presented to the CPU interface. + If unsure, say Y. config CAVIUM_ERRATUM_27456 diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h index 4ad22c3135db..8bd5afc7b692 100644 --- a/arch/arm64/include/asm/arch_gicv3.h +++ b/arch/arm64/include/asm/arch_gicv3.h @@ -53,17 +53,36 @@ static inline u64 gic_read_iar_common(void) * The gicv3 of ThunderX requires a modified version for reading the * IAR status to ensure data synchronization (access to icc_iar1_el1 * is not sync'ed before and after). + * + * Erratum 38545 + * + * When a IAR register read races with a GIC interrupt RELEASE event, + * GIC-CPU interface could wrongly return a valid INTID to the CPU + * for an interrupt that is already released(non activated) instead of 0x3ff. + * + * To workaround this, return a valid interrupt ID only if there is a change + * in the active priority list after the IAR read. + * + * Common function used for both the workarounds since, + * 1. On Thunderx 88xx 1.x both erratas are applicable. + * 2. Having extra nops doesn't add any side effects for Silicons where + * erratum 23154 is not applicable. */ static inline u64 gic_read_iar_cavium_thunderx(void) { - u64 irqstat; + u64 irqstat, apr; + apr = read_sysreg_s(SYS_ICC_AP1R0_EL1); nops(8); irqstat = read_sysreg_s(SYS_ICC_IAR1_EL1); nops(4); mb(); - return irqstat; + /* Max priority groups implemented is only 32 */ + if (likely(apr != read_sysreg_s(SYS_ICC_AP1R0_EL1))) + return irqstat; + + return 0x3ff; } static inline void gic_write_ctlr(u32 val) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 999b9149f856..4596e7ca29a3 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -84,6 +84,13 @@ #define CAVIUM_CPU_PART_THUNDERX_81XX 0x0A2 #define CAVIUM_CPU_PART_THUNDERX_83XX 0x0A3 #define CAVIUM_CPU_PART_THUNDERX2 0x0AF +/* OcteonTx2 series */ +#define CAVIUM_CPU_PART_OCTX2_98XX 0x0B1 +#define CAVIUM_CPU_PART_OCTX2_96XX 0x0B2 +#define CAVIUM_CPU_PART_OCTX2_95XX 0x0B3 +#define CAVIUM_CPU_PART_OCTX2_95XXN 0x0B4 +#define CAVIUM_CPU_PART_OCTX2_95XXMM 0x0B5 +#define CAVIUM_CPU_PART_OCTX2_95XXO 0x0B6 #define BRCM_CPU_PART_BRAHMA_B53 0x100 #define BRCM_CPU_PART_VULCAN 0x516 @@ -124,6 +131,12 @@ #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) +#define MIDR_OCTX2_98XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_98XX) +#define MIDR_OCTX2_96XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_96XX) +#define MIDR_OCTX2_95XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_95XX) +#define MIDR_OCTX2_95XXN MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_95XXN) +#define MIDR_OCTX2_95XXMM MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_95XXMM) +#define MIDR_OCTX2_95XXO MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_95XXO) #define MIDR_CAVIUM_THUNDERX2 MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX2) #define MIDR_BRAHMA_B53 MIDR_CPU_MODEL(ARM_CPU_IMP_BRCM, BRCM_CPU_PART_BRAHMA_B53) #define MIDR_BRCM_VULCAN MIDR_CPU_MODEL(ARM_CPU_IMP_BRCM, BRCM_CPU_PART_VULCAN) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index b217941713a8..510f47055b91 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -214,6 +214,20 @@ static const struct arm64_cpu_capabilities arm64_repeat_tlbi_list[] = { }; #endif +#ifdef CONFIG_CAVIUM_ERRATUM_23154 +const struct midr_range cavium_erratum_23154_cpus[] = { + MIDR_ALL_VERSIONS(MIDR_THUNDERX), + MIDR_ALL_VERSIONS(MIDR_THUNDERX_81XX), + MIDR_ALL_VERSIONS(MIDR_THUNDERX_83XX), + MIDR_ALL_VERSIONS(MIDR_OCTX2_98XX), + MIDR_ALL_VERSIONS(MIDR_OCTX2_96XX), + MIDR_ALL_VERSIONS(MIDR_OCTX2_95XX), + MIDR_ALL_VERSIONS(MIDR_OCTX2_95XXN), + MIDR_ALL_VERSIONS(MIDR_OCTX2_95XXMM), + MIDR_ALL_VERSIONS(MIDR_OCTX2_95XXO), +}; +#endif + #ifdef CONFIG_CAVIUM_ERRATUM_27456 const struct midr_range cavium_erratum_27456_cpus[] = { /* Cavium ThunderX, T88 pass 1.x - 2.1 */ @@ -425,10 +439,10 @@ const struct arm64_cpu_capabilities arm64_errata[] = { #endif #ifdef CONFIG_CAVIUM_ERRATUM_23154 { - /* Cavium ThunderX, pass 1.x */ - .desc = "Cavium erratum 23154", + .desc = "Cavium errata 23154 and 38545", .capability = ARM64_WORKAROUND_CAVIUM_23154, - ERRATA_MIDR_REV_RANGE(MIDR_THUNDERX, 0, 0, 1), + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, + ERRATA_MIDR_RANGE_LIST(cavium_erratum_23154_cpus), }, #endif #ifdef CONFIG_CAVIUM_ERRATUM_27456 -- cgit v1.2.3 From 507f788d05e7fba6cf478ffa1c99f5c2b0020f63 Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Tue, 1 Mar 2022 10:14:33 +0000 Subject: arm64: lib: Import latest version of Arm Optimized Routines' strcmp Import the latest version of the Arm Optimized Routines strcmp function based on the upstream code of string/aarch64/strcmp.S at commit 189dfefe37d5 from: https://github.com/ARM-software/optimized-routines This latest version includes MTE support. Note that for simplicity Arm have chosen to contribute this code to Linux under GPLv2 rather than the original MIT OR Apache-2.0 WITH LLVM-exception license. Arm is the sole copyright holder for this code. Signed-off-by: Joey Gouly Cc: Robin Murphy Cc: Mark Rutland Cc: Catalin Marinas Cc: Will Deacon Acked-by: Mark Rutland Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20220301101435.19327-2-joey.gouly@arm.com Signed-off-by: Will Deacon --- arch/arm64/lib/strcmp.S | 238 +++++++++++++++++++++++++----------------------- 1 file changed, 126 insertions(+), 112 deletions(-) (limited to 'arch') diff --git a/arch/arm64/lib/strcmp.S b/arch/arm64/lib/strcmp.S index 83bcad72ec97..758de77afd2f 100644 --- a/arch/arm64/lib/strcmp.S +++ b/arch/arm64/lib/strcmp.S @@ -1,9 +1,9 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (c) 2012-2021, Arm Limited. + * Copyright (c) 2012-2022, Arm Limited. * * Adapted from the original at: - * https://github.com/ARM-software/optimized-routines/blob/afd6244a1f8d9229/string/aarch64/strcmp.S + * https://github.com/ARM-software/optimized-routines/blob/189dfefe37d54c5b/string/aarch64/strcmp.S */ #include @@ -11,161 +11,175 @@ /* Assumptions: * - * ARMv8-a, AArch64 + * ARMv8-a, AArch64. + * MTE compatible. */ #define L(label) .L ## label #define REP8_01 0x0101010101010101 #define REP8_7f 0x7f7f7f7f7f7f7f7f -#define REP8_80 0x8080808080808080 -/* Parameters and result. */ #define src1 x0 #define src2 x1 #define result x0 -/* Internal variables. */ #define data1 x2 #define data1w w2 #define data2 x3 #define data2w w3 #define has_nul x4 #define diff x5 +#define off1 x5 #define syndrome x6 -#define tmp1 x7 -#define tmp2 x8 -#define tmp3 x9 -#define zeroones x10 -#define pos x11 - - /* Start of performance-critical section -- one 64B cache line. */ - .align 6 +#define tmp x6 +#define data3 x7 +#define zeroones x8 +#define shift x9 +#define off2 x10 + +/* On big-endian early bytes are at MSB and on little-endian LSB. + LS_FW means shifting towards early bytes. */ +#ifdef __AARCH64EB__ +# define LS_FW lsl +#else +# define LS_FW lsr +#endif + +/* NUL detection works on the principle that (X - 1) & (~X) & 0x80 + (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and + can be done in parallel across the entire word. + Since carry propagation makes 0x1 bytes before a NUL byte appear + NUL too in big-endian, byte-reverse the data before the NUL check. */ + + SYM_FUNC_START_WEAK_PI(strcmp) - eor tmp1, src1, src2 - mov zeroones, #REP8_01 - tst tmp1, #7 + sub off2, src2, src1 + mov zeroones, REP8_01 + and tmp, src1, 7 + tst off2, 7 b.ne L(misaligned8) - ands tmp1, src1, #7 - b.ne L(mutual_align) - /* NUL detection works on the principle that (X - 1) & (~X) & 0x80 - (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and - can be done in parallel across the entire word. */ + cbnz tmp, L(mutual_align) + + .p2align 4 + L(loop_aligned): - ldr data1, [src1], #8 - ldr data2, [src2], #8 + ldr data2, [src1, off2] + ldr data1, [src1], 8 L(start_realigned): - sub tmp1, data1, zeroones - orr tmp2, data1, #REP8_7f - eor diff, data1, data2 /* Non-zero if differences found. */ - bic has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */ +#ifdef __AARCH64EB__ + rev tmp, data1 + sub has_nul, tmp, zeroones + orr tmp, tmp, REP8_7f +#else + sub has_nul, data1, zeroones + orr tmp, data1, REP8_7f +#endif + bics has_nul, has_nul, tmp /* Non-zero if NUL terminator. */ + ccmp data1, data2, 0, eq + b.eq L(loop_aligned) +#ifdef __AARCH64EB__ + rev has_nul, has_nul +#endif + eor diff, data1, data2 orr syndrome, diff, has_nul - cbz syndrome, L(loop_aligned) - /* End of performance-critical section -- one 64B cache line. */ - L(end): -#ifndef __AARCH64EB__ +#ifndef __AARCH64EB__ rev syndrome, syndrome rev data1, data1 - /* The MS-non-zero bit of the syndrome marks either the first bit - that is different, or the top bit of the first zero byte. - Shifting left now will bring the critical information into the - top bits. */ - clz pos, syndrome rev data2, data2 - lsl data1, data1, pos - lsl data2, data2, pos - /* But we need to zero-extend (char is unsigned) the value and then - perform a signed 32-bit subtraction. */ - lsr data1, data1, #56 - sub result, data1, data2, lsr #56 - ret -#else - /* For big-endian we cannot use the trick with the syndrome value - as carry-propagation can corrupt the upper bits if the trailing - bytes in the string contain 0x01. */ - /* However, if there is no NUL byte in the dword, we can generate - the result directly. We can't just subtract the bytes as the - MSB might be significant. */ - cbnz has_nul, 1f - cmp data1, data2 - cset result, ne - cneg result, result, lo - ret -1: - /* Re-compute the NUL-byte detection, using a byte-reversed value. */ - rev tmp3, data1 - sub tmp1, tmp3, zeroones - orr tmp2, tmp3, #REP8_7f - bic has_nul, tmp1, tmp2 - rev has_nul, has_nul - orr syndrome, diff, has_nul - clz pos, syndrome - /* The MS-non-zero bit of the syndrome marks either the first bit - that is different, or the top bit of the first zero byte. +#endif + clz shift, syndrome + /* The most-significant-non-zero bit of the syndrome marks either the + first bit that is different, or the top bit of the first zero byte. Shifting left now will bring the critical information into the top bits. */ - lsl data1, data1, pos - lsl data2, data2, pos + lsl data1, data1, shift + lsl data2, data2, shift /* But we need to zero-extend (char is unsigned) the value and then perform a signed 32-bit subtraction. */ - lsr data1, data1, #56 - sub result, data1, data2, lsr #56 + lsr data1, data1, 56 + sub result, data1, data2, lsr 56 ret -#endif + + .p2align 4 L(mutual_align): /* Sources are mutually aligned, but are not currently at an alignment boundary. Round down the addresses and then mask off - the bytes that preceed the start point. */ - bic src1, src1, #7 - bic src2, src2, #7 - lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */ - ldr data1, [src1], #8 - neg tmp1, tmp1 /* Bits to alignment -64. */ - ldr data2, [src2], #8 - mov tmp2, #~0 -#ifdef __AARCH64EB__ - /* Big-endian. Early bytes are at MSB. */ - lsl tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */ -#else - /* Little-endian. Early bytes are at LSB. */ - lsr tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */ -#endif - orr data1, data1, tmp2 - orr data2, data2, tmp2 + the bytes that precede the start point. */ + bic src1, src1, 7 + ldr data2, [src1, off2] + ldr data1, [src1], 8 + neg shift, src2, lsl 3 /* Bits to alignment -64. */ + mov tmp, -1 + LS_FW tmp, tmp, shift + orr data1, data1, tmp + orr data2, data2, tmp b L(start_realigned) L(misaligned8): /* Align SRC1 to 8 bytes and then compare 8 bytes at a time, always - checking to make sure that we don't access beyond page boundary in - SRC2. */ - tst src1, #7 - b.eq L(loop_misaligned) + checking to make sure that we don't access beyond the end of SRC2. */ + cbz tmp, L(src1_aligned) L(do_misaligned): - ldrb data1w, [src1], #1 - ldrb data2w, [src2], #1 - cmp data1w, #1 - ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */ + ldrb data1w, [src1], 1 + ldrb data2w, [src2], 1 + cmp data1w, 0 + ccmp data1w, data2w, 0, ne /* NZCV = 0b0000. */ b.ne L(done) - tst src1, #7 + tst src1, 7 b.ne L(do_misaligned) -L(loop_misaligned): - /* Test if we are within the last dword of the end of a 4K page. If - yes then jump back to the misaligned loop to copy a byte at a time. */ - and tmp1, src2, #0xff8 - eor tmp1, tmp1, #0xff8 - cbz tmp1, L(do_misaligned) - ldr data1, [src1], #8 - ldr data2, [src2], #8 - - sub tmp1, data1, zeroones - orr tmp2, data1, #REP8_7f - eor diff, data1, data2 /* Non-zero if differences found. */ - bic has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */ +L(src1_aligned): + neg shift, src2, lsl 3 + bic src2, src2, 7 + ldr data3, [src2], 8 +#ifdef __AARCH64EB__ + rev data3, data3 +#endif + lsr tmp, zeroones, shift + orr data3, data3, tmp + sub has_nul, data3, zeroones + orr tmp, data3, REP8_7f + bics has_nul, has_nul, tmp + b.ne L(tail) + + sub off1, src2, src1 + + .p2align 4 + +L(loop_unaligned): + ldr data3, [src1, off1] + ldr data2, [src1, off2] +#ifdef __AARCH64EB__ + rev data3, data3 +#endif + sub has_nul, data3, zeroones + orr tmp, data3, REP8_7f + ldr data1, [src1], 8 + bics has_nul, has_nul, tmp + ccmp data1, data2, 0, eq + b.eq L(loop_unaligned) + + lsl tmp, has_nul, shift +#ifdef __AARCH64EB__ + rev tmp, tmp +#endif + eor diff, data1, data2 + orr syndrome, diff, tmp + cbnz syndrome, L(end) +L(tail): + ldr data1, [src1] + neg shift, shift + lsr data2, data3, shift + lsr has_nul, has_nul, shift +#ifdef __AARCH64EB__ + rev data2, data2 + rev has_nul, has_nul +#endif + eor diff, data1, data2 orr syndrome, diff, has_nul - cbz syndrome, L(loop_misaligned) b L(end) L(done): -- cgit v1.2.3 From 387d828adffcf1eb949f3141079c479793c59aac Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Tue, 1 Mar 2022 10:14:34 +0000 Subject: arm64: lib: Import latest version of Arm Optimized Routines' strncmp Import the latest version of the Arm Optimized Routines strncmp function based on the upstream code of string/aarch64/strncmp.S at commit 189dfefe37d5 from: https://github.com/ARM-software/optimized-routines This latest version includes MTE support. Note that for simplicity Arm have chosen to contribute this code to Linux under GPLv2 rather than the original MIT OR Apache-2.0 WITH LLVM-exception license. Arm is the sole copyright holder for this code. Signed-off-by: Joey Gouly Cc: Robin Murphy Cc: Mark Rutland Cc: Catalin Marinas Cc: Will Deacon Acked-by: Mark Rutland Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20220301101435.19327-3-joey.gouly@arm.com Signed-off-by: Will Deacon --- arch/arm64/lib/strncmp.S | 234 ++++++++++++++++++++++++++++------------------- 1 file changed, 141 insertions(+), 93 deletions(-) (limited to 'arch') diff --git a/arch/arm64/lib/strncmp.S b/arch/arm64/lib/strncmp.S index e42bcfcd37e6..a4884b97e9a8 100644 --- a/arch/arm64/lib/strncmp.S +++ b/arch/arm64/lib/strncmp.S @@ -1,9 +1,9 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (c) 2013-2021, Arm Limited. + * Copyright (c) 2013-2022, Arm Limited. * * Adapted from the original at: - * https://github.com/ARM-software/optimized-routines/blob/e823e3abf5f89ecb/string/aarch64/strncmp.S + * https://github.com/ARM-software/optimized-routines/blob/189dfefe37d54c5b/string/aarch64/strncmp.S */ #include @@ -11,14 +11,14 @@ /* Assumptions: * - * ARMv8-a, AArch64 + * ARMv8-a, AArch64. + * MTE compatible. */ #define L(label) .L ## label #define REP8_01 0x0101010101010101 #define REP8_7f 0x7f7f7f7f7f7f7f7f -#define REP8_80 0x8080808080808080 /* Parameters and result. */ #define src1 x0 @@ -39,10 +39,24 @@ #define tmp3 x10 #define zeroones x11 #define pos x12 -#define limit_wd x13 -#define mask x14 -#define endloop x15 +#define mask x13 +#define endloop x14 #define count mask +#define offset pos +#define neg_offset x15 + +/* Define endian dependent shift operations. + On big-endian early bytes are at MSB and on little-endian LSB. + LS_FW means shifting towards early bytes. + LS_BK means shifting towards later bytes. + */ +#ifdef __AARCH64EB__ +#define LS_FW lsl +#define LS_BK lsr +#else +#define LS_FW lsr +#define LS_BK lsl +#endif SYM_FUNC_START_WEAK_PI(strncmp) cbz limit, L(ret0) @@ -52,9 +66,6 @@ SYM_FUNC_START_WEAK_PI(strncmp) and count, src1, #7 b.ne L(misaligned8) cbnz count, L(mutual_align) - /* Calculate the number of full and partial words -1. */ - sub limit_wd, limit, #1 /* limit != 0, so no underflow. */ - lsr limit_wd, limit_wd, #3 /* Convert to Dwords. */ /* NUL detection works on the principle that (X - 1) & (~X) & 0x80 (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and @@ -64,56 +75,52 @@ L(loop_aligned): ldr data1, [src1], #8 ldr data2, [src2], #8 L(start_realigned): - subs limit_wd, limit_wd, #1 + subs limit, limit, #8 sub tmp1, data1, zeroones orr tmp2, data1, #REP8_7f eor diff, data1, data2 /* Non-zero if differences found. */ - csinv endloop, diff, xzr, pl /* Last Dword or differences. */ + csinv endloop, diff, xzr, hi /* Last Dword or differences. */ bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */ ccmp endloop, #0, #0, eq b.eq L(loop_aligned) /* End of main loop */ - /* Not reached the limit, must have found the end or a diff. */ - tbz limit_wd, #63, L(not_limit) - - /* Limit % 8 == 0 => all bytes significant. */ - ands limit, limit, #7 - b.eq L(not_limit) - - lsl limit, limit, #3 /* Bits -> bytes. */ - mov mask, #~0 -#ifdef __AARCH64EB__ - lsr mask, mask, limit -#else - lsl mask, mask, limit -#endif - bic data1, data1, mask - bic data2, data2, mask - - /* Make sure that the NUL byte is marked in the syndrome. */ - orr has_nul, has_nul, mask - -L(not_limit): +L(full_check): +#ifndef __AARCH64EB__ orr syndrome, diff, has_nul - -#ifndef __AARCH64EB__ + add limit, limit, 8 /* Rewind limit to before last subs. */ +L(syndrome_check): + /* Limit was reached. Check if the NUL byte or the difference + is before the limit. */ rev syndrome, syndrome rev data1, data1 - /* The MS-non-zero bit of the syndrome marks either the first bit - that is different, or the top bit of the first zero byte. - Shifting left now will bring the critical information into the - top bits. */ clz pos, syndrome rev data2, data2 lsl data1, data1, pos + cmp limit, pos, lsr #3 lsl data2, data2, pos /* But we need to zero-extend (char is unsigned) the value and then perform a signed 32-bit subtraction. */ lsr data1, data1, #56 sub result, data1, data2, lsr #56 + csel result, result, xzr, hi ret #else + /* Not reached the limit, must have found the end or a diff. */ + tbz limit, #63, L(not_limit) + add tmp1, limit, 8 + cbz limit, L(not_limit) + + lsl limit, tmp1, #3 /* Bits -> bytes. */ + mov mask, #~0 + lsr mask, mask, limit + bic data1, data1, mask + bic data2, data2, mask + + /* Make sure that the NUL byte is marked in the syndrome. */ + orr has_nul, has_nul, mask + +L(not_limit): /* For big-endian we cannot use the trick with the syndrome value as carry-propagation can corrupt the upper bits if the trailing bytes in the string contain 0x01. */ @@ -134,10 +141,11 @@ L(not_limit): rev has_nul, has_nul orr syndrome, diff, has_nul clz pos, syndrome - /* The MS-non-zero bit of the syndrome marks either the first bit - that is different, or the top bit of the first zero byte. + /* The most-significant-non-zero bit of the syndrome marks either the + first bit that is different, or the top bit of the first zero byte. Shifting left now will bring the critical information into the top bits. */ +L(end_quick): lsl data1, data1, pos lsl data2, data2, pos /* But we need to zero-extend (char is unsigned) the value and then @@ -159,22 +167,12 @@ L(mutual_align): neg tmp3, count, lsl #3 /* 64 - bits(bytes beyond align). */ ldr data2, [src2], #8 mov tmp2, #~0 - sub limit_wd, limit, #1 /* limit != 0, so no underflow. */ -#ifdef __AARCH64EB__ - /* Big-endian. Early bytes are at MSB. */ - lsl tmp2, tmp2, tmp3 /* Shift (count & 63). */ -#else - /* Little-endian. Early bytes are at LSB. */ - lsr tmp2, tmp2, tmp3 /* Shift (count & 63). */ -#endif - and tmp3, limit_wd, #7 - lsr limit_wd, limit_wd, #3 - /* Adjust the limit. Only low 3 bits used, so overflow irrelevant. */ - add limit, limit, count - add tmp3, tmp3, count + LS_FW tmp2, tmp2, tmp3 /* Shift (count & 63). */ + /* Adjust the limit and ensure it doesn't overflow. */ + adds limit, limit, count + csinv limit, limit, xzr, lo orr data1, data1, tmp2 orr data2, data2, tmp2 - add limit_wd, limit_wd, tmp3, lsr #3 b L(start_realigned) .p2align 4 @@ -197,13 +195,11 @@ L(done): /* Align the SRC1 to a dword by doing a bytewise compare and then do the dword loop. */ L(try_misaligned_words): - lsr limit_wd, limit, #3 - cbz count, L(do_misaligned) + cbz count, L(src1_aligned) neg count, count and count, count, #7 sub limit, limit, count - lsr limit_wd, limit, #3 L(page_end_loop): ldrb data1w, [src1], #1 @@ -214,48 +210,100 @@ L(page_end_loop): subs count, count, #1 b.hi L(page_end_loop) -L(do_misaligned): - /* Prepare ourselves for the next page crossing. Unlike the aligned - loop, we fetch 1 less dword because we risk crossing bounds on - SRC2. */ - mov count, #8 - subs limit_wd, limit_wd, #1 - b.lo L(done_loop) -L(loop_misaligned): - and tmp2, src2, #0xff8 - eor tmp2, tmp2, #0xff8 - cbz tmp2, L(page_end_loop) + /* The following diagram explains the comparison of misaligned strings. + The bytes are shown in natural order. For little-endian, it is + reversed in the registers. The "x" bytes are before the string. + The "|" separates data that is loaded at one time. + src1 | a a a a a a a a | b b b c c c c c | . . . + src2 | x x x x x a a a a a a a a b b b | c c c c c . . . + + After shifting in each step, the data looks like this: + STEP_A STEP_B STEP_C + data1 a a a a a a a a b b b c c c c c b b b c c c c c + data2 a a a a a a a a b b b 0 0 0 0 0 0 0 0 c c c c c + The bytes with "0" are eliminated from the syndrome via mask. + + Align SRC2 down to 16 bytes. This way we can read 16 bytes at a + time from SRC2. The comparison happens in 3 steps. After each step + the loop can exit, or read from SRC1 or SRC2. */ +L(src1_aligned): + /* Calculate offset from 8 byte alignment to string start in bits. No + need to mask offset since shifts are ignoring upper bits. */ + lsl offset, src2, #3 + bic src2, src2, #0xf + mov mask, -1 + neg neg_offset, offset ldr data1, [src1], #8 - ldr data2, [src2], #8 - sub tmp1, data1, zeroones - orr tmp2, data1, #REP8_7f - eor diff, data1, data2 /* Non-zero if differences found. */ - bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */ - ccmp diff, #0, #0, eq - b.ne L(not_limit) - subs limit_wd, limit_wd, #1 - b.pl L(loop_misaligned) + ldp tmp1, tmp2, [src2], #16 + LS_BK mask, mask, neg_offset + and neg_offset, neg_offset, #63 /* Need actual value for cmp later. */ + /* Skip the first compare if data in tmp1 is irrelevant. */ + tbnz offset, 6, L(misaligned_mid_loop) -L(done_loop): - /* We found a difference or a NULL before the limit was reached. */ - and limit, limit, #7 - cbz limit, L(not_limit) - /* Read the last word. */ - sub src1, src1, 8 - sub src2, src2, 8 - ldr data1, [src1, limit] - ldr data2, [src2, limit] - sub tmp1, data1, zeroones - orr tmp2, data1, #REP8_7f +L(loop_misaligned): + /* STEP_A: Compare full 8 bytes when there is enough data from SRC2.*/ + LS_FW data2, tmp1, offset + LS_BK tmp1, tmp2, neg_offset + subs limit, limit, #8 + orr data2, data2, tmp1 /* 8 bytes from SRC2 combined from two regs.*/ + sub has_nul, data1, zeroones eor diff, data1, data2 /* Non-zero if differences found. */ - bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */ - ccmp diff, #0, #0, eq - b.ne L(not_limit) + orr tmp3, data1, #REP8_7f + csinv endloop, diff, xzr, hi /* If limit, set to all ones. */ + bic has_nul, has_nul, tmp3 /* Non-zero if NUL byte found in SRC1. */ + orr tmp3, endloop, has_nul + cbnz tmp3, L(full_check) + + ldr data1, [src1], #8 +L(misaligned_mid_loop): + /* STEP_B: Compare first part of data1 to second part of tmp2. */ + LS_FW data2, tmp2, offset +#ifdef __AARCH64EB__ + /* For big-endian we do a byte reverse to avoid carry-propagation + problem described above. This way we can reuse the has_nul in the + next step and also use syndrome value trick at the end. */ + rev tmp3, data1 + #define data1_fixed tmp3 +#else + #define data1_fixed data1 +#endif + sub has_nul, data1_fixed, zeroones + orr tmp3, data1_fixed, #REP8_7f + eor diff, data2, data1 /* Non-zero if differences found. */ + bic has_nul, has_nul, tmp3 /* Non-zero if NUL terminator. */ +#ifdef __AARCH64EB__ + rev has_nul, has_nul +#endif + cmp limit, neg_offset, lsr #3 + orr syndrome, diff, has_nul + bic syndrome, syndrome, mask /* Ignore later bytes. */ + csinv tmp3, syndrome, xzr, hi /* If limit, set to all ones. */ + cbnz tmp3, L(syndrome_check) + + /* STEP_C: Compare second part of data1 to first part of tmp1. */ + ldp tmp1, tmp2, [src2], #16 + cmp limit, #8 + LS_BK data2, tmp1, neg_offset + eor diff, data2, data1 /* Non-zero if differences found. */ + orr syndrome, diff, has_nul + and syndrome, syndrome, mask /* Ignore earlier bytes. */ + csinv tmp3, syndrome, xzr, hi /* If limit, set to all ones. */ + cbnz tmp3, L(syndrome_check) + + ldr data1, [src1], #8 + sub limit, limit, #8 + b L(loop_misaligned) + +#ifdef __AARCH64EB__ +L(syndrome_check): + clz pos, syndrome + cmp pos, limit, lsl #3 + b.lo L(end_quick) +#endif L(ret0): mov result, #0 ret - SYM_FUNC_END_PI(strncmp) EXPORT_SYMBOL_NOHWKASAN(strncmp) -- cgit v1.2.3 From e33c89256e66ba64ce5190c7f2c2741e619c6321 Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Tue, 1 Mar 2022 10:14:35 +0000 Subject: Revert "arm64: Mitigate MTE issues with str{n}cmp()" This reverts commit 59a68d4138086c015ab8241c3267eec5550fbd44. Now that the str{n}cmp functions have been updated to handle MTE properly, the workaround to use the generic functions is no longer needed. Signed-off-by: Joey Gouly Cc: Robin Murphy Cc: Mark Rutland Cc: Catalin Marinas Cc: Will Deacon Acked-by: Mark Rutland Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20220301101435.19327-4-joey.gouly@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/assembler.h | 5 ----- arch/arm64/include/asm/string.h | 2 -- arch/arm64/lib/strcmp.S | 2 +- arch/arm64/lib/strncmp.S | 2 +- 4 files changed, 2 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index e8bd0af0141c..8df412178efb 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -535,11 +535,6 @@ alternative_endif #define EXPORT_SYMBOL_NOKASAN(name) EXPORT_SYMBOL(name) #endif -#ifdef CONFIG_KASAN_HW_TAGS -#define EXPORT_SYMBOL_NOHWKASAN(name) -#else -#define EXPORT_SYMBOL_NOHWKASAN(name) EXPORT_SYMBOL_NOKASAN(name) -#endif /* * Emit a 64-bit absolute little endian symbol reference in a way that * ensures that it will be resolved at build time, even when building a diff --git a/arch/arm64/include/asm/string.h b/arch/arm64/include/asm/string.h index 95f7686b728d..3a3264ff47b9 100644 --- a/arch/arm64/include/asm/string.h +++ b/arch/arm64/include/asm/string.h @@ -12,13 +12,11 @@ extern char *strrchr(const char *, int c); #define __HAVE_ARCH_STRCHR extern char *strchr(const char *, int c); -#ifndef CONFIG_KASAN_HW_TAGS #define __HAVE_ARCH_STRCMP extern int strcmp(const char *, const char *); #define __HAVE_ARCH_STRNCMP extern int strncmp(const char *, const char *, __kernel_size_t); -#endif #define __HAVE_ARCH_STRLEN extern __kernel_size_t strlen(const char *); diff --git a/arch/arm64/lib/strcmp.S b/arch/arm64/lib/strcmp.S index 758de77afd2f..e6815a3dd265 100644 --- a/arch/arm64/lib/strcmp.S +++ b/arch/arm64/lib/strcmp.S @@ -187,4 +187,4 @@ L(done): ret SYM_FUNC_END_PI(strcmp) -EXPORT_SYMBOL_NOHWKASAN(strcmp) +EXPORT_SYMBOL_NOKASAN(strcmp) diff --git a/arch/arm64/lib/strncmp.S b/arch/arm64/lib/strncmp.S index a4884b97e9a8..bc195cb86693 100644 --- a/arch/arm64/lib/strncmp.S +++ b/arch/arm64/lib/strncmp.S @@ -306,4 +306,4 @@ L(ret0): mov result, #0 ret SYM_FUNC_END_PI(strncmp) -EXPORT_SYMBOL_NOHWKASAN(strncmp) +EXPORT_SYMBOL_NOKASAN(strncmp) -- cgit v1.2.3 From 031495635b4668f94e964e037ca93d0d38bfde58 Mon Sep 17 00:00:00 2001 From: Vijay Balakrishna Date: Wed, 2 Mar 2022 09:38:09 -0800 Subject: arm64: Do not defer reserve_crashkernel() for platforms with no DMA memory zones The following patches resulted in deferring crash kernel reservation to mem_init(), mainly aimed at platforms with DMA memory zones (no IOMMU), in particular Raspberry Pi 4. commit 1a8e1cef7603 ("arm64: use both ZONE_DMA and ZONE_DMA32") commit 8424ecdde7df ("arm64: mm: Set ZONE_DMA size based on devicetree's dma-ranges") commit 0a30c53573b0 ("arm64: mm: Move reserve_crashkernel() into mem_init()") commit 2687275a5843 ("arm64: Force NO_BLOCK_MAPPINGS if crashkernel reservation is required") Above changes introduced boot slowdown due to linear map creation for all the memory banks with NO_BLOCK_MAPPINGS, see discussion[1]. The proposed changes restore crash kernel reservation to earlier behavior thus avoids slow boot, particularly for platforms with IOMMU (no DMA memory zones). Tested changes to confirm no ~150ms boot slowdown on our SoC with IOMMU and 8GB memory. Also tested with ZONE_DMA and/or ZONE_DMA32 configs to confirm no regression to deferring scheme of crash kernel memory reservation. In both cases successfully collected kernel crash dump. [1] https://lore.kernel.org/all/9436d033-579b-55fa-9b00-6f4b661c2dd7@linux.microsoft.com/ Signed-off-by: Vijay Balakrishna Cc: stable@vger.kernel.org Reviewed-by: Pasha Tatashin Link: https://lore.kernel.org/r/1646242689-20744-1-git-send-email-vijayb@linux.microsoft.com [will: Add #ifdef CONFIG_KEXEC_CORE guards to fix 'crashk_res' references in allnoconfig build] Signed-off-by: Will Deacon --- arch/arm64/mm/init.c | 36 ++++++++++++++++++++++++++++++++---- arch/arm64/mm/mmu.c | 32 +++++++++++++++++++++++++++++++- 2 files changed, 63 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index db63cc885771..919be440494f 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -61,8 +61,34 @@ EXPORT_SYMBOL(memstart_addr); * unless restricted on specific platforms (e.g. 30-bit on Raspberry Pi 4). * In such case, ZONE_DMA32 covers the rest of the 32-bit addressable memory, * otherwise it is empty. + * + * Memory reservation for crash kernel either done early or deferred + * depending on DMA memory zones configs (ZONE_DMA) -- + * + * In absence of ZONE_DMA configs arm64_dma_phys_limit initialized + * here instead of max_zone_phys(). This lets early reservation of + * crash kernel memory which has a dependency on arm64_dma_phys_limit. + * Reserving memory early for crash kernel allows linear creation of block + * mappings (greater than page-granularity) for all the memory bank rangs. + * In this scheme a comparatively quicker boot is observed. + * + * If ZONE_DMA configs are defined, crash kernel memory reservation + * is delayed until DMA zone memory range size initilazation performed in + * zone_sizes_init(). The defer is necessary to steer clear of DMA zone + * memory range to avoid overlap allocation. So crash kernel memory boundaries + * are not known when mapping all bank memory ranges, which otherwise means + * not possible to exclude crash kernel range from creating block mappings + * so page-granularity mappings are created for the entire memory range. + * Hence a slightly slower boot is observed. + * + * Note: Page-granularity mapppings are necessary for crash kernel memory + * range for shrinking its size via /sys/kernel/kexec_crash_size interface. */ -phys_addr_t arm64_dma_phys_limit __ro_after_init; +#if IS_ENABLED(CONFIG_ZONE_DMA) || IS_ENABLED(CONFIG_ZONE_DMA32) +phys_addr_t __ro_after_init arm64_dma_phys_limit; +#else +const phys_addr_t arm64_dma_phys_limit = PHYS_MASK + 1; +#endif #ifdef CONFIG_KEXEC_CORE /* @@ -153,8 +179,6 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) if (!arm64_dma_phys_limit) arm64_dma_phys_limit = dma32_phys_limit; #endif - if (!arm64_dma_phys_limit) - arm64_dma_phys_limit = PHYS_MASK + 1; max_zone_pfns[ZONE_NORMAL] = max; free_area_init(max_zone_pfns); @@ -315,6 +339,9 @@ void __init arm64_memblock_init(void) early_init_fdt_scan_reserved_mem(); + if (!IS_ENABLED(CONFIG_ZONE_DMA) && !IS_ENABLED(CONFIG_ZONE_DMA32)) + reserve_crashkernel(); + high_memory = __va(memblock_end_of_DRAM() - 1) + 1; } @@ -361,7 +388,8 @@ void __init bootmem_init(void) * request_standard_resources() depends on crashkernel's memory being * reserved, so do it here. */ - reserve_crashkernel(); + if (IS_ENABLED(CONFIG_ZONE_DMA) || IS_ENABLED(CONFIG_ZONE_DMA32)) + reserve_crashkernel(); memblock_dump_all(); } diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index acfae9b41cc8..ed21bf83d0b7 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -517,7 +517,7 @@ static void __init map_mem(pgd_t *pgdp) */ BUILD_BUG_ON(pgd_index(direct_map_end - 1) == pgd_index(direct_map_end)); - if (can_set_direct_map() || crash_mem_map || IS_ENABLED(CONFIG_KFENCE)) + if (can_set_direct_map() || IS_ENABLED(CONFIG_KFENCE)) flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; /* @@ -528,6 +528,17 @@ static void __init map_mem(pgd_t *pgdp) */ memblock_mark_nomap(kernel_start, kernel_end - kernel_start); +#ifdef CONFIG_KEXEC_CORE + if (crash_mem_map) { + if (IS_ENABLED(CONFIG_ZONE_DMA) || + IS_ENABLED(CONFIG_ZONE_DMA32)) + flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; + else if (crashk_res.end) + memblock_mark_nomap(crashk_res.start, + resource_size(&crashk_res)); + } +#endif + /* map all the memory banks */ for_each_mem_range(i, &start, &end) { if (start >= end) @@ -554,6 +565,25 @@ static void __init map_mem(pgd_t *pgdp) __map_memblock(pgdp, kernel_start, kernel_end, PAGE_KERNEL, NO_CONT_MAPPINGS); memblock_clear_nomap(kernel_start, kernel_end - kernel_start); + + /* + * Use page-level mappings here so that we can shrink the region + * in page granularity and put back unused memory to buddy system + * through /sys/kernel/kexec_crash_size interface. + */ +#ifdef CONFIG_KEXEC_CORE + if (crash_mem_map && + !IS_ENABLED(CONFIG_ZONE_DMA) && !IS_ENABLED(CONFIG_ZONE_DMA32)) { + if (crashk_res.end) { + __map_memblock(pgdp, crashk_res.start, + crashk_res.end + 1, + PAGE_KERNEL, + NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS); + memblock_clear_nomap(crashk_res.start, + resource_size(&crashk_res)); + } + } +#endif } void mark_rodata_ro(void) -- cgit v1.2.3 From ee94b5a061b121305cae8ff46b7fa8325aa84b12 Mon Sep 17 00:00:00 2001 From: Sagar Patel Date: Mon, 7 Mar 2022 17:24:13 -0500 Subject: arm64: drop unused includes of Drop several includes of which are not used. git-blame indicates they were used at some point, but they're not needed anymore. Signed-off-by: Sagar Patel Link: https://lore.kernel.org/r/20220307222412.146506-1-sagarmp@cs.unc.edu Signed-off-by: Will Deacon --- arch/arm64/kernel/signal.c | 1 - arch/arm64/kernel/sys_compat.c | 1 - arch/arm64/kernel/traps.c | 1 - 3 files changed, 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index d8aaf4b6f432..fe83a42dac8f 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c index db5159a3055f..12c6864e51e1 100644 --- a/arch/arm64/kernel/sys_compat.c +++ b/arch/arm64/kernel/sys_compat.c @@ -9,7 +9,6 @@ #include #include -#include #include #include #include diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 70fc42470f13..bb878f52ca0a 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From 819a47d24b61b5e2d660d19c8798f0f9b4498b73 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 27 Feb 2022 17:52:32 +0900 Subject: arm64: clean up tools Makefile Remove unused gen-y. Remove redundant $(shell ...) because 'mkdir' is done in cmd_gen_cpucaps. Replace $(filter-out $(PHONY), $^) with the $(real-prereqs) shorthand. The '&&' in cmd_gen_cpucaps should be replaced with ';' because it is run under 'set -e' environment. Signed-off-by: Masahiro Yamada Link: https://lore.kernel.org/r/20220227085232.206529-1-masahiroy@kernel.org Signed-off-by: Will Deacon --- arch/arm64/tools/Makefile | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/arm64/tools/Makefile b/arch/arm64/tools/Makefile index 932b4fe5c768..cf1307188150 100644 --- a/arch/arm64/tools/Makefile +++ b/arch/arm64/tools/Makefile @@ -5,18 +5,14 @@ kapi := $(gen)/asm kapi-hdrs-y := $(kapi)/cpucaps.h -targets += $(addprefix ../../../,$(gen-y) $(kapi-hdrs-y)) +targets += $(addprefix ../../../, $(kapi-hdrs-y)) PHONY += kapi -kapi: $(kapi-hdrs-y) $(gen-y) - -# Create output directory if not already present -_dummy := $(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)') +kapi: $(kapi-hdrs-y) quiet_cmd_gen_cpucaps = GEN $@ - cmd_gen_cpucaps = mkdir -p $(dir $@) && \ - $(AWK) -f $(filter-out $(PHONY),$^) > $@ + cmd_gen_cpucaps = mkdir -p $(dir $@); $(AWK) -f $(real-prereqs) > $@ $(kapi)/cpucaps.h: $(src)/gen-cpucaps.awk $(src)/cpucaps FORCE $(call if_changed,gen_cpucaps) -- cgit v1.2.3 From 83f83cc0c1379413fb1199a78f91ab441a7e76fd Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Thu, 3 Mar 2022 16:54:19 +0800 Subject: arm64: perf: Expose some Armv9 common events under sysfs Armv9[1] has introduced some common architectural events (0x400C-0x400F) and common microarchitectural events (0x4010-0x401B), which can be detected by PMCEID0_EL0 from bit44 to bit59, so expose these common events under sysfs. [1] https://developer.arm.com/documentation/ddi0608/ba Cc: Mark Rutland Cc: Will Deacon Signed-off-by: Shaokun Zhang Link: https://lore.kernel.org/r/20220303085419.64085-1-zhangshaokun@hisilicon.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/perf_event.h | 14 ++++++++++++++ arch/arm64/kernel/perf_event.c | 10 ++++++++++ 2 files changed, 24 insertions(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h index 4ef6f19331f9..b93cafd8313a 100644 --- a/arch/arm64/include/asm/perf_event.h +++ b/arch/arm64/include/asm/perf_event.h @@ -96,6 +96,20 @@ #define ARMV8_PMUV3_PERFCTR_L2I_CACHE_LMISS 0x400A #define ARMV8_PMUV3_PERFCTR_L3D_CACHE_LMISS_RD 0x400B +/* Trace buffer events */ +#define ARMV8_PMUV3_PERFCTR_TRB_WRAP 0x400C +#define ARMV8_PMUV3_PERFCTR_TRB_TRIG 0x400E + +/* Trace unit events */ +#define ARMV8_PMUV3_PERFCTR_TRCEXTOUT0 0x4010 +#define ARMV8_PMUV3_PERFCTR_TRCEXTOUT1 0x4011 +#define ARMV8_PMUV3_PERFCTR_TRCEXTOUT2 0x4012 +#define ARMV8_PMUV3_PERFCTR_TRCEXTOUT3 0x4013 +#define ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT4 0x4018 +#define ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT5 0x4019 +#define ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT6 0x401A +#define ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT7 0x401B + /* additional latency from alignment events */ #define ARMV8_PMUV3_PERFCTR_LDST_ALIGN_LAT 0x4020 #define ARMV8_PMUV3_PERFCTR_LD_ALIGN_LAT 0x4021 diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index cab678ed6618..cb69ff1e6138 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -242,6 +242,16 @@ static struct attribute *armv8_pmuv3_event_attrs[] = { ARMV8_EVENT_ATTR(l2d_cache_lmiss_rd, ARMV8_PMUV3_PERFCTR_L2D_CACHE_LMISS_RD), ARMV8_EVENT_ATTR(l2i_cache_lmiss, ARMV8_PMUV3_PERFCTR_L2I_CACHE_LMISS), ARMV8_EVENT_ATTR(l3d_cache_lmiss_rd, ARMV8_PMUV3_PERFCTR_L3D_CACHE_LMISS_RD), + ARMV8_EVENT_ATTR(trb_wrap, ARMV8_PMUV3_PERFCTR_TRB_WRAP), + ARMV8_EVENT_ATTR(trb_trig, ARMV8_PMUV3_PERFCTR_TRB_TRIG), + ARMV8_EVENT_ATTR(trcextout0, ARMV8_PMUV3_PERFCTR_TRCEXTOUT0), + ARMV8_EVENT_ATTR(trcextout1, ARMV8_PMUV3_PERFCTR_TRCEXTOUT1), + ARMV8_EVENT_ATTR(trcextout2, ARMV8_PMUV3_PERFCTR_TRCEXTOUT2), + ARMV8_EVENT_ATTR(trcextout3, ARMV8_PMUV3_PERFCTR_TRCEXTOUT3), + ARMV8_EVENT_ATTR(cti_trigout4, ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT4), + ARMV8_EVENT_ATTR(cti_trigout5, ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT5), + ARMV8_EVENT_ATTR(cti_trigout6, ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT6), + ARMV8_EVENT_ATTR(cti_trigout7, ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT7), ARMV8_EVENT_ATTR(ldst_align_lat, ARMV8_PMUV3_PERFCTR_LDST_ALIGN_LAT), ARMV8_EVENT_ATTR(ld_align_lat, ARMV8_PMUV3_PERFCTR_LD_ALIGN_LAT), ARMV8_EVENT_ATTR(st_align_lat, ARMV8_PMUV3_PERFCTR_ST_ALIGN_LAT), -- cgit v1.2.3 From f00f3674873bb4ca4984aa3c440f3b0087a3677e Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Thu, 3 Mar 2022 18:07:10 +0800 Subject: arm64: perf: Consistently make all event numbers as 16-bits Arm ARM documents PMU event numbers as 16-bits in the table and more 0x4XXX events have been added in the header file, so use 16-bits for all event numbers and make them consistent. No functional change intended. Cc: Mark Rutland Cc: Will Deacon Signed-off-by: Shaokun Zhang Link: https://lore.kernel.org/r/20220303100710.2238-1-zhangshaokun@hisilicon.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/perf_event.h | 298 ++++++++++++++++++------------------ 1 file changed, 149 insertions(+), 149 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h index b93cafd8313a..3eaf462f5752 100644 --- a/arch/arm64/include/asm/perf_event.h +++ b/arch/arm64/include/asm/perf_event.h @@ -15,70 +15,70 @@ /* * Common architectural and microarchitectural event numbers. */ -#define ARMV8_PMUV3_PERFCTR_SW_INCR 0x00 -#define ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL 0x01 -#define ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL 0x02 -#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL 0x03 -#define ARMV8_PMUV3_PERFCTR_L1D_CACHE 0x04 -#define ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL 0x05 -#define ARMV8_PMUV3_PERFCTR_LD_RETIRED 0x06 -#define ARMV8_PMUV3_PERFCTR_ST_RETIRED 0x07 -#define ARMV8_PMUV3_PERFCTR_INST_RETIRED 0x08 -#define ARMV8_PMUV3_PERFCTR_EXC_TAKEN 0x09 -#define ARMV8_PMUV3_PERFCTR_EXC_RETURN 0x0A -#define ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED 0x0B -#define ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED 0x0C -#define ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED 0x0D -#define ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED 0x0E -#define ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED 0x0F -#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED 0x10 -#define ARMV8_PMUV3_PERFCTR_CPU_CYCLES 0x11 -#define ARMV8_PMUV3_PERFCTR_BR_PRED 0x12 -#define ARMV8_PMUV3_PERFCTR_MEM_ACCESS 0x13 -#define ARMV8_PMUV3_PERFCTR_L1I_CACHE 0x14 -#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB 0x15 -#define ARMV8_PMUV3_PERFCTR_L2D_CACHE 0x16 -#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL 0x17 -#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB 0x18 -#define ARMV8_PMUV3_PERFCTR_BUS_ACCESS 0x19 -#define ARMV8_PMUV3_PERFCTR_MEMORY_ERROR 0x1A -#define ARMV8_PMUV3_PERFCTR_INST_SPEC 0x1B -#define ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED 0x1C -#define ARMV8_PMUV3_PERFCTR_BUS_CYCLES 0x1D -#define ARMV8_PMUV3_PERFCTR_CHAIN 0x1E -#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE 0x1F -#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE 0x20 -#define ARMV8_PMUV3_PERFCTR_BR_RETIRED 0x21 -#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED_RETIRED 0x22 -#define ARMV8_PMUV3_PERFCTR_STALL_FRONTEND 0x23 -#define ARMV8_PMUV3_PERFCTR_STALL_BACKEND 0x24 -#define ARMV8_PMUV3_PERFCTR_L1D_TLB 0x25 -#define ARMV8_PMUV3_PERFCTR_L1I_TLB 0x26 -#define ARMV8_PMUV3_PERFCTR_L2I_CACHE 0x27 -#define ARMV8_PMUV3_PERFCTR_L2I_CACHE_REFILL 0x28 -#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_ALLOCATE 0x29 -#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_REFILL 0x2A -#define ARMV8_PMUV3_PERFCTR_L3D_CACHE 0x2B -#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB 0x2C -#define ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL 0x2D -#define ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL 0x2E -#define ARMV8_PMUV3_PERFCTR_L2D_TLB 0x2F -#define ARMV8_PMUV3_PERFCTR_L2I_TLB 0x30 -#define ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS 0x31 -#define ARMV8_PMUV3_PERFCTR_LL_CACHE 0x32 -#define ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS 0x33 -#define ARMV8_PMUV3_PERFCTR_DTLB_WALK 0x34 -#define ARMV8_PMUV3_PERFCTR_ITLB_WALK 0x35 -#define ARMV8_PMUV3_PERFCTR_LL_CACHE_RD 0x36 -#define ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD 0x37 -#define ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD 0x38 -#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_LMISS_RD 0x39 -#define ARMV8_PMUV3_PERFCTR_OP_RETIRED 0x3A -#define ARMV8_PMUV3_PERFCTR_OP_SPEC 0x3B -#define ARMV8_PMUV3_PERFCTR_STALL 0x3C -#define ARMV8_PMUV3_PERFCTR_STALL_SLOT_BACKEND 0x3D -#define ARMV8_PMUV3_PERFCTR_STALL_SLOT_FRONTEND 0x3E -#define ARMV8_PMUV3_PERFCTR_STALL_SLOT 0x3F +#define ARMV8_PMUV3_PERFCTR_SW_INCR 0x0000 +#define ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL 0x0001 +#define ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL 0x0002 +#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL 0x0003 +#define ARMV8_PMUV3_PERFCTR_L1D_CACHE 0x0004 +#define ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL 0x0005 +#define ARMV8_PMUV3_PERFCTR_LD_RETIRED 0x0006 +#define ARMV8_PMUV3_PERFCTR_ST_RETIRED 0x0007 +#define ARMV8_PMUV3_PERFCTR_INST_RETIRED 0x0008 +#define ARMV8_PMUV3_PERFCTR_EXC_TAKEN 0x0009 +#define ARMV8_PMUV3_PERFCTR_EXC_RETURN 0x000A +#define ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED 0x000B +#define ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED 0x000C +#define ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED 0x000D +#define ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED 0x000E +#define ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED 0x000F +#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED 0x0010 +#define ARMV8_PMUV3_PERFCTR_CPU_CYCLES 0x0011 +#define ARMV8_PMUV3_PERFCTR_BR_PRED 0x0012 +#define ARMV8_PMUV3_PERFCTR_MEM_ACCESS 0x0013 +#define ARMV8_PMUV3_PERFCTR_L1I_CACHE 0x0014 +#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB 0x0015 +#define ARMV8_PMUV3_PERFCTR_L2D_CACHE 0x0016 +#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL 0x0017 +#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB 0x0018 +#define ARMV8_PMUV3_PERFCTR_BUS_ACCESS 0x0019 +#define ARMV8_PMUV3_PERFCTR_MEMORY_ERROR 0x001A +#define ARMV8_PMUV3_PERFCTR_INST_SPEC 0x001B +#define ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED 0x001C +#define ARMV8_PMUV3_PERFCTR_BUS_CYCLES 0x001D +#define ARMV8_PMUV3_PERFCTR_CHAIN 0x001E +#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE 0x001F +#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE 0x0020 +#define ARMV8_PMUV3_PERFCTR_BR_RETIRED 0x0021 +#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED_RETIRED 0x0022 +#define ARMV8_PMUV3_PERFCTR_STALL_FRONTEND 0x0023 +#define ARMV8_PMUV3_PERFCTR_STALL_BACKEND 0x0024 +#define ARMV8_PMUV3_PERFCTR_L1D_TLB 0x0025 +#define ARMV8_PMUV3_PERFCTR_L1I_TLB 0x0026 +#define ARMV8_PMUV3_PERFCTR_L2I_CACHE 0x0027 +#define ARMV8_PMUV3_PERFCTR_L2I_CACHE_REFILL 0x0028 +#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_ALLOCATE 0x0029 +#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_REFILL 0x002A +#define ARMV8_PMUV3_PERFCTR_L3D_CACHE 0x002B +#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB 0x002C +#define ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL 0x002D +#define ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL 0x002E +#define ARMV8_PMUV3_PERFCTR_L2D_TLB 0x002F +#define ARMV8_PMUV3_PERFCTR_L2I_TLB 0x0030 +#define ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS 0x0031 +#define ARMV8_PMUV3_PERFCTR_LL_CACHE 0x0032 +#define ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS 0x0033 +#define ARMV8_PMUV3_PERFCTR_DTLB_WALK 0x0034 +#define ARMV8_PMUV3_PERFCTR_ITLB_WALK 0x0035 +#define ARMV8_PMUV3_PERFCTR_LL_CACHE_RD 0x0036 +#define ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD 0x0037 +#define ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD 0x0038 +#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_LMISS_RD 0x0039 +#define ARMV8_PMUV3_PERFCTR_OP_RETIRED 0x003A +#define ARMV8_PMUV3_PERFCTR_OP_SPEC 0x003B +#define ARMV8_PMUV3_PERFCTR_STALL 0x003C +#define ARMV8_PMUV3_PERFCTR_STALL_SLOT_BACKEND 0x003D +#define ARMV8_PMUV3_PERFCTR_STALL_SLOT_FRONTEND 0x003E +#define ARMV8_PMUV3_PERFCTR_STALL_SLOT 0x003F /* Statistical profiling extension microarchitectural events */ #define ARMV8_SPE_PERFCTR_SAMPLE_POP 0x4000 @@ -121,91 +121,91 @@ #define ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED_WR 0x4026 /* ARMv8 recommended implementation defined event types */ -#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD 0x40 -#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR 0x41 -#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD 0x42 -#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR 0x43 -#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_INNER 0x44 -#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_OUTER 0x45 -#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_VICTIM 0x46 -#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_CLEAN 0x47 -#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_INVAL 0x48 - -#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD 0x4C -#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR 0x4D -#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD 0x4E -#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR 0x4F -#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_RD 0x50 -#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WR 0x51 -#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_RD 0x52 -#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_WR 0x53 - -#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_VICTIM 0x56 -#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_CLEAN 0x57 -#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_INVAL 0x58 - -#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_RD 0x5C -#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_WR 0x5D -#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_RD 0x5E -#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_WR 0x5F -#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD 0x60 -#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR 0x61 -#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_SHARED 0x62 -#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NOT_SHARED 0x63 -#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NORMAL 0x64 -#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_PERIPH 0x65 -#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_RD 0x66 -#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_WR 0x67 -#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LD_SPEC 0x68 -#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_ST_SPEC 0x69 -#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LDST_SPEC 0x6A - -#define ARMV8_IMPDEF_PERFCTR_LDREX_SPEC 0x6C -#define ARMV8_IMPDEF_PERFCTR_STREX_PASS_SPEC 0x6D -#define ARMV8_IMPDEF_PERFCTR_STREX_FAIL_SPEC 0x6E -#define ARMV8_IMPDEF_PERFCTR_STREX_SPEC 0x6F -#define ARMV8_IMPDEF_PERFCTR_LD_SPEC 0x70 -#define ARMV8_IMPDEF_PERFCTR_ST_SPEC 0x71 -#define ARMV8_IMPDEF_PERFCTR_LDST_SPEC 0x72 -#define ARMV8_IMPDEF_PERFCTR_DP_SPEC 0x73 -#define ARMV8_IMPDEF_PERFCTR_ASE_SPEC 0x74 -#define ARMV8_IMPDEF_PERFCTR_VFP_SPEC 0x75 -#define ARMV8_IMPDEF_PERFCTR_PC_WRITE_SPEC 0x76 -#define ARMV8_IMPDEF_PERFCTR_CRYPTO_SPEC 0x77 -#define ARMV8_IMPDEF_PERFCTR_BR_IMMED_SPEC 0x78 -#define ARMV8_IMPDEF_PERFCTR_BR_RETURN_SPEC 0x79 -#define ARMV8_IMPDEF_PERFCTR_BR_INDIRECT_SPEC 0x7A - -#define ARMV8_IMPDEF_PERFCTR_ISB_SPEC 0x7C -#define ARMV8_IMPDEF_PERFCTR_DSB_SPEC 0x7D -#define ARMV8_IMPDEF_PERFCTR_DMB_SPEC 0x7E - -#define ARMV8_IMPDEF_PERFCTR_EXC_UNDEF 0x81 -#define ARMV8_IMPDEF_PERFCTR_EXC_SVC 0x82 -#define ARMV8_IMPDEF_PERFCTR_EXC_PABORT 0x83 -#define ARMV8_IMPDEF_PERFCTR_EXC_DABORT 0x84 - -#define ARMV8_IMPDEF_PERFCTR_EXC_IRQ 0x86 -#define ARMV8_IMPDEF_PERFCTR_EXC_FIQ 0x87 -#define ARMV8_IMPDEF_PERFCTR_EXC_SMC 0x88 - -#define ARMV8_IMPDEF_PERFCTR_EXC_HVC 0x8A -#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_PABORT 0x8B -#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_DABORT 0x8C -#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_OTHER 0x8D -#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_IRQ 0x8E -#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_FIQ 0x8F -#define ARMV8_IMPDEF_PERFCTR_RC_LD_SPEC 0x90 -#define ARMV8_IMPDEF_PERFCTR_RC_ST_SPEC 0x91 - -#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_RD 0xA0 -#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WR 0xA1 -#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_RD 0xA2 -#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_WR 0xA3 - -#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_VICTIM 0xA6 -#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_CLEAN 0xA7 -#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_INVAL 0xA8 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD 0x0040 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR 0x0041 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD 0x0042 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR 0x0043 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_INNER 0x0044 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_OUTER 0x0045 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_VICTIM 0x0046 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_CLEAN 0x0047 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_INVAL 0x0048 + +#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD 0x004C +#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR 0x004D +#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD 0x004E +#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR 0x004F +#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_RD 0x0050 +#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WR 0x0051 +#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_RD 0x0052 +#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_WR 0x0053 + +#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_VICTIM 0x0056 +#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_CLEAN 0x0057 +#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_INVAL 0x0058 + +#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_RD 0x005C +#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_WR 0x005D +#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_RD 0x005E +#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_WR 0x005F +#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD 0x0060 +#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR 0x0061 +#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_SHARED 0x0062 +#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NOT_SHARED 0x0063 +#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NORMAL 0x0064 +#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_PERIPH 0x0065 +#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_RD 0x0066 +#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_WR 0x0067 +#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LD_SPEC 0x0068 +#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_ST_SPEC 0x0069 +#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LDST_SPEC 0x006A + +#define ARMV8_IMPDEF_PERFCTR_LDREX_SPEC 0x006C +#define ARMV8_IMPDEF_PERFCTR_STREX_PASS_SPEC 0x006D +#define ARMV8_IMPDEF_PERFCTR_STREX_FAIL_SPEC 0x006E +#define ARMV8_IMPDEF_PERFCTR_STREX_SPEC 0x006F +#define ARMV8_IMPDEF_PERFCTR_LD_SPEC 0x0070 +#define ARMV8_IMPDEF_PERFCTR_ST_SPEC 0x0071 +#define ARMV8_IMPDEF_PERFCTR_LDST_SPEC 0x0072 +#define ARMV8_IMPDEF_PERFCTR_DP_SPEC 0x0073 +#define ARMV8_IMPDEF_PERFCTR_ASE_SPEC 0x0074 +#define ARMV8_IMPDEF_PERFCTR_VFP_SPEC 0x0075 +#define ARMV8_IMPDEF_PERFCTR_PC_WRITE_SPEC 0x0076 +#define ARMV8_IMPDEF_PERFCTR_CRYPTO_SPEC 0x0077 +#define ARMV8_IMPDEF_PERFCTR_BR_IMMED_SPEC 0x0078 +#define ARMV8_IMPDEF_PERFCTR_BR_RETURN_SPEC 0x0079 +#define ARMV8_IMPDEF_PERFCTR_BR_INDIRECT_SPEC 0x007A + +#define ARMV8_IMPDEF_PERFCTR_ISB_SPEC 0x007C +#define ARMV8_IMPDEF_PERFCTR_DSB_SPEC 0x007D +#define ARMV8_IMPDEF_PERFCTR_DMB_SPEC 0x007E + +#define ARMV8_IMPDEF_PERFCTR_EXC_UNDEF 0x0081 +#define ARMV8_IMPDEF_PERFCTR_EXC_SVC 0x0082 +#define ARMV8_IMPDEF_PERFCTR_EXC_PABORT 0x0083 +#define ARMV8_IMPDEF_PERFCTR_EXC_DABORT 0x0084 + +#define ARMV8_IMPDEF_PERFCTR_EXC_IRQ 0x0086 +#define ARMV8_IMPDEF_PERFCTR_EXC_FIQ 0x0087 +#define ARMV8_IMPDEF_PERFCTR_EXC_SMC 0x0088 + +#define ARMV8_IMPDEF_PERFCTR_EXC_HVC 0x008A +#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_PABORT 0x008B +#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_DABORT 0x008C +#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_OTHER 0x008D +#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_IRQ 0x008E +#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_FIQ 0x008F +#define ARMV8_IMPDEF_PERFCTR_RC_LD_SPEC 0x0090 +#define ARMV8_IMPDEF_PERFCTR_RC_ST_SPEC 0x0091 + +#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_RD 0x00A0 +#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WR 0x00A1 +#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_RD 0x00A2 +#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_WR 0x00A3 + +#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_VICTIM 0x00A6 +#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_CLEAN 0x00A7 +#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_INVAL 0x00A8 /* * Per-CPU PMCR: config reg -- cgit v1.2.3 From a639027a1be1d68437e1c2cac6ed16306c84ab3c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 8 Feb 2022 18:56:04 +0000 Subject: drivers/perf: Add Apple icestorm/firestorm CPU PMU driver Add a new, weird and wonderful driver for the equally weird Apple PMU HW. Although the PMU itself is functional, we don't know much about the events yet, so this can be considered as yet another random number generator... Nonetheless, it can reliably count at least cycles and instructions in the usually wonky big-little way. For anything else, it of course supports raw event numbers. Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon --- arch/arm64/include/asm/apple_m1_pmu.h | 45 +++ drivers/perf/Kconfig | 7 + drivers/perf/Makefile | 1 + drivers/perf/apple_m1_cpu_pmu.c | 584 ++++++++++++++++++++++++++++++++++ 4 files changed, 637 insertions(+) create mode 100644 drivers/perf/apple_m1_cpu_pmu.c (limited to 'arch') diff --git a/arch/arm64/include/asm/apple_m1_pmu.h b/arch/arm64/include/asm/apple_m1_pmu.h index b848af7faadc..99483b19b99f 100644 --- a/arch/arm64/include/asm/apple_m1_pmu.h +++ b/arch/arm64/include/asm/apple_m1_pmu.h @@ -6,8 +6,21 @@ #include #include +/* Counters */ +#define SYS_IMP_APL_PMC0_EL1 sys_reg(3, 2, 15, 0, 0) +#define SYS_IMP_APL_PMC1_EL1 sys_reg(3, 2, 15, 1, 0) +#define SYS_IMP_APL_PMC2_EL1 sys_reg(3, 2, 15, 2, 0) +#define SYS_IMP_APL_PMC3_EL1 sys_reg(3, 2, 15, 3, 0) +#define SYS_IMP_APL_PMC4_EL1 sys_reg(3, 2, 15, 4, 0) +#define SYS_IMP_APL_PMC5_EL1 sys_reg(3, 2, 15, 5, 0) +#define SYS_IMP_APL_PMC6_EL1 sys_reg(3, 2, 15, 6, 0) +#define SYS_IMP_APL_PMC7_EL1 sys_reg(3, 2, 15, 7, 0) +#define SYS_IMP_APL_PMC8_EL1 sys_reg(3, 2, 15, 9, 0) +#define SYS_IMP_APL_PMC9_EL1 sys_reg(3, 2, 15, 10, 0) + /* Core PMC control register */ #define SYS_IMP_APL_PMCR0_EL1 sys_reg(3, 1, 15, 0, 0) +#define PMCR0_CNT_ENABLE_0_7 GENMASK(7, 0) #define PMCR0_IMODE GENMASK(10, 8) #define PMCR0_IMODE_OFF 0 #define PMCR0_IMODE_PMI 1 @@ -15,5 +28,37 @@ #define PMCR0_IMODE_HALT 3 #define PMCR0_IMODE_FIQ 4 #define PMCR0_IACT BIT(11) +#define PMCR0_PMI_ENABLE_0_7 GENMASK(19, 12) +#define PMCR0_STOP_CNT_ON_PMI BIT(20) +#define PMCR0_CNT_GLOB_L2C_EVT BIT(21) +#define PMCR0_DEFER_PMI_TO_ERET BIT(22) +#define PMCR0_ALLOW_CNT_EN_EL0 BIT(30) +#define PMCR0_CNT_ENABLE_8_9 GENMASK(33, 32) +#define PMCR0_PMI_ENABLE_8_9 GENMASK(45, 44) + +#define SYS_IMP_APL_PMCR1_EL1 sys_reg(3, 1, 15, 1, 0) +#define PMCR1_COUNT_A64_EL0_0_7 GENMASK(15, 8) +#define PMCR1_COUNT_A64_EL1_0_7 GENMASK(23, 16) +#define PMCR1_COUNT_A64_EL0_8_9 GENMASK(41, 40) +#define PMCR1_COUNT_A64_EL1_8_9 GENMASK(49, 48) + +#define SYS_IMP_APL_PMCR2_EL1 sys_reg(3, 1, 15, 2, 0) +#define SYS_IMP_APL_PMCR3_EL1 sys_reg(3, 1, 15, 3, 0) +#define SYS_IMP_APL_PMCR4_EL1 sys_reg(3, 1, 15, 4, 0) + +#define SYS_IMP_APL_PMESR0_EL1 sys_reg(3, 1, 15, 5, 0) +#define PMESR0_EVT_CNT_2 GENMASK(7, 0) +#define PMESR0_EVT_CNT_3 GENMASK(15, 8) +#define PMESR0_EVT_CNT_4 GENMASK(23, 16) +#define PMESR0_EVT_CNT_5 GENMASK(31, 24) + +#define SYS_IMP_APL_PMESR1_EL1 sys_reg(3, 1, 15, 6, 0) +#define PMESR1_EVT_CNT_6 GENMASK(7, 0) +#define PMESR1_EVT_CNT_7 GENMASK(15, 8) +#define PMESR1_EVT_CNT_8 GENMASK(23, 16) +#define PMESR1_EVT_CNT_9 GENMASK(31, 24) + +#define SYS_IMP_APL_PMSR_EL1 sys_reg(3, 1, 15, 13, 0) +#define PMSR_OVERFLOW GENMASK(9, 0) #endif /* __ASM_APPLE_M1_PMU_h */ diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index e1a0c44bc686..d4fa0dabb05f 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -146,6 +146,13 @@ config MARVELL_CN10K_TAD_PMU Provides support for Last-Level cache Tag-and-data Units (LLC-TAD) performance monitors on CN10K family silicons. +config APPLE_M1_CPU_PMU + bool "Apple M1 CPU PMU support" + depends on ARM_PMU && ARCH_APPLE + help + Provides support for the non-architectural CPU PMUs present on + the Apple M1 SoCs and derivatives. + source "drivers/perf/hisilicon/Kconfig" endmenu diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile index 2db5418d5b0a..21ad0832e3d4 100644 --- a/drivers/perf/Makefile +++ b/drivers/perf/Makefile @@ -15,3 +15,4 @@ obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o obj-$(CONFIG_ARM_SPE_PMU) += arm_spe_pmu.o obj-$(CONFIG_ARM_DMC620_PMU) += arm_dmc620_pmu.o obj-$(CONFIG_MARVELL_CN10K_TAD_PMU) += marvell_cn10k_tad_pmu.o +obj-$(CONFIG_APPLE_M1_CPU_PMU) += apple_m1_cpu_pmu.o diff --git a/drivers/perf/apple_m1_cpu_pmu.c b/drivers/perf/apple_m1_cpu_pmu.c new file mode 100644 index 000000000000..979a7c2b4f56 --- /dev/null +++ b/drivers/perf/apple_m1_cpu_pmu.c @@ -0,0 +1,584 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * CPU PMU driver for the Apple M1 and derivatives + * + * Copyright (C) 2021 Google LLC + * + * Author: Marc Zyngier + * + * Most of the information used in this driver was provided by the + * Asahi Linux project. The rest was experimentally discovered. + */ + +#include +#include +#include + +#include +#include +#include + +#define M1_PMU_NR_COUNTERS 10 + +#define M1_PMU_CFG_EVENT GENMASK(7, 0) + +#define ANY_BUT_0_1 GENMASK(9, 2) +#define ONLY_2_TO_7 GENMASK(7, 2) +#define ONLY_2_4_6 (BIT(2) | BIT(4) | BIT(6)) +#define ONLY_5_6_7 (BIT(5) | BIT(6) | BIT(7)) + +/* + * Description of the events we actually know about, as well as those with + * a specific counter affinity. Yes, this is a grand total of two known + * counters, and the rest is anybody's guess. + * + * Not all counters can count all events. Counters #0 and #1 are wired to + * count cycles and instructions respectively, and some events have + * bizarre mappings (every other counter, or even *one* counter). These + * restrictions equally apply to both P and E cores. + * + * It is worth noting that the PMUs attached to P and E cores are likely + * to be different because the underlying uarches are different. At the + * moment, we don't really need to distinguish between the two because we + * know next to nothing about the events themselves, and we already have + * per cpu-type PMU abstractions. + * + * If we eventually find out that the events are different across + * implementations, we'll have to introduce per cpu-type tables. + */ +enum m1_pmu_events { + M1_PMU_PERFCTR_UNKNOWN_01 = 0x01, + M1_PMU_PERFCTR_CPU_CYCLES = 0x02, + M1_PMU_PERFCTR_INSTRUCTIONS = 0x8c, + M1_PMU_PERFCTR_UNKNOWN_8d = 0x8d, + M1_PMU_PERFCTR_UNKNOWN_8e = 0x8e, + M1_PMU_PERFCTR_UNKNOWN_8f = 0x8f, + M1_PMU_PERFCTR_UNKNOWN_90 = 0x90, + M1_PMU_PERFCTR_UNKNOWN_93 = 0x93, + M1_PMU_PERFCTR_UNKNOWN_94 = 0x94, + M1_PMU_PERFCTR_UNKNOWN_95 = 0x95, + M1_PMU_PERFCTR_UNKNOWN_96 = 0x96, + M1_PMU_PERFCTR_UNKNOWN_97 = 0x97, + M1_PMU_PERFCTR_UNKNOWN_98 = 0x98, + M1_PMU_PERFCTR_UNKNOWN_99 = 0x99, + M1_PMU_PERFCTR_UNKNOWN_9a = 0x9a, + M1_PMU_PERFCTR_UNKNOWN_9b = 0x9b, + M1_PMU_PERFCTR_UNKNOWN_9c = 0x9c, + M1_PMU_PERFCTR_UNKNOWN_9f = 0x9f, + M1_PMU_PERFCTR_UNKNOWN_bf = 0xbf, + M1_PMU_PERFCTR_UNKNOWN_c0 = 0xc0, + M1_PMU_PERFCTR_UNKNOWN_c1 = 0xc1, + M1_PMU_PERFCTR_UNKNOWN_c4 = 0xc4, + M1_PMU_PERFCTR_UNKNOWN_c5 = 0xc5, + M1_PMU_PERFCTR_UNKNOWN_c6 = 0xc6, + M1_PMU_PERFCTR_UNKNOWN_c8 = 0xc8, + M1_PMU_PERFCTR_UNKNOWN_ca = 0xca, + M1_PMU_PERFCTR_UNKNOWN_cb = 0xcb, + M1_PMU_PERFCTR_UNKNOWN_f5 = 0xf5, + M1_PMU_PERFCTR_UNKNOWN_f6 = 0xf6, + M1_PMU_PERFCTR_UNKNOWN_f7 = 0xf7, + M1_PMU_PERFCTR_UNKNOWN_f8 = 0xf8, + M1_PMU_PERFCTR_UNKNOWN_fd = 0xfd, + M1_PMU_PERFCTR_LAST = M1_PMU_CFG_EVENT, + + /* + * From this point onwards, these are not actual HW events, + * but attributes that get stored in hw->config_base. + */ + M1_PMU_CFG_COUNT_USER = BIT(8), + M1_PMU_CFG_COUNT_KERNEL = BIT(9), +}; + +/* + * Per-event affinity table. Most events can be installed on counter + * 2-9, but there are a number of exceptions. Note that this table + * has been created experimentally, and I wouldn't be surprised if more + * counters had strange affinities. + */ +static const u16 m1_pmu_event_affinity[M1_PMU_PERFCTR_LAST + 1] = { + [0 ... M1_PMU_PERFCTR_LAST] = ANY_BUT_0_1, + [M1_PMU_PERFCTR_UNKNOWN_01] = BIT(7), + [M1_PMU_PERFCTR_CPU_CYCLES] = ANY_BUT_0_1 | BIT(0), + [M1_PMU_PERFCTR_INSTRUCTIONS] = BIT(7) | BIT(1), + [M1_PMU_PERFCTR_UNKNOWN_8d] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_8e] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_8f] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_90] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_93] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_94] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_95] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_96] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_97] = BIT(7), + [M1_PMU_PERFCTR_UNKNOWN_98] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_99] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_9a] = BIT(7), + [M1_PMU_PERFCTR_UNKNOWN_9b] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_9c] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_9f] = BIT(7), + [M1_PMU_PERFCTR_UNKNOWN_bf] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_c0] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_c1] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_c4] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_c5] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_c6] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_c8] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_ca] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_cb] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_f5] = ONLY_2_4_6, + [M1_PMU_PERFCTR_UNKNOWN_f6] = ONLY_2_4_6, + [M1_PMU_PERFCTR_UNKNOWN_f7] = ONLY_2_4_6, + [M1_PMU_PERFCTR_UNKNOWN_f8] = ONLY_2_TO_7, + [M1_PMU_PERFCTR_UNKNOWN_fd] = ONLY_2_4_6, +}; + +static const unsigned m1_pmu_perf_map[PERF_COUNT_HW_MAX] = { + PERF_MAP_ALL_UNSUPPORTED, + [PERF_COUNT_HW_CPU_CYCLES] = M1_PMU_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = M1_PMU_PERFCTR_INSTRUCTIONS, + /* No idea about the rest yet */ +}; + +/* sysfs definitions */ +static ssize_t m1_pmu_events_sysfs_show(struct device *dev, + struct device_attribute *attr, + char *page) +{ + struct perf_pmu_events_attr *pmu_attr; + + pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr); + + return sprintf(page, "event=0x%04llx\n", pmu_attr->id); +} + +#define M1_PMU_EVENT_ATTR(name, config) \ + PMU_EVENT_ATTR_ID(name, m1_pmu_events_sysfs_show, config) + +static struct attribute *m1_pmu_event_attrs[] = { + M1_PMU_EVENT_ATTR(cycles, M1_PMU_PERFCTR_CPU_CYCLES), + M1_PMU_EVENT_ATTR(instructions, M1_PMU_PERFCTR_INSTRUCTIONS), + NULL, +}; + +static const struct attribute_group m1_pmu_events_attr_group = { + .name = "events", + .attrs = m1_pmu_event_attrs, +}; + +PMU_FORMAT_ATTR(event, "config:0-7"); + +static struct attribute *m1_pmu_format_attrs[] = { + &format_attr_event.attr, + NULL, +}; + +static const struct attribute_group m1_pmu_format_attr_group = { + .name = "format", + .attrs = m1_pmu_format_attrs, +}; + +/* Low level accessors. No synchronisation. */ +#define PMU_READ_COUNTER(_idx) \ + case _idx: return read_sysreg_s(SYS_IMP_APL_PMC## _idx ##_EL1) + +#define PMU_WRITE_COUNTER(_val, _idx) \ + case _idx: \ + write_sysreg_s(_val, SYS_IMP_APL_PMC## _idx ##_EL1); \ + return + +static u64 m1_pmu_read_hw_counter(unsigned int index) +{ + switch (index) { + PMU_READ_COUNTER(0); + PMU_READ_COUNTER(1); + PMU_READ_COUNTER(2); + PMU_READ_COUNTER(3); + PMU_READ_COUNTER(4); + PMU_READ_COUNTER(5); + PMU_READ_COUNTER(6); + PMU_READ_COUNTER(7); + PMU_READ_COUNTER(8); + PMU_READ_COUNTER(9); + } + + BUG(); +} + +static void m1_pmu_write_hw_counter(u64 val, unsigned int index) +{ + switch (index) { + PMU_WRITE_COUNTER(val, 0); + PMU_WRITE_COUNTER(val, 1); + PMU_WRITE_COUNTER(val, 2); + PMU_WRITE_COUNTER(val, 3); + PMU_WRITE_COUNTER(val, 4); + PMU_WRITE_COUNTER(val, 5); + PMU_WRITE_COUNTER(val, 6); + PMU_WRITE_COUNTER(val, 7); + PMU_WRITE_COUNTER(val, 8); + PMU_WRITE_COUNTER(val, 9); + } + + BUG(); +} + +#define get_bit_offset(index, mask) (__ffs(mask) + (index)) + +static void __m1_pmu_enable_counter(unsigned int index, bool en) +{ + u64 val, bit; + + switch (index) { + case 0 ... 7: + bit = BIT(get_bit_offset(index, PMCR0_CNT_ENABLE_0_7)); + break; + case 8 ... 9: + bit = BIT(get_bit_offset(index - 8, PMCR0_CNT_ENABLE_8_9)); + break; + default: + BUG(); + } + + val = read_sysreg_s(SYS_IMP_APL_PMCR0_EL1); + + if (en) + val |= bit; + else + val &= ~bit; + + write_sysreg_s(val, SYS_IMP_APL_PMCR0_EL1); +} + +static void m1_pmu_enable_counter(unsigned int index) +{ + __m1_pmu_enable_counter(index, true); +} + +static void m1_pmu_disable_counter(unsigned int index) +{ + __m1_pmu_enable_counter(index, false); +} + +static void __m1_pmu_enable_counter_interrupt(unsigned int index, bool en) +{ + u64 val, bit; + + switch (index) { + case 0 ... 7: + bit = BIT(get_bit_offset(index, PMCR0_PMI_ENABLE_0_7)); + break; + case 8 ... 9: + bit = BIT(get_bit_offset(index - 8, PMCR0_PMI_ENABLE_8_9)); + break; + default: + BUG(); + } + + val = read_sysreg_s(SYS_IMP_APL_PMCR0_EL1); + + if (en) + val |= bit; + else + val &= ~bit; + + write_sysreg_s(val, SYS_IMP_APL_PMCR0_EL1); +} + +static void m1_pmu_enable_counter_interrupt(unsigned int index) +{ + __m1_pmu_enable_counter_interrupt(index, true); +} + +static void m1_pmu_disable_counter_interrupt(unsigned int index) +{ + __m1_pmu_enable_counter_interrupt(index, false); +} + +static void m1_pmu_configure_counter(unsigned int index, u8 event, + bool user, bool kernel) +{ + u64 val, user_bit, kernel_bit; + int shift; + + switch (index) { + case 0 ... 7: + user_bit = BIT(get_bit_offset(index, PMCR1_COUNT_A64_EL0_0_7)); + kernel_bit = BIT(get_bit_offset(index, PMCR1_COUNT_A64_EL1_0_7)); + break; + case 8 ... 9: + user_bit = BIT(get_bit_offset(index - 8, PMCR1_COUNT_A64_EL0_8_9)); + kernel_bit = BIT(get_bit_offset(index - 8, PMCR1_COUNT_A64_EL1_8_9)); + break; + default: + BUG(); + } + + val = read_sysreg_s(SYS_IMP_APL_PMCR1_EL1); + + if (user) + val |= user_bit; + else + val &= ~user_bit; + + if (kernel) + val |= kernel_bit; + else + val &= ~kernel_bit; + + write_sysreg_s(val, SYS_IMP_APL_PMCR1_EL1); + + /* + * Counters 0 and 1 have fixed events. For anything else, + * place the event at the expected location in the relevant + * register (PMESR0 holds the event configuration for counters + * 2-5, resp. PMESR1 for counters 6-9). + */ + switch (index) { + case 0 ... 1: + break; + case 2 ... 5: + shift = (index - 2) * 8; + val = read_sysreg_s(SYS_IMP_APL_PMESR0_EL1); + val &= ~((u64)0xff << shift); + val |= (u64)event << shift; + write_sysreg_s(val, SYS_IMP_APL_PMESR0_EL1); + break; + case 6 ... 9: + shift = (index - 6) * 8; + val = read_sysreg_s(SYS_IMP_APL_PMESR1_EL1); + val &= ~((u64)0xff << shift); + val |= (u64)event << shift; + write_sysreg_s(val, SYS_IMP_APL_PMESR1_EL1); + break; + } +} + +/* arm_pmu backend */ +static void m1_pmu_enable_event(struct perf_event *event) +{ + bool user, kernel; + u8 evt; + + evt = event->hw.config_base & M1_PMU_CFG_EVENT; + user = event->hw.config_base & M1_PMU_CFG_COUNT_USER; + kernel = event->hw.config_base & M1_PMU_CFG_COUNT_KERNEL; + + m1_pmu_disable_counter_interrupt(event->hw.idx); + m1_pmu_disable_counter(event->hw.idx); + isb(); + + m1_pmu_configure_counter(event->hw.idx, evt, user, kernel); + m1_pmu_enable_counter(event->hw.idx); + m1_pmu_enable_counter_interrupt(event->hw.idx); + isb(); +} + +static void m1_pmu_disable_event(struct perf_event *event) +{ + m1_pmu_disable_counter_interrupt(event->hw.idx); + m1_pmu_disable_counter(event->hw.idx); + isb(); +} + +static irqreturn_t m1_pmu_handle_irq(struct arm_pmu *cpu_pmu) +{ + struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); + struct pt_regs *regs; + u64 overflow, state; + int idx; + + overflow = read_sysreg_s(SYS_IMP_APL_PMSR_EL1); + if (!overflow) { + /* Spurious interrupt? */ + state = read_sysreg_s(SYS_IMP_APL_PMCR0_EL1); + state &= ~PMCR0_IACT; + write_sysreg_s(state, SYS_IMP_APL_PMCR0_EL1); + isb(); + return IRQ_NONE; + } + + cpu_pmu->stop(cpu_pmu); + + regs = get_irq_regs(); + + for (idx = 0; idx < cpu_pmu->num_events; idx++) { + struct perf_event *event = cpuc->events[idx]; + struct perf_sample_data data; + + if (!event) + continue; + + armpmu_event_update(event); + perf_sample_data_init(&data, 0, event->hw.last_period); + if (!armpmu_event_set_period(event)) + continue; + + if (perf_event_overflow(event, &data, regs)) + m1_pmu_disable_event(event); + } + + cpu_pmu->start(cpu_pmu); + + return IRQ_HANDLED; +} + +static u64 m1_pmu_read_counter(struct perf_event *event) +{ + return m1_pmu_read_hw_counter(event->hw.idx); +} + +static void m1_pmu_write_counter(struct perf_event *event, u64 value) +{ + m1_pmu_write_hw_counter(value, event->hw.idx); + isb(); +} + +static int m1_pmu_get_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + unsigned long evtype = event->hw.config_base & M1_PMU_CFG_EVENT; + unsigned long affinity = m1_pmu_event_affinity[evtype]; + int idx; + + /* + * Place the event on the first free counter that can count + * this event. + * + * We could do a better job if we had a view of all the events + * counting on the PMU at any given time, and by placing the + * most constraining events first. + */ + for_each_set_bit(idx, &affinity, M1_PMU_NR_COUNTERS) { + if (!test_and_set_bit(idx, cpuc->used_mask)) + return idx; + } + + return -EAGAIN; +} + +static void m1_pmu_clear_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + clear_bit(event->hw.idx, cpuc->used_mask); +} + +static void __m1_pmu_set_mode(u8 mode) +{ + u64 val; + + val = read_sysreg_s(SYS_IMP_APL_PMCR0_EL1); + val &= ~(PMCR0_IMODE | PMCR0_IACT); + val |= FIELD_PREP(PMCR0_IMODE, mode); + write_sysreg_s(val, SYS_IMP_APL_PMCR0_EL1); + isb(); +} + +static void m1_pmu_start(struct arm_pmu *cpu_pmu) +{ + __m1_pmu_set_mode(PMCR0_IMODE_FIQ); +} + +static void m1_pmu_stop(struct arm_pmu *cpu_pmu) +{ + __m1_pmu_set_mode(PMCR0_IMODE_OFF); +} + +static int m1_pmu_map_event(struct perf_event *event) +{ + /* + * Although the counters are 48bit wide, bit 47 is what + * triggers the overflow interrupt. Advertise the counters + * being 47bit wide to mimick the behaviour of the ARM PMU. + */ + event->hw.flags |= ARMPMU_EVT_47BIT; + return armpmu_map_event(event, &m1_pmu_perf_map, NULL, M1_PMU_CFG_EVENT); +} + +static void m1_pmu_reset(void *info) +{ + int i; + + __m1_pmu_set_mode(PMCR0_IMODE_OFF); + + for (i = 0; i < M1_PMU_NR_COUNTERS; i++) { + m1_pmu_disable_counter(i); + m1_pmu_disable_counter_interrupt(i); + m1_pmu_write_hw_counter(0, i); + } + + isb(); +} + +static int m1_pmu_set_event_filter(struct hw_perf_event *event, + struct perf_event_attr *attr) +{ + unsigned long config_base = 0; + + if (!attr->exclude_guest) + return -EINVAL; + if (!attr->exclude_kernel) + config_base |= M1_PMU_CFG_COUNT_KERNEL; + if (!attr->exclude_user) + config_base |= M1_PMU_CFG_COUNT_USER; + + event->config_base = config_base; + + return 0; +} + +static int m1_pmu_init(struct arm_pmu *cpu_pmu) +{ + cpu_pmu->handle_irq = m1_pmu_handle_irq; + cpu_pmu->enable = m1_pmu_enable_event; + cpu_pmu->disable = m1_pmu_disable_event; + cpu_pmu->read_counter = m1_pmu_read_counter; + cpu_pmu->write_counter = m1_pmu_write_counter; + cpu_pmu->get_event_idx = m1_pmu_get_event_idx; + cpu_pmu->clear_event_idx = m1_pmu_clear_event_idx; + cpu_pmu->start = m1_pmu_start; + cpu_pmu->stop = m1_pmu_stop; + cpu_pmu->map_event = m1_pmu_map_event; + cpu_pmu->reset = m1_pmu_reset; + cpu_pmu->set_event_filter = m1_pmu_set_event_filter; + + cpu_pmu->num_events = M1_PMU_NR_COUNTERS; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = &m1_pmu_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = &m1_pmu_format_attr_group; + return 0; +} + +/* Device driver gunk */ +static int m1_pmu_ice_init(struct arm_pmu *cpu_pmu) +{ + cpu_pmu->name = "apple_icestorm_pmu"; + return m1_pmu_init(cpu_pmu); +} + +static int m1_pmu_fire_init(struct arm_pmu *cpu_pmu) +{ + cpu_pmu->name = "apple_firestorm_pmu"; + return m1_pmu_init(cpu_pmu); +} + +static const struct of_device_id m1_pmu_of_device_ids[] = { + { .compatible = "apple,icestorm-pmu", .data = m1_pmu_ice_init, }, + { .compatible = "apple,firestorm-pmu", .data = m1_pmu_fire_init, }, + { }, +}; +MODULE_DEVICE_TABLE(of, m1_pmu_of_device_ids); + +static int m1_pmu_device_probe(struct platform_device *pdev) +{ + return arm_pmu_device_probe(pdev, m1_pmu_of_device_ids, NULL); +} + +static struct platform_driver m1_pmu_driver = { + .driver = { + .name = "apple-m1-cpu-pmu", + .of_match_table = m1_pmu_of_device_ids, + .suppress_bind_attrs = true, + }, + .probe = m1_pmu_device_probe, +}; + +module_platform_driver(m1_pmu_driver); +MODULE_LICENSE("GPL v2"); -- cgit v1.2.3 From 770093459b9b333380aa71f2c31c60b14895c1df Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 9 Mar 2022 12:21:37 +0000 Subject: arm64: mm: Drop 'const' from conditional arm64_dma_phys_limit definition Commit 031495635b46 ("arm64: Do not defer reserve_crashkernel() for platforms with no DMA memory zones") introduced different definitions for 'arm64_dma_phys_limit' depending on CONFIG_ZONE_DMA{,32} based on a late suggestion from Pasha. Sadly, this results in a build error when passing W=1: | arch/arm64/mm/init.c:90:19: error: conflicting type qualifiers for 'arm64_dma_phys_limit' Drop the 'const' for now and use '__ro_after_init' consistently. Link: https://lore.kernel.org/r/202203090241.aj7paWeX-lkp@intel.com Link: https://lore.kernel.org/r/CA+CK2bDbbx=8R=UthkMesWOST8eJMtOGJdfMRTFSwVmo0Vn0EA@mail.gmail.com Fixes: 031495635b46 ("arm64: Do not defer reserve_crashkernel() for platforms with no DMA memory zones") Signed-off-by: Will Deacon --- arch/arm64/mm/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 919be440494f..9e26ec80d317 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -87,7 +87,7 @@ EXPORT_SYMBOL(memstart_addr); #if IS_ENABLED(CONFIG_ZONE_DMA) || IS_ENABLED(CONFIG_ZONE_DMA32) phys_addr_t __ro_after_init arm64_dma_phys_limit; #else -const phys_addr_t arm64_dma_phys_limit = PHYS_MASK + 1; +phys_addr_t __ro_after_init arm64_dma_phys_limit = PHYS_MASK + 1; #endif #ifdef CONFIG_KEXEC_CORE -- cgit v1.2.3 From f90205b95368ee2b56fc523abda6c4d514901d9b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 9 Mar 2022 18:06:00 +0000 Subject: arm64: Add cavium_erratum_23154_cpus missing sentinel Qian Cai reported that playing with CPU hotplug resulted in a out-of-bound access due to cavium_erratum_23154_cpus missing a sentinel indicating the end of the array. Add it in order to restore peace and harmony in the world of broken HW. Reported-by: Qian Cai Signed-off-by: Marc Zyngier Fixes: 24a147bcef8c ("irqchip/gic-v3: Workaround Marvell erratum 38545 when reading IAR") Link: https://lore.kernel.org/r/YijmkXp1VG7e8lDx@qian Cc: Linu Cherian Cc: Will Deacon Link: https://lore.kernel.org/r/20220309180600.3990874-1-maz@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/cpu_errata.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 510f47055b91..6485d8e54cca 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -225,6 +225,7 @@ const struct midr_range cavium_erratum_23154_cpus[] = { MIDR_ALL_VERSIONS(MIDR_OCTX2_95XXN), MIDR_ALL_VERSIONS(MIDR_OCTX2_95XXMM), MIDR_ALL_VERSIONS(MIDR_OCTX2_95XXO), + {}, }; #endif -- cgit v1.2.3 From cf220ad6749b8305ba11bdf601c55a17ad2a715d Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 9 Mar 2022 13:12:00 +0000 Subject: arm64/mte: Remove asymmetric mode from the prctl() interface As pointed out by Evgenii Stepanov one potential issue with the new ABI for enabling asymmetric is that if there are multiple places where MTE is configured in a process, some of which were compiled with the old prctl.h and some of which were compiled with the new prctl.h, there may be problems keeping track of which MTE modes are requested. For example some code may disable only sync and async modes leaving asymmetric mode enabled when it intended to fully disable MTE. In order to avoid such mishaps remove asymmetric mode from the prctl(), instead implicitly allowing it if both sync and async modes are requested. This should not disrupt userspace since a process requesting both may already see a mix of sync and async modes due to differing defaults between CPUs or changes in default while the process is running but it does mean that userspace is unable to explicitly request asymmetric mode without changing the system default for CPUs. Reported-by: Evgenii Stepanov Signed-off-by: Mark Brown Reviewed-by: Catalin Marinas Reviewed-by: Evgenii Stepanov Cc: Peter Collingbourne Cc: Joey Gouly Cc: Branislav Rankov Link: https://lore.kernel.org/r/20220309131200.112637-1-broonie@kernel.org Signed-off-by: Will Deacon --- Documentation/arm64/memory-tagging-extension.rst | 15 ++++++++------- arch/arm64/kernel/mte.c | 13 ++++++++++--- arch/arm64/kernel/process.c | 2 -- include/uapi/linux/prctl.h | 4 +--- 4 files changed, 19 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/Documentation/arm64/memory-tagging-extension.rst b/Documentation/arm64/memory-tagging-extension.rst index 42677d48a114..7e812a51e506 100644 --- a/Documentation/arm64/memory-tagging-extension.rst +++ b/Documentation/arm64/memory-tagging-extension.rst @@ -88,7 +88,6 @@ bit-field: (ignored if combined with other options) - ``PR_MTE_TCF_SYNC`` - *Synchronous* tag check fault mode - ``PR_MTE_TCF_ASYNC`` - *Asynchronous* tag check fault mode -- ``PR_MTE_TCF_ASYMM`` - *Asymmetric* tag check fault mode If no modes are specified, tag check faults are ignored. If a single mode is specified, the program will run in that mode. If multiple @@ -149,17 +148,19 @@ default preferred mode for each CPU is ``async``. To allow a program to potentially run in the CPU's preferred tag checking mode, the user program may set multiple tag check fault mode bits in the ``flags`` argument to the ``prctl(PR_SET_TAGGED_ADDR_CTRL, -flags, 0, 0, 0)`` system call. If the CPU's preferred tag checking -mode is in the task's set of provided tag checking modes, that -mode will be selected. Otherwise, one of the modes in the task's mode -selected by the kernel using the preference order: +flags, 0, 0, 0)`` system call. If both synchronous and asynchronous +modes are requested then asymmetric mode may also be selected by the +kernel. If the CPU's preferred tag checking mode is in the task's set +of provided tag checking modes, that mode will be selected. Otherwise, +one of the modes in the task's mode will be selected by the kernel +from the task's mode set using the preference order: 1. Asynchronous 2. Asymmetric 3. Synchronous -If asymmetric mode is specified by the program but not supported by -either the system or the kernel then an error will be returned. +Note that there is no way for userspace to request multiple modes and +also disable asymmetric mode. Initial process state --------------------- diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index cbbd8d93fc50..78b3e0f8e997 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -311,7 +311,16 @@ long set_mte_ctrl(struct task_struct *task, unsigned long arg) mte_ctrl |= MTE_CTRL_TCF_ASYNC; if (arg & PR_MTE_TCF_SYNC) mte_ctrl |= MTE_CTRL_TCF_SYNC; - if (arg & PR_MTE_TCF_ASYMM) + + /* + * If the system supports it and both sync and async modes are + * specified then implicitly enable asymmetric mode. + * Userspace could see a mix of both sync and async anyway due + * to differing or changing defaults on CPUs. + */ + if (cpus_have_cap(ARM64_MTE_ASYMM) && + (arg & PR_MTE_TCF_ASYNC) && + (arg & PR_MTE_TCF_SYNC)) mte_ctrl |= MTE_CTRL_TCF_ASYMM; task->thread.mte_ctrl = mte_ctrl; @@ -341,8 +350,6 @@ long get_mte_ctrl(struct task_struct *task) ret |= PR_MTE_TCF_ASYNC; if (mte_ctrl & MTE_CTRL_TCF_SYNC) ret |= PR_MTE_TCF_SYNC; - if (mte_ctrl & MTE_CTRL_TCF_ASYMM) - ret |= PR_MTE_TCF_ASYMM; return ret; } diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 941cfa7117b9..7fa97df55e3a 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -637,8 +637,6 @@ long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg) if (system_supports_mte()) valid_mask |= PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC \ | PR_MTE_TAG_MASK; - if (cpus_have_cap(ARM64_MTE_ASYMM)) - valid_mask |= PR_MTE_TCF_ASYMM; if (arg & ~valid_mask) return -EINVAL; diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 4ae2b21e4066..e998764f0262 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -238,9 +238,7 @@ struct prctl_mm_map { # define PR_MTE_TCF_NONE 0UL # define PR_MTE_TCF_SYNC (1UL << 1) # define PR_MTE_TCF_ASYNC (1UL << 2) -# define PR_MTE_TCF_ASYMM (1UL << 19) -# define PR_MTE_TCF_MASK (PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC | \ - PR_MTE_TCF_ASYMM) +# define PR_MTE_TCF_MASK (PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC) /* MTE tag inclusion mask */ # define PR_MTE_TAG_SHIFT 3 # define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT) -- cgit v1.2.3