From d233ab3c5c5ed4b3d2201bddb71dab5a2946c31b Mon Sep 17 00:00:00 2001 From: Heinrich Schuchardt Date: Sun, 25 Sep 2022 02:47:57 +0200 Subject: riscv/vdso: typo therefor The adverbs 'therefor' and 'therefore' have different meaning. As the meaning here is 'consequently' the spelling should be 'therefore'. Signed-off-by: Heinrich Schuchardt Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20220925004757.9089-1-heinrich.schuchardt@canonical.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/vdso.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/riscv') diff --git a/arch/riscv/include/asm/vdso.h b/arch/riscv/include/asm/vdso.h index af981426fe0f..a7644f46d0e5 100644 --- a/arch/riscv/include/asm/vdso.h +++ b/arch/riscv/include/asm/vdso.h @@ -10,7 +10,7 @@ /* * All systems with an MMU have a VDSO, but systems without an MMU don't - * support shared libraries and therefor don't have one. + * support shared libraries and therefore don't have one. */ #ifdef CONFIG_MMU -- cgit v1.2.3 From 5e9c68ea777594a2d63fa44c0509782e90821707 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Wed, 12 Oct 2022 01:18:40 +0200 Subject: RISC-V: Cache SBI vendor values sbi_get_mvendorid(), sbi_get_marchid() and sbi_get_mimpid() might get called multiple times, though the values of these CSRs should not change during the runtime of a specific machine. Though the values can be different depending on which hart of the system they get called. So hook into the newly introduced cpuinfo struct to allow retrieving these cached values via new functions. Also use arch_initcall for the cpuinfo setup instead, as that now clearly is "architecture specific initialization" and also makes these information available slightly earlier. [caching vendor ids] Suggested-by: Atish Patra [using cpuinfo struct as cache] Suggested-by: Anup Patel Link: https://lore.kernel.org/all/20221011231841.2951264-2-heiko@sntech.de/ Signed-off-by: Heiko Stuebner Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/sbi.h | 5 +++++ arch/riscv/kernel/cpu.c | 30 +++++++++++++++++++++++++++--- 2 files changed, 32 insertions(+), 3 deletions(-) (limited to 'arch/riscv') diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h index 2a0ef738695e..4ca7fbacff42 100644 --- a/arch/riscv/include/asm/sbi.h +++ b/arch/riscv/include/asm/sbi.h @@ -327,4 +327,9 @@ int sbi_err_map_linux_errno(int err); static inline int sbi_remote_fence_i(const struct cpumask *cpu_mask) { return -1; } static inline void sbi_init(void) {} #endif /* CONFIG_RISCV_SBI */ + +unsigned long riscv_cached_mvendorid(unsigned int cpu_id); +unsigned long riscv_cached_marchid(unsigned int cpu_id); +unsigned long riscv_cached_mimpid(unsigned int cpu_id); + #endif /* _ASM_RISCV_SBI_H */ diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c index fa427bdcf773..bf9dd6764bad 100644 --- a/arch/riscv/kernel/cpu.c +++ b/arch/riscv/kernel/cpu.c @@ -70,8 +70,6 @@ int riscv_of_parent_hartid(struct device_node *node, unsigned long *hartid) return -1; } -#ifdef CONFIG_PROC_FS - struct riscv_cpuinfo { unsigned long mvendorid; unsigned long marchid; @@ -79,6 +77,30 @@ struct riscv_cpuinfo { }; static DEFINE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo); +unsigned long riscv_cached_mvendorid(unsigned int cpu_id) +{ + struct riscv_cpuinfo *ci = per_cpu_ptr(&riscv_cpuinfo, cpu_id); + + return ci->mvendorid; +} +EXPORT_SYMBOL(riscv_cached_mvendorid); + +unsigned long riscv_cached_marchid(unsigned int cpu_id) +{ + struct riscv_cpuinfo *ci = per_cpu_ptr(&riscv_cpuinfo, cpu_id); + + return ci->marchid; +} +EXPORT_SYMBOL(riscv_cached_marchid); + +unsigned long riscv_cached_mimpid(unsigned int cpu_id) +{ + struct riscv_cpuinfo *ci = per_cpu_ptr(&riscv_cpuinfo, cpu_id); + + return ci->mimpid; +} +EXPORT_SYMBOL(riscv_cached_mimpid); + static int riscv_cpuinfo_starting(unsigned int cpu) { struct riscv_cpuinfo *ci = this_cpu_ptr(&riscv_cpuinfo); @@ -113,7 +135,9 @@ static int __init riscv_cpuinfo_init(void) return 0; } -device_initcall(riscv_cpuinfo_init); +arch_initcall(riscv_cpuinfo_init); + +#ifdef CONFIG_PROC_FS #define __RISCV_ISA_EXT_DATA(UPROP, EXTID) \ { \ -- cgit v1.2.3 From 65e9fb081877a18c432c6ff344937b7277c044b5 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Wed, 12 Oct 2022 01:18:41 +0200 Subject: drivers/perf: riscv_pmu_sbi: add support for PMU variant on T-Head C9xx cores With the T-HEAD C9XX cores being designed before or during the ratification to the SSCOFPMF extension, it implements functionality very similar but not equal to it. It implements overflow handling and also some privilege-mode filtering. While SSCOFPMF supports this for all modes, the C9XX only implements the filtering for M-mode and S-mode but not user-mode. So add some adaptions to allow the C9XX to still handle its PMU through the regular SBI PMU interface instead of defining new interfaces or drivers. To work properly, this requires a matching change in SBI, though the actual interface between kernel and SBI does not change. The main differences are a the overflow CSR and irq number. As the reading of the overflow-csr is in the hot-path during irq handling, use an errata and alternatives to not introduce new conditionals there. Reviewed-by: Andrew Jones Reviewed-by: Conor Dooley Signed-off-by: Heiko Stuebner Link: https://lore.kernel.org/all/20221011231841.2951264-2-heiko@sntech.de/ Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig.erratas | 13 +++++++++++++ arch/riscv/errata/thead/errata.c | 19 +++++++++++++++++++ arch/riscv/include/asm/errata_list.h | 16 +++++++++++++++- drivers/perf/riscv_pmu_sbi.c | 34 ++++++++++++++++++++++++---------- 4 files changed, 71 insertions(+), 11 deletions(-) (limited to 'arch/riscv') diff --git a/arch/riscv/Kconfig.erratas b/arch/riscv/Kconfig.erratas index f3623df23b5f..69621ae6d647 100644 --- a/arch/riscv/Kconfig.erratas +++ b/arch/riscv/Kconfig.erratas @@ -66,4 +66,17 @@ config ERRATA_THEAD_CMO If you don't know what to do here, say "Y". +config ERRATA_THEAD_PMU + bool "Apply T-Head PMU errata" + depends on ERRATA_THEAD && RISCV_PMU_SBI + default y + help + The T-Head C9xx cores implement a PMU overflow extension very + similar to the core SSCOFPMF extension. + + This will apply the overflow errata to handle the non-standard + behaviour via the regular SBI PMU driver and interface. + + If you don't know what to do here, say "Y". + endmenu # "CPU errata selection" diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c index 21546937db39..fac5742d1c1e 100644 --- a/arch/riscv/errata/thead/errata.c +++ b/arch/riscv/errata/thead/errata.c @@ -47,6 +47,22 @@ static bool errata_probe_cmo(unsigned int stage, return true; } +static bool errata_probe_pmu(unsigned int stage, + unsigned long arch_id, unsigned long impid) +{ + if (!IS_ENABLED(CONFIG_ERRATA_THEAD_PMU)) + return false; + + /* target-c9xx cores report arch_id and impid as 0 */ + if (arch_id != 0 || impid != 0) + return false; + + if (stage == RISCV_ALTERNATIVES_EARLY_BOOT) + return false; + + return true; +} + static u32 thead_errata_probe(unsigned int stage, unsigned long archid, unsigned long impid) { @@ -58,6 +74,9 @@ static u32 thead_errata_probe(unsigned int stage, if (errata_probe_cmo(stage, archid, impid)) cpu_req_errata |= BIT(ERRATA_THEAD_CMO); + if (errata_probe_pmu(stage, archid, impid)) + cpu_req_errata |= BIT(ERRATA_THEAD_PMU); + return cpu_req_errata; } diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h index 19a771085781..4180312d2a70 100644 --- a/arch/riscv/include/asm/errata_list.h +++ b/arch/riscv/include/asm/errata_list.h @@ -6,6 +6,7 @@ #define ASM_ERRATA_LIST_H #include +#include #include #ifdef CONFIG_ERRATA_SIFIVE @@ -17,7 +18,8 @@ #ifdef CONFIG_ERRATA_THEAD #define ERRATA_THEAD_PBMT 0 #define ERRATA_THEAD_CMO 1 -#define ERRATA_THEAD_NUMBER 2 +#define ERRATA_THEAD_PMU 2 +#define ERRATA_THEAD_NUMBER 3 #endif #define CPUFEATURE_SVPBMT 0 @@ -142,6 +144,18 @@ asm volatile(ALTERNATIVE_2( \ "r"((unsigned long)(_start) + (_size)) \ : "a0") +#define THEAD_C9XX_RV_IRQ_PMU 17 +#define THEAD_C9XX_CSR_SCOUNTEROF 0x5c5 + +#define ALT_SBI_PMU_OVERFLOW(__ovl) \ +asm volatile(ALTERNATIVE( \ + "csrr %0, " __stringify(CSR_SSCOUNTOVF), \ + "csrr %0, " __stringify(THEAD_C9XX_CSR_SCOUNTEROF), \ + THEAD_VENDOR_ID, ERRATA_THEAD_PMU, \ + CONFIG_ERRATA_THEAD_PMU) \ + : "=r" (__ovl) : \ + : "memory") + #endif /* __ASSEMBLY__ */ #endif diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c index 3852c18362f5..f6507efe2a58 100644 --- a/drivers/perf/riscv_pmu_sbi.c +++ b/drivers/perf/riscv_pmu_sbi.c @@ -20,6 +20,7 @@ #include #include +#include #include #include @@ -47,6 +48,8 @@ static const struct attribute_group *riscv_pmu_attr_groups[] = { * per_cpu in case of harts with different pmu counters */ static union sbi_pmu_ctr_info *pmu_ctr_list; +static bool riscv_pmu_use_irq; +static unsigned int riscv_pmu_irq_num; static unsigned int riscv_pmu_irq; struct sbi_pmu_event_data { @@ -580,7 +583,7 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev) fidx = find_first_bit(cpu_hw_evt->used_hw_ctrs, RISCV_MAX_COUNTERS); event = cpu_hw_evt->events[fidx]; if (!event) { - csr_clear(CSR_SIP, SIP_LCOFIP); + csr_clear(CSR_SIP, BIT(riscv_pmu_irq_num)); return IRQ_NONE; } @@ -588,13 +591,13 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev) pmu_sbi_stop_hw_ctrs(pmu); /* Overflow status register should only be read after counter are stopped */ - overflow = csr_read(CSR_SSCOUNTOVF); + ALT_SBI_PMU_OVERFLOW(overflow); /* * Overflow interrupt pending bit should only be cleared after stopping * all the counters to avoid any race condition. */ - csr_clear(CSR_SIP, SIP_LCOFIP); + csr_clear(CSR_SIP, BIT(riscv_pmu_irq_num)); /* No overflow bit is set */ if (!overflow) @@ -661,10 +664,10 @@ static int pmu_sbi_starting_cpu(unsigned int cpu, struct hlist_node *node) /* Stop all the counters so that they can be enabled from perf */ pmu_sbi_stop_all(pmu); - if (riscv_isa_extension_available(NULL, SSCOFPMF)) { + if (riscv_pmu_use_irq) { cpu_hw_evt->irq = riscv_pmu_irq; - csr_clear(CSR_IP, BIT(RV_IRQ_PMU)); - csr_set(CSR_IE, BIT(RV_IRQ_PMU)); + csr_clear(CSR_IP, BIT(riscv_pmu_irq_num)); + csr_set(CSR_IE, BIT(riscv_pmu_irq_num)); enable_percpu_irq(riscv_pmu_irq, IRQ_TYPE_NONE); } @@ -673,9 +676,9 @@ static int pmu_sbi_starting_cpu(unsigned int cpu, struct hlist_node *node) static int pmu_sbi_dying_cpu(unsigned int cpu, struct hlist_node *node) { - if (riscv_isa_extension_available(NULL, SSCOFPMF)) { + if (riscv_pmu_use_irq) { disable_percpu_irq(riscv_pmu_irq); - csr_clear(CSR_IE, BIT(RV_IRQ_PMU)); + csr_clear(CSR_IE, BIT(riscv_pmu_irq_num)); } /* Disable all counters access for user mode now */ @@ -691,7 +694,18 @@ static int pmu_sbi_setup_irqs(struct riscv_pmu *pmu, struct platform_device *pde struct device_node *cpu, *child; struct irq_domain *domain = NULL; - if (!riscv_isa_extension_available(NULL, SSCOFPMF)) + if (riscv_isa_extension_available(NULL, SSCOFPMF)) { + riscv_pmu_irq_num = RV_IRQ_PMU; + riscv_pmu_use_irq = true; + } else if (IS_ENABLED(CONFIG_ERRATA_THEAD_PMU) && + riscv_cached_mvendorid(0) == THEAD_VENDOR_ID && + riscv_cached_marchid(0) == 0 && + riscv_cached_mimpid(0) == 0) { + riscv_pmu_irq_num = THEAD_C9XX_RV_IRQ_PMU; + riscv_pmu_use_irq = true; + } + + if (!riscv_pmu_use_irq) return -EOPNOTSUPP; for_each_of_cpu_node(cpu) { @@ -713,7 +727,7 @@ static int pmu_sbi_setup_irqs(struct riscv_pmu *pmu, struct platform_device *pde return -ENODEV; } - riscv_pmu_irq = irq_create_mapping(domain, RV_IRQ_PMU); + riscv_pmu_irq = irq_create_mapping(domain, riscv_pmu_irq_num); if (!riscv_pmu_irq) { pr_err("Failed to map PMU interrupt for node\n"); return -ENODEV; -- cgit v1.2.3 From 1b52861f0e04da43013f88dd56464b5719a974e3 Mon Sep 17 00:00:00 2001 From: Jinyu Tang Date: Sun, 9 Oct 2022 21:45:03 +0800 Subject: riscv: support update_mmu_tlb() Add macro definition to support update_mmu_tlb() for riscv, this function is from commit:7df676974359 ("mm/memory.c:Update local TLB if PTE entry exists"). update_mmu_tlb() is used when a thread notice that other cpu thread has handled the fault and changed the PTE. For MIPS, it's worth to do that,this cpu thread will trap in tlb fault again otherwise. For RISCV, it's also better to flush local tlb than do nothing in update_mmu_tlb(). There are two kinds of page fault that have update_mmu_tlb() inside: 1.page fault which PTE is NOT none, only protection check error, like write protection fault. If updata_mmu_tlb() is empty, after finsh page fault this time and re-execute, cpu will find address but protection checked error in tlb again. So this will cause another page fault. PTE in memory is good now,so update_mmu_cache() in handle_pte_fault() will be executed. If updata_mmu_tlb() is not empty flush local tlb, cpu won't find this address in tlb next time, and get entry in physical memory, so it won't cause another page fault. 2.page fault which PTE is none or swapped. For this case, this cpu thread won't cause another page fault,cpu will have tlb miss when re-execute, and get entry in memory directly. But "set pte in phycial memory and flush local tlb" is pratice in Linux, it's better to flush local tlb if it find entry in phycial memory has changed. Maybe it's same for other ARCH which can't detect PTE changed and update it in local tlb automatically. Signed-off-by: Jinyu Tang Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/20221009134503.18783-1-tjytimi@163.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/pgtable.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/riscv') diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 7ec936910a96..c61ae83aadee 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -418,6 +418,9 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, local_flush_tlb_page(address); } +#define __HAVE_ARCH_UPDATE_MMU_TLB +#define update_mmu_tlb update_mmu_cache + static inline void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { -- cgit v1.2.3 From 3558927fc2b2fd0af309648f4071035e08719866 Mon Sep 17 00:00:00 2001 From: Cleo John Date: Mon, 10 Oct 2022 20:28:48 +0200 Subject: riscv: fix styling in ucontext header Change the two comments in ucontext.h by getting them up to the coding style proposed by torvalds. Signed-off-by: Cleo John Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20221010182848.GA28029@watet-ms7b87 Signed-off-by: Palmer Dabbelt --- arch/riscv/include/uapi/asm/ucontext.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'arch/riscv') diff --git a/arch/riscv/include/uapi/asm/ucontext.h b/arch/riscv/include/uapi/asm/ucontext.h index 44eb993950e5..516bd0bb0da5 100644 --- a/arch/riscv/include/uapi/asm/ucontext.h +++ b/arch/riscv/include/uapi/asm/ucontext.h @@ -15,19 +15,23 @@ struct ucontext { struct ucontext *uc_link; stack_t uc_stack; sigset_t uc_sigmask; - /* There's some padding here to allow sigset_t to be expanded in the + /* + * There's some padding here to allow sigset_t to be expanded in the * future. Though this is unlikely, other architectures put uc_sigmask * at the end of this structure and explicitly state it can be - * expanded, so we didn't want to box ourselves in here. */ + * expanded, so we didn't want to box ourselves in here. + */ __u8 __unused[1024 / 8 - sizeof(sigset_t)]; - /* We can't put uc_sigmask at the end of this structure because we need + /* + * We can't put uc_sigmask at the end of this structure because we need * to be able to expand sigcontext in the future. For example, the * vector ISA extension will almost certainly add ISA state. We want * to ensure all user-visible ISA state can be saved and restored via a * ucontext, so we're putting this at the end in order to allow for * infinite extensibility. Since we know this will be extended and we * assume sigset_t won't be extended an extreme amount, we're - * prioritizing this. */ + * prioritizing this. + */ struct sigcontext uc_mcontext; }; -- cgit v1.2.3 From 310f541a027b1d5dc68f44f176cde618e6ee9691 Mon Sep 17 00:00:00 2001 From: Liu Shixin Date: Wed, 12 Oct 2022 20:00:37 +0800 Subject: riscv: Enable HAVE_ARCH_HUGE_VMAP for 64BIT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This sets the HAVE_ARCH_HUGE_VMAP option, and defines the required page table functions. With this feature, ioremap area will be mapped with huge page granularity according to its actual size. This feature can be disabled by kernel parameter "nohugeiomap". Signed-off-by: Liu Shixin Reviewed-by: Björn Töpel Tested-by: Björn Töpel Link: https://lore.kernel.org/r/20221012120038.1034354-2-liushixin2@huawei.com [Palmer: minor formatting] Signed-off-by: Palmer Dabbelt --- .../features/vm/huge-vmap/arch-support.txt | 2 +- arch/riscv/Kconfig | 1 + arch/riscv/include/asm/vmalloc.h | 18 +++++ arch/riscv/mm/Makefile | 1 + arch/riscv/mm/pgtable.c | 83 ++++++++++++++++++++++ 5 files changed, 104 insertions(+), 1 deletion(-) create mode 100644 arch/riscv/mm/pgtable.c (limited to 'arch/riscv') diff --git a/Documentation/features/vm/huge-vmap/arch-support.txt b/Documentation/features/vm/huge-vmap/arch-support.txt index 13b4940e0c3a..7274a4b15bcc 100644 --- a/Documentation/features/vm/huge-vmap/arch-support.txt +++ b/Documentation/features/vm/huge-vmap/arch-support.txt @@ -21,7 +21,7 @@ | openrisc: | TODO | | parisc: | TODO | | powerpc: | ok | - | riscv: | TODO | + | riscv: | ok | | s390: | TODO | | sh: | TODO | | sparc: | TODO | diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 6b48a3ae9843..db2082dd456d 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -72,6 +72,7 @@ config RISCV select GENERIC_VDSO_TIME_NS if HAVE_GENERIC_VDSO select HARDIRQS_SW_RESEND select HAVE_ARCH_AUDITSYSCALL + select HAVE_ARCH_HUGE_VMAP if MMU && 64BIT && !XIP_KERNEL select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL select HAVE_ARCH_JUMP_LABEL_RELATIVE if !XIP_KERNEL select HAVE_ARCH_KASAN if MMU && 64BIT diff --git a/arch/riscv/include/asm/vmalloc.h b/arch/riscv/include/asm/vmalloc.h index ff9abc00d139..48da5371f1e9 100644 --- a/arch/riscv/include/asm/vmalloc.h +++ b/arch/riscv/include/asm/vmalloc.h @@ -1,4 +1,22 @@ #ifndef _ASM_RISCV_VMALLOC_H #define _ASM_RISCV_VMALLOC_H +#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP + +#define IOREMAP_MAX_ORDER (PUD_SHIFT) + +#define arch_vmap_pud_supported arch_vmap_pud_supported +static inline bool arch_vmap_pud_supported(pgprot_t prot) +{ + return true; +} + +#define arch_vmap_pmd_supported arch_vmap_pmd_supported +static inline bool arch_vmap_pmd_supported(pgprot_t prot) +{ + return true; +} + +#endif + #endif /* _ASM_RISCV_VMALLOC_H */ diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile index d76aabf4b94d..ce7f121ad2dc 100644 --- a/arch/riscv/mm/Makefile +++ b/arch/riscv/mm/Makefile @@ -13,6 +13,7 @@ obj-y += extable.o obj-$(CONFIG_MMU) += fault.o pageattr.o obj-y += cacheflush.o obj-y += context.o +obj-y += pgtable.o ifeq ($(CONFIG_MMU),y) obj-$(CONFIG_SMP) += tlbflush.o diff --git a/arch/riscv/mm/pgtable.c b/arch/riscv/mm/pgtable.c new file mode 100644 index 000000000000..6645ead1a7c1 --- /dev/null +++ b/arch/riscv/mm/pgtable.c @@ -0,0 +1,83 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include + +#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP +int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot) +{ + return 0; +} + +void p4d_clear_huge(p4d_t *p4d) +{ +} + +int pud_set_huge(pud_t *pud, phys_addr_t phys, pgprot_t prot) +{ + pud_t new_pud = pfn_pud(__phys_to_pfn(phys), prot); + + set_pud(pud, new_pud); + return 1; +} + +int pud_clear_huge(pud_t *pud) +{ + if (!pud_leaf(READ_ONCE(*pud))) + return 0; + pud_clear(pud); + return 1; +} + +int pud_free_pmd_page(pud_t *pud, unsigned long addr) +{ + pmd_t *pmd = pud_pgtable(*pud); + int i; + + pud_clear(pud); + + flush_tlb_kernel_range(addr, addr + PUD_SIZE); + + for (i = 0; i < PTRS_PER_PMD; i++) { + if (!pmd_none(pmd[i])) { + pte_t *pte = (pte_t *)pmd_page_vaddr(pmd[i]); + + pte_free_kernel(NULL, pte); + } + } + + pmd_free(NULL, pmd); + + return 1; +} + +int pmd_set_huge(pmd_t *pmd, phys_addr_t phys, pgprot_t prot) +{ + pmd_t new_pmd = pfn_pmd(__phys_to_pfn(phys), prot); + + set_pmd(pmd, new_pmd); + return 1; +} + +int pmd_clear_huge(pmd_t *pmd) +{ + if (!pmd_leaf(READ_ONCE(*pmd))) + return 0; + pmd_clear(pmd); + return 1; +} + +int pmd_free_pte_page(pmd_t *pmd, unsigned long addr) +{ + pte_t *pte = (pte_t *)pmd_page_vaddr(*pmd); + + pmd_clear(pmd); + + flush_tlb_kernel_range(addr, addr + PMD_SIZE); + pte_free_kernel(NULL, pte); + return 1; +} + +#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ -- cgit v1.2.3 From be79afc740b5a1b2048cd67580cdb9d76d7e6cc2 Mon Sep 17 00:00:00 2001 From: Liu Shixin Date: Wed, 12 Oct 2022 20:00:38 +0800 Subject: riscv: Enable HAVE_ARCH_HUGE_VMALLOC for 64BIT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After we support HAVE_ARCH_HUGE_VMAP, we can now enable HAVE_ARCH_HUGE_VMALLOC too. This feature has been used in kvmalloc and alloc_large_system_hash for now. This feature can be disabled by kernel parameters "nohugevmalloc". Signed-off-by: Liu Shixin Reviewed-by: Björn Töpel Tested-by: Björn Töpel Link: https://lore.kernel.org/r/20221012120038.1034354-3-liushixin2@huawei.com [Palmer: minor formatting] Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/riscv') diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index db2082dd456d..7cd981f96f48 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -72,6 +72,7 @@ config RISCV select GENERIC_VDSO_TIME_NS if HAVE_GENERIC_VDSO select HARDIRQS_SW_RESEND select HAVE_ARCH_AUDITSYSCALL + select HAVE_ARCH_HUGE_VMALLOC if HAVE_ARCH_HUGE_VMAP select HAVE_ARCH_HUGE_VMAP if MMU && 64BIT && !XIP_KERNEL select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL select HAVE_ARCH_JUMP_LABEL_RELATIVE if !XIP_KERNEL -- cgit v1.2.3 From effae0e3d9e1139d583e9b5d050f4f948825b8a3 Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Tue, 15 Nov 2022 10:51:33 +0000 Subject: riscv: Kconfig: Enable cpufreq kconfig menu Enable cpufreq kconfig menu for RISC-V. Signed-off-by: Lad Prabhakar Reviewed-by: Conor Dooley Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20221115105135.1180490-2-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/riscv') diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 7cd981f96f48..3b41165a8b10 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -684,6 +684,8 @@ menu "CPU Power Management" source "drivers/cpuidle/Kconfig" +source "drivers/cpufreq/Kconfig" + endmenu # "CPU Power Management" source "arch/riscv/kvm/Kconfig" -- cgit v1.2.3 From 41555cc9e2e9778ddc7c0293a4a2e4995e332643 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Fri, 21 Oct 2022 17:00:30 +0100 Subject: RISC-V: enable sparsemem by default for defconfig on an arch level, RISC-V defaults to FLATMEM. On PolarFire SoC, the memory layout is almost always sparse, with a maximum of 1 GiB at 0x8000_0000 & a possible 16 GiB range at 0x10_0000_0000. The Icicle kit, for example, has 2 GiB of DDR - so there's a big hole in the memory map between the two gigs. Prior to v6.1-rc1, boot times from defconfig builds were pretty bad on Icicle but enabling sparsemem would fix those issues. As of v6.1-rc1, the Icicle kit no longer boots from defconfig builds with the in-kernel devicetree. A change to the memory map resulted in a futher "sparse-ification", producing a splat on boot: OF: fdt: Ignoring memory range 0x80000000 - 0x80200000 Machine model: Microchip PolarFire-SoC Icicle Kit earlycon: ns16550a0 at MMIO32 0x0000000020100000 (options '115200n8') printk: bootconsole [ns16550a0] enabled printk: debug: skip boot console de-registration. efi: UEFI not found. Zone ranges: DMA32 [mem 0x0000000080200000-0x00000000ffffffff] Normal [mem 0x0000000100000000-0x000000107fffffff] Movable zone start for each node Early memory node ranges node 0: [mem 0x0000000080200000-0x00000000bfbfffff] node 0: [mem 0x00000000bfc00000-0x00000000bfffffff] node 0: [mem 0x0000001040000000-0x000000107fffffff] Initmem setup node 0 [mem 0x0000000080200000-0x000000107fffffff] Kernel panic - not syncing: Failed to allocate 1073741824 bytes for node 0 memory map CPU: 0 PID: 0 Comm: swapper Not tainted 5.19.0-dirty #1 Hardware name: Microchip PolarFire-SoC Icicle Kit (DT) Call Trace: [] show_stack+0x30/0x3c [] dump_stack_lvl+0x4a/0x66 [] dump_stack+0x18/0x20 [] panic+0x124/0x2c6 [] free_area_init_core+0x0/0x11e [] free_area_init_node+0xc2/0xf6 [] free_area_init+0x222/0x260 [] misc_mem_init+0x62/0x9a [] setup_arch+0xb0/0xea [] start_kernel+0x88/0x4ee ---[ end Kernel panic - not syncing: Failed to allocate 1073741824 bytes for node 0 memory map ]--- With the aim of keeping defconfig builds booting on icicle, enable SPARSEMEM_MANUAL. Signed-off-by: Conor Dooley Link: https://lore.kernel.org/r/20221021160028.4042304-1-conor@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/configs/defconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/riscv') diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index 05fd5fcf24f9..daba5d743862 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -38,6 +38,7 @@ CONFIG_KVM=m CONFIG_JUMP_LABEL=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y +CONFIG_SPARSEMEM_MANUAL=y CONFIG_BLK_DEV_THROTTLING=y CONFIG_NET=y CONFIG_PACKET=y -- cgit v1.2.3 From 8a6841c439dfbba2067a533b0e8264ea438689f6 Mon Sep 17 00:00:00 2001 From: Jamie Iles Date: Tue, 15 Nov 2022 20:08:29 +0000 Subject: RISC-V: use REG_S/REG_L for mcount In preparation for rv32i ftrace support, convert mcount routines to use native sized loads/stores. Reviewed-by: Andrew Jones Signed-off-by: Jamie Iles Link: https://lore.kernel.org/r/20221115200832.706370-2-jamie@jamieiles.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/mcount.S | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'arch/riscv') diff --git a/arch/riscv/kernel/mcount.S b/arch/riscv/kernel/mcount.S index 6d462681c9c0..9cf0904afd6d 100644 --- a/arch/riscv/kernel/mcount.S +++ b/arch/riscv/kernel/mcount.S @@ -15,8 +15,8 @@ .macro SAVE_ABI_STATE addi sp, sp, -16 - sd s0, 0(sp) - sd ra, 8(sp) + REG_S s0, 0(sp) + REG_S ra, 8(sp) addi s0, sp, 16 .endm @@ -26,22 +26,22 @@ */ .macro SAVE_RET_ABI_STATE addi sp, sp, -32 - sd s0, 16(sp) - sd ra, 24(sp) - sd a0, 8(sp) + REG_S s0, 16(sp) + REG_S ra, 24(sp) + REG_S a0, 8(sp) addi s0, sp, 32 .endm .macro RESTORE_ABI_STATE - ld ra, 8(sp) - ld s0, 0(sp) + REG_L ra, 8(sp) + REG_L s0, 0(sp) addi sp, sp, 16 .endm .macro RESTORE_RET_ABI_STATE - ld ra, 24(sp) - ld s0, 16(sp) - ld a0, 8(sp) + REG_L ra, 24(sp) + REG_L s0, 16(sp) + REG_L a0, 8(sp) addi sp, sp, 32 .endm @@ -82,16 +82,16 @@ ENTRY(MCOUNT_NAME) la t4, ftrace_stub #ifdef CONFIG_FUNCTION_GRAPH_TRACER la t0, ftrace_graph_return - ld t1, 0(t0) + REG_L t1, 0(t0) bne t1, t4, do_ftrace_graph_caller la t3, ftrace_graph_entry - ld t2, 0(t3) + REG_L t2, 0(t3) la t6, ftrace_graph_entry_stub bne t2, t6, do_ftrace_graph_caller #endif la t3, ftrace_trace_function - ld t5, 0(t3) + REG_L t5, 0(t3) bne t5, t4, do_trace ret @@ -104,7 +104,7 @@ do_ftrace_graph_caller: addi a0, s0, -8 mv a1, ra #ifdef HAVE_FUNCTION_GRAPH_FP_TEST - ld a2, -16(s0) + REG_L a2, -16(s0) #endif SAVE_ABI_STATE call prepare_ftrace_return @@ -117,7 +117,7 @@ do_ftrace_graph_caller: * (*ftrace_trace_function)(ra_to_caller, ra_to_caller_of_caller) */ do_trace: - ld a1, -8(s0) + REG_L a1, -8(s0) mv a0, ra SAVE_ABI_STATE -- cgit v1.2.3 From 3bd7743f8d6d7171db9897a746038eefd52a1fbd Mon Sep 17 00:00:00 2001 From: Jamie Iles Date: Tue, 15 Nov 2022 20:08:30 +0000 Subject: RISC-V: reduce mcount save space on RV32 For RV32 we can reduce the size of the ABI save+restore state by using SZREG so that register stores are packed rather than on an 8 byte boundary. Signed-off-by: Jamie Iles Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/20221115200832.706370-3-jamie@jamieiles.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/mcount.S | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'arch/riscv') diff --git a/arch/riscv/kernel/mcount.S b/arch/riscv/kernel/mcount.S index 9cf0904afd6d..613bd07c6268 100644 --- a/arch/riscv/kernel/mcount.S +++ b/arch/riscv/kernel/mcount.S @@ -15,8 +15,8 @@ .macro SAVE_ABI_STATE addi sp, sp, -16 - REG_S s0, 0(sp) - REG_S ra, 8(sp) + REG_S s0, 0*SZREG(sp) + REG_S ra, 1*SZREG(sp) addi s0, sp, 16 .endm @@ -25,24 +25,24 @@ * register if a0 was not saved. */ .macro SAVE_RET_ABI_STATE - addi sp, sp, -32 - REG_S s0, 16(sp) - REG_S ra, 24(sp) - REG_S a0, 8(sp) - addi s0, sp, 32 + addi sp, sp, -4*SZREG + REG_S s0, 2*SZREG(sp) + REG_S ra, 3*SZREG(sp) + REG_S a0, 1*SZREG(sp) + addi s0, sp, 4*SZREG .endm .macro RESTORE_ABI_STATE - REG_L ra, 8(sp) - REG_L s0, 0(sp) + REG_L ra, 1*SZREG(sp) + REG_L s0, 0*SZREG(sp) addi sp, sp, 16 .endm .macro RESTORE_RET_ABI_STATE - REG_L ra, 24(sp) - REG_L s0, 16(sp) - REG_L a0, 8(sp) - addi sp, sp, 32 + REG_L ra, 3*SZREG(sp) + REG_L s0, 2*SZREG(sp) + REG_L a0, 1*SZREG(sp) + addi sp, sp, 4*SZREG .endm ENTRY(ftrace_stub) @@ -101,10 +101,10 @@ ENTRY(MCOUNT_NAME) * prepare_to_return(&ra_to_caller_of_caller, ra_to_caller) */ do_ftrace_graph_caller: - addi a0, s0, -8 + addi a0, s0, -SZREG mv a1, ra #ifdef HAVE_FUNCTION_GRAPH_FP_TEST - REG_L a2, -16(s0) + REG_L a2, -2*SZREG(s0) #endif SAVE_ABI_STATE call prepare_ftrace_return @@ -117,7 +117,7 @@ do_ftrace_graph_caller: * (*ftrace_trace_function)(ra_to_caller, ra_to_caller_of_caller) */ do_trace: - REG_L a1, -8(s0) + REG_L a1, -SZREG(s0) mv a0, ra SAVE_ABI_STATE -- cgit v1.2.3 From dc58a24db8c12ea361e94eaf53adc5d471534694 Mon Sep 17 00:00:00 2001 From: Jamie Iles Date: Tue, 15 Nov 2022 20:08:31 +0000 Subject: RISC-V: preserve a1 in mcount The RISC-V ELF psABI states that both a0 and a1 are used for return values so we should preserve them both in return_to_handler. This is especially important for RV32 for functions returning a 64-bit quantity otherwise the return value can be corrupted and undefined behaviour results. Reviewed-by: Andrew Jones Signed-off-by: Jamie Iles Link: https://lore.kernel.org/r/20221115200832.706370-4-jamie@jamieiles.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/mcount.S | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/riscv') diff --git a/arch/riscv/kernel/mcount.S b/arch/riscv/kernel/mcount.S index 613bd07c6268..30102aadc4d7 100644 --- a/arch/riscv/kernel/mcount.S +++ b/arch/riscv/kernel/mcount.S @@ -29,6 +29,7 @@ REG_S s0, 2*SZREG(sp) REG_S ra, 3*SZREG(sp) REG_S a0, 1*SZREG(sp) + REG_S a1, 0*SZREG(sp) addi s0, sp, 4*SZREG .endm @@ -42,6 +43,7 @@ REG_L ra, 3*SZREG(sp) REG_L s0, 2*SZREG(sp) REG_L a0, 1*SZREG(sp) + REG_L a1, 0*SZREG(sp) addi sp, sp, 4*SZREG .endm @@ -71,9 +73,9 @@ ENTRY(return_to_handler) mv a0, t6 #endif call ftrace_return_to_handler - mv a1, a0 + mv a2, a0 RESTORE_RET_ABI_STATE - jalr a1 + jalr a2 ENDPROC(return_to_handler) #endif -- cgit v1.2.3 From f32b4b467ebd8a035f8342b7ea27efc84b10d96b Mon Sep 17 00:00:00 2001 From: Jamie Iles Date: Tue, 15 Nov 2022 20:08:32 +0000 Subject: RISC-V: enable dynamic ftrace for RV32I The RISC-V mcount function is now capable of supporting RV32I so make it available in the kernel config. Signed-off-by: Jamie Iles Link: https://lore.kernel.org/r/20221115200832.706370-5-jamie@jamieiles.com Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/riscv') diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 6b48a3ae9843..5ae4f7ce2a05 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -129,6 +129,11 @@ config RISCV select TRACE_IRQFLAGS_SUPPORT select UACCESS_MEMCPY if !MMU select ZONE_DMA32 if 64BIT + select HAVE_DYNAMIC_FTRACE if !XIP_KERNEL && MMU && $(cc-option,-fpatchable-function-entry=8) + select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE + select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL + select HAVE_FUNCTION_GRAPH_TRACER + select HAVE_FUNCTION_TRACER if !XIP_KERNEL config ARCH_MMAP_RND_BITS_MIN default 18 if 64BIT @@ -274,11 +279,6 @@ config ARCH_RV64I bool "RV64I" select 64BIT select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 - select HAVE_DYNAMIC_FTRACE if !XIP_KERNEL && MMU && $(cc-option,-fpatchable-function-entry=8) - select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE - select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL - select HAVE_FUNCTION_GRAPH_TRACER - select HAVE_FUNCTION_TRACER if !XIP_KERNEL select SWIOTLB if MMU endchoice -- cgit v1.2.3 From 6925ba3d9b8ccf1989b4cf13d6f0d7e341899481 Mon Sep 17 00:00:00 2001 From: Hal Feng Date: Fri, 18 Nov 2022 09:17:14 +0800 Subject: RISC-V: defconfig: Enable CONFIG_SERIAL_8250_DW Add CONFIG_SERIAL_8250_DW=y, which is a necessary option for StarFive JH7110 and JH7100 SoCs to boot with serial ports. Reviewed-by: Conor Dooley Signed-off-by: Hal Feng Acked-by: Palmer Dabbelt Link: https://lore.kernel.org/r/20221118011714.70877-9-hal.feng@starfivetech.com Signed-off-by: Palmer Dabbelt --- arch/riscv/configs/defconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/riscv') diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index daba5d743862..74ed7037314f 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -123,6 +123,7 @@ CONFIG_MICROSEMI_PHY=y CONFIG_INPUT_MOUSEDEV=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_DW=y CONFIG_SERIAL_OF_PLATFORM=y CONFIG_VIRTIO_CONSOLE=y CONFIG_HW_RANDOM=y -- cgit v1.2.3 From d33deda095d3637d218e7eed441633b2a01e1413 Mon Sep 17 00:00:00 2001 From: Tong Tiangen Date: Mon, 24 Oct 2022 09:47:24 +0000 Subject: riscv/mm: hugepage's PG_dcache_clean flag is only set in head page HugeTLB pages are always fully mapped, so only setting head page's PG_dcache_clean flag is enough. Signed-off-by: Tong Tiangen Link: https://lore.kernel.org/lkml/20220331065640.5777-2-songmuchun@bytedance.com/ Link: https://lore.kernel.org/r/20221024094725.3054311-2-tongtiangen@huawei.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/cacheflush.h | 7 +++++++ arch/riscv/mm/cacheflush.c | 7 +++++++ 2 files changed, 14 insertions(+) (limited to 'arch/riscv') diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h index 8a5c246b0a21..c172d05de474 100644 --- a/arch/riscv/include/asm/cacheflush.h +++ b/arch/riscv/include/asm/cacheflush.h @@ -17,6 +17,13 @@ static inline void local_flush_icache_all(void) static inline void flush_dcache_page(struct page *page) { + /* + * HugeTLB pages are always fully mapped and only head page will be + * set PG_dcache_clean (see comments in flush_icache_pte()). + */ + if (PageHuge(page)) + page = compound_head(page); + if (test_bit(PG_dcache_clean, &page->flags)) clear_bit(PG_dcache_clean, &page->flags); } diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c index 6cb7d96ad9c7..062559c04fc3 100644 --- a/arch/riscv/mm/cacheflush.c +++ b/arch/riscv/mm/cacheflush.c @@ -82,6 +82,13 @@ void flush_icache_pte(pte_t pte) { struct page *page = pte_page(pte); + /* + * HugeTLB pages are always fully mapped, so only setting head page's + * PG_dcache_clean flag is enough. + */ + if (PageHuge(page)) + page = compound_head(page); + if (!test_and_set_bit(PG_dcache_clean, &page->flags)) flush_icache_all(); } -- cgit v1.2.3 From d8bf77a1dc3079692f54be3087a5fd16d90027b0 Mon Sep 17 00:00:00 2001 From: Tong Tiangen Date: Mon, 24 Oct 2022 09:47:25 +0000 Subject: riscv/mm: add arch hook arch_clear_hugepage_flags With the PG_arch_1 we keep track if the page's data cache is clean, architecture rely on this property to treat new pages as dirty with respect to the data cache and perform the flushing before mapping the pages into userspace. This patch adds a new architecture hook, arch_clear_hugepage_flags,so that architectures which rely on the page flags being in a particular state for fresh allocations can adjust the flags accordingly when a page is freed into the pool. Fixes: 9e953cda5cdf ("riscv: Introduce huge page support for 32/64bit kernel") Signed-off-by: Tong Tiangen Link: https://lore.kernel.org/r/20221024094725.3054311-3-tongtiangen@huawei.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/hugetlb.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/riscv') diff --git a/arch/riscv/include/asm/hugetlb.h b/arch/riscv/include/asm/hugetlb.h index a5c2ca1d1cd8..ec19d6afc896 100644 --- a/arch/riscv/include/asm/hugetlb.h +++ b/arch/riscv/include/asm/hugetlb.h @@ -5,4 +5,10 @@ #include #include +static inline void arch_clear_hugepage_flags(struct page *page) +{ + clear_bit(PG_dcache_clean, &page->flags); +} +#define arch_clear_hugepage_flags arch_clear_hugepage_flags + #endif /* _ASM_RISCV_HUGETLB_H */ -- cgit v1.2.3 From b57c2f124098459a4acc15d5044f87cba31c87f0 Mon Sep 17 00:00:00 2001 From: Binglei Wang Date: Tue, 25 Oct 2022 16:18:32 +0100 Subject: riscv: add riscv rethook implementation Implement the kretprobes on riscv arch by using rethook machenism which abstracts general kretprobe info into a struct rethook_node to be embedded in the struct kretprobe_instance. Acked-by: Masami Hiramatsu (Google) Signed-off-by: Binglei Wang Signed-off-by: Conor Dooley Link: https://lore.kernel.org/r/20221025151831.1097417-1-conor@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 + arch/riscv/include/asm/kprobes.h | 2 - arch/riscv/kernel/probes/Makefile | 2 +- arch/riscv/kernel/probes/kprobes.c | 13 ---- arch/riscv/kernel/probes/kprobes_trampoline.S | 93 --------------------------- arch/riscv/kernel/probes/rethook.c | 27 ++++++++ arch/riscv/kernel/probes/rethook.h | 8 +++ arch/riscv/kernel/probes/rethook_trampoline.S | 93 +++++++++++++++++++++++++++ 8 files changed, 130 insertions(+), 109 deletions(-) delete mode 100644 arch/riscv/kernel/probes/kprobes_trampoline.S create mode 100644 arch/riscv/kernel/probes/rethook.c create mode 100644 arch/riscv/kernel/probes/rethook.h create mode 100644 arch/riscv/kernel/probes/rethook_trampoline.S (limited to 'arch/riscv') diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 8d47562be90c..ef8d66de5f38 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -101,6 +101,7 @@ config RISCV select HAVE_KPROBES if !XIP_KERNEL select HAVE_KPROBES_ON_FTRACE if !XIP_KERNEL select HAVE_KRETPROBES if !XIP_KERNEL + select HAVE_RETHOOK if !XIP_KERNEL select HAVE_MOVE_PMD select HAVE_MOVE_PUD select HAVE_PCI diff --git a/arch/riscv/include/asm/kprobes.h b/arch/riscv/include/asm/kprobes.h index 217ef89f22b9..e7882ccb0fd4 100644 --- a/arch/riscv/include/asm/kprobes.h +++ b/arch/riscv/include/asm/kprobes.h @@ -40,8 +40,6 @@ void arch_remove_kprobe(struct kprobe *p); int kprobe_fault_handler(struct pt_regs *regs, unsigned int trapnr); bool kprobe_breakpoint_handler(struct pt_regs *regs); bool kprobe_single_step_handler(struct pt_regs *regs); -void __kretprobe_trampoline(void); -void __kprobes *trampoline_probe_handler(struct pt_regs *regs); #endif /* CONFIG_KPROBES */ #endif /* _ASM_RISCV_KPROBES_H */ diff --git a/arch/riscv/kernel/probes/Makefile b/arch/riscv/kernel/probes/Makefile index 7f0840dcc31b..c40139e9ca47 100644 --- a/arch/riscv/kernel/probes/Makefile +++ b/arch/riscv/kernel/probes/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_KPROBES) += kprobes.o decode-insn.o simulate-insn.o -obj-$(CONFIG_KPROBES) += kprobes_trampoline.o +obj-$(CONFIG_RETHOOK) += rethook.o rethook_trampoline.o obj-$(CONFIG_KPROBES_ON_FTRACE) += ftrace.o obj-$(CONFIG_UPROBES) += uprobes.o decode-insn.o simulate-insn.o CFLAGS_REMOVE_simulate-insn.o = $(CC_FLAGS_FTRACE) diff --git a/arch/riscv/kernel/probes/kprobes.c b/arch/riscv/kernel/probes/kprobes.c index e6e950b7cf32..f21592d20306 100644 --- a/arch/riscv/kernel/probes/kprobes.c +++ b/arch/riscv/kernel/probes/kprobes.c @@ -345,19 +345,6 @@ int __init arch_populate_kprobe_blacklist(void) return ret; } -void __kprobes __used *trampoline_probe_handler(struct pt_regs *regs) -{ - return (void *)kretprobe_trampoline_handler(regs, NULL); -} - -void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, - struct pt_regs *regs) -{ - ri->ret_addr = (kprobe_opcode_t *)regs->ra; - ri->fp = NULL; - regs->ra = (unsigned long) &__kretprobe_trampoline; -} - int __kprobes arch_trampoline_kprobe(struct kprobe *p) { return 0; diff --git a/arch/riscv/kernel/probes/kprobes_trampoline.S b/arch/riscv/kernel/probes/kprobes_trampoline.S deleted file mode 100644 index 7bdb09ded39b..000000000000 --- a/arch/riscv/kernel/probes/kprobes_trampoline.S +++ /dev/null @@ -1,93 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0+ */ -/* - * Author: Patrick Stählin - */ -#include - -#include -#include - - .text - .altmacro - - .macro save_all_base_regs - REG_S x1, PT_RA(sp) - REG_S x3, PT_GP(sp) - REG_S x4, PT_TP(sp) - REG_S x5, PT_T0(sp) - REG_S x6, PT_T1(sp) - REG_S x7, PT_T2(sp) - REG_S x8, PT_S0(sp) - REG_S x9, PT_S1(sp) - REG_S x10, PT_A0(sp) - REG_S x11, PT_A1(sp) - REG_S x12, PT_A2(sp) - REG_S x13, PT_A3(sp) - REG_S x14, PT_A4(sp) - REG_S x15, PT_A5(sp) - REG_S x16, PT_A6(sp) - REG_S x17, PT_A7(sp) - REG_S x18, PT_S2(sp) - REG_S x19, PT_S3(sp) - REG_S x20, PT_S4(sp) - REG_S x21, PT_S5(sp) - REG_S x22, PT_S6(sp) - REG_S x23, PT_S7(sp) - REG_S x24, PT_S8(sp) - REG_S x25, PT_S9(sp) - REG_S x26, PT_S10(sp) - REG_S x27, PT_S11(sp) - REG_S x28, PT_T3(sp) - REG_S x29, PT_T4(sp) - REG_S x30, PT_T5(sp) - REG_S x31, PT_T6(sp) - .endm - - .macro restore_all_base_regs - REG_L x3, PT_GP(sp) - REG_L x4, PT_TP(sp) - REG_L x5, PT_T0(sp) - REG_L x6, PT_T1(sp) - REG_L x7, PT_T2(sp) - REG_L x8, PT_S0(sp) - REG_L x9, PT_S1(sp) - REG_L x10, PT_A0(sp) - REG_L x11, PT_A1(sp) - REG_L x12, PT_A2(sp) - REG_L x13, PT_A3(sp) - REG_L x14, PT_A4(sp) - REG_L x15, PT_A5(sp) - REG_L x16, PT_A6(sp) - REG_L x17, PT_A7(sp) - REG_L x18, PT_S2(sp) - REG_L x19, PT_S3(sp) - REG_L x20, PT_S4(sp) - REG_L x21, PT_S5(sp) - REG_L x22, PT_S6(sp) - REG_L x23, PT_S7(sp) - REG_L x24, PT_S8(sp) - REG_L x25, PT_S9(sp) - REG_L x26, PT_S10(sp) - REG_L x27, PT_S11(sp) - REG_L x28, PT_T3(sp) - REG_L x29, PT_T4(sp) - REG_L x30, PT_T5(sp) - REG_L x31, PT_T6(sp) - .endm - -ENTRY(__kretprobe_trampoline) - addi sp, sp, -(PT_SIZE_ON_STACK) - save_all_base_regs - - move a0, sp /* pt_regs */ - - call trampoline_probe_handler - - /* use the result as the return-address */ - move ra, a0 - - restore_all_base_regs - addi sp, sp, PT_SIZE_ON_STACK - - ret -ENDPROC(__kretprobe_trampoline) diff --git a/arch/riscv/kernel/probes/rethook.c b/arch/riscv/kernel/probes/rethook.c new file mode 100644 index 000000000000..5c27c1f50989 --- /dev/null +++ b/arch/riscv/kernel/probes/rethook.c @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Generic return hook for riscv. + */ + +#include +#include +#include "rethook.h" + +/* This is called from arch_rethook_trampoline() */ +unsigned long __used arch_rethook_trampoline_callback(struct pt_regs *regs) +{ + return rethook_trampoline_handler(regs, regs->s0); +} + +NOKPROBE_SYMBOL(arch_rethook_trampoline_callback); + +void arch_rethook_prepare(struct rethook_node *rhn, struct pt_regs *regs, bool mcount) +{ + rhn->ret_addr = regs->ra; + rhn->frame = regs->s0; + + /* replace return addr with trampoline */ + regs->ra = (unsigned long)arch_rethook_trampoline; +} + +NOKPROBE_SYMBOL(arch_rethook_prepare); diff --git a/arch/riscv/kernel/probes/rethook.h b/arch/riscv/kernel/probes/rethook.h new file mode 100644 index 000000000000..4758f7e3ce88 --- /dev/null +++ b/arch/riscv/kernel/probes/rethook.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __RISCV_RETHOOK_H +#define __RISCV_RETHOOK_H + +unsigned long arch_rethook_trampoline_callback(struct pt_regs *regs); +void arch_rethook_prepare(struct rethook_node *rhn, struct pt_regs *regs, bool mcount); + +#endif diff --git a/arch/riscv/kernel/probes/rethook_trampoline.S b/arch/riscv/kernel/probes/rethook_trampoline.S new file mode 100644 index 000000000000..21bac92a170a --- /dev/null +++ b/arch/riscv/kernel/probes/rethook_trampoline.S @@ -0,0 +1,93 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Author: Patrick Stählin + */ +#include + +#include +#include + + .text + .altmacro + + .macro save_all_base_regs + REG_S x1, PT_RA(sp) + REG_S x3, PT_GP(sp) + REG_S x4, PT_TP(sp) + REG_S x5, PT_T0(sp) + REG_S x6, PT_T1(sp) + REG_S x7, PT_T2(sp) + REG_S x8, PT_S0(sp) + REG_S x9, PT_S1(sp) + REG_S x10, PT_A0(sp) + REG_S x11, PT_A1(sp) + REG_S x12, PT_A2(sp) + REG_S x13, PT_A3(sp) + REG_S x14, PT_A4(sp) + REG_S x15, PT_A5(sp) + REG_S x16, PT_A6(sp) + REG_S x17, PT_A7(sp) + REG_S x18, PT_S2(sp) + REG_S x19, PT_S3(sp) + REG_S x20, PT_S4(sp) + REG_S x21, PT_S5(sp) + REG_S x22, PT_S6(sp) + REG_S x23, PT_S7(sp) + REG_S x24, PT_S8(sp) + REG_S x25, PT_S9(sp) + REG_S x26, PT_S10(sp) + REG_S x27, PT_S11(sp) + REG_S x28, PT_T3(sp) + REG_S x29, PT_T4(sp) + REG_S x30, PT_T5(sp) + REG_S x31, PT_T6(sp) + .endm + + .macro restore_all_base_regs + REG_L x3, PT_GP(sp) + REG_L x4, PT_TP(sp) + REG_L x5, PT_T0(sp) + REG_L x6, PT_T1(sp) + REG_L x7, PT_T2(sp) + REG_L x8, PT_S0(sp) + REG_L x9, PT_S1(sp) + REG_L x10, PT_A0(sp) + REG_L x11, PT_A1(sp) + REG_L x12, PT_A2(sp) + REG_L x13, PT_A3(sp) + REG_L x14, PT_A4(sp) + REG_L x15, PT_A5(sp) + REG_L x16, PT_A6(sp) + REG_L x17, PT_A7(sp) + REG_L x18, PT_S2(sp) + REG_L x19, PT_S3(sp) + REG_L x20, PT_S4(sp) + REG_L x21, PT_S5(sp) + REG_L x22, PT_S6(sp) + REG_L x23, PT_S7(sp) + REG_L x24, PT_S8(sp) + REG_L x25, PT_S9(sp) + REG_L x26, PT_S10(sp) + REG_L x27, PT_S11(sp) + REG_L x28, PT_T3(sp) + REG_L x29, PT_T4(sp) + REG_L x30, PT_T5(sp) + REG_L x31, PT_T6(sp) + .endm + +ENTRY(arch_rethook_trampoline) + addi sp, sp, -(PT_SIZE_ON_STACK) + save_all_base_regs + + move a0, sp /* pt_regs */ + + call arch_rethook_trampoline_callback + + /* use the result as the return-address */ + move ra, a0 + + restore_all_base_regs + addi sp, sp, PT_SIZE_ON_STACK + + ret +ENDPROC(arch_rethook_trampoline) -- cgit v1.2.3 From 649d6b1019a2f243bc3a98cb85902a8ebf74289a Mon Sep 17 00:00:00 2001 From: Xianting Tian Date: Wed, 26 Oct 2022 22:42:07 +0800 Subject: RISC-V: Add arch_crash_save_vmcoreinfo support Add arch_crash_save_vmcoreinfo(), which exports VM layout(MODULES, VMALLOC, VMEMMAP ranges and KERNEL_LINK_ADDR), va bits and ram base for vmcore. Default pagetable levels and PAGE_OFFSET aren't same for different kernel version as below. For pagetable levels, it sets sv57 by default and falls back to setting sv48 at boot time if sv57 is not supported by the hardware. For ram base, the default value is 0x80200000 for qemu riscv64 env and, for example, is 0x200000 on the XuanTie 910 CPU. * Linux Kernel 5.18 ~ * PGTABLE_LEVELS = 5 * PAGE_OFFSET = 0xff60000000000000 * Linux Kernel 5.17 ~ * PGTABLE_LEVELS = 4 * PAGE_OFFSET = 0xffffaf8000000000 * Linux Kernel 4.19 ~ * PGTABLE_LEVELS = 3 * PAGE_OFFSET = 0xffffffe000000000 Since these configurations change from time to time and version to version, it is preferable to export them via vmcoreinfo than to change the crash's code frequently, it can simplify the development of crash tool. Signed-off-by: Xianting Tian Tested-by: Deepak Gupta Tested-by: Guo Ren Acked-by: Baoquan He Link: https://lore.kernel.org/r/20221026144208.373504-2-xianting.tian@linux.alibaba.com [Palmer: wrap commit text] Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/Makefile | 1 + arch/riscv/kernel/crash_core.c | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+) create mode 100644 arch/riscv/kernel/crash_core.c (limited to 'arch/riscv') diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index db6e4b1294ba..4cf303a779ab 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -81,6 +81,7 @@ obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_KEXEC_CORE) += kexec_relocate.o crash_save_regs.o machine_kexec.o obj-$(CONFIG_KEXEC_FILE) += elf_kexec.o machine_kexec_file.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o +obj-$(CONFIG_CRASH_CORE) += crash_core.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o diff --git a/arch/riscv/kernel/crash_core.c b/arch/riscv/kernel/crash_core.c new file mode 100644 index 000000000000..b351a3c01355 --- /dev/null +++ b/arch/riscv/kernel/crash_core.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include + +void arch_crash_save_vmcoreinfo(void) +{ + VMCOREINFO_NUMBER(VA_BITS); + VMCOREINFO_NUMBER(phys_ram_base); + + vmcoreinfo_append_str("NUMBER(PAGE_OFFSET)=0x%lx\n", PAGE_OFFSET); + vmcoreinfo_append_str("NUMBER(VMALLOC_START)=0x%lx\n", VMALLOC_START); + vmcoreinfo_append_str("NUMBER(VMALLOC_END)=0x%lx\n", VMALLOC_END); + vmcoreinfo_append_str("NUMBER(VMEMMAP_START)=0x%lx\n", VMEMMAP_START); + vmcoreinfo_append_str("NUMBER(VMEMMAP_END)=0x%lx\n", VMEMMAP_END); +#ifdef CONFIG_64BIT + vmcoreinfo_append_str("NUMBER(MODULES_VADDR)=0x%lx\n", MODULES_VADDR); + vmcoreinfo_append_str("NUMBER(MODULES_END)=0x%lx\n", MODULES_END); +#endif + vmcoreinfo_append_str("NUMBER(KERNEL_LINK_ADDR)=0x%lx\n", KERNEL_LINK_ADDR); +} -- cgit v1.2.3 From 96df59b1ae23f5c11698c3c2159aeb2ecd4944a4 Mon Sep 17 00:00:00 2001 From: Li Huafei Date: Fri, 4 Nov 2022 17:56:57 +0800 Subject: RISC-V: kexec: Fix memory leak of fdt buffer This is reported by kmemleak detector: unreferenced object 0xff60000082864000 (size 9588): comm "kexec", pid 146, jiffies 4294900634 (age 64.788s) hex dump (first 32 bytes): d0 0d fe ed 00 00 12 ed 00 00 00 48 00 00 11 40 ...........H...@ 00 00 00 28 00 00 00 11 00 00 00 02 00 00 00 00 ...(............ backtrace: [<00000000f95b17c4>] kmemleak_alloc+0x34/0x3e [<00000000b9ec8e3e>] kmalloc_order+0x9c/0xc4 [<00000000a95cf02e>] kmalloc_order_trace+0x34/0xb6 [<00000000f01e68b4>] __kmalloc+0x5c2/0x62a [<000000002bd497b2>] kvmalloc_node+0x66/0xd6 [<00000000906542fa>] of_kexec_alloc_and_setup_fdt+0xa6/0x6ea [<00000000e1166bde>] elf_kexec_load+0x206/0x4ec [<0000000036548e09>] kexec_image_load_default+0x40/0x4c [<0000000079fbe1b4>] sys_kexec_file_load+0x1c4/0x322 [<0000000040c62c03>] ret_from_syscall+0x0/0x2 In elf_kexec_load(), a buffer is allocated via kvmalloc() to store fdt. While it's not freed back to system when kexec kernel is reloaded or unloaded. Then memory leak is caused. Fix it by introducing riscv specific function arch_kimage_file_post_load_cleanup(), and freeing the buffer there. Fixes: 6261586e0c91 ("RISC-V: Add kexec_file support") Signed-off-by: Li Huafei Reviewed-by: Conor Dooley Reviewed-by: Liao Chang Link: https://lore.kernel.org/r/20221104095658.141222-1-lihuafei1@huawei.com Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/kexec.h | 5 +++++ arch/riscv/kernel/elf_kexec.c | 10 ++++++++++ 2 files changed, 15 insertions(+) (limited to 'arch/riscv') diff --git a/arch/riscv/include/asm/kexec.h b/arch/riscv/include/asm/kexec.h index eee260e8ab30..2b56769cb530 100644 --- a/arch/riscv/include/asm/kexec.h +++ b/arch/riscv/include/asm/kexec.h @@ -39,6 +39,7 @@ crash_setup_regs(struct pt_regs *newregs, #define ARCH_HAS_KIMAGE_ARCH struct kimage_arch { + void *fdt; /* For CONFIG_KEXEC_FILE */ unsigned long fdt_addr; }; @@ -62,6 +63,10 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi, const Elf_Shdr *relsec, const Elf_Shdr *symtab); #define arch_kexec_apply_relocations_add arch_kexec_apply_relocations_add + +struct kimage; +int arch_kimage_file_post_load_cleanup(struct kimage *image); +#define arch_kimage_file_post_load_cleanup arch_kimage_file_post_load_cleanup #endif #endif diff --git a/arch/riscv/kernel/elf_kexec.c b/arch/riscv/kernel/elf_kexec.c index 0cb94992c15b..ff30fcb43f47 100644 --- a/arch/riscv/kernel/elf_kexec.c +++ b/arch/riscv/kernel/elf_kexec.c @@ -21,6 +21,14 @@ #include #include +int arch_kimage_file_post_load_cleanup(struct kimage *image) +{ + kvfree(image->arch.fdt); + image->arch.fdt = NULL; + + return kexec_image_post_load_cleanup_default(image); +} + static int riscv_kexec_elf_load(struct kimage *image, struct elfhdr *ehdr, struct kexec_elf_info *elf_info, unsigned long old_pbase, unsigned long new_pbase) @@ -298,6 +306,8 @@ static void *elf_kexec_load(struct kimage *image, char *kernel_buf, pr_err("Error add DTB kbuf ret=%d\n", ret); goto out_free_fdt; } + /* Cache the fdt buffer address for memory cleanup */ + image->arch.fdt = fdt; pr_notice("Loaded device tree at 0x%lx\n", kbuf.mem); goto out; -- cgit v1.2.3 From cbc32023ddbdf4baa3d9dc513a2184a84080a5a2 Mon Sep 17 00:00:00 2001 From: Li Huafei Date: Fri, 4 Nov 2022 17:56:58 +0800 Subject: RISC-V: kexec: Fix memory leak of elf header buffer This is reported by kmemleak detector: unreferenced object 0xff2000000403d000 (size 4096): comm "kexec", pid 146, jiffies 4294900633 (age 64.792s) hex dump (first 32 bytes): 7f 45 4c 46 02 01 01 00 00 00 00 00 00 00 00 00 .ELF............ 04 00 f3 00 01 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<00000000566ca97c>] kmemleak_vmalloc+0x3c/0xbe [<00000000979283d8>] __vmalloc_node_range+0x3ac/0x560 [<00000000b4b3712a>] __vmalloc_node+0x56/0x62 [<00000000854f75e2>] vzalloc+0x2c/0x34 [<00000000e9a00db9>] crash_prepare_elf64_headers+0x80/0x30c [<0000000067e8bf48>] elf_kexec_load+0x3e8/0x4ec [<0000000036548e09>] kexec_image_load_default+0x40/0x4c [<0000000079fbe1b4>] sys_kexec_file_load+0x1c4/0x322 [<0000000040c62c03>] ret_from_syscall+0x0/0x2 In elf_kexec_load(), a buffer is allocated via vzalloc() to store elf headers. While it's not freed back to system when kdump kernel is reloaded or unloaded, or when image->elf_header is successfully set and then fails to load kdump kernel for some reason. Fix it by freeing the buffer in arch_kimage_file_post_load_cleanup(). Fixes: 8acea455fafa ("RISC-V: Support for kexec_file on panic") Signed-off-by: Li Huafei Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20221104095658.141222-2-lihuafei1@huawei.com Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/elf_kexec.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/riscv') diff --git a/arch/riscv/kernel/elf_kexec.c b/arch/riscv/kernel/elf_kexec.c index ff30fcb43f47..5372b708fae2 100644 --- a/arch/riscv/kernel/elf_kexec.c +++ b/arch/riscv/kernel/elf_kexec.c @@ -26,6 +26,10 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image) kvfree(image->arch.fdt); image->arch.fdt = NULL; + vfree(image->elf_headers); + image->elf_headers = NULL; + image->elf_headers_sz = 0; + return kexec_image_post_load_cleanup_default(image); } -- cgit v1.2.3 From 5c3022e4a616d800cf5f4c3a981d7992179e44a1 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Wed, 9 Nov 2022 01:49:36 -0500 Subject: riscv: stacktrace: Fixup ftrace_graph_ret_addr retp argument The 'retp' is a pointer to the return address on the stack, so we must pass the current return address pointer as the 'retp' argument to ftrace_push_return_trace(). Not parent function's return address on the stack. Fixes: b785ec129bd9 ("riscv/ftrace: Add HAVE_FUNCTION_GRAPH_RET_ADDR_PTR support") Signed-off-by: Guo Ren Signed-off-by: Guo Ren Link: https://lore.kernel.org/r/20221109064937.3643993-2-guoren@kernel.org Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/stacktrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/riscv') diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index 08d11a53f39e..bcfe9eb55f80 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -58,7 +58,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, } else { fp = frame->fp; pc = ftrace_graph_ret_addr(current, NULL, frame->ra, - (unsigned long *)(fp - 8)); + &frame->ra); } } -- cgit v1.2.3 From 7ecdadf7f8c659524f6b2aebf6be7bf619764d90 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Wed, 9 Nov 2022 01:49:37 -0500 Subject: riscv: stacktrace: Make walk_stackframe cross pt_regs frame The current walk_stackframe with FRAME_POINTER would stop unwinding at ret_from_exception: BUG: sleeping function called from invalid context at kernel/locking/rwsem.c:1518 in_atomic(): 0, irqs_disabled(): 1, non_block: 0, pid: 1, name: init CPU: 0 PID: 1 Comm: init Not tainted 5.10.113-00021-g15c15974895c-dirty #192 Call Trace: [] walk_stackframe+0x0/0xee [] show_stack+0x32/0x4a [] dump_stack_lvl+0x72/0x8e [] dump_stack+0x14/0x1c [] ___might_sleep+0x12e/0x138 [] __might_sleep+0x10/0x18 [] down_read+0x22/0xa4 [] do_page_fault+0xb0/0x2fe [] ret_from_exception+0x0/0xc The optimization would help walk_stackframe cross the pt_regs frame and get more backtrace of debug info: BUG: sleeping function called from invalid context at kernel/locking/rwsem.c:1518 in_atomic(): 0, irqs_disabled(): 1, non_block: 0, pid: 1, name: init CPU: 0 PID: 1 Comm: init Not tainted 5.10.113-00021-g15c15974895c-dirty #192 Call Trace: [] walk_stackframe+0x0/0xee [] show_stack+0x32/0x4a [] dump_stack_lvl+0x72/0x8e [] dump_stack+0x14/0x1c [] ___might_sleep+0x12e/0x138 [] __might_sleep+0x10/0x18 [] down_read+0x22/0xa4 [] do_page_fault+0xb0/0x2fe [] ret_from_exception+0x0/0xc [] riscv_intc_irq+0x1a/0x72 [] ret_from_exception+0x0/0xc [] vma_link+0x54/0x160 [] mmap_region+0x2cc/0x4d0 [] do_mmap+0x2d8/0x3ac [] vm_mmap_pgoff+0x70/0xb8 [] vm_mmap+0x2a/0x36 [] elf_map+0x72/0x84 [] load_elf_binary+0x69a/0xec8 [] bprm_execve+0x246/0x53a [] kernel_execve+0xe8/0x124 [] run_init_process+0xfa/0x10c [] try_to_run_init_process+0x12/0x3c [] kernel_init+0xb4/0xf8 [] ret_from_exception+0x0/0xc Here is the error injection test code for the above output: drivers/irqchip/irq-riscv-intc.c: static asmlinkage void riscv_intc_irq(struct pt_regs *regs) { unsigned long cause = regs->cause & ~CAUSE_IRQ_FLAG; + u32 tmp; __get_user(tmp, (u32 *)0); Signed-off-by: Guo Ren Signed-off-by: Guo Ren Link: https://lore.kernel.org/r/20221109064937.3643993-3-guoren@kernel.org [Palmer: use SYM_CODE_*] Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/entry.S | 3 ++- arch/riscv/kernel/stacktrace.c | 9 +++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'arch/riscv') diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index b9eda3fcbd6d..da44fe2d0d82 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -248,7 +248,7 @@ ret_from_syscall_rejected: andi t0, t0, _TIF_SYSCALL_WORK bnez t0, handle_syscall_trace_exit -ret_from_exception: +SYM_CODE_START_NOALIGN(ret_from_exception) REG_L s0, PT_STATUS(sp) csrc CSR_STATUS, SR_IE #ifdef CONFIG_TRACE_IRQFLAGS @@ -262,6 +262,7 @@ ret_from_exception: andi s0, s0, SR_SPP #endif bnez s0, resume_kernel +SYM_CODE_END(ret_from_exception) resume_userspace: /* Interrupts must be disabled here so flags are checked atomically */ diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index bcfe9eb55f80..75c8dd64fc48 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -16,6 +16,8 @@ #ifdef CONFIG_FRAME_POINTER +extern asmlinkage void ret_from_exception(void); + void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, bool (*fn)(void *, unsigned long), void *arg) { @@ -59,6 +61,13 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, fp = frame->fp; pc = ftrace_graph_ret_addr(current, NULL, frame->ra, &frame->ra); + if (pc == (unsigned long)ret_from_exception) { + if (unlikely(!__kernel_text_address(pc) || !fn(arg, pc))) + break; + + pc = ((struct pt_regs *)sp)->epc; + fp = ((struct pt_regs *)sp)->s0; + } } } -- cgit v1.2.3 From 4bd1d80efb5af640f99157f39b50fb11326ce641 Mon Sep 17 00:00:00 2001 From: Sergey Matyukevich Date: Mon, 29 Aug 2022 23:52:19 +0300 Subject: riscv: mm: notify remote harts about mmu cache updates Current implementation of update_mmu_cache function performs local TLB flush. It does not take into account ASID information. Besides, it does not take into account other harts currently running the same mm context or possible migration of the running context to other harts. Meanwhile TLB flush is not performed for every context switch if ASID support is enabled. Patch [1] proposed to add ASID support to update_mmu_cache to avoid flushing local TLB entirely. This patch takes into account other harts currently running the same mm context as well as possible migration of this context to other harts. For this purpose the approach from flush_icache_mm is reused. Remote harts currently running the same mm context are informed via SBI calls that they need to flush their local TLBs. All the other harts are marked as needing a deferred TLB flush when this mm context runs on them. [1] https://lore.kernel.org/linux-riscv/20220821013926.8968-1-tjytimi@163.com/ Signed-off-by: Sergey Matyukevich Fixes: 65d4b9c53017 ("RISC-V: Implement ASID allocator") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/linux-riscv/20220829205219.283543-1-geomatsi@gmail.com/#t Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/mmu.h | 2 ++ arch/riscv/include/asm/pgtable.h | 2 +- arch/riscv/include/asm/tlbflush.h | 18 ++++++++++++++++++ arch/riscv/mm/context.c | 10 ++++++++++ arch/riscv/mm/tlbflush.c | 28 +++++++++++----------------- 5 files changed, 42 insertions(+), 18 deletions(-) (limited to 'arch/riscv') diff --git a/arch/riscv/include/asm/mmu.h b/arch/riscv/include/asm/mmu.h index 0099dc116168..5ff1f19fd45c 100644 --- a/arch/riscv/include/asm/mmu.h +++ b/arch/riscv/include/asm/mmu.h @@ -19,6 +19,8 @@ typedef struct { #ifdef CONFIG_SMP /* A local icache flush is needed before user execution can resume. */ cpumask_t icache_stale_mask; + /* A local tlb flush is needed before user execution can resume. */ + cpumask_t tlb_stale_mask; #endif } mm_context_t; diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index c61ae83aadee..2359f1f9bda9 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -415,7 +415,7 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, * Relying on flush_tlb_fix_spurious_fault would suffice, but * the extra traps reduce performance. So, eagerly SFENCE.VMA. */ - local_flush_tlb_page(address); + flush_tlb_page(vma, address); } #define __HAVE_ARCH_UPDATE_MMU_TLB diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h index 801019381dea..907b9efd39a8 100644 --- a/arch/riscv/include/asm/tlbflush.h +++ b/arch/riscv/include/asm/tlbflush.h @@ -22,6 +22,24 @@ static inline void local_flush_tlb_page(unsigned long addr) { ALT_FLUSH_TLB_PAGE(__asm__ __volatile__ ("sfence.vma %0" : : "r" (addr) : "memory")); } + +static inline void local_flush_tlb_all_asid(unsigned long asid) +{ + __asm__ __volatile__ ("sfence.vma x0, %0" + : + : "r" (asid) + : "memory"); +} + +static inline void local_flush_tlb_page_asid(unsigned long addr, + unsigned long asid) +{ + __asm__ __volatile__ ("sfence.vma %0, %1" + : + : "r" (addr), "r" (asid) + : "memory"); +} + #else /* CONFIG_MMU */ #define local_flush_tlb_all() do { } while (0) #define local_flush_tlb_page(addr) do { } while (0) diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c index 7acbfbd14557..80ce9caba8d2 100644 --- a/arch/riscv/mm/context.c +++ b/arch/riscv/mm/context.c @@ -196,6 +196,16 @@ switch_mm_fast: if (need_flush_tlb) local_flush_tlb_all(); +#ifdef CONFIG_SMP + else { + cpumask_t *mask = &mm->context.tlb_stale_mask; + + if (cpumask_test_cpu(cpu, mask)) { + cpumask_clear_cpu(cpu, mask); + local_flush_tlb_all_asid(cntx & asid_mask); + } + } +#endif } static void set_mm_noasid(struct mm_struct *mm) diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index 37ed760d007c..ce7dfc81bb3f 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -5,23 +5,7 @@ #include #include #include - -static inline void local_flush_tlb_all_asid(unsigned long asid) -{ - __asm__ __volatile__ ("sfence.vma x0, %0" - : - : "r" (asid) - : "memory"); -} - -static inline void local_flush_tlb_page_asid(unsigned long addr, - unsigned long asid) -{ - __asm__ __volatile__ ("sfence.vma %0, %1" - : - : "r" (addr), "r" (asid) - : "memory"); -} +#include void flush_tlb_all(void) { @@ -31,6 +15,7 @@ void flush_tlb_all(void) static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start, unsigned long size, unsigned long stride) { + struct cpumask *pmask = &mm->context.tlb_stale_mask; struct cpumask *cmask = mm_cpumask(mm); unsigned int cpuid; bool broadcast; @@ -44,6 +29,15 @@ static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start, if (static_branch_unlikely(&use_asid_allocator)) { unsigned long asid = atomic_long_read(&mm->context.id); + /* + * TLB will be immediately flushed on harts concurrently + * executing this MM context. TLB flush on other harts + * is deferred until this MM context migrates there. + */ + cpumask_setall(pmask); + cpumask_clear_cpu(cpuid, pmask); + cpumask_andnot(pmask, pmask, cmask); + if (broadcast) { sbi_remote_sfence_vma_asid(cmask, start, size, asid); } else if (size <= stride) { -- cgit v1.2.3 From b0f4c74eadbf69a3298f38566bfaa2e202541f2f Mon Sep 17 00:00:00 2001 From: Andrew Bresticker Date: Fri, 11 Nov 2022 17:31:08 -0500 Subject: RISC-V: Fix unannoted hardirqs-on in return to userspace slow-path The return to userspace path in entry.S may enable interrupts without the corresponding lockdep annotation, producing a splat[0] when DEBUG_LOCKDEP is enabled. Simply calling __trace_hardirqs_on() here gets a bit messy due to the use of RA to point back to ret_from_exception, so just move the whole slow-path loop into C. It's more readable and it lets us use local_irq_{enable,disable}(), avoiding the need for manual annotations altogether. [0]: ------------[ cut here ]------------ DEBUG_LOCKS_WARN_ON(!lockdep_hardirqs_enabled()) WARNING: CPU: 2 PID: 1 at kernel/locking/lockdep.c:5512 check_flags+0x10a/0x1e0 Modules linked in: CPU: 2 PID: 1 Comm: init Not tainted 6.1.0-rc4-00160-gb56b6e2b4f31 #53 Hardware name: riscv-virtio,qemu (DT) epc : check_flags+0x10a/0x1e0 ra : check_flags+0x10a/0x1e0 status: 0000000200000100 badaddr: 0000000000000000 cause: 0000000000000003 [] lock_is_held_type+0x78/0x14e [] __might_resched+0x26/0x22c [] __might_sleep+0x3c/0x66 [] get_signal+0x9e/0xa70 [] do_notify_resume+0x6e/0x422 [] ret_from_exception+0x0/0x10 irq event stamp: 44512 hardirqs last enabled at (44511): [] _raw_spin_unlock_irqrestore+0x54/0x62 hardirqs last disabled at (44512): [] __trace_hardirqs_off+0xc/0x14 softirqs last enabled at (44472): [] __do_softirq+0x3de/0x51e softirqs last disabled at (44467): [] irq_exit+0xd6/0x104 ---[ end trace 0000000000000000 ]--- possible reason: unannotated irqs-on. Signed-off-by: Andrew Bresticker Fixes: 3c4697982982 ("riscv: Enable LOCKDEP_SUPPORT & fixup TRACE_IRQFLAGS_SUPPORT") Link: https://lore.kernel.org/r/20221111223108.1976562-1-abrestic@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/entry.S | 18 +++++------------- arch/riscv/kernel/signal.c | 34 +++++++++++++++++++++------------- 2 files changed, 26 insertions(+), 26 deletions(-) (limited to 'arch/riscv') diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index b9eda3fcbd6d..58dfa8595e19 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -263,12 +263,11 @@ ret_from_exception: #endif bnez s0, resume_kernel -resume_userspace: /* Interrupts must be disabled here so flags are checked atomically */ REG_L s0, TASK_TI_FLAGS(tp) /* current_thread_info->flags */ andi s1, s0, _TIF_WORK_MASK - bnez s1, work_pending - + bnez s1, resume_userspace_slow +resume_userspace: #ifdef CONFIG_CONTEXT_TRACKING_USER call user_enter_callable #endif @@ -368,19 +367,12 @@ resume_kernel: j restore_all #endif -work_pending: +resume_userspace_slow: /* Enter slow path for supplementary processing */ - la ra, ret_from_exception - andi s1, s0, _TIF_NEED_RESCHED - bnez s1, work_resched -work_notifysig: - /* Handle pending signals and notify-resume requests */ - csrs CSR_STATUS, SR_IE /* Enable interrupts for do_notify_resume() */ move a0, sp /* pt_regs */ move a1, s0 /* current_thread_info->flags */ - tail do_notify_resume -work_resched: - tail schedule + call do_work_pending + j resume_userspace /* Slow paths for ptrace. */ handle_syscall_trace_enter: diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index 5c591123c440..bfb2afa4135f 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -313,19 +313,27 @@ static void do_signal(struct pt_regs *regs) } /* - * notification of userspace execution resumption - * - triggered by the _TIF_WORK_MASK flags + * Handle any pending work on the resume-to-userspace path, as indicated by + * _TIF_WORK_MASK. Entered from assembly with IRQs off. */ -asmlinkage __visible void do_notify_resume(struct pt_regs *regs, - unsigned long thread_info_flags) +asmlinkage __visible void do_work_pending(struct pt_regs *regs, + unsigned long thread_info_flags) { - if (thread_info_flags & _TIF_UPROBE) - uprobe_notify_resume(regs); - - /* Handle pending signal delivery */ - if (thread_info_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) - do_signal(regs); - - if (thread_info_flags & _TIF_NOTIFY_RESUME) - resume_user_mode_work(regs); + do { + if (thread_info_flags & _TIF_NEED_RESCHED) { + schedule(); + } else { + local_irq_enable(); + if (thread_info_flags & _TIF_UPROBE) + uprobe_notify_resume(regs); + /* Handle pending signal delivery */ + if (thread_info_flags & (_TIF_SIGPENDING | + _TIF_NOTIFY_SIGNAL)) + do_signal(regs); + if (thread_info_flags & _TIF_NOTIFY_RESUME) + resume_user_mode_work(regs); + } + local_irq_disable(); + thread_info_flags = read_thread_flags(); + } while (thread_info_flags & _TIF_WORK_MASK); } -- cgit v1.2.3 From b91676fc16cd384a81e3af52c641aa61985cc231 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 14 Nov 2022 14:35:34 +0530 Subject: RISC-V: Fix MEMREMAP_WB for systems with Svpbmt Currently, the memremap() called with MEMREMAP_WB maps memory using the generic ioremap() function which breaks on system with Svpbmt because memory mapped using _PAGE_IOREMAP page attributes is treated as strongly-ordered non-cacheable IO memory. To address this, we implement RISC-V specific arch_memremap_wb() which maps memory using _PAGE_KERNEL page attributes resulting in write-back cacheable mapping on systems with Svpbmt. Fixes: ff689fd21cb1 ("riscv: add RISC-V Svpbmt extension support") Co-developed-by: Mayuresh Chitale Signed-off-by: Mayuresh Chitale Signed-off-by: Anup Patel Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20221114090536.1662624-2-apatel@ventanamicro.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/io.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/riscv') diff --git a/arch/riscv/include/asm/io.h b/arch/riscv/include/asm/io.h index 92080a227937..42497d487a17 100644 --- a/arch/riscv/include/asm/io.h +++ b/arch/riscv/include/asm/io.h @@ -135,4 +135,9 @@ __io_writes_outs(outs, u64, q, __io_pbr(), __io_paw()) #include +#ifdef CONFIG_MMU +#define arch_memremap_wb(addr, size) \ + ((__force void *)ioremap_prot((addr), (size), _PAGE_KERNEL)) +#endif + #endif /* _ASM_RISCV_IO_H */ -- cgit v1.2.3 From a49ab905a1fc8630a94221f9a06ce0dafb266576 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 14 Nov 2022 14:35:35 +0530 Subject: RISC-V: Implement arch specific PMEM APIs The NVDIMM PMEM driver expects arch specific APIs for cache maintenance and if arch does not provide these APIs then NVDIMM PMEM driver will always use MEMREMAP_WT to map persistent memory which in-turn maps as UC memory type defined by the RISC-V Svpbmt specification. Now that the Svpbmt and Zicbom support is available in RISC-V kernel, we implement PMEM APIs using ALT_CMO_OP() macros so that the NVDIMM PMEM driver can use MEMREMAP_WB to map persistent memory. Co-developed-by: Mayuresh Chitale Signed-off-by: Mayuresh Chitale Signed-off-by: Anup Patel Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20221114090536.1662624-3-apatel@ventanamicro.com Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 + arch/riscv/mm/Makefile | 1 + arch/riscv/mm/pmem.c | 21 +++++++++++++++++++++ 3 files changed, 23 insertions(+) create mode 100644 arch/riscv/mm/pmem.c (limited to 'arch/riscv') diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 6b48a3ae9843..025e2a1b1c60 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -25,6 +25,7 @@ config RISCV select ARCH_HAS_GIGANTIC_PAGE select ARCH_HAS_KCOV select ARCH_HAS_MMIOWB + select ARCH_HAS_PMEM_API select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SET_DIRECT_MAP if MMU select ARCH_HAS_SET_MEMORY if MMU diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile index d76aabf4b94d..b4f35da889bf 100644 --- a/arch/riscv/mm/Makefile +++ b/arch/riscv/mm/Makefile @@ -13,6 +13,7 @@ obj-y += extable.o obj-$(CONFIG_MMU) += fault.o pageattr.o obj-y += cacheflush.o obj-y += context.o +obj-y += pmem.o ifeq ($(CONFIG_MMU),y) obj-$(CONFIG_SMP) += tlbflush.o diff --git a/arch/riscv/mm/pmem.c b/arch/riscv/mm/pmem.c new file mode 100644 index 000000000000..089df92ae876 --- /dev/null +++ b/arch/riscv/mm/pmem.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022 Ventana Micro Systems Inc. + */ + +#include +#include + +#include + +void arch_wb_cache_pmem(void *addr, size_t size) +{ + ALT_CMO_OP(clean, addr, size, riscv_cbom_block_size); +} +EXPORT_SYMBOL_GPL(arch_wb_cache_pmem); + +void arch_invalidate_pmem(void *addr, size_t size) +{ + ALT_CMO_OP(inval, addr, size, riscv_cbom_block_size); +} +EXPORT_SYMBOL_GPL(arch_invalidate_pmem); -- cgit v1.2.3 From 497bcbe3ce0466123a834f2777a8a762bd5d7aae Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 14 Nov 2022 14:35:36 +0530 Subject: RISC-V: Enable PMEM drivers We now have PMEM arch support available in RISC-V kernel so let us enable relevant drivers in defconfig. Signed-off-by: Anup Patel Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20221114090536.1662624-4-apatel@ventanamicro.com Signed-off-by: Palmer Dabbelt --- arch/riscv/configs/defconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/riscv') diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index 05fd5fcf24f9..462da9f7410d 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -159,6 +159,7 @@ CONFIG_VIRTIO_MMIO=y CONFIG_RPMSG_CHAR=y CONFIG_RPMSG_CTRL=y CONFIG_RPMSG_VIRTIO=y +CONFIG_LIBNVDIMM=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y -- cgit v1.2.3 From bf3d7b1d8499ca46874c7373d2043ecbe252cccc Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Fri, 18 Nov 2022 10:43:01 +0000 Subject: RISC-V: stop selecting SIFIVE_PLIC at the SoC level The SIFIVE_PLIC driver is used by all current RISC-V SoCs & will be, where possible, used for future implementations. Rather than having each driver select the option on a case-by-case basis, do so at the arch level. Signed-off-by: Conor Dooley Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221118104300.85016-4-conor@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 + arch/riscv/Kconfig.socs | 5 ----- 2 files changed, 1 insertion(+), 5 deletions(-) (limited to 'arch/riscv') diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 6b48a3ae9843..3ee67dc4e98b 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -123,6 +123,7 @@ config RISCV select PCI_MSI if PCI select RISCV_INTC select RISCV_TIMER if RISCV_SBI + select SIFIVE_PLIC select SPARSE_IRQ select SYSCTL_EXCEPTION_TRACE select THREAD_INFO_IN_TASK diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs index 69774bb362d6..15e391f38f75 100644 --- a/arch/riscv/Kconfig.socs +++ b/arch/riscv/Kconfig.socs @@ -3,7 +3,6 @@ menu "SoC selection" config SOC_MICROCHIP_POLARFIRE bool "Microchip PolarFire SoCs" select MCHP_CLK_MPFS - select SIFIVE_PLIC help This enables support for Microchip PolarFire SoC platforms. @@ -13,7 +12,6 @@ config SOC_SIFIVE select SERIAL_SIFIVE_CONSOLE if TTY select CLK_SIFIVE select CLK_SIFIVE_PRCI - select SIFIVE_PLIC select ERRATA_SIFIVE if !XIP_KERNEL help This enables support for SiFive SoC platform hardware. @@ -22,7 +20,6 @@ config SOC_STARFIVE bool "StarFive SoCs" select PINCTRL select RESET_CONTROLLER - select SIFIVE_PLIC help This enables support for StarFive SoC platform hardware. @@ -34,7 +31,6 @@ config SOC_VIRT select POWER_RESET_SYSCON_POWEROFF select GOLDFISH select RTC_DRV_GOLDFISH if RTC_CLASS - select SIFIVE_PLIC select PM_GENERIC_DOMAINS if PM select PM_GENERIC_DOMAINS_OF if PM && OF select RISCV_SBI_CPUIDLE if CPU_IDLE && RISCV_SBI @@ -47,7 +43,6 @@ config SOC_CANAAN select CLINT_TIMER if RISCV_M_MODE select SERIAL_SIFIVE if TTY select SERIAL_SIFIVE_CONSOLE if TTY - select SIFIVE_PLIC select ARCH_HAS_RESET_CONTROLLER select PINCTRL select COMMON_CLK -- cgit v1.2.3 From de59b6ed0618b909be78f6bc60874a57dd016063 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Wed, 23 Nov 2022 23:02:57 +0800 Subject: riscv: boot: add zstd support Support build the zstd compressed Image.zst. Similar as other compressed formats, the Image.zst is not self-decompressing and the bootloader still needs to handle decompression before launching the kernel image. Signed-off-by: Jisheng Zhang Link: https://lore.kernel.org/r/20221123150257.3108-1-jszhang@kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/boot/Makefile | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/riscv') diff --git a/arch/riscv/boot/Makefile b/arch/riscv/boot/Makefile index d1a49adcb1d7..c72de7232abb 100644 --- a/arch/riscv/boot/Makefile +++ b/arch/riscv/boot/Makefile @@ -56,6 +56,9 @@ $(obj)/Image.lzma: $(obj)/Image FORCE $(obj)/Image.lzo: $(obj)/Image FORCE $(call if_changed,lzo) +$(obj)/Image.zst: $(obj)/Image FORCE + $(call if_changed,zstd) + $(obj)/loader.bin: $(obj)/loader FORCE $(call if_changed,objcopy) -- cgit v1.2.3 From 0c49688174f5347c3f8012e84c0ffa0d2b2890c8 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Sat, 26 Nov 2022 00:09:19 -0600 Subject: riscv: Fix crash during early errata patching The patch function for the T-Head PBMT errata calls __pa_symbol() before relocation. This crashes when CONFIG_DEBUG_VIRTUAL is enabled, because __pa_symbol() forwards to __phys_addr_symbol(), and __phys_addr_symbol() checks against the absolute kernel start/end address. Fix this by checking against the kernel map instead of a symbol address. Fixes: a35707c3d850 ("riscv: add memory-type errata for T-Head") Reviewed-by: Heiko Stuebner Tested-by: Heiko Stuebner Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20221126060920.65009-1-samuel@sholland.org Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/physaddr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/riscv') diff --git a/arch/riscv/mm/physaddr.c b/arch/riscv/mm/physaddr.c index 19cf25a74ee2..9b18bda74154 100644 --- a/arch/riscv/mm/physaddr.c +++ b/arch/riscv/mm/physaddr.c @@ -22,7 +22,7 @@ EXPORT_SYMBOL(__virt_to_phys); phys_addr_t __phys_addr_symbol(unsigned long x) { unsigned long kernel_start = kernel_map.virt_addr; - unsigned long kernel_end = (unsigned long)_end; + unsigned long kernel_end = kernel_start + kernel_map.size; /* * Boundary checking aginst the kernel image mapping. -- cgit v1.2.3 From 583286e2072ed25c31b7db14d69fdf03f1fae7ba Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Sat, 26 Nov 2022 00:09:20 -0600 Subject: riscv: Move cast inside kernel_mapping_[pv]a_to_[vp]a Before commit 44c922572952 ("RISC-V: enable XIP"), these macros cast their argument to unsigned long. That commit moved the cast after an assignment to an unsigned long variable, rendering it ineffectual. Move the cast back, so we can remove the cast at each call site. Reviewed-by: Alexandre Ghiti Reviewed-by: Heiko Stuebner Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20221126060920.65009-2-samuel@sholland.org Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/page.h | 18 +++++++++--------- arch/riscv/mm/init.c | 16 ++++++++-------- 2 files changed, 17 insertions(+), 17 deletions(-) (limited to 'arch/riscv') diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h index ac70b0fd9a9a..9f432c1b5289 100644 --- a/arch/riscv/include/asm/page.h +++ b/arch/riscv/include/asm/page.h @@ -123,20 +123,20 @@ extern phys_addr_t phys_ram_base; ((x) >= PAGE_OFFSET && (!IS_ENABLED(CONFIG_64BIT) || (x) < PAGE_OFFSET + KERN_VIRT_SIZE)) #define linear_mapping_pa_to_va(x) ((void *)((unsigned long)(x) + kernel_map.va_pa_offset)) -#define kernel_mapping_pa_to_va(y) ({ \ - unsigned long _y = y; \ - (IS_ENABLED(CONFIG_XIP_KERNEL) && _y < phys_ram_base) ? \ - (void *)((unsigned long)(_y) + kernel_map.va_kernel_xip_pa_offset) : \ - (void *)((unsigned long)(_y) + kernel_map.va_kernel_pa_offset + XIP_OFFSET); \ +#define kernel_mapping_pa_to_va(y) ({ \ + unsigned long _y = (unsigned long)(y); \ + (IS_ENABLED(CONFIG_XIP_KERNEL) && _y < phys_ram_base) ? \ + (void *)(_y + kernel_map.va_kernel_xip_pa_offset) : \ + (void *)(_y + kernel_map.va_kernel_pa_offset + XIP_OFFSET); \ }) #define __pa_to_va_nodebug(x) linear_mapping_pa_to_va(x) #define linear_mapping_va_to_pa(x) ((unsigned long)(x) - kernel_map.va_pa_offset) #define kernel_mapping_va_to_pa(y) ({ \ - unsigned long _y = y; \ - (IS_ENABLED(CONFIG_XIP_KERNEL) && _y < kernel_map.virt_addr + XIP_OFFSET) ? \ - ((unsigned long)(_y) - kernel_map.va_kernel_xip_pa_offset) : \ - ((unsigned long)(_y) - kernel_map.va_kernel_pa_offset - XIP_OFFSET); \ + unsigned long _y = (unsigned long)(y); \ + (IS_ENABLED(CONFIG_XIP_KERNEL) && _y < kernel_map.virt_addr + XIP_OFFSET) ? \ + (_y - kernel_map.va_kernel_xip_pa_offset) : \ + (_y - kernel_map.va_kernel_pa_offset - XIP_OFFSET); \ }) #define __va_to_pa_nodebug(x) ({ \ diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index b56a0a75533f..7d59516ce6b3 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -927,15 +927,15 @@ static void __init pt_ops_set_early(void) */ static void __init pt_ops_set_fixmap(void) { - pt_ops.alloc_pte = kernel_mapping_pa_to_va((uintptr_t)alloc_pte_fixmap); - pt_ops.get_pte_virt = kernel_mapping_pa_to_va((uintptr_t)get_pte_virt_fixmap); + pt_ops.alloc_pte = kernel_mapping_pa_to_va(alloc_pte_fixmap); + pt_ops.get_pte_virt = kernel_mapping_pa_to_va(get_pte_virt_fixmap); #ifndef __PAGETABLE_PMD_FOLDED - pt_ops.alloc_pmd = kernel_mapping_pa_to_va((uintptr_t)alloc_pmd_fixmap); - pt_ops.get_pmd_virt = kernel_mapping_pa_to_va((uintptr_t)get_pmd_virt_fixmap); - pt_ops.alloc_pud = kernel_mapping_pa_to_va((uintptr_t)alloc_pud_fixmap); - pt_ops.get_pud_virt = kernel_mapping_pa_to_va((uintptr_t)get_pud_virt_fixmap); - pt_ops.alloc_p4d = kernel_mapping_pa_to_va((uintptr_t)alloc_p4d_fixmap); - pt_ops.get_p4d_virt = kernel_mapping_pa_to_va((uintptr_t)get_p4d_virt_fixmap); + pt_ops.alloc_pmd = kernel_mapping_pa_to_va(alloc_pmd_fixmap); + pt_ops.get_pmd_virt = kernel_mapping_pa_to_va(get_pmd_virt_fixmap); + pt_ops.alloc_pud = kernel_mapping_pa_to_va(alloc_pud_fixmap); + pt_ops.get_pud_virt = kernel_mapping_pa_to_va(get_pud_virt_fixmap); + pt_ops.alloc_p4d = kernel_mapping_pa_to_va(alloc_p4d_fixmap); + pt_ops.get_p4d_virt = kernel_mapping_pa_to_va(get_p4d_virt_fixmap); #endif } -- cgit v1.2.3 From 6ff8ca3f93d3cd2a77f051d2d971cf3638d39546 Mon Sep 17 00:00:00 2001 From: Qinglin Pan Date: Mon, 28 Nov 2022 10:36:43 +0800 Subject: riscv: mm: call best_map_size many times during linear-mapping Modify the best_map_size function to give map_size many times instead of only once, so a memory region can be mapped by both PMD_SIZE and PAGE_SIZE. Signed-off-by: Qinglin Pan Reviewed-by: Andrew Jones Reviewed-by: Alexandre Ghiti Tested-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20221128023643.329091-1-panqinglin2020@iscas.ac.cn Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/init.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/riscv') diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 7d59516ce6b3..bb0028c07ef3 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -673,10 +673,11 @@ void __init create_pgd_mapping(pgd_t *pgdp, static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size) { /* Upgrade to PMD_SIZE mappings whenever possible */ - if ((base & (PMD_SIZE - 1)) || (size & (PMD_SIZE - 1))) - return PAGE_SIZE; + base &= PMD_SIZE - 1; + if (!base && size >= PMD_SIZE) + return PMD_SIZE; - return PMD_SIZE; + return PAGE_SIZE; } #ifdef CONFIG_XIP_KERNEL @@ -1111,9 +1112,9 @@ static void __init setup_vm_final(void) if (end >= __pa(PAGE_OFFSET) + memory_limit) end = __pa(PAGE_OFFSET) + memory_limit; - map_size = best_map_size(start, end - start); for (pa = start; pa < end; pa += map_size) { va = (uintptr_t)__va(pa); + map_size = best_map_size(pa, end - pa); create_pgd_mapping(swapper_pg_dir, va, pa, map_size, pgprot_from_va(va)); -- cgit v1.2.3 From 2ba8c7dc71c098935977528747b82ffae43f3f18 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 29 Nov 2022 16:00:50 +0100 Subject: riscv: Don't duplicate __ALTERNATIVE_CFG in __ALTERNATIVE_CFG_2 Build __ALTERNATIVE_CFG_2 by adding on to __ALTERNATIVE_CFG rather than duplicating it. Signed-off-by: Andrew Jones Tested-by: Lad Prabhakar Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20221129150053.50464-2-ajones@ventanamicro.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/alternative-macros.h | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) (limited to 'arch/riscv') diff --git a/arch/riscv/include/asm/alternative-macros.h b/arch/riscv/include/asm/alternative-macros.h index ec2f3f1b836f..c7036166af2c 100644 --- a/arch/riscv/include/asm/alternative-macros.h +++ b/arch/riscv/include/asm/alternative-macros.h @@ -49,14 +49,7 @@ .macro __ALTERNATIVE_CFG_2 old_c, new_c_1, vendor_id_1, errata_id_1, enable_1, \ new_c_2, vendor_id_2, errata_id_2, enable_2 -886 : - .option push - .option norvc - .option norelax - \old_c - .option pop -887 : - ALT_NEW_CONTENT \vendor_id_1, \errata_id_1, \enable_1, \new_c_1 + __ALTERNATIVE_CFG \old_c, \new_c_1, \vendor_id_1, \errata_id_1, \enable_1 ALT_NEW_CONTENT \vendor_id_2, \errata_id_2, \enable_2, \new_c_2 .endm @@ -116,14 +109,7 @@ enable_1, \ new_c_2, vendor_id_2, errata_id_2, \ enable_2) \ - "886 :\n" \ - ".option push\n" \ - ".option norvc\n" \ - ".option norelax\n" \ - old_c "\n" \ - ".option pop\n" \ - "887 :\n" \ - ALT_NEW_CONTENT(vendor_id_1, errata_id_1, enable_1, new_c_1) \ + __ALTERNATIVE_CFG(old_c, new_c_1, vendor_id_1, errata_id_1, enable_1) \ ALT_NEW_CONTENT(vendor_id_2, errata_id_2, enable_2, new_c_2) #define _ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1, \ -- cgit v1.2.3 From 7d52eace1bf5c55704bb0ca5dc8f2489927683ff Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 29 Nov 2022 16:00:51 +0100 Subject: riscv: alternatives: Don't name unused macro parameters Without CONFIG_RISCV_ALTERNATIVE only the first parameter of the ALTERNATIVE macros is needed. Use ... for the rest to cut down on clutter. While there, fix a couple space vs. tab issues. Signed-off-by: Andrew Jones Tested-by: Lad Prabhakar Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20221129150053.50464-3-ajones@ventanamicro.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/alternative-macros.h | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) (limited to 'arch/riscv') diff --git a/arch/riscv/include/asm/alternative-macros.h b/arch/riscv/include/asm/alternative-macros.h index c7036166af2c..7cc2b587c5c4 100644 --- a/arch/riscv/include/asm/alternative-macros.h +++ b/arch/riscv/include/asm/alternative-macros.h @@ -130,28 +130,22 @@ \old_c .endm -#define _ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, CONFIG_k) \ +#define _ALTERNATIVE_CFG(old_c, ...) \ __ALTERNATIVE_CFG old_c -#define _ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1, \ - CONFIG_k_1, \ - new_c_2, vendor_id_2, errata_id_2, \ - CONFIG_k_2) \ - __ALTERNATIVE_CFG old_c +#define _ALTERNATIVE_CFG_2(old_c, ...) \ + __ALTERNATIVE_CFG old_c #else /* !__ASSEMBLY__ */ -#define __ALTERNATIVE_CFG(old_c) \ +#define __ALTERNATIVE_CFG(old_c) \ old_c "\n" -#define _ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, CONFIG_k) \ +#define _ALTERNATIVE_CFG(old_c, ...) \ __ALTERNATIVE_CFG(old_c) -#define _ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1, \ - CONFIG_k_1, \ - new_c_2, vendor_id_2, errata_id_2, \ - CONFIG_k_2) \ - __ALTERNATIVE_CFG(old_c) +#define _ALTERNATIVE_CFG_2(old_c, ...) \ + __ALTERNATIVE_CFG(old_c) #endif /* __ASSEMBLY__ */ #endif /* CONFIG_RISCV_ALTERNATIVE */ -- cgit v1.2.3 From bb2efcde594628ae08ee6e4be51b2047df9d2d06 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 29 Nov 2022 16:00:52 +0100 Subject: riscv: alternatives: Drop the underscores from the assembly macro names The underscores aren't needed because there isn't anything already named without them and the _CFG extension. This is a bit of a cleanup by itself, but the real motivation is for a coming patch which would otherwise need to add two more underscores to these macro names, i.e. ____ALTERNATIVE_CFG, and that'd be gross. Signed-off-by: Andrew Jones Tested-by: Lad Prabhakar Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20221129150053.50464-4-ajones@ventanamicro.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/alternative-macros.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'arch/riscv') diff --git a/arch/riscv/include/asm/alternative-macros.h b/arch/riscv/include/asm/alternative-macros.h index 7cc2b587c5c4..9ea95331a280 100644 --- a/arch/riscv/include/asm/alternative-macros.h +++ b/arch/riscv/include/asm/alternative-macros.h @@ -33,7 +33,7 @@ .endif .endm -.macro __ALTERNATIVE_CFG old_c, new_c, vendor_id, errata_id, enable +.macro ALTERNATIVE_CFG old_c, new_c, vendor_id, errata_id, enable 886 : .option push .option norvc @@ -45,11 +45,11 @@ .endm #define _ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, CONFIG_k) \ - __ALTERNATIVE_CFG old_c, new_c, vendor_id, errata_id, IS_ENABLED(CONFIG_k) + ALTERNATIVE_CFG old_c, new_c, vendor_id, errata_id, IS_ENABLED(CONFIG_k) -.macro __ALTERNATIVE_CFG_2 old_c, new_c_1, vendor_id_1, errata_id_1, enable_1, \ - new_c_2, vendor_id_2, errata_id_2, enable_2 - __ALTERNATIVE_CFG \old_c, \new_c_1, \vendor_id_1, \errata_id_1, \enable_1 +.macro ALTERNATIVE_CFG_2 old_c, new_c_1, vendor_id_1, errata_id_1, enable_1, \ + new_c_2, vendor_id_2, errata_id_2, enable_2 + ALTERNATIVE_CFG \old_c, \new_c_1, \vendor_id_1, \errata_id_1, \enable_1 ALT_NEW_CONTENT \vendor_id_2, \errata_id_2, \enable_2, \new_c_2 .endm @@ -57,9 +57,9 @@ CONFIG_k_1, \ new_c_2, vendor_id_2, errata_id_2, \ CONFIG_k_2) \ - __ALTERNATIVE_CFG_2 old_c, new_c_1, vendor_id_1, errata_id_1, \ + ALTERNATIVE_CFG_2 old_c, new_c_1, vendor_id_1, errata_id_1, \ IS_ENABLED(CONFIG_k_1), \ - new_c_2, vendor_id_2, errata_id_2, \ + new_c_2, vendor_id_2, errata_id_2, \ IS_ENABLED(CONFIG_k_2) #else /* !__ASSEMBLY__ */ @@ -126,15 +126,15 @@ #else /* CONFIG_RISCV_ALTERNATIVE */ #ifdef __ASSEMBLY__ -.macro __ALTERNATIVE_CFG old_c +.macro ALTERNATIVE_CFG old_c \old_c .endm #define _ALTERNATIVE_CFG(old_c, ...) \ - __ALTERNATIVE_CFG old_c + ALTERNATIVE_CFG old_c #define _ALTERNATIVE_CFG_2(old_c, ...) \ - __ALTERNATIVE_CFG old_c + ALTERNATIVE_CFG old_c #else /* !__ASSEMBLY__ */ -- cgit v1.2.3 From 26fb4b90b745a808e94a81dc732d440c285fa74b Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 29 Nov 2022 16:00:53 +0100 Subject: riscv: Don't duplicate _ALTERNATIVE_CFG* macros Reduce clutter by only defining the _ALTERNATIVE_CFG* macros once, rather than once for assembly and once for C. To do that, we need to add __ALTERNATIVE_CFG* macros to the assembly side, but those are one-liners. Also take the opportunity to do a bit of reformatting, taking full advantage of the fact checkpatch gives us 100 char lines. Signed-off-by: Andrew Jones Tested-by: Lad Prabhakar Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20221129150053.50464-5-ajones@ventanamicro.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/alternative-macros.h | 53 +++++++++-------------------- 1 file changed, 17 insertions(+), 36 deletions(-) (limited to 'arch/riscv') diff --git a/arch/riscv/include/asm/alternative-macros.h b/arch/riscv/include/asm/alternative-macros.h index 9ea95331a280..7226e2462584 100644 --- a/arch/riscv/include/asm/alternative-macros.h +++ b/arch/riscv/include/asm/alternative-macros.h @@ -44,23 +44,14 @@ ALT_NEW_CONTENT \vendor_id, \errata_id, \enable, \new_c .endm -#define _ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, CONFIG_k) \ - ALTERNATIVE_CFG old_c, new_c, vendor_id, errata_id, IS_ENABLED(CONFIG_k) - .macro ALTERNATIVE_CFG_2 old_c, new_c_1, vendor_id_1, errata_id_1, enable_1, \ new_c_2, vendor_id_2, errata_id_2, enable_2 ALTERNATIVE_CFG \old_c, \new_c_1, \vendor_id_1, \errata_id_1, \enable_1 ALT_NEW_CONTENT \vendor_id_2, \errata_id_2, \enable_2, \new_c_2 .endm -#define _ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1, \ - CONFIG_k_1, \ - new_c_2, vendor_id_2, errata_id_2, \ - CONFIG_k_2) \ - ALTERNATIVE_CFG_2 old_c, new_c_1, vendor_id_1, errata_id_1, \ - IS_ENABLED(CONFIG_k_1), \ - new_c_2, vendor_id_2, errata_id_2, \ - IS_ENABLED(CONFIG_k_2) +#define __ALTERNATIVE_CFG(...) ALTERNATIVE_CFG __VA_ARGS__ +#define __ALTERNATIVE_CFG_2(...) ALTERNATIVE_CFG_2 __VA_ARGS__ #else /* !__ASSEMBLY__ */ @@ -102,27 +93,21 @@ "887 :\n" \ ALT_NEW_CONTENT(vendor_id, errata_id, enable, new_c) -#define _ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, CONFIG_k) \ - __ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, IS_ENABLED(CONFIG_k)) - -#define __ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1, \ - enable_1, \ - new_c_2, vendor_id_2, errata_id_2, \ - enable_2) \ - __ALTERNATIVE_CFG(old_c, new_c_1, vendor_id_1, errata_id_1, enable_1) \ +#define __ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1, enable_1, \ + new_c_2, vendor_id_2, errata_id_2, enable_2) \ + __ALTERNATIVE_CFG(old_c, new_c_1, vendor_id_1, errata_id_1, enable_1) \ ALT_NEW_CONTENT(vendor_id_2, errata_id_2, enable_2, new_c_2) -#define _ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1, \ - CONFIG_k_1, \ - new_c_2, vendor_id_2, errata_id_2, \ - CONFIG_k_2) \ - __ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1, \ - IS_ENABLED(CONFIG_k_1), \ - new_c_2, vendor_id_2, errata_id_2, \ - IS_ENABLED(CONFIG_k_2)) - #endif /* __ASSEMBLY__ */ +#define _ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, CONFIG_k) \ + __ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, IS_ENABLED(CONFIG_k)) + +#define _ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1, CONFIG_k_1, \ + new_c_2, vendor_id_2, errata_id_2, CONFIG_k_2) \ + __ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1, IS_ENABLED(CONFIG_k_1), \ + new_c_2, vendor_id_2, errata_id_2, IS_ENABLED(CONFIG_k_2)) + #else /* CONFIG_RISCV_ALTERNATIVE */ #ifdef __ASSEMBLY__ @@ -173,13 +158,9 @@ * on the following sample code and then replace ALTERNATIVE() with * ALTERNATIVE_2() to append its customized content. */ -#define ALTERNATIVE_2(old_content, new_content_1, vendor_id_1, \ - errata_id_1, CONFIG_k_1, \ - new_content_2, vendor_id_2, \ - errata_id_2, CONFIG_k_2) \ - _ALTERNATIVE_CFG_2(old_content, new_content_1, vendor_id_1, \ - errata_id_1, CONFIG_k_1, \ - new_content_2, vendor_id_2, \ - errata_id_2, CONFIG_k_2) +#define ALTERNATIVE_2(old_content, new_content_1, vendor_id_1, errata_id_1, CONFIG_k_1, \ + new_content_2, vendor_id_2, errata_id_2, CONFIG_k_2) \ + _ALTERNATIVE_CFG_2(old_content, new_content_1, vendor_id_1, errata_id_1, CONFIG_k_1, \ + new_content_2, vendor_id_2, errata_id_2, CONFIG_k_2) #endif -- cgit v1.2.3 From 726855549cf8d5c6b05795cf74a9c23584f45544 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 29 Nov 2022 15:34:45 +0100 Subject: RISC-V: Improve use of isa2hwcap[] Improve isa2hwcap[] by removing it from static storage, as riscv_fill_hwcap() is only called once, and by reducing its size from 256 bytes to 26. The latter improvement is possible because isa2hwcap[] will never be indexed with capital letters and we can precompute the offsets from 'a'. No functional change intended. Signed-off-by: Andrew Jones Reviewed-by: Conor Dooley Reviewed-by: Heiko Stuebner Link: https://lore.kernel.org/r/20221129143447.49714-2-ajones@ventanamicro.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/cpufeature.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'arch/riscv') diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 694267d1fe81..4677320d7e31 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -74,15 +74,15 @@ void __init riscv_fill_hwcap(void) const char *isa; char print_str[NUM_ALPHA_EXTS + 1]; int i, j, rc; - static unsigned long isa2hwcap[256] = {0}; + unsigned long isa2hwcap[26] = {0}; unsigned long hartid; - isa2hwcap['i'] = isa2hwcap['I'] = COMPAT_HWCAP_ISA_I; - isa2hwcap['m'] = isa2hwcap['M'] = COMPAT_HWCAP_ISA_M; - isa2hwcap['a'] = isa2hwcap['A'] = COMPAT_HWCAP_ISA_A; - isa2hwcap['f'] = isa2hwcap['F'] = COMPAT_HWCAP_ISA_F; - isa2hwcap['d'] = isa2hwcap['D'] = COMPAT_HWCAP_ISA_D; - isa2hwcap['c'] = isa2hwcap['C'] = COMPAT_HWCAP_ISA_C; + isa2hwcap['i' - 'a'] = COMPAT_HWCAP_ISA_I; + isa2hwcap['m' - 'a'] = COMPAT_HWCAP_ISA_M; + isa2hwcap['a' - 'a'] = COMPAT_HWCAP_ISA_A; + isa2hwcap['f' - 'a'] = COMPAT_HWCAP_ISA_F; + isa2hwcap['d' - 'a'] = COMPAT_HWCAP_ISA_D; + isa2hwcap['c' - 'a'] = COMPAT_HWCAP_ISA_C; elf_hwcap = 0; @@ -196,8 +196,10 @@ void __init riscv_fill_hwcap(void) if (unlikely(ext_err)) continue; if (!ext_long) { - this_hwcap |= isa2hwcap[(unsigned char)(*ext)]; - set_bit(*ext - 'a', this_isa); + int nr = *ext - 'a'; + + this_hwcap |= isa2hwcap[nr]; + set_bit(nr, this_isa); } else { SET_ISA_EXT_MAP("sscofpmf", RISCV_ISA_EXT_SSCOFPMF); SET_ISA_EXT_MAP("svpbmt", RISCV_ISA_EXT_SVPBMT); -- cgit v1.2.3 From fb0ff0a95d61f69415cb8d8f2d921e1f7eed75af Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 29 Nov 2022 15:34:46 +0100 Subject: RISC-V: Introduce riscv_isa_extension_check Currently any isa extension found in the isa string is set in the isa bitmap. An isa extension set in the bitmap indicates that the extension is present and may be used (a.k.a is enabled). However, when an extension cannot be used due to missing dependencies or errata it should not be added to the bitmap. Introduce a function where additional checks may be placed in order to determine if an extension should be enabled or not. Note, the checks may simply indicate an issue with the DT, but, since extensions may be used in early boot, it's not always possible to simply produce an error at the point the issue is determined. It's best to keep the extension disabled and produce an error. No functional change intended, as the function is only introduced and always returns true. A later patch will provide checks for an isa extension. Signed-off-by: Andrew Jones Reviewed-by: Conor Dooley Reviewed-by: Heiko Stuebner Link: https://lore.kernel.org/r/20221129143447.49714-3-ajones@ventanamicro.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/cpufeature.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'arch/riscv') diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 4677320d7e31..220be7222129 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -68,6 +68,11 @@ bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, int bit) } EXPORT_SYMBOL_GPL(__riscv_isa_extension_available); +static bool riscv_isa_extension_check(int id) +{ + return true; +} + void __init riscv_fill_hwcap(void) { struct device_node *node; @@ -189,7 +194,8 @@ void __init riscv_fill_hwcap(void) #define SET_ISA_EXT_MAP(name, bit) \ do { \ if ((ext_end - ext == sizeof(name) - 1) && \ - !memcmp(ext, name, sizeof(name) - 1)) \ + !memcmp(ext, name, sizeof(name) - 1) && \ + riscv_isa_extension_check(bit)) \ set_bit(bit, this_isa); \ } while (false) \ @@ -198,8 +204,10 @@ void __init riscv_fill_hwcap(void) if (!ext_long) { int nr = *ext - 'a'; - this_hwcap |= isa2hwcap[nr]; - set_bit(nr, this_isa); + if (riscv_isa_extension_check(nr)) { + this_hwcap |= isa2hwcap[nr]; + set_bit(nr, this_isa); + } } else { SET_ISA_EXT_MAP("sscofpmf", RISCV_ISA_EXT_SSCOFPMF); SET_ISA_EXT_MAP("svpbmt", RISCV_ISA_EXT_SVPBMT); -- cgit v1.2.3 From 9daaca4a44d6f0741060e67c54a0175c035edb1f Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Tue, 29 Nov 2022 15:34:47 +0100 Subject: RISC-V: Ensure Zicbom has a valid block size When a DT puts zicbom in the isa string, but does not provide a block size, ALT_CMO_OP() will attempt to do cache operations on address zero since the start address will be ANDed with zero. We can't simply BUG() in riscv_init_cbom_blocksize() when we fail to find a block size because the failure will happen before logging works, leaving users to scratch their heads as to why the boot hung. Instead, ensure Zicbom is disabled and output an error which will hopefully alert people that the DT needs to be fixed. While at it, add a check that the block size is a power-of-2 too. Signed-off-by: Andrew Jones Reviewed-by: Conor Dooley Reviewed-by: Heiko Stuebner Link: https://lore.kernel.org/r/20221129143447.49714-4-ajones@ventanamicro.com [Palmer: base on 5c20a3a9df19 ("RISC-V: Fix compilation without RISCV_ISA_ZICBOM"] Reported-by: kernel test robot Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/cpufeature.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'arch/riscv') diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 220be7222129..93e45560af30 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -70,6 +71,18 @@ EXPORT_SYMBOL_GPL(__riscv_isa_extension_available); static bool riscv_isa_extension_check(int id) { + switch (id) { + case RISCV_ISA_EXT_ZICBOM: + if (!riscv_cbom_block_size) { + pr_err("Zicbom detected in ISA string, but no cbom-block-size found\n"); + return false; + } else if (!is_power_of_2(riscv_cbom_block_size)) { + pr_err("cbom-block-size present, but is not a power-of-2\n"); + return false; + } + return true; + } + return true; } -- cgit v1.2.3 From b003b3b77d65133a0011ae3b7b255347438c12f6 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Tue, 29 Nov 2022 18:35:14 -0800 Subject: RISC-V: Align the shadow stack The standard RISC-V ABIs all require 16-byte stack alignment. We're only calling that one function on the shadow stack so I doubt it'd result in a real issue, but might as well keep this lined up. Fixes: 31da94c25aea ("riscv: add VMAP_STACK overflow detection") Reviewed-by: Jisheng Zhang Link: https://lore.kernel.org/r/20221130023515.20217-1-palmer@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/traps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/riscv') diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index be54ccea8c47..acdfcacd7e57 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -206,7 +206,7 @@ static DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], * shadow stack, handled_ kernel_ stack_ overflow(in kernel/entry.S) is used * to get per-cpu overflow stack(get_overflow_stack). */ -long shadow_stack[SHADOW_OVERFLOW_STACK_SIZE/sizeof(long)]; +long shadow_stack[SHADOW_OVERFLOW_STACK_SIZE/sizeof(long)] __aligned(16); asmlinkage unsigned long get_overflow_stack(void) { return (unsigned long)this_cpu_ptr(overflow_stack) + -- cgit v1.2.3 From de57ecc476103179e93fd85091770921f76a19af Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Tue, 29 Nov 2022 18:35:15 -0800 Subject: RISC-V: Add some comments about the shadow and overflow stacks It took me a while to page all this back in when trying to review the recent spin_shadow_stack, so I figured I'd just write up some comments. Reviewed-by: Guo Ren Reviewed-by: Jisheng Zhang Link: https://lore.kernel.org/r/20221130023515.20217-2-palmer@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/traps.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) (limited to 'arch/riscv') diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index acdfcacd7e57..336d4aadadb1 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -200,18 +200,18 @@ void __init trap_init(void) } #ifdef CONFIG_VMAP_STACK +/* + * Extra stack space that allows us to provide panic messages when the kernel + * has overflowed its stack. + */ static DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack)__aligned(16); /* - * shadow stack, handled_ kernel_ stack_ overflow(in kernel/entry.S) is used - * to get per-cpu overflow stack(get_overflow_stack). + * A temporary stack for use by handle_kernel_stack_overflow. This is used so + * we can call into C code to get the per-hart overflow stack. Usage of this + * stack must be protected by spin_shadow_stack. */ long shadow_stack[SHADOW_OVERFLOW_STACK_SIZE/sizeof(long)] __aligned(16); -asmlinkage unsigned long get_overflow_stack(void) -{ - return (unsigned long)this_cpu_ptr(overflow_stack) + - OVERFLOW_STACK_SIZE; -} /* * A pseudo spinlock to protect the shadow stack from being used by multiple @@ -222,6 +222,12 @@ asmlinkage unsigned long get_overflow_stack(void) */ unsigned long spin_shadow_stack; +asmlinkage unsigned long get_overflow_stack(void) +{ + return (unsigned long)this_cpu_ptr(overflow_stack) + + OVERFLOW_STACK_SIZE; +} + asmlinkage void handle_bad_stack(struct pt_regs *regs) { unsigned long tsk_stk = (unsigned long)current->stack; -- cgit v1.2.3 From e923f4625ed3ad7656c3f9f086c898798bafbbc5 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Thu, 1 Dec 2022 12:37:50 +0100 Subject: riscv: Apply a static assert to riscv_isa_ext_id Add a static assert to ensure a RISCV_ISA_EXT_* enum is never created with a value >= RISCV_ISA_EXT_MAX. We can do this by putting RISCV_ISA_EXT_ID_MAX to more work. Before it was redundant with RISCV_ISA_EXT_MAX and hence only used to document the limit. Now it grows with the enum and is used to check the limit. Signed-off-by: Andrew Jones Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20221201113750.18021-1-ajones@ventanamicro.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/hwcap.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/riscv') diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h index b22525290073..86328e3acb02 100644 --- a/arch/riscv/include/asm/hwcap.h +++ b/arch/riscv/include/asm/hwcap.h @@ -59,8 +59,9 @@ enum riscv_isa_ext_id { RISCV_ISA_EXT_ZIHINTPAUSE, RISCV_ISA_EXT_SSTC, RISCV_ISA_EXT_SVINVAL, - RISCV_ISA_EXT_ID_MAX = RISCV_ISA_EXT_MAX, + RISCV_ISA_EXT_ID_MAX }; +static_assert(RISCV_ISA_EXT_ID_MAX <= RISCV_ISA_EXT_MAX); /* * This enum represents the logical ID for each RISC-V ISA extension static -- cgit v1.2.3 From 71fc3621efc38ace9640ee6a0db3300900689592 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Thu, 1 Dec 2022 14:51:28 +0100 Subject: riscv: Fix P4D_SHIFT definition for 3-level page table mode RISC-V kernels support 3,4,5-level page tables at runtime by folding upper levels. In case of a 3-level page table, PGDIR is folded into P4D which in turn is folded into PUD: PGDIR_SHIFT value is correctly set to the same value as PUD_SHIFT, but P4D_SHIFT is not, then any use of P4D_SHIFT will access invalid address bits (all set to 1). Fix this by dynamically defining P4D_SHIFT value, like we already do for PGDIR_SHIFT. Fixes: d10efa21a937 ("riscv: mm: Control p4d's folding by pgtable_l5_enabled") Signed-off-by: Alexandre Ghiti Reviewed-by: Palmer Dabbelt Link: https://lore.kernel.org/r/20221201135128.1482189-2-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/pgtable-64.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/riscv') diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h index dc42375c2357..42a042c0e13e 100644 --- a/arch/riscv/include/asm/pgtable-64.h +++ b/arch/riscv/include/asm/pgtable-64.h @@ -25,7 +25,11 @@ extern bool pgtable_l5_enabled; #define PGDIR_MASK (~(PGDIR_SIZE - 1)) /* p4d is folded into pgd in case of 4-level page table */ -#define P4D_SHIFT 39 +#define P4D_SHIFT_L3 30 +#define P4D_SHIFT_L4 39 +#define P4D_SHIFT_L5 39 +#define P4D_SHIFT (pgtable_l5_enabled ? P4D_SHIFT_L5 : \ + (pgtable_l4_enabled ? P4D_SHIFT_L4 : P4D_SHIFT_L3)) #define P4D_SIZE (_AC(1, UL) << P4D_SHIFT) #define P4D_MASK (~(P4D_SIZE - 1)) -- cgit v1.2.3 From c528ef0888b75f673f7d48022de8d31d5b451e8c Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Wed, 7 Dec 2022 04:11:12 -0500 Subject: riscv: Fixup compile error with !MMU Current nommu_virt_defconfig can't compile: In file included from arch/riscv/kernel/crash_core.c:3: arch/riscv/kernel/crash_core.c: In function 'arch_crash_save_vmcoreinfo': arch/riscv/kernel/crash_core.c:8:27: error: 'VA_BITS' undeclared (first use in this function) 8 | VMCOREINFO_NUMBER(VA_BITS); | ^~~~~~~ Add MMU dependency for KEXEC_FILE. Fixes: 6261586e0c91 ("RISC-V: Add kexec_file support") Reported-by: Conor Dooley Reported-by: kernel test robot Signed-off-by: Guo Ren Signed-off-by: Guo Ren Tested-by: Conor Dooley Link: https://lore.kernel.org/r/20221207091112.2258674-1-guoren@kernel.org Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/riscv') diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 67ef08d33d3a..e1a9fa47f012 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -498,7 +498,7 @@ config KEXEC_FILE select KEXEC_CORE select KEXEC_ELF select HAVE_IMA_KEXEC if IMA - depends on 64BIT + depends on 64BIT && MMU help This is new version of kexec system call. This system call is file based and takes file descriptors as system call argument -- cgit v1.2.3