diff options
Diffstat (limited to 'arch/riscv/mm')
-rw-r--r-- | arch/riscv/mm/Makefile | 4 | ||||
-rw-r--r-- | arch/riscv/mm/cacheflush.c | 69 | ||||
-rw-r--r-- | arch/riscv/mm/fault.c | 6 | ||||
-rw-r--r-- | arch/riscv/mm/hugetlbpage.c | 301 | ||||
-rw-r--r-- | arch/riscv/mm/init.c | 272 | ||||
-rw-r--r-- | arch/riscv/mm/kasan_init.c | 516 | ||||
-rw-r--r-- | arch/riscv/mm/pageattr.c | 8 | ||||
-rw-r--r-- | arch/riscv/mm/physaddr.c | 16 | ||||
-rw-r--r-- | arch/riscv/mm/ptdump.c | 24 | ||||
-rw-r--r-- | arch/riscv/mm/tlbflush.c | 93 |
10 files changed, 943 insertions, 366 deletions
diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile index 2ac177c05352..b85e9e82f082 100644 --- a/arch/riscv/mm/Makefile +++ b/arch/riscv/mm/Makefile @@ -1,6 +1,10 @@ # SPDX-License-Identifier: GPL-2.0-only CFLAGS_init.o := -mcmodel=medany +ifdef CONFIG_RELOCATABLE +CFLAGS_init.o += -fno-pie +endif + ifdef CONFIG_FTRACE CFLAGS_REMOVE_init.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_cacheflush.o = $(CC_FLAGS_FTRACE) diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c index fcd6145fbead..fca532ddf3ec 100644 --- a/arch/riscv/mm/cacheflush.c +++ b/arch/riscv/mm/cacheflush.c @@ -19,7 +19,7 @@ void flush_icache_all(void) { local_flush_icache_all(); - if (IS_ENABLED(CONFIG_RISCV_SBI)) + if (IS_ENABLED(CONFIG_RISCV_SBI) && !riscv_use_ipi_for_rfence()) sbi_remote_fence_i(NULL); else on_each_cpu(ipi_remote_fence_i, NULL, 1); @@ -67,7 +67,8 @@ void flush_icache_mm(struct mm_struct *mm, bool local) * with flush_icache_deferred(). */ smp_mb(); - } else if (IS_ENABLED(CONFIG_RISCV_SBI)) { + } else if (IS_ENABLED(CONFIG_RISCV_SBI) && + !riscv_use_ipi_for_rfence()) { sbi_remote_fence_i(&others); } else { on_each_cpu_mask(&others, ipi_remote_fence_i, NULL, 1); @@ -100,36 +101,48 @@ void flush_icache_pte(pte_t pte) unsigned int riscv_cbom_block_size; EXPORT_SYMBOL_GPL(riscv_cbom_block_size); -void riscv_init_cbom_blocksize(void) +unsigned int riscv_cboz_block_size; +EXPORT_SYMBOL_GPL(riscv_cboz_block_size); + +static void cbo_get_block_size(struct device_node *node, + const char *name, u32 *block_size, + unsigned long *first_hartid) { + unsigned long hartid; + u32 val; + + if (riscv_of_processor_hartid(node, &hartid)) + return; + + if (of_property_read_u32(node, name, &val)) + return; + + if (!*block_size) { + *block_size = val; + *first_hartid = hartid; + } else if (*block_size != val) { + pr_warn("%s mismatched between harts %lu and %lu\n", + name, *first_hartid, hartid); + } +} + +void riscv_init_cbo_blocksizes(void) +{ + unsigned long cbom_hartid, cboz_hartid; + u32 cbom_block_size = 0, cboz_block_size = 0; struct device_node *node; - unsigned long cbom_hartid; - u32 val, probed_block_size; - int ret; - probed_block_size = 0; for_each_of_cpu_node(node) { - unsigned long hartid; - - ret = riscv_of_processor_hartid(node, &hartid); - if (ret) - continue; - - /* set block-size for cbom extension if available */ - ret = of_property_read_u32(node, "riscv,cbom-block-size", &val); - if (ret) - continue; - - if (!probed_block_size) { - probed_block_size = val; - cbom_hartid = hartid; - } else { - if (probed_block_size != val) - pr_warn("cbom-block-size mismatched between harts %lu and %lu\n", - cbom_hartid, hartid); - } + /* set block-size for cbom and/or cboz extension if available */ + cbo_get_block_size(node, "riscv,cbom-block-size", + &cbom_block_size, &cbom_hartid); + cbo_get_block_size(node, "riscv,cboz-block-size", + &cboz_block_size, &cboz_hartid); } - if (probed_block_size) - riscv_cbom_block_size = probed_block_size; + if (cbom_block_size) + riscv_cbom_block_size = cbom_block_size; + + if (cboz_block_size) + riscv_cboz_block_size = cboz_block_size; } diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index d5f3e501dffb..8685f85a7474 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -15,6 +15,7 @@ #include <linux/uaccess.h> #include <linux/kprobes.h> #include <linux/kfence.h> +#include <linux/entry-common.h> #include <asm/ptrace.h> #include <asm/tlbflush.h> @@ -209,7 +210,7 @@ static inline bool access_error(unsigned long cause, struct vm_area_struct *vma) * This routine handles page faults. It determines the address and the * problem, and then passes it off to one of the appropriate routines. */ -asmlinkage void do_page_fault(struct pt_regs *regs) +void handle_page_fault(struct pt_regs *regs) { struct task_struct *tsk; struct vm_area_struct *vma; @@ -256,7 +257,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs) } #endif /* Enable interrupts if they were enabled in the parent context. */ - if (likely(regs->status & SR_PIE)) + if (!regs_irqs_disabled(regs)) local_irq_enable(); /* @@ -361,4 +362,3 @@ good_area: } return; } -NOKPROBE_SYMBOL(do_page_fault); diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c index 932dadfdca54..a163a3e0f0d4 100644 --- a/arch/riscv/mm/hugetlbpage.c +++ b/arch/riscv/mm/hugetlbpage.c @@ -2,6 +2,305 @@ #include <linux/hugetlb.h> #include <linux/err.h> +#ifdef CONFIG_RISCV_ISA_SVNAPOT +pte_t *huge_pte_alloc(struct mm_struct *mm, + struct vm_area_struct *vma, + unsigned long addr, + unsigned long sz) +{ + unsigned long order; + pte_t *pte = NULL; + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + + pgd = pgd_offset(mm, addr); + p4d = p4d_alloc(mm, pgd, addr); + if (!p4d) + return NULL; + + pud = pud_alloc(mm, p4d, addr); + if (!pud) + return NULL; + + if (sz == PUD_SIZE) { + pte = (pte_t *)pud; + goto out; + } + + if (sz == PMD_SIZE) { + if (want_pmd_share(vma, addr) && pud_none(*pud)) + pte = huge_pmd_share(mm, vma, addr, pud); + else + pte = (pte_t *)pmd_alloc(mm, pud, addr); + goto out; + } + + pmd = pmd_alloc(mm, pud, addr); + if (!pmd) + return NULL; + + for_each_napot_order(order) { + if (napot_cont_size(order) == sz) { + pte = pte_alloc_map(mm, pmd, addr & napot_cont_mask(order)); + break; + } + } + +out: + WARN_ON_ONCE(pte && pte_present(*pte) && !pte_huge(*pte)); + return pte; +} + +pte_t *huge_pte_offset(struct mm_struct *mm, + unsigned long addr, + unsigned long sz) +{ + unsigned long order; + pte_t *pte = NULL; + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + + pgd = pgd_offset(mm, addr); + if (!pgd_present(*pgd)) + return NULL; + + p4d = p4d_offset(pgd, addr); + if (!p4d_present(*p4d)) + return NULL; + + pud = pud_offset(p4d, addr); + if (sz == PUD_SIZE) + /* must be pud huge, non-present or none */ + return (pte_t *)pud; + + if (!pud_present(*pud)) + return NULL; + + pmd = pmd_offset(pud, addr); + if (sz == PMD_SIZE) + /* must be pmd huge, non-present or none */ + return (pte_t *)pmd; + + if (!pmd_present(*pmd)) + return NULL; + + for_each_napot_order(order) { + if (napot_cont_size(order) == sz) { + pte = pte_offset_kernel(pmd, addr & napot_cont_mask(order)); + break; + } + } + return pte; +} + +static pte_t get_clear_contig(struct mm_struct *mm, + unsigned long addr, + pte_t *ptep, + unsigned long pte_num) +{ + pte_t orig_pte = ptep_get(ptep); + unsigned long i; + + for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++) { + pte_t pte = ptep_get_and_clear(mm, addr, ptep); + + if (pte_dirty(pte)) + orig_pte = pte_mkdirty(orig_pte); + + if (pte_young(pte)) + orig_pte = pte_mkyoung(orig_pte); + } + + return orig_pte; +} + +static pte_t get_clear_contig_flush(struct mm_struct *mm, + unsigned long addr, + pte_t *ptep, + unsigned long pte_num) +{ + pte_t orig_pte = get_clear_contig(mm, addr, ptep, pte_num); + struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0); + bool valid = !pte_none(orig_pte); + + if (valid) + flush_tlb_range(&vma, addr, addr + (PAGE_SIZE * pte_num)); + + return orig_pte; +} + +pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags) +{ + unsigned long order; + + for_each_napot_order(order) { + if (shift == napot_cont_shift(order)) { + entry = pte_mknapot(entry, order); + break; + } + } + if (order == NAPOT_ORDER_MAX) + entry = pte_mkhuge(entry); + + return entry; +} + +void set_huge_pte_at(struct mm_struct *mm, + unsigned long addr, + pte_t *ptep, + pte_t pte) +{ + int i, pte_num; + + if (!pte_napot(pte)) { + set_pte_at(mm, addr, ptep, pte); + return; + } + + pte_num = napot_pte_num(napot_cont_order(pte)); + for (i = 0; i < pte_num; i++, ptep++, addr += PAGE_SIZE) + set_pte_at(mm, addr, ptep, pte); +} + +int huge_ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long addr, + pte_t *ptep, + pte_t pte, + int dirty) +{ + struct mm_struct *mm = vma->vm_mm; + unsigned long order; + pte_t orig_pte; + int i, pte_num; + + if (!pte_napot(pte)) + return ptep_set_access_flags(vma, addr, ptep, pte, dirty); + + order = napot_cont_order(pte); + pte_num = napot_pte_num(order); + ptep = huge_pte_offset(mm, addr, napot_cont_size(order)); + orig_pte = get_clear_contig_flush(mm, addr, ptep, pte_num); + + if (pte_dirty(orig_pte)) + pte = pte_mkdirty(pte); + + if (pte_young(orig_pte)) + pte = pte_mkyoung(pte); + + for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++) + set_pte_at(mm, addr, ptep, pte); + + return true; +} + +pte_t huge_ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, + pte_t *ptep) +{ + pte_t orig_pte = ptep_get(ptep); + int pte_num; + + if (!pte_napot(orig_pte)) + return ptep_get_and_clear(mm, addr, ptep); + + pte_num = napot_pte_num(napot_cont_order(orig_pte)); + + return get_clear_contig(mm, addr, ptep, pte_num); +} + +void huge_ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, + pte_t *ptep) +{ + pte_t pte = ptep_get(ptep); + unsigned long order; + int i, pte_num; + + if (!pte_napot(pte)) { + ptep_set_wrprotect(mm, addr, ptep); + return; + } + + order = napot_cont_order(pte); + pte_num = napot_pte_num(order); + ptep = huge_pte_offset(mm, addr, napot_cont_size(order)); + + for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++) + ptep_set_wrprotect(mm, addr, ptep); +} + +pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, + unsigned long addr, + pte_t *ptep) +{ + pte_t pte = ptep_get(ptep); + int pte_num; + + if (!pte_napot(pte)) + return ptep_clear_flush(vma, addr, ptep); + + pte_num = napot_pte_num(napot_cont_order(pte)); + + return get_clear_contig_flush(vma->vm_mm, addr, ptep, pte_num); +} + +void huge_pte_clear(struct mm_struct *mm, + unsigned long addr, + pte_t *ptep, + unsigned long sz) +{ + pte_t pte = READ_ONCE(*ptep); + int i, pte_num; + + if (!pte_napot(pte)) { + pte_clear(mm, addr, ptep); + return; + } + + pte_num = napot_pte_num(napot_cont_order(pte)); + for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++) + pte_clear(mm, addr, ptep); +} + +static __init bool is_napot_size(unsigned long size) +{ + unsigned long order; + + if (!has_svnapot()) + return false; + + for_each_napot_order(order) { + if (size == napot_cont_size(order)) + return true; + } + return false; +} + +static __init int napot_hugetlbpages_init(void) +{ + if (has_svnapot()) { + unsigned long order; + + for_each_napot_order(order) + hugetlb_add_hstate(order); + } + return 0; +} +arch_initcall(napot_hugetlbpages_init); + +#else + +static __init bool is_napot_size(unsigned long size) +{ + return false; +} + +#endif /*CONFIG_RISCV_ISA_SVNAPOT*/ + int pud_huge(pud_t pud) { return pud_leaf(pud); @@ -18,6 +317,8 @@ bool __init arch_hugetlb_valid_size(unsigned long size) return true; else if (IS_ENABLED(CONFIG_64BIT) && size == PUD_SIZE) return true; + else if (is_napot_size(size)) + return true; else return false; } diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 478d6763a01a..747e5b1ef02d 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -20,6 +20,9 @@ #include <linux/dma-map-ops.h> #include <linux/crash_dump.h> #include <linux/hugetlb.h> +#ifdef CONFIG_RELOCATABLE +#include <linux/elf.h> +#endif #include <asm/fixmap.h> #include <asm/tlbflush.h> @@ -57,7 +60,6 @@ unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] EXPORT_SYMBOL(empty_zero_page); extern char _start[]; -#define DTB_EARLY_BASE_VA PGDIR_SIZE void *_dtb_early_va __initdata; uintptr_t _dtb_early_pa __initdata; @@ -146,7 +148,7 @@ static void __init print_vm_layout(void) print_ml("kasan", KASAN_SHADOW_START, KASAN_SHADOW_END); #endif - print_ml("kernel", (unsigned long)KERNEL_LINK_ADDR, + print_ml("kernel", (unsigned long)kernel_map.virt_addr, (unsigned long)ADDRESS_SPACE_END); } } @@ -213,6 +215,14 @@ static void __init setup_bootmem(void) phys_ram_end = memblock_end_of_DRAM(); if (!IS_ENABLED(CONFIG_XIP_KERNEL)) phys_ram_base = memblock_start_of_DRAM(); + + /* + * In 64-bit, any use of __va/__pa before this point is wrong as we + * did not know the start of DRAM before. + */ + if (IS_ENABLED(CONFIG_64BIT)) + kernel_map.va_pa_offset = PAGE_OFFSET - phys_ram_base; + /* * memblock allocator is not aware of the fact that last 4K bytes of * the addressable memory can not be mapped because of IS_ERR_VALUE @@ -236,31 +246,22 @@ static void __init setup_bootmem(void) set_max_mapnr(max_low_pfn - ARCH_PFN_OFFSET); reserve_initrd_mem(); + + /* + * No allocation should be done before reserving the memory as defined + * in the device tree, otherwise the allocation could end up in a + * reserved region. + */ + early_init_fdt_scan_reserved_mem(); + /* * If DTB is built in, no need to reserve its memblock. * Otherwise, do reserve it but avoid using * early_init_fdt_reserve_self() since __pa() does * not work for DTB pointers that are fixmap addresses */ - if (!IS_ENABLED(CONFIG_BUILTIN_DTB)) { - /* - * In case the DTB is not located in a memory region we won't - * be able to locate it later on via the linear mapping and - * get a segfault when accessing it via __va(dtb_early_pa). - * To avoid this situation copy DTB to a memory region. - * Note that memblock_phys_alloc will also reserve DTB region. - */ - if (!memblock_is_memory(dtb_early_pa)) { - size_t fdt_size = fdt_totalsize(dtb_early_va); - phys_addr_t new_dtb_early_pa = memblock_phys_alloc(fdt_size, PAGE_SIZE); - void *new_dtb_early_va = early_memremap(new_dtb_early_pa, fdt_size); - - memcpy(new_dtb_early_va, dtb_early_va, fdt_size); - early_memunmap(new_dtb_early_va, fdt_size); - _dtb_early_pa = new_dtb_early_pa; - } else - memblock_reserve(dtb_early_pa, fdt_totalsize(dtb_early_va)); - } + if (!IS_ENABLED(CONFIG_BUILTIN_DTB)) + memblock_reserve(dtb_early_pa, fdt_totalsize(dtb_early_va)); dma_contiguous_reserve(dma32_phys_limit); if (IS_ENABLED(CONFIG_64BIT)) @@ -271,21 +272,14 @@ static void __init setup_bootmem(void) #ifdef CONFIG_MMU struct pt_alloc_ops pt_ops __initdata; -unsigned long riscv_pfn_base __ro_after_init; -EXPORT_SYMBOL(riscv_pfn_base); - pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss; pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss; static pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss; pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE); -static p4d_t __maybe_unused early_dtb_p4d[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE); -static pud_t __maybe_unused early_dtb_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE); -static pmd_t __maybe_unused early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE); #ifdef CONFIG_XIP_KERNEL #define pt_ops (*(struct pt_alloc_ops *)XIP_FIXUP(&pt_ops)) -#define riscv_pfn_base (*(unsigned long *)XIP_FIXUP(&riscv_pfn_base)) #define trampoline_pg_dir ((pgd_t *)XIP_FIXUP(trampoline_pg_dir)) #define fixmap_pte ((pte_t *)XIP_FIXUP(fixmap_pte)) #define early_pg_dir ((pgd_t *)XIP_FIXUP(early_pg_dir)) @@ -626,9 +620,6 @@ static void __init create_p4d_mapping(p4d_t *p4dp, #define trampoline_pgd_next (pgtable_l5_enabled ? \ (uintptr_t)trampoline_p4d : (pgtable_l4_enabled ? \ (uintptr_t)trampoline_pud : (uintptr_t)trampoline_pmd)) -#define early_dtb_pgd_next (pgtable_l5_enabled ? \ - (uintptr_t)early_dtb_p4d : (pgtable_l4_enabled ? \ - (uintptr_t)early_dtb_pud : (uintptr_t)early_dtb_pmd)) #else #define pgd_next_t pte_t #define alloc_pgd_next(__va) pt_ops.alloc_pte(__va) @@ -636,7 +627,6 @@ static void __init create_p4d_mapping(p4d_t *p4dp, #define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \ create_pte_mapping(__nextp, __va, __pa, __sz, __prot) #define fixmap_pgd_next ((uintptr_t)fixmap_pte) -#define early_dtb_pgd_next ((uintptr_t)early_dtb_pmd) #define create_p4d_mapping(__pmdp, __va, __pa, __sz, __prot) do {} while(0) #define create_pud_mapping(__pmdp, __va, __pa, __sz, __prot) do {} while(0) #define create_pmd_mapping(__pmdp, __va, __pa, __sz, __prot) do {} while(0) @@ -671,9 +661,16 @@ void __init create_pgd_mapping(pgd_t *pgdp, static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size) { - /* Upgrade to PMD_SIZE mappings whenever possible */ - base &= PMD_SIZE - 1; - if (!base && size >= PMD_SIZE) + if (!(base & (PGDIR_SIZE - 1)) && size >= PGDIR_SIZE) + return PGDIR_SIZE; + + if (!(base & (P4D_SIZE - 1)) && size >= P4D_SIZE) + return P4D_SIZE; + + if (!(base & (PUD_SIZE - 1)) && size >= PUD_SIZE) + return PUD_SIZE; + + if (!(base & (PMD_SIZE - 1)) && size >= PMD_SIZE) return PMD_SIZE; return PAGE_SIZE; @@ -732,6 +729,8 @@ static __init pgprot_t pgprot_from_va(uintptr_t va) #endif /* CONFIG_STRICT_KERNEL_RWX */ #if defined(CONFIG_64BIT) && !defined(CONFIG_XIP_KERNEL) +u64 __pi_set_satp_mode_from_cmdline(uintptr_t dtb_pa); + static void __init disable_pgtable_l5(void) { pgtable_l5_enabled = false; @@ -746,17 +745,39 @@ static void __init disable_pgtable_l4(void) satp_mode = SATP_MODE_39; } +static int __init print_no4lvl(char *p) +{ + pr_info("Disabled 4-level and 5-level paging"); + return 0; +} +early_param("no4lvl", print_no4lvl); + +static int __init print_no5lvl(char *p) +{ + pr_info("Disabled 5-level paging"); + return 0; +} +early_param("no5lvl", print_no5lvl); + /* * There is a simple way to determine if 4-level is supported by the * underlying hardware: establish 1:1 mapping in 4-level page table mode * then read SATP to see if the configuration was taken into account * meaning sv48 is supported. */ -static __init void set_satp_mode(void) +static __init void set_satp_mode(uintptr_t dtb_pa) { u64 identity_satp, hw_satp; uintptr_t set_satp_mode_pmd = ((unsigned long)set_satp_mode) & PMD_MASK; - bool check_l4 = false; + u64 satp_mode_cmdline = __pi_set_satp_mode_from_cmdline(dtb_pa); + + if (satp_mode_cmdline == SATP_MODE_57) { + disable_pgtable_l5(); + } else if (satp_mode_cmdline == SATP_MODE_48) { + disable_pgtable_l5(); + disable_pgtable_l4(); + return; + } create_p4d_mapping(early_p4d, set_satp_mode_pmd, (uintptr_t)early_pud, @@ -775,7 +796,8 @@ static __init void set_satp_mode(void) retry: create_pgd_mapping(early_pg_dir, set_satp_mode_pmd, - check_l4 ? (uintptr_t)early_pud : (uintptr_t)early_p4d, + pgtable_l5_enabled ? + (uintptr_t)early_p4d : (uintptr_t)early_pud, PGDIR_SIZE, PAGE_TABLE); identity_satp = PFN_DOWN((uintptr_t)&early_pg_dir) | satp_mode; @@ -786,9 +808,8 @@ retry: local_flush_tlb_all(); if (hw_satp != identity_satp) { - if (!check_l4) { + if (pgtable_l5_enabled) { disable_pgtable_l5(); - check_l4 = true; memset(early_pg_dir, 0, PAGE_SIZE); goto retry; } @@ -820,6 +841,44 @@ retry: #error "setup_vm() is called from head.S before relocate so it should not use absolute addressing." #endif +#ifdef CONFIG_RELOCATABLE +extern unsigned long __rela_dyn_start, __rela_dyn_end; + +static void __init relocate_kernel(void) +{ + Elf64_Rela *rela = (Elf64_Rela *)&__rela_dyn_start; + /* + * This holds the offset between the linked virtual address and the + * relocated virtual address. + */ + uintptr_t reloc_offset = kernel_map.virt_addr - KERNEL_LINK_ADDR; + /* + * This holds the offset between kernel linked virtual address and + * physical address. + */ + uintptr_t va_kernel_link_pa_offset = KERNEL_LINK_ADDR - kernel_map.phys_addr; + + for ( ; rela < (Elf64_Rela *)&__rela_dyn_end; rela++) { + Elf64_Addr addr = (rela->r_offset - va_kernel_link_pa_offset); + Elf64_Addr relocated_addr = rela->r_addend; + + if (rela->r_info != R_RISCV_RELATIVE) + continue; + + /* + * Make sure to not relocate vdso symbols like rt_sigreturn + * which are linked from the address 0 in vmlinux since + * vdso symbol addresses are actually used as an offset from + * mm->context.vdso in VDSO_OFFSET macro. + */ + if (relocated_addr >= KERNEL_LINK_ADDR) + relocated_addr += reloc_offset; + + *(Elf64_Addr *)addr = relocated_addr; + } +} +#endif /* CONFIG_RELOCATABLE */ + #ifdef CONFIG_XIP_KERNEL static void __init create_kernel_page_table(pgd_t *pgdir, __always_unused bool early) @@ -860,32 +919,27 @@ static void __init create_kernel_page_table(pgd_t *pgdir, bool early) * this means 2 PMD entries whereas for 32-bit kernel, this is only 1 PGDIR * entry. */ -static void __init create_fdt_early_page_table(pgd_t *pgdir, uintptr_t dtb_pa) +static void __init create_fdt_early_page_table(uintptr_t fix_fdt_va, + uintptr_t dtb_pa) { -#ifndef CONFIG_BUILTIN_DTB uintptr_t pa = dtb_pa & ~(PMD_SIZE - 1); - create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA, - IS_ENABLED(CONFIG_64BIT) ? early_dtb_pgd_next : pa, - PGDIR_SIZE, - IS_ENABLED(CONFIG_64BIT) ? PAGE_TABLE : PAGE_KERNEL); - - if (pgtable_l5_enabled) - create_p4d_mapping(early_dtb_p4d, DTB_EARLY_BASE_VA, - (uintptr_t)early_dtb_pud, P4D_SIZE, PAGE_TABLE); - - if (pgtable_l4_enabled) - create_pud_mapping(early_dtb_pud, DTB_EARLY_BASE_VA, - (uintptr_t)early_dtb_pmd, PUD_SIZE, PAGE_TABLE); +#ifndef CONFIG_BUILTIN_DTB + /* Make sure the fdt fixmap address is always aligned on PMD size */ + BUILD_BUG_ON(FIX_FDT % (PMD_SIZE / PAGE_SIZE)); - if (IS_ENABLED(CONFIG_64BIT)) { - create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA, + /* In 32-bit only, the fdt lies in its own PGD */ + if (!IS_ENABLED(CONFIG_64BIT)) { + create_pgd_mapping(early_pg_dir, fix_fdt_va, + pa, MAX_FDT_SIZE, PAGE_KERNEL); + } else { + create_pmd_mapping(fixmap_pmd, fix_fdt_va, pa, PMD_SIZE, PAGE_KERNEL); - create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA + PMD_SIZE, + create_pmd_mapping(fixmap_pmd, fix_fdt_va + PMD_SIZE, pa + PMD_SIZE, PMD_SIZE, PAGE_KERNEL); } - dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PMD_SIZE - 1)); + dtb_early_va = (void *)fix_fdt_va + (dtb_pa & (PMD_SIZE - 1)); #else /* * For 64-bit kernel, __va can't be used since it would return a linear @@ -979,14 +1033,25 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) #endif #if defined(CONFIG_64BIT) && !defined(CONFIG_XIP_KERNEL) - set_satp_mode(); + set_satp_mode(dtb_pa); #endif - kernel_map.va_pa_offset = PAGE_OFFSET - kernel_map.phys_addr; + /* + * In 64-bit, we defer the setup of va_pa_offset to setup_bootmem, + * where we have the system memory layout: this allows us to align + * the physical and virtual mappings and then make use of PUD/P4D/PGD + * for the linear mapping. This is only possible because the kernel + * mapping lies outside the linear mapping. + * In 32-bit however, as the kernel resides in the linear mapping, + * setup_vm_final can not change the mapping established here, + * otherwise the same kernel addresses would get mapped to different + * physical addresses (if the start of dram is different from the + * kernel physical address start). + */ + kernel_map.va_pa_offset = IS_ENABLED(CONFIG_64BIT) ? + 0UL : PAGE_OFFSET - kernel_map.phys_addr; kernel_map.va_kernel_pa_offset = kernel_map.virt_addr - kernel_map.phys_addr; - riscv_pfn_base = PFN_DOWN(kernel_map.phys_addr); - /* * The default maximal physical memory size is KERN_VIRT_SIZE for 32-bit * kernel, whereas for 64-bit kernel, the end of the virtual address @@ -1007,6 +1072,17 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) BUG_ON((kernel_map.virt_addr + kernel_map.size) > ADDRESS_SPACE_END - SZ_4K); #endif +#ifdef CONFIG_RELOCATABLE + /* + * Early page table uses only one PUD, which makes it possible + * to map PUD_SIZE aligned on PUD_SIZE: if the relocation offset + * makes the kernel cross over a PUD_SIZE boundary, raise a bug + * since a part of the kernel would not get mapped. + */ + BUG_ON(PUD_SIZE - (kernel_map.virt_addr & (PUD_SIZE - 1)) < kernel_map.size); + relocate_kernel(); +#endif + apply_early_boot_alternatives(); pt_ops_set_early(); @@ -1055,7 +1131,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) create_kernel_page_table(early_pg_dir, true); /* Setup early mapping for FDT early scan */ - create_fdt_early_page_table(early_pg_dir, dtb_pa); + create_fdt_early_page_table(__fix_to_virt(FIX_FDT), dtb_pa); /* * Bootime fixmap only can handle PMD_SIZE mapping. Thus, boot-ioremap @@ -1090,16 +1166,36 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) pt_ops_set_fixmap(); } -static void __init setup_vm_final(void) +static void __init create_linear_mapping_range(phys_addr_t start, + phys_addr_t end) { + phys_addr_t pa; uintptr_t va, map_size; - phys_addr_t pa, start, end; + + for (pa = start; pa < end; pa += map_size) { + va = (uintptr_t)__va(pa); + map_size = best_map_size(pa, end - pa); + + create_pgd_mapping(swapper_pg_dir, va, pa, map_size, + pgprot_from_va(va)); + } +} + +static void __init create_linear_mapping_page_table(void) +{ + phys_addr_t start, end; u64 i; - /* Setup swapper PGD for fixmap */ - create_pgd_mapping(swapper_pg_dir, FIXADDR_START, - __pa_symbol(fixmap_pgd_next), - PGDIR_SIZE, PAGE_TABLE); +#ifdef CONFIG_STRICT_KERNEL_RWX + phys_addr_t ktext_start = __pa_symbol(_start); + phys_addr_t ktext_size = __init_data_begin - _start; + phys_addr_t krodata_start = __pa_symbol(__start_rodata); + phys_addr_t krodata_size = _data - __start_rodata; + + /* Isolate kernel text and rodata so they don't get mapped with a PUD */ + memblock_mark_nomap(ktext_start, ktext_size); + memblock_mark_nomap(krodata_start, krodata_size); +#endif /* Map all memory banks in the linear mapping */ for_each_mem_range(i, &start, &end) { @@ -1111,15 +1207,39 @@ static void __init setup_vm_final(void) if (end >= __pa(PAGE_OFFSET) + memory_limit) end = __pa(PAGE_OFFSET) + memory_limit; - for (pa = start; pa < end; pa += map_size) { - va = (uintptr_t)__va(pa); - map_size = best_map_size(pa, end - pa); - - create_pgd_mapping(swapper_pg_dir, va, pa, map_size, - pgprot_from_va(va)); - } + create_linear_mapping_range(start, end); } +#ifdef CONFIG_STRICT_KERNEL_RWX + create_linear_mapping_range(ktext_start, ktext_start + ktext_size); + create_linear_mapping_range(krodata_start, + krodata_start + krodata_size); + + memblock_clear_nomap(ktext_start, ktext_size); + memblock_clear_nomap(krodata_start, krodata_size); +#endif +} + +static void __init setup_vm_final(void) +{ + /* Setup swapper PGD for fixmap */ +#if !defined(CONFIG_64BIT) + /* + * In 32-bit, the device tree lies in a pgd entry, so it must be copied + * directly in swapper_pg_dir in addition to the pgd entry that points + * to fixmap_pte. + */ + unsigned long idx = pgd_index(__fix_to_virt(FIX_FDT)); + + set_pgd(&swapper_pg_dir[idx], early_pg_dir[idx]); +#endif + create_pgd_mapping(swapper_pg_dir, FIXADDR_START, + __pa_symbol(fixmap_pgd_next), + PGDIR_SIZE, PAGE_TABLE); + + /* Map the linear mapping */ + create_linear_mapping_page_table(); + /* Map the kernel */ if (IS_ENABLED(CONFIG_64BIT)) create_kernel_page_table(swapper_pg_dir, false); diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c index e1226709490f..8fc0efcf905c 100644 --- a/arch/riscv/mm/kasan_init.c +++ b/arch/riscv/mm/kasan_init.c @@ -18,58 +18,48 @@ * For sv39, the region is aligned on PGDIR_SIZE so we only need to populate * the page global directory with kasan_early_shadow_pmd. * - * For sv48 and sv57, the region is not aligned on PGDIR_SIZE so the mapping - * must be divided as follows: - * - the first PGD entry, although incomplete, is populated with - * kasan_early_shadow_pud/p4d - * - the PGD entries in the middle are populated with kasan_early_shadow_pud/p4d - * - the last PGD entry is shared with the kernel mapping so populated at the - * lower levels pud/p4d - * - * In addition, when shallow populating a kasan region (for example vmalloc), - * this region may also not be aligned on PGDIR size, so we must go down to the - * pud level too. + * For sv48 and sv57, the region start is aligned on PGDIR_SIZE whereas the end + * region is not and then we have to go down to the PUD level. */ extern pgd_t early_pg_dir[PTRS_PER_PGD]; +pgd_t tmp_pg_dir[PTRS_PER_PGD] __page_aligned_bss; +p4d_t tmp_p4d[PTRS_PER_P4D] __page_aligned_bss; +pud_t tmp_pud[PTRS_PER_PUD] __page_aligned_bss; static void __init kasan_populate_pte(pmd_t *pmd, unsigned long vaddr, unsigned long end) { phys_addr_t phys_addr; - pte_t *ptep, *base_pte; + pte_t *ptep, *p; - if (pmd_none(*pmd)) - base_pte = memblock_alloc(PTRS_PER_PTE * sizeof(pte_t), PAGE_SIZE); - else - base_pte = (pte_t *)pmd_page_vaddr(*pmd); + if (pmd_none(*pmd)) { + p = memblock_alloc(PTRS_PER_PTE * sizeof(pte_t), PAGE_SIZE); + set_pmd(pmd, pfn_pmd(PFN_DOWN(__pa(p)), PAGE_TABLE)); + } - ptep = base_pte + pte_index(vaddr); + ptep = pte_offset_kernel(pmd, vaddr); do { if (pte_none(*ptep)) { phys_addr = memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); set_pte(ptep, pfn_pte(PFN_DOWN(phys_addr), PAGE_KERNEL)); + memset(__va(phys_addr), KASAN_SHADOW_INIT, PAGE_SIZE); } } while (ptep++, vaddr += PAGE_SIZE, vaddr != end); - - set_pmd(pmd, pfn_pmd(PFN_DOWN(__pa(base_pte)), PAGE_TABLE)); } static void __init kasan_populate_pmd(pud_t *pud, unsigned long vaddr, unsigned long end) { phys_addr_t phys_addr; - pmd_t *pmdp, *base_pmd; + pmd_t *pmdp, *p; unsigned long next; if (pud_none(*pud)) { - base_pmd = memblock_alloc(PTRS_PER_PMD * sizeof(pmd_t), PAGE_SIZE); - } else { - base_pmd = (pmd_t *)pud_pgtable(*pud); - if (base_pmd == lm_alias(kasan_early_shadow_pmd)) - base_pmd = memblock_alloc(PTRS_PER_PMD * sizeof(pmd_t), PAGE_SIZE); + p = memblock_alloc(PTRS_PER_PMD * sizeof(pmd_t), PAGE_SIZE); + set_pud(pud, pfn_pud(PFN_DOWN(__pa(p)), PAGE_TABLE)); } - pmdp = base_pmd + pmd_index(vaddr); + pmdp = pmd_offset(pud, vaddr); do { next = pmd_addr_end(vaddr, end); @@ -78,157 +68,77 @@ static void __init kasan_populate_pmd(pud_t *pud, unsigned long vaddr, unsigned phys_addr = memblock_phys_alloc(PMD_SIZE, PMD_SIZE); if (phys_addr) { set_pmd(pmdp, pfn_pmd(PFN_DOWN(phys_addr), PAGE_KERNEL)); + memset(__va(phys_addr), KASAN_SHADOW_INIT, PMD_SIZE); continue; } } kasan_populate_pte(pmdp, vaddr, next); } while (pmdp++, vaddr = next, vaddr != end); - - /* - * Wait for the whole PGD to be populated before setting the PGD in - * the page table, otherwise, if we did set the PGD before populating - * it entirely, memblock could allocate a page at a physical address - * where KASAN is not populated yet and then we'd get a page fault. - */ - set_pud(pud, pfn_pud(PFN_DOWN(__pa(base_pmd)), PAGE_TABLE)); } -static void __init kasan_populate_pud(pgd_t *pgd, - unsigned long vaddr, unsigned long end, - bool early) +static void __init kasan_populate_pud(p4d_t *p4d, + unsigned long vaddr, unsigned long end) { phys_addr_t phys_addr; - pud_t *pudp, *base_pud; + pud_t *pudp, *p; unsigned long next; - if (early) { - /* - * We can't use pgd_page_vaddr here as it would return a linear - * mapping address but it is not mapped yet, but when populating - * early_pg_dir, we need the physical address and when populating - * swapper_pg_dir, we need the kernel virtual address so use - * pt_ops facility. - */ - base_pud = pt_ops.get_pud_virt(pfn_to_phys(_pgd_pfn(*pgd))); - } else if (pgd_none(*pgd)) { - base_pud = memblock_alloc(PTRS_PER_PUD * sizeof(pud_t), PAGE_SIZE); - memcpy(base_pud, (void *)kasan_early_shadow_pud, - sizeof(pud_t) * PTRS_PER_PUD); - } else { - base_pud = (pud_t *)pgd_page_vaddr(*pgd); - if (base_pud == lm_alias(kasan_early_shadow_pud)) { - base_pud = memblock_alloc(PTRS_PER_PUD * sizeof(pud_t), PAGE_SIZE); - memcpy(base_pud, (void *)kasan_early_shadow_pud, - sizeof(pud_t) * PTRS_PER_PUD); - } + if (p4d_none(*p4d)) { + p = memblock_alloc(PTRS_PER_PUD * sizeof(pud_t), PAGE_SIZE); + set_p4d(p4d, pfn_p4d(PFN_DOWN(__pa(p)), PAGE_TABLE)); } - pudp = base_pud + pud_index(vaddr); + pudp = pud_offset(p4d, vaddr); do { next = pud_addr_end(vaddr, end); if (pud_none(*pudp) && IS_ALIGNED(vaddr, PUD_SIZE) && (next - vaddr) >= PUD_SIZE) { - if (early) { - phys_addr = __pa(((uintptr_t)kasan_early_shadow_pmd)); - set_pud(pudp, pfn_pud(PFN_DOWN(phys_addr), PAGE_TABLE)); + phys_addr = memblock_phys_alloc(PUD_SIZE, PUD_SIZE); + if (phys_addr) { + set_pud(pudp, pfn_pud(PFN_DOWN(phys_addr), PAGE_KERNEL)); + memset(__va(phys_addr), KASAN_SHADOW_INIT, PUD_SIZE); continue; - } else { - phys_addr = memblock_phys_alloc(PUD_SIZE, PUD_SIZE); - if (phys_addr) { - set_pud(pudp, pfn_pud(PFN_DOWN(phys_addr), PAGE_KERNEL)); - continue; - } } } kasan_populate_pmd(pudp, vaddr, next); } while (pudp++, vaddr = next, vaddr != end); - - /* - * Wait for the whole PGD to be populated before setting the PGD in - * the page table, otherwise, if we did set the PGD before populating - * it entirely, memblock could allocate a page at a physical address - * where KASAN is not populated yet and then we'd get a page fault. - */ - if (!early) - set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa(base_pud)), PAGE_TABLE)); } static void __init kasan_populate_p4d(pgd_t *pgd, - unsigned long vaddr, unsigned long end, - bool early) + unsigned long vaddr, unsigned long end) { phys_addr_t phys_addr; - p4d_t *p4dp, *base_p4d; + p4d_t *p4dp, *p; unsigned long next; - if (early) { - /* - * We can't use pgd_page_vaddr here as it would return a linear - * mapping address but it is not mapped yet, but when populating - * early_pg_dir, we need the physical address and when populating - * swapper_pg_dir, we need the kernel virtual address so use - * pt_ops facility. - */ - base_p4d = pt_ops.get_p4d_virt(pfn_to_phys(_pgd_pfn(*pgd))); - } else { - base_p4d = (p4d_t *)pgd_page_vaddr(*pgd); - if (base_p4d == lm_alias(kasan_early_shadow_p4d)) { - base_p4d = memblock_alloc(PTRS_PER_PUD * sizeof(p4d_t), PAGE_SIZE); - memcpy(base_p4d, (void *)kasan_early_shadow_p4d, - sizeof(p4d_t) * PTRS_PER_P4D); - } + if (pgd_none(*pgd)) { + p = memblock_alloc(PTRS_PER_P4D * sizeof(p4d_t), PAGE_SIZE); + set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa(p)), PAGE_TABLE)); } - p4dp = base_p4d + p4d_index(vaddr); + p4dp = p4d_offset(pgd, vaddr); do { next = p4d_addr_end(vaddr, end); if (p4d_none(*p4dp) && IS_ALIGNED(vaddr, P4D_SIZE) && (next - vaddr) >= P4D_SIZE) { - if (early) { - phys_addr = __pa(((uintptr_t)kasan_early_shadow_pud)); - set_p4d(p4dp, pfn_p4d(PFN_DOWN(phys_addr), PAGE_TABLE)); + phys_addr = memblock_phys_alloc(P4D_SIZE, P4D_SIZE); + if (phys_addr) { + set_p4d(p4dp, pfn_p4d(PFN_DOWN(phys_addr), PAGE_KERNEL)); + memset(__va(phys_addr), KASAN_SHADOW_INIT, P4D_SIZE); continue; - } else { - phys_addr = memblock_phys_alloc(P4D_SIZE, P4D_SIZE); - if (phys_addr) { - set_p4d(p4dp, pfn_p4d(PFN_DOWN(phys_addr), PAGE_KERNEL)); - continue; - } } } - kasan_populate_pud((pgd_t *)p4dp, vaddr, next, early); + kasan_populate_pud(p4dp, vaddr, next); } while (p4dp++, vaddr = next, vaddr != end); - - /* - * Wait for the whole P4D to be populated before setting the P4D in - * the page table, otherwise, if we did set the P4D before populating - * it entirely, memblock could allocate a page at a physical address - * where KASAN is not populated yet and then we'd get a page fault. - */ - if (!early) - set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa(base_p4d)), PAGE_TABLE)); } -#define kasan_early_shadow_pgd_next (pgtable_l5_enabled ? \ - (uintptr_t)kasan_early_shadow_p4d : \ - (pgtable_l4_enabled ? \ - (uintptr_t)kasan_early_shadow_pud : \ - (uintptr_t)kasan_early_shadow_pmd)) -#define kasan_populate_pgd_next(pgdp, vaddr, next, early) \ - (pgtable_l5_enabled ? \ - kasan_populate_p4d(pgdp, vaddr, next, early) : \ - (pgtable_l4_enabled ? \ - kasan_populate_pud(pgdp, vaddr, next, early) : \ - kasan_populate_pmd((pud_t *)pgdp, vaddr, next))) - static void __init kasan_populate_pgd(pgd_t *pgdp, - unsigned long vaddr, unsigned long end, - bool early) + unsigned long vaddr, unsigned long end) { phys_addr_t phys_addr; unsigned long next; @@ -236,29 +146,174 @@ static void __init kasan_populate_pgd(pgd_t *pgdp, do { next = pgd_addr_end(vaddr, end); - if (IS_ALIGNED(vaddr, PGDIR_SIZE) && (next - vaddr) >= PGDIR_SIZE) { - if (early) { - phys_addr = __pa((uintptr_t)kasan_early_shadow_pgd_next); - set_pgd(pgdp, pfn_pgd(PFN_DOWN(phys_addr), PAGE_TABLE)); + if (pgd_none(*pgdp) && IS_ALIGNED(vaddr, PGDIR_SIZE) && + (next - vaddr) >= PGDIR_SIZE) { + phys_addr = memblock_phys_alloc(PGDIR_SIZE, PGDIR_SIZE); + if (phys_addr) { + set_pgd(pgdp, pfn_pgd(PFN_DOWN(phys_addr), PAGE_KERNEL)); + memset(__va(phys_addr), KASAN_SHADOW_INIT, PGDIR_SIZE); continue; - } else if (pgd_page_vaddr(*pgdp) == - (unsigned long)lm_alias(kasan_early_shadow_pgd_next)) { - /* - * pgdp can't be none since kasan_early_init - * initialized all KASAN shadow region with - * kasan_early_shadow_pud: if this is still the - * case, that means we can try to allocate a - * hugepage as a replacement. - */ - phys_addr = memblock_phys_alloc(PGDIR_SIZE, PGDIR_SIZE); - if (phys_addr) { - set_pgd(pgdp, pfn_pgd(PFN_DOWN(phys_addr), PAGE_KERNEL)); - continue; - } } } - kasan_populate_pgd_next(pgdp, vaddr, next, early); + kasan_populate_p4d(pgdp, vaddr, next); + } while (pgdp++, vaddr = next, vaddr != end); +} + +static void __init kasan_early_clear_pud(p4d_t *p4dp, + unsigned long vaddr, unsigned long end) +{ + pud_t *pudp, *base_pud; + unsigned long next; + + if (!pgtable_l4_enabled) { + pudp = (pud_t *)p4dp; + } else { + base_pud = pt_ops.get_pud_virt(pfn_to_phys(_p4d_pfn(*p4dp))); + pudp = base_pud + pud_index(vaddr); + } + + do { + next = pud_addr_end(vaddr, end); + + if (IS_ALIGNED(vaddr, PUD_SIZE) && (next - vaddr) >= PUD_SIZE) { + pud_clear(pudp); + continue; + } + + BUG(); + } while (pudp++, vaddr = next, vaddr != end); +} + +static void __init kasan_early_clear_p4d(pgd_t *pgdp, + unsigned long vaddr, unsigned long end) +{ + p4d_t *p4dp, *base_p4d; + unsigned long next; + + if (!pgtable_l5_enabled) { + p4dp = (p4d_t *)pgdp; + } else { + base_p4d = pt_ops.get_p4d_virt(pfn_to_phys(_pgd_pfn(*pgdp))); + p4dp = base_p4d + p4d_index(vaddr); + } + + do { + next = p4d_addr_end(vaddr, end); + + if (pgtable_l4_enabled && IS_ALIGNED(vaddr, P4D_SIZE) && + (next - vaddr) >= P4D_SIZE) { + p4d_clear(p4dp); + continue; + } + + kasan_early_clear_pud(p4dp, vaddr, next); + } while (p4dp++, vaddr = next, vaddr != end); +} + +static void __init kasan_early_clear_pgd(pgd_t *pgdp, + unsigned long vaddr, unsigned long end) +{ + unsigned long next; + + do { + next = pgd_addr_end(vaddr, end); + + if (pgtable_l5_enabled && IS_ALIGNED(vaddr, PGDIR_SIZE) && + (next - vaddr) >= PGDIR_SIZE) { + pgd_clear(pgdp); + continue; + } + + kasan_early_clear_p4d(pgdp, vaddr, next); + } while (pgdp++, vaddr = next, vaddr != end); +} + +static void __init kasan_early_populate_pud(p4d_t *p4dp, + unsigned long vaddr, + unsigned long end) +{ + pud_t *pudp, *base_pud; + phys_addr_t phys_addr; + unsigned long next; + + if (!pgtable_l4_enabled) { + pudp = (pud_t *)p4dp; + } else { + base_pud = pt_ops.get_pud_virt(pfn_to_phys(_p4d_pfn(*p4dp))); + pudp = base_pud + pud_index(vaddr); + } + + do { + next = pud_addr_end(vaddr, end); + + if (pud_none(*pudp) && IS_ALIGNED(vaddr, PUD_SIZE) && + (next - vaddr) >= PUD_SIZE) { + phys_addr = __pa((uintptr_t)kasan_early_shadow_pmd); + set_pud(pudp, pfn_pud(PFN_DOWN(phys_addr), PAGE_TABLE)); + continue; + } + + BUG(); + } while (pudp++, vaddr = next, vaddr != end); +} + +static void __init kasan_early_populate_p4d(pgd_t *pgdp, + unsigned long vaddr, + unsigned long end) +{ + p4d_t *p4dp, *base_p4d; + phys_addr_t phys_addr; + unsigned long next; + + /* + * We can't use pgd_page_vaddr here as it would return a linear + * mapping address but it is not mapped yet, but when populating + * early_pg_dir, we need the physical address and when populating + * swapper_pg_dir, we need the kernel virtual address so use + * pt_ops facility. + * Note that this test is then completely equivalent to + * p4dp = p4d_offset(pgdp, vaddr) + */ + if (!pgtable_l5_enabled) { + p4dp = (p4d_t *)pgdp; + } else { + base_p4d = pt_ops.get_p4d_virt(pfn_to_phys(_pgd_pfn(*pgdp))); + p4dp = base_p4d + p4d_index(vaddr); + } + + do { + next = p4d_addr_end(vaddr, end); + + if (p4d_none(*p4dp) && IS_ALIGNED(vaddr, P4D_SIZE) && + (next - vaddr) >= P4D_SIZE) { + phys_addr = __pa((uintptr_t)kasan_early_shadow_pud); + set_p4d(p4dp, pfn_p4d(PFN_DOWN(phys_addr), PAGE_TABLE)); + continue; + } + + kasan_early_populate_pud(p4dp, vaddr, next); + } while (p4dp++, vaddr = next, vaddr != end); +} + +static void __init kasan_early_populate_pgd(pgd_t *pgdp, + unsigned long vaddr, + unsigned long end) +{ + phys_addr_t phys_addr; + unsigned long next; + + do { + next = pgd_addr_end(vaddr, end); + + if (pgd_none(*pgdp) && IS_ALIGNED(vaddr, PGDIR_SIZE) && + (next - vaddr) >= PGDIR_SIZE) { + phys_addr = __pa((uintptr_t)kasan_early_shadow_p4d); + set_pgd(pgdp, pfn_pgd(PFN_DOWN(phys_addr), PAGE_TABLE)); + continue; + } + + kasan_early_populate_p4d(pgdp, vaddr, next); } while (pgdp++, vaddr = next, vaddr != end); } @@ -295,16 +350,16 @@ asmlinkage void __init kasan_early_init(void) PAGE_TABLE)); } - kasan_populate_pgd(early_pg_dir + pgd_index(KASAN_SHADOW_START), - KASAN_SHADOW_START, KASAN_SHADOW_END, true); + kasan_early_populate_pgd(early_pg_dir + pgd_index(KASAN_SHADOW_START), + KASAN_SHADOW_START, KASAN_SHADOW_END); local_flush_tlb_all(); } void __init kasan_swapper_init(void) { - kasan_populate_pgd(pgd_offset_k(KASAN_SHADOW_START), - KASAN_SHADOW_START, KASAN_SHADOW_END, true); + kasan_early_populate_pgd(pgd_offset_k(KASAN_SHADOW_START), + KASAN_SHADOW_START, KASAN_SHADOW_END); local_flush_tlb_all(); } @@ -314,118 +369,65 @@ static void __init kasan_populate(void *start, void *end) unsigned long vaddr = (unsigned long)start & PAGE_MASK; unsigned long vend = PAGE_ALIGN((unsigned long)end); - kasan_populate_pgd(pgd_offset_k(vaddr), vaddr, vend, false); - - local_flush_tlb_all(); - memset(start, KASAN_SHADOW_INIT, end - start); + kasan_populate_pgd(pgd_offset_k(vaddr), vaddr, vend); } -static void __init kasan_shallow_populate_pmd(pgd_t *pgdp, +static void __init kasan_shallow_populate_pud(p4d_t *p4d, unsigned long vaddr, unsigned long end) { unsigned long next; - pmd_t *pmdp, *base_pmd; - bool is_kasan_pte; - - base_pmd = (pmd_t *)pgd_page_vaddr(*pgdp); - pmdp = base_pmd + pmd_index(vaddr); - - do { - next = pmd_addr_end(vaddr, end); - is_kasan_pte = (pmd_pgtable(*pmdp) == lm_alias(kasan_early_shadow_pte)); - - if (is_kasan_pte) - pmd_clear(pmdp); - } while (pmdp++, vaddr = next, vaddr != end); -} - -static void __init kasan_shallow_populate_pud(pgd_t *pgdp, - unsigned long vaddr, unsigned long end) -{ - unsigned long next; - pud_t *pudp, *base_pud; - pmd_t *base_pmd; - bool is_kasan_pmd; - - base_pud = (pud_t *)pgd_page_vaddr(*pgdp); - pudp = base_pud + pud_index(vaddr); + void *p; + pud_t *pud_k = pud_offset(p4d, vaddr); do { next = pud_addr_end(vaddr, end); - is_kasan_pmd = (pud_pgtable(*pudp) == lm_alias(kasan_early_shadow_pmd)); - - if (!is_kasan_pmd) - continue; - - base_pmd = memblock_alloc(PAGE_SIZE, PAGE_SIZE); - set_pud(pudp, pfn_pud(PFN_DOWN(__pa(base_pmd)), PAGE_TABLE)); - if (IS_ALIGNED(vaddr, PUD_SIZE) && (next - vaddr) >= PUD_SIZE) + if (pud_none(*pud_k)) { + p = memblock_alloc(PAGE_SIZE, PAGE_SIZE); + set_pud(pud_k, pfn_pud(PFN_DOWN(__pa(p)), PAGE_TABLE)); continue; + } - memcpy(base_pmd, (void *)kasan_early_shadow_pmd, PAGE_SIZE); - kasan_shallow_populate_pmd((pgd_t *)pudp, vaddr, next); - } while (pudp++, vaddr = next, vaddr != end); + BUG(); + } while (pud_k++, vaddr = next, vaddr != end); } -static void __init kasan_shallow_populate_p4d(pgd_t *pgdp, +static void __init kasan_shallow_populate_p4d(pgd_t *pgd, unsigned long vaddr, unsigned long end) { unsigned long next; - p4d_t *p4dp, *base_p4d; - pud_t *base_pud; - bool is_kasan_pud; - - base_p4d = (p4d_t *)pgd_page_vaddr(*pgdp); - p4dp = base_p4d + p4d_index(vaddr); + void *p; + p4d_t *p4d_k = p4d_offset(pgd, vaddr); do { next = p4d_addr_end(vaddr, end); - is_kasan_pud = (p4d_pgtable(*p4dp) == lm_alias(kasan_early_shadow_pud)); - if (!is_kasan_pud) - continue; - - base_pud = memblock_alloc(PAGE_SIZE, PAGE_SIZE); - set_p4d(p4dp, pfn_p4d(PFN_DOWN(__pa(base_pud)), PAGE_TABLE)); - - if (IS_ALIGNED(vaddr, P4D_SIZE) && (next - vaddr) >= P4D_SIZE) + if (p4d_none(*p4d_k)) { + p = memblock_alloc(PAGE_SIZE, PAGE_SIZE); + set_p4d(p4d_k, pfn_p4d(PFN_DOWN(__pa(p)), PAGE_TABLE)); continue; + } - memcpy(base_pud, (void *)kasan_early_shadow_pud, PAGE_SIZE); - kasan_shallow_populate_pud((pgd_t *)p4dp, vaddr, next); - } while (p4dp++, vaddr = next, vaddr != end); + kasan_shallow_populate_pud(p4d_k, vaddr, end); + } while (p4d_k++, vaddr = next, vaddr != end); } -#define kasan_shallow_populate_pgd_next(pgdp, vaddr, next) \ - (pgtable_l5_enabled ? \ - kasan_shallow_populate_p4d(pgdp, vaddr, next) : \ - (pgtable_l4_enabled ? \ - kasan_shallow_populate_pud(pgdp, vaddr, next) : \ - kasan_shallow_populate_pmd(pgdp, vaddr, next))) - static void __init kasan_shallow_populate_pgd(unsigned long vaddr, unsigned long end) { unsigned long next; void *p; pgd_t *pgd_k = pgd_offset_k(vaddr); - bool is_kasan_pgd_next; do { next = pgd_addr_end(vaddr, end); - is_kasan_pgd_next = (pgd_page_vaddr(*pgd_k) == - (unsigned long)lm_alias(kasan_early_shadow_pgd_next)); - if (is_kasan_pgd_next) { + if (pgd_none(*pgd_k)) { p = memblock_alloc(PAGE_SIZE, PAGE_SIZE); set_pgd(pgd_k, pfn_pgd(PFN_DOWN(__pa(p)), PAGE_TABLE)); - } - - if (IS_ALIGNED(vaddr, PGDIR_SIZE) && (next - vaddr) >= PGDIR_SIZE) continue; + } - memcpy(p, (void *)kasan_early_shadow_pgd_next, PAGE_SIZE); - kasan_shallow_populate_pgd_next(pgd_k, vaddr, next); + kasan_shallow_populate_p4d(pgd_k, vaddr, next); } while (pgd_k++, vaddr = next, vaddr != end); } @@ -435,7 +437,37 @@ static void __init kasan_shallow_populate(void *start, void *end) unsigned long vend = PAGE_ALIGN((unsigned long)end); kasan_shallow_populate_pgd(vaddr, vend); - local_flush_tlb_all(); +} + +static void create_tmp_mapping(void) +{ + void *ptr; + p4d_t *base_p4d; + + /* + * We need to clean the early mapping: this is hard to achieve "in-place", + * so install a temporary mapping like arm64 and x86 do. + */ + memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(pgd_t) * PTRS_PER_PGD); + + /* Copy the last p4d since it is shared with the kernel mapping. */ + if (pgtable_l5_enabled) { + ptr = (p4d_t *)pgd_page_vaddr(*pgd_offset_k(KASAN_SHADOW_END)); + memcpy(tmp_p4d, ptr, sizeof(p4d_t) * PTRS_PER_P4D); + set_pgd(&tmp_pg_dir[pgd_index(KASAN_SHADOW_END)], + pfn_pgd(PFN_DOWN(__pa(tmp_p4d)), PAGE_TABLE)); + base_p4d = tmp_p4d; + } else { + base_p4d = (p4d_t *)tmp_pg_dir; + } + + /* Copy the last pud since it is shared with the kernel mapping. */ + if (pgtable_l4_enabled) { + ptr = (pud_t *)p4d_page_vaddr(*(base_p4d + p4d_index(KASAN_SHADOW_END))); + memcpy(tmp_pud, ptr, sizeof(pud_t) * PTRS_PER_PUD); + set_p4d(&base_p4d[p4d_index(KASAN_SHADOW_END)], + pfn_p4d(PFN_DOWN(__pa(tmp_pud)), PAGE_TABLE)); + } } void __init kasan_init(void) @@ -443,10 +475,27 @@ void __init kasan_init(void) phys_addr_t p_start, p_end; u64 i; - if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) + create_tmp_mapping(); + csr_write(CSR_SATP, PFN_DOWN(__pa(tmp_pg_dir)) | satp_mode); + + kasan_early_clear_pgd(pgd_offset_k(KASAN_SHADOW_START), + KASAN_SHADOW_START, KASAN_SHADOW_END); + + kasan_populate_early_shadow((void *)kasan_mem_to_shadow((void *)FIXADDR_START), + (void *)kasan_mem_to_shadow((void *)VMALLOC_START)); + + if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) { kasan_shallow_populate( (void *)kasan_mem_to_shadow((void *)VMALLOC_START), (void *)kasan_mem_to_shadow((void *)VMALLOC_END)); + /* Shallow populate modules and BPF which are vmalloc-allocated */ + kasan_shallow_populate( + (void *)kasan_mem_to_shadow((void *)MODULES_VADDR), + (void *)kasan_mem_to_shadow((void *)MODULES_END)); + } else { + kasan_populate_early_shadow((void *)kasan_mem_to_shadow((void *)VMALLOC_START), + (void *)kasan_mem_to_shadow((void *)VMALLOC_END)); + } /* Populate the linear mapping */ for_each_mem_range(i, &p_start, &p_end) { @@ -459,8 +508,8 @@ void __init kasan_init(void) kasan_populate(kasan_mem_to_shadow(start), kasan_mem_to_shadow(end)); } - /* Populate kernel, BPF, modules mapping */ - kasan_populate(kasan_mem_to_shadow((const void *)MODULES_VADDR), + /* Populate kernel */ + kasan_populate(kasan_mem_to_shadow((const void *)MODULES_END), kasan_mem_to_shadow((const void *)MODULES_VADDR + SZ_2G)); for (i = 0; i < PTRS_PER_PTE; i++) @@ -471,4 +520,7 @@ void __init kasan_init(void) memset(kasan_early_shadow_page, KASAN_SHADOW_INIT, PAGE_SIZE); init_task.kasan_depth = 0; + + csr_write(CSR_SATP, PFN_DOWN(__pa(swapper_pg_dir)) | satp_mode); + local_flush_tlb_all(); } diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c index 86c56616e5de..ea3d61de065b 100644 --- a/arch/riscv/mm/pageattr.c +++ b/arch/riscv/mm/pageattr.c @@ -217,18 +217,26 @@ bool kernel_page_present(struct page *page) pgd = pgd_offset_k(addr); if (!pgd_present(*pgd)) return false; + if (pgd_leaf(*pgd)) + return true; p4d = p4d_offset(pgd, addr); if (!p4d_present(*p4d)) return false; + if (p4d_leaf(*p4d)) + return true; pud = pud_offset(p4d, addr); if (!pud_present(*pud)) return false; + if (pud_leaf(*pud)) + return true; pmd = pmd_offset(pud, addr); if (!pmd_present(*pmd)) return false; + if (pmd_leaf(*pmd)) + return true; pte = pte_offset_kernel(pmd, addr); return pte_present(*pte); diff --git a/arch/riscv/mm/physaddr.c b/arch/riscv/mm/physaddr.c index 9b18bda74154..18706f457da7 100644 --- a/arch/riscv/mm/physaddr.c +++ b/arch/riscv/mm/physaddr.c @@ -33,3 +33,19 @@ phys_addr_t __phys_addr_symbol(unsigned long x) return __va_to_pa_nodebug(x); } EXPORT_SYMBOL(__phys_addr_symbol); + +phys_addr_t linear_mapping_va_to_pa(unsigned long x) +{ + BUG_ON(!kernel_map.va_pa_offset); + + return ((unsigned long)(x) - kernel_map.va_pa_offset); +} +EXPORT_SYMBOL(linear_mapping_va_to_pa); + +void *linear_mapping_pa_to_va(unsigned long x) +{ + BUG_ON(!kernel_map.va_pa_offset); + + return ((void *)((unsigned long)(x) + kernel_map.va_pa_offset)); +} +EXPORT_SYMBOL(linear_mapping_pa_to_va); diff --git a/arch/riscv/mm/ptdump.c b/arch/riscv/mm/ptdump.c index 830e7de65e3a..20a9f991a6d7 100644 --- a/arch/riscv/mm/ptdump.c +++ b/arch/riscv/mm/ptdump.c @@ -59,10 +59,6 @@ struct ptd_mm_info { }; enum address_markers_idx { -#ifdef CONFIG_KASAN - KASAN_SHADOW_START_NR, - KASAN_SHADOW_END_NR, -#endif FIXMAP_START_NR, FIXMAP_END_NR, PCI_IO_START_NR, @@ -74,6 +70,10 @@ enum address_markers_idx { VMALLOC_START_NR, VMALLOC_END_NR, PAGE_OFFSET_NR, +#ifdef CONFIG_KASAN + KASAN_SHADOW_START_NR, + KASAN_SHADOW_END_NR, +#endif #ifdef CONFIG_64BIT MODULES_MAPPING_NR, KERNEL_MAPPING_NR, @@ -82,10 +82,6 @@ enum address_markers_idx { }; static struct addr_marker address_markers[] = { -#ifdef CONFIG_KASAN - {0, "Kasan shadow start"}, - {0, "Kasan shadow end"}, -#endif {0, "Fixmap start"}, {0, "Fixmap end"}, {0, "PCI I/O start"}, @@ -97,6 +93,10 @@ static struct addr_marker address_markers[] = { {0, "vmalloc() area"}, {0, "vmalloc() end"}, {0, "Linear mapping"}, +#ifdef CONFIG_KASAN + {0, "Kasan shadow start"}, + {0, "Kasan shadow end"}, +#endif #ifdef CONFIG_64BIT {0, "Modules/BPF mapping"}, {0, "Kernel mapping"}, @@ -362,10 +362,6 @@ static int __init ptdump_init(void) { unsigned int i, j; -#ifdef CONFIG_KASAN - address_markers[KASAN_SHADOW_START_NR].start_address = KASAN_SHADOW_START; - address_markers[KASAN_SHADOW_END_NR].start_address = KASAN_SHADOW_END; -#endif address_markers[FIXMAP_START_NR].start_address = FIXADDR_START; address_markers[FIXMAP_END_NR].start_address = FIXADDR_TOP; address_markers[PCI_IO_START_NR].start_address = PCI_IO_START; @@ -377,6 +373,10 @@ static int __init ptdump_init(void) address_markers[VMALLOC_START_NR].start_address = VMALLOC_START; address_markers[VMALLOC_END_NR].start_address = VMALLOC_END; address_markers[PAGE_OFFSET_NR].start_address = PAGE_OFFSET; +#ifdef CONFIG_KASAN + address_markers[KASAN_SHADOW_START_NR].start_address = KASAN_SHADOW_START; + address_markers[KASAN_SHADOW_END_NR].start_address = KASAN_SHADOW_END; +#endif #ifdef CONFIG_64BIT address_markers[MODULES_MAPPING_NR].start_address = MODULES_VADDR; address_markers[KERNEL_MAPPING_NR].start_address = kernel_map.virt_addr; diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index ef701fa83f36..77be59aadc73 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -23,14 +23,62 @@ static inline void local_flush_tlb_page_asid(unsigned long addr, : "memory"); } +static inline void local_flush_tlb_range(unsigned long start, + unsigned long size, unsigned long stride) +{ + if (size <= stride) + local_flush_tlb_page(start); + else + local_flush_tlb_all(); +} + +static inline void local_flush_tlb_range_asid(unsigned long start, + unsigned long size, unsigned long stride, unsigned long asid) +{ + if (size <= stride) + local_flush_tlb_page_asid(start, asid); + else + local_flush_tlb_all_asid(asid); +} + +static void __ipi_flush_tlb_all(void *info) +{ + local_flush_tlb_all(); +} + void flush_tlb_all(void) { - sbi_remote_sfence_vma(NULL, 0, -1); + if (riscv_use_ipi_for_rfence()) + on_each_cpu(__ipi_flush_tlb_all, NULL, 1); + else + sbi_remote_sfence_vma(NULL, 0, -1); +} + +struct flush_tlb_range_data { + unsigned long asid; + unsigned long start; + unsigned long size; + unsigned long stride; +}; + +static void __ipi_flush_tlb_range_asid(void *info) +{ + struct flush_tlb_range_data *d = info; + + local_flush_tlb_range_asid(d->start, d->size, d->stride, d->asid); +} + +static void __ipi_flush_tlb_range(void *info) +{ + struct flush_tlb_range_data *d = info; + + local_flush_tlb_range(d->start, d->size, d->stride); } -static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start, - unsigned long size, unsigned long stride) +static void __flush_tlb_range(struct mm_struct *mm, unsigned long start, + unsigned long size, unsigned long stride) { + struct flush_tlb_range_data ftd; struct cpumask *cmask = mm_cpumask(mm); unsigned int cpuid; bool broadcast; @@ -45,19 +93,34 @@ static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start, unsigned long asid = atomic_long_read(&mm->context.id) & asid_mask; if (broadcast) { - sbi_remote_sfence_vma_asid(cmask, start, size, asid); - } else if (size <= stride) { - local_flush_tlb_page_asid(start, asid); + if (riscv_use_ipi_for_rfence()) { + ftd.asid = asid; + ftd.start = start; + ftd.size = size; + ftd.stride = stride; + on_each_cpu_mask(cmask, + __ipi_flush_tlb_range_asid, + &ftd, 1); + } else + sbi_remote_sfence_vma_asid(cmask, + start, size, asid); } else { - local_flush_tlb_all_asid(asid); + local_flush_tlb_range_asid(start, size, stride, asid); } } else { if (broadcast) { - sbi_remote_sfence_vma(cmask, start, size); - } else if (size <= stride) { - local_flush_tlb_page(start); + if (riscv_use_ipi_for_rfence()) { + ftd.asid = 0; + ftd.start = start; + ftd.size = size; + ftd.stride = stride; + on_each_cpu_mask(cmask, + __ipi_flush_tlb_range, + &ftd, 1); + } else + sbi_remote_sfence_vma(cmask, start, size); } else { - local_flush_tlb_all(); + local_flush_tlb_range(start, size, stride); } } @@ -66,23 +129,23 @@ static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start, void flush_tlb_mm(struct mm_struct *mm) { - __sbi_tlb_flush_range(mm, 0, -1, PAGE_SIZE); + __flush_tlb_range(mm, 0, -1, PAGE_SIZE); } void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) { - __sbi_tlb_flush_range(vma->vm_mm, addr, PAGE_SIZE, PAGE_SIZE); + __flush_tlb_range(vma->vm_mm, addr, PAGE_SIZE, PAGE_SIZE); } void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - __sbi_tlb_flush_range(vma->vm_mm, start, end - start, PAGE_SIZE); + __flush_tlb_range(vma->vm_mm, start, end - start, PAGE_SIZE); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - __sbi_tlb_flush_range(vma->vm_mm, start, end - start, PMD_SIZE); + __flush_tlb_range(vma->vm_mm, start, end - start, PMD_SIZE); } #endif |