summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/devicetree/bindings/riscv/extensions.yaml7
-rw-r--r--arch/riscv/include/asm/cacheflush.h18
-rw-r--r--arch/riscv/include/asm/hwcap.h1
-rw-r--r--arch/riscv/include/asm/pgtable.h10
-rw-r--r--arch/riscv/include/asm/thread_info.h7
-rw-r--r--arch/riscv/kernel/asm-offsets.c7
-rw-r--r--arch/riscv/kernel/cpufeature.c1
-rw-r--r--arch/riscv/kernel/entry.S87
-rw-r--r--arch/riscv/mm/init.c2
-rw-r--r--arch/riscv/mm/pgtable.c13
10 files changed, 152 insertions, 1 deletions
diff --git a/Documentation/devicetree/bindings/riscv/extensions.yaml b/Documentation/devicetree/bindings/riscv/extensions.yaml
index a06dbc6b4928..2cf2026cff57 100644
--- a/Documentation/devicetree/bindings/riscv/extensions.yaml
+++ b/Documentation/devicetree/bindings/riscv/extensions.yaml
@@ -171,6 +171,13 @@ properties:
memory types as ratified in the 20191213 version of the privileged
ISA specification.
+ - const: svvptc
+ description:
+ The standard Svvptc supervisor-level extension for
+ address-translation cache behaviour with respect to invalid entries
+ as ratified at commit 4a69197e5617 ("Update to ratified state") of
+ riscv-svvptc.
+
- const: zacas
description: |
The Zacas extension for Atomic Compare-and-Swap (CAS) instructions
diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h
index ce79c558a4c8..8de73f91bfa3 100644
--- a/arch/riscv/include/asm/cacheflush.h
+++ b/arch/riscv/include/asm/cacheflush.h
@@ -46,7 +46,23 @@ do { \
} while (0)
#ifdef CONFIG_64BIT
-#define flush_cache_vmap(start, end) flush_tlb_kernel_range(start, end)
+extern u64 new_vmalloc[NR_CPUS / sizeof(u64) + 1];
+extern char _end[];
+#define flush_cache_vmap flush_cache_vmap
+static inline void flush_cache_vmap(unsigned long start, unsigned long end)
+{
+ if (is_vmalloc_or_module_addr((void *)start)) {
+ int i;
+
+ /*
+ * We don't care if concurrently a cpu resets this value since
+ * the only place this can happen is in handle_exception() where
+ * an sfence.vma is emitted.
+ */
+ for (i = 0; i < ARRAY_SIZE(new_vmalloc); ++i)
+ new_vmalloc[i] = -1ULL;
+ }
+}
#define flush_cache_vmap_early(start, end) local_flush_tlb_kernel_range(start, end)
#endif
diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
index 5a0bd27fd11a..46d9de54179e 100644
--- a/arch/riscv/include/asm/hwcap.h
+++ b/arch/riscv/include/asm/hwcap.h
@@ -92,6 +92,7 @@
#define RISCV_ISA_EXT_ZCF 83
#define RISCV_ISA_EXT_ZCMOP 84
#define RISCV_ISA_EXT_ZAWRS 85
+#define RISCV_ISA_EXT_SVVPTC 86
#define RISCV_ISA_EXT_XLINUXENVCFG 127
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index ede2f921967c..e79f15293492 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -497,6 +497,9 @@ static inline void update_mmu_cache_range(struct vm_fault *vmf,
struct vm_area_struct *vma, unsigned long address,
pte_t *ptep, unsigned int nr)
{
+ asm goto(ALTERNATIVE("nop", "j %l[svvptc]", 0, RISCV_ISA_EXT_SVVPTC, 1)
+ : : : : svvptc);
+
/*
* The kernel assumes that TLBs don't cache invalid entries, but
* in RISC-V, SFENCE.VMA specifies an ordering constraint, not a
@@ -506,6 +509,13 @@ static inline void update_mmu_cache_range(struct vm_fault *vmf,
*/
while (nr--)
local_flush_tlb_page(address + nr * PAGE_SIZE);
+
+svvptc:;
+ /*
+ * Svvptc guarantees that the new valid pte will be visible within
+ * a bounded timeframe, so when the uarch does not cache invalid
+ * entries, we don't have to do anything.
+ */
}
#define update_mmu_cache(vma, addr, ptep) \
update_mmu_cache_range(NULL, vma, addr, ptep, 1)
diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h
index 5b09dc874151..ebe52f96da34 100644
--- a/arch/riscv/include/asm/thread_info.h
+++ b/arch/riscv/include/asm/thread_info.h
@@ -61,6 +61,13 @@ struct thread_info {
void *scs_base;
void *scs_sp;
#endif
+#ifdef CONFIG_64BIT
+ /*
+ * Used in handle_exception() to save a0, a1 and a2 before knowing if we
+ * can access the kernel stack.
+ */
+ unsigned long a0, a1, a2;
+#endif
};
#ifdef CONFIG_SHADOW_CALL_STACK
diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c
index b09ca5f944f7..e94180ba432f 100644
--- a/arch/riscv/kernel/asm-offsets.c
+++ b/arch/riscv/kernel/asm-offsets.c
@@ -36,6 +36,8 @@ void asm_offsets(void)
OFFSET(TASK_THREAD_S9, task_struct, thread.s[9]);
OFFSET(TASK_THREAD_S10, task_struct, thread.s[10]);
OFFSET(TASK_THREAD_S11, task_struct, thread.s[11]);
+
+ OFFSET(TASK_TI_CPU, task_struct, thread_info.cpu);
OFFSET(TASK_TI_FLAGS, task_struct, thread_info.flags);
OFFSET(TASK_TI_PREEMPT_COUNT, task_struct, thread_info.preempt_count);
OFFSET(TASK_TI_KERNEL_SP, task_struct, thread_info.kernel_sp);
@@ -43,6 +45,11 @@ void asm_offsets(void)
#ifdef CONFIG_SHADOW_CALL_STACK
OFFSET(TASK_TI_SCS_SP, task_struct, thread_info.scs_sp);
#endif
+#ifdef CONFIG_64BIT
+ OFFSET(TASK_TI_A0, task_struct, thread_info.a0);
+ OFFSET(TASK_TI_A1, task_struct, thread_info.a1);
+ OFFSET(TASK_TI_A2, task_struct, thread_info.a2);
+#endif
OFFSET(TASK_TI_CPU_NUM, task_struct, thread_info.cpu);
OFFSET(TASK_THREAD_F0, task_struct, thread.fstate.f[0]);
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index b427188b28fc..3a8eeaa9310c 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -381,6 +381,7 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = {
__RISCV_ISA_EXT_DATA(svinval, RISCV_ISA_EXT_SVINVAL),
__RISCV_ISA_EXT_DATA(svnapot, RISCV_ISA_EXT_SVNAPOT),
__RISCV_ISA_EXT_DATA(svpbmt, RISCV_ISA_EXT_SVPBMT),
+ __RISCV_ISA_EXT_DATA(svvptc, RISCV_ISA_EXT_SVVPTC),
};
const size_t riscv_isa_ext_count = ARRAY_SIZE(riscv_isa_ext);
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index ac2e908d4418..fc41472d512e 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -19,6 +19,79 @@
.section .irqentry.text, "ax"
+.macro new_vmalloc_check
+ REG_S a0, TASK_TI_A0(tp)
+ csrr a0, CSR_CAUSE
+ /* Exclude IRQs */
+ blt a0, zero, _new_vmalloc_restore_context_a0
+
+ REG_S a1, TASK_TI_A1(tp)
+ /* Only check new_vmalloc if we are in page/protection fault */
+ li a1, EXC_LOAD_PAGE_FAULT
+ beq a0, a1, _new_vmalloc_kernel_address
+ li a1, EXC_STORE_PAGE_FAULT
+ beq a0, a1, _new_vmalloc_kernel_address
+ li a1, EXC_INST_PAGE_FAULT
+ bne a0, a1, _new_vmalloc_restore_context_a1
+
+_new_vmalloc_kernel_address:
+ /* Is it a kernel address? */
+ csrr a0, CSR_TVAL
+ bge a0, zero, _new_vmalloc_restore_context_a1
+
+ /* Check if a new vmalloc mapping appeared that could explain the trap */
+ REG_S a2, TASK_TI_A2(tp)
+ /*
+ * Computes:
+ * a0 = &new_vmalloc[BIT_WORD(cpu)]
+ * a1 = BIT_MASK(cpu)
+ */
+ REG_L a2, TASK_TI_CPU(tp)
+ /*
+ * Compute the new_vmalloc element position:
+ * (cpu / 64) * 8 = (cpu >> 6) << 3
+ */
+ srli a1, a2, 6
+ slli a1, a1, 3
+ la a0, new_vmalloc
+ add a0, a0, a1
+ /*
+ * Compute the bit position in the new_vmalloc element:
+ * bit_pos = cpu % 64 = cpu - (cpu / 64) * 64 = cpu - (cpu >> 6) << 6
+ * = cpu - ((cpu >> 6) << 3) << 3
+ */
+ slli a1, a1, 3
+ sub a1, a2, a1
+ /* Compute the "get mask": 1 << bit_pos */
+ li a2, 1
+ sll a1, a2, a1
+
+ /* Check the value of new_vmalloc for this cpu */
+ REG_L a2, 0(a0)
+ and a2, a2, a1
+ beq a2, zero, _new_vmalloc_restore_context
+
+ /* Atomically reset the current cpu bit in new_vmalloc */
+ amoxor.d a0, a1, (a0)
+
+ /* Only emit a sfence.vma if the uarch caches invalid entries */
+ ALTERNATIVE("sfence.vma", "nop", 0, RISCV_ISA_EXT_SVVPTC, 1)
+
+ REG_L a0, TASK_TI_A0(tp)
+ REG_L a1, TASK_TI_A1(tp)
+ REG_L a2, TASK_TI_A2(tp)
+ csrw CSR_SCRATCH, x0
+ sret
+
+_new_vmalloc_restore_context:
+ REG_L a2, TASK_TI_A2(tp)
+_new_vmalloc_restore_context_a1:
+ REG_L a1, TASK_TI_A1(tp)
+_new_vmalloc_restore_context_a0:
+ REG_L a0, TASK_TI_A0(tp)
+.endm
+
+
SYM_CODE_START(handle_exception)
/*
* If coming from userspace, preserve the user thread pointer and load
@@ -30,6 +103,20 @@ SYM_CODE_START(handle_exception)
.Lrestore_kernel_tpsp:
csrr tp, CSR_SCRATCH
+
+#ifdef CONFIG_64BIT
+ /*
+ * The RISC-V kernel does not eagerly emit a sfence.vma after each
+ * new vmalloc mapping, which may result in exceptions:
+ * - if the uarch caches invalid entries, the new mapping would not be
+ * observed by the page table walker and an invalidation is needed.
+ * - if the uarch does not cache invalid entries, a reordered access
+ * could "miss" the new mapping and traps: in that case, we only need
+ * to retry the access, no sfence.vma is required.
+ */
+ new_vmalloc_check
+#endif
+
REG_S sp, TASK_TI_KERNEL_SP(tp)
#ifdef CONFIG_VMAP_STACK
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 0e56725377a4..206bae58ed6c 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -37,6 +37,8 @@
#include "../kernel/head.h"
+u64 new_vmalloc[NR_CPUS / sizeof(u64) + 1];
+
struct kernel_mapping kernel_map __ro_after_init;
EXPORT_SYMBOL(kernel_map);
#ifdef CONFIG_XIP_KERNEL
diff --git a/arch/riscv/mm/pgtable.c b/arch/riscv/mm/pgtable.c
index 533ec9055fa0..4ae67324f992 100644
--- a/arch/riscv/mm/pgtable.c
+++ b/arch/riscv/mm/pgtable.c
@@ -9,6 +9,9 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long address, pte_t *ptep,
pte_t entry, int dirty)
{
+ asm goto(ALTERNATIVE("nop", "j %l[svvptc]", 0, RISCV_ISA_EXT_SVVPTC, 1)
+ : : : : svvptc);
+
if (!pte_same(ptep_get(ptep), entry))
__set_pte_at(vma->vm_mm, ptep, entry);
/*
@@ -16,6 +19,16 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
* the case that the PTE changed and the spurious fault case.
*/
return true;
+
+svvptc:
+ if (!pte_same(ptep_get(ptep), entry)) {
+ __set_pte_at(vma->vm_mm, ptep, entry);
+ /* Here only not svadu is impacted */
+ flush_tlb_page(vma, address);
+ return true;
+ }
+
+ return false;
}
int ptep_test_and_clear_young(struct vm_area_struct *vma,