summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRyan Roberts <ryan.roberts@arm.com>2025-04-22 09:18:16 +0100
committerWill Deacon <will@kernel.org>2025-05-09 13:43:07 +0100
commit2fba13371fe80b4d0d533a502e460ce0e936d024 (patch)
treebdd2a5e1b5ba561cc80ce4593b42c366141a05a8
parent61ef8ddaa35e341508d8fa891c33029ed5f75779 (diff)
mm/vmalloc: Gracefully unmap huge ptes
Commit f7ee1f13d606 ("mm/vmalloc: enable mapping of huge pages at pte level in vmap") added its support by reusing the set_huge_pte_at() API, which is otherwise only used for user mappings. But when unmapping those huge ptes, it continued to call ptep_get_and_clear(), which is a layering violation. To date, the only arch to implement this support is powerpc and it all happens to work ok for it. But arm64's implementation of ptep_get_and_clear() can not be safely used to clear a previous set_huge_pte_at(). So let's introduce a new arch opt-in function, arch_vmap_pte_range_unmap_size(), which can provide the size of a (present) pte. Then we can call huge_ptep_get_and_clear() to tear it down properly. Note that if vunmap_range() is called with a range that starts in the middle of a huge pte-mapped page, we must unmap the entire huge page so the behaviour is consistent with pmd and pud block mappings. In this case emit a warning just like we do for pmd/pud mappings. Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com> Reviewed-by: Uladzislau Rezki (Sony) <urezki@gmail.com> Reviewed-by: Catalin Marinas <catalin.marinas@arm.com> Signed-off-by: Ryan Roberts <ryan.roberts@arm.com> Tested-by: Luiz Capitulino <luizcap@redhat.com> Link: https://lore.kernel.org/r/20250422081822.1836315-9-ryan.roberts@arm.com Signed-off-by: Will Deacon <will@kernel.org>
-rw-r--r--include/linux/vmalloc.h8
-rw-r--r--mm/vmalloc.c18
2 files changed, 24 insertions, 2 deletions
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 31e9ffd936e3..16dd4cba64f2 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -113,6 +113,14 @@ static inline unsigned long arch_vmap_pte_range_map_size(unsigned long addr, uns
}
#endif
+#ifndef arch_vmap_pte_range_unmap_size
+static inline unsigned long arch_vmap_pte_range_unmap_size(unsigned long addr,
+ pte_t *ptep)
+{
+ return PAGE_SIZE;
+}
+#endif
+
#ifndef arch_vmap_pte_supported_shift
static inline int arch_vmap_pte_supported_shift(unsigned long size)
{
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index d60d3a29d149..fe2e2cc8da94 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -350,12 +350,26 @@ static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
pgtbl_mod_mask *mask)
{
pte_t *pte;
+ pte_t ptent;
+ unsigned long size = PAGE_SIZE;
pte = pte_offset_kernel(pmd, addr);
do {
- pte_t ptent = ptep_get_and_clear(&init_mm, addr, pte);
+#ifdef CONFIG_HUGETLB_PAGE
+ size = arch_vmap_pte_range_unmap_size(addr, pte);
+ if (size != PAGE_SIZE) {
+ if (WARN_ON(!IS_ALIGNED(addr, size))) {
+ addr = ALIGN_DOWN(addr, size);
+ pte = PTR_ALIGN_DOWN(pte, sizeof(*pte) * (size >> PAGE_SHIFT));
+ }
+ ptent = huge_ptep_get_and_clear(&init_mm, addr, pte, size);
+ if (WARN_ON(end - addr < size))
+ size = end - addr;
+ } else
+#endif
+ ptent = ptep_get_and_clear(&init_mm, addr, pte);
WARN_ON(!pte_none(ptent) && !pte_present(ptent));
- } while (pte++, addr += PAGE_SIZE, addr != end);
+ } while (pte += (size >> PAGE_SHIFT), addr += size, addr != end);
*mask |= PGTBL_PTE_MODIFIED;
}