summaryrefslogtreecommitdiff
path: root/mm/hugetlb.c
diff options
context:
space:
mode:
authorAxel Rasmussen <axelrasmussen@google.com>2023-07-07 14:55:33 -0700
committerAndrew Morton <akpm@linux-foundation.org>2023-08-18 10:12:16 -0700
commitaf19487f00f34ff8643921d7909dbb3fedc7e329 (patch)
treebfd5f51fddd0fa74aca542b5fa88137af407580f /mm/hugetlb.c
parent60b1e24ce8c3334d9204d6229356b750632136be (diff)
mm: make PTE_MARKER_SWAPIN_ERROR more general
Patch series "add UFFDIO_POISON to simulate memory poisoning with UFFD", v4. This series adds a new userfaultfd feature, UFFDIO_POISON. See commit 4 for a detailed description of the feature. This patch (of 8): Future patches will reuse PTE_MARKER_SWAPIN_ERROR to implement UFFDIO_POISON, so make some various preparations for that: First, rename it to just PTE_MARKER_POISONED. The "SWAPIN" can be confusing since we're going to re-use it for something not really related to swap. This can be particularly confusing for things like hugetlbfs, which doesn't support swap whatsoever. Also rename some various helper functions. Next, fix pte marker copying for hugetlbfs. Previously, it would WARN on seeing a PTE_MARKER_SWAPIN_ERROR, since hugetlbfs doesn't support swap. But, since we're going to re-use it, we want it to go ahead and copy it just like non-hugetlbfs memory does today. Since the code to do this is more complicated now, pull it out into a helper which can be re-used in both places. While we're at it, also make it slightly more explicit in its handling of e.g. uffd wp markers. For non-hugetlbfs page faults, instead of returning VM_FAULT_SIGBUS for an error entry, return VM_FAULT_HWPOISON. For most cases this change doesn't matter, e.g. a userspace program would receive a SIGBUS either way. But for UFFDIO_POISON, this change will let KVM guests get an MCE out of the box, instead of giving a SIGBUS to the hypervisor and requiring it to somehow inject an MCE. Finally, for hugetlbfs faults, handle PTE_MARKER_POISONED, and return VM_FAULT_HWPOISON_LARGE in such cases. Note that this can't happen today because the lack of swap support means we'll never end up with such a PTE anyway, but this behavior will be needed once such entries *can* show up via UFFDIO_POISON. Link: https://lkml.kernel.org/r/20230707215540.2324998-1-axelrasmussen@google.com Link: https://lkml.kernel.org/r/20230707215540.2324998-2-axelrasmussen@google.com Signed-off-by: Axel Rasmussen <axelrasmussen@google.com> Acked-by: Peter Xu <peterx@redhat.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Brian Geffon <bgeffon@google.com> Cc: Christian Brauner <brauner@kernel.org> Cc: David Hildenbrand <david@redhat.com> Cc: Gaosheng Cui <cuigaosheng1@huawei.com> Cc: Huang, Ying <ying.huang@intel.com> Cc: Hugh Dickins <hughd@google.com> Cc: James Houghton <jthoughton@google.com> Cc: Jan Alexander Steffens (heftig) <heftig@archlinux.org> Cc: Jiaqi Yan <jiaqiyan@google.com> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Kefeng Wang <wangkefeng.wang@huawei.com> Cc: Liam R. Howlett <Liam.Howlett@oracle.com> Cc: Miaohe Lin <linmiaohe@huawei.com> Cc: Mike Kravetz <mike.kravetz@oracle.com> Cc: Mike Rapoport (IBM) <rppt@kernel.org> Cc: Muchun Song <muchun.song@linux.dev> Cc: Nadav Amit <namit@vmware.com> Cc: Naoya Horiguchi <naoya.horiguchi@nec.com> Cc: Ryan Roberts <ryan.roberts@arm.com> Cc: Shuah Khan <shuah@kernel.org> Cc: Suleiman Souhlal <suleiman@google.com> Cc: Suren Baghdasaryan <surenb@google.com> Cc: T.J. Alumbaugh <talumbau@google.com> Cc: Yu Zhao <yuzhao@google.com> Cc: ZhangPeng <zhangpeng362@huawei.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'mm/hugetlb.c')
-rw-r--r--mm/hugetlb.c32
1 files changed, 21 insertions, 11 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index e3839eee4657..ffee2978dfed 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -34,6 +34,7 @@
#include <linux/nospec.h>
#include <linux/delayacct.h>
#include <linux/memory.h>
+#include <linux/mm_inline.h>
#include <asm/page.h>
#include <asm/pgalloc.h>
@@ -5101,15 +5102,12 @@ again:
entry = huge_pte_clear_uffd_wp(entry);
set_huge_pte_at(dst, addr, dst_pte, entry);
} else if (unlikely(is_pte_marker(entry))) {
- /* No swap on hugetlb */
- WARN_ON_ONCE(
- is_swapin_error_entry(pte_to_swp_entry(entry)));
- /*
- * We copy the pte marker only if the dst vma has
- * uffd-wp enabled.
- */
- if (userfaultfd_wp(dst_vma))
- set_huge_pte_at(dst, addr, dst_pte, entry);
+ pte_marker marker = copy_pte_marker(
+ pte_to_swp_entry(entry), dst_vma);
+
+ if (marker)
+ set_huge_pte_at(dst, addr, dst_pte,
+ make_pte_marker(marker));
} else {
entry = huge_ptep_get(src_pte);
pte_folio = page_folio(pte_page(entry));
@@ -6089,14 +6087,26 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
}
entry = huge_ptep_get(ptep);
- /* PTE markers should be handled the same way as none pte */
- if (huge_pte_none_mostly(entry))
+ if (huge_pte_none_mostly(entry)) {
+ if (is_pte_marker(entry)) {
+ pte_marker marker =
+ pte_marker_get(pte_to_swp_entry(entry));
+
+ if (marker & PTE_MARKER_POISONED) {
+ ret = VM_FAULT_HWPOISON_LARGE;
+ goto out_mutex;
+ }
+ }
+
/*
+ * Other PTE markers should be handled the same way as none PTE.
+ *
* hugetlb_no_page will drop vma lock and hugetlb fault
* mutex internally, which make us return immediately.
*/
return hugetlb_no_page(mm, vma, mapping, idx, address, ptep,
entry, flags);
+ }
ret = 0;