summaryrefslogtreecommitdiff
path: root/arch/x86/kvm/mmu/tdp_mmu.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/mmu/tdp_mmu.c')
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.c45
1 files changed, 22 insertions, 23 deletions
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 771210ce5181..7c25dbf32ecc 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include "mmu.h"
#include "mmu_internal.h"
@@ -10,23 +11,15 @@
#include <asm/cmpxchg.h>
#include <trace/events/kvm.h>
-static bool __read_mostly tdp_mmu_enabled = true;
-module_param_named(tdp_mmu, tdp_mmu_enabled, bool, 0644);
-
/* Initializes the TDP MMU for the VM, if enabled. */
int kvm_mmu_init_tdp_mmu(struct kvm *kvm)
{
struct workqueue_struct *wq;
- if (!tdp_enabled || !READ_ONCE(tdp_mmu_enabled))
- return 0;
-
wq = alloc_workqueue("kvm", WQ_UNBOUND|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 0);
if (!wq)
return -ENOMEM;
- /* This should not be changed for the lifetime of the VM. */
- kvm->arch.tdp_mmu_enabled = true;
INIT_LIST_HEAD(&kvm->arch.tdp_mmu_roots);
spin_lock_init(&kvm->arch.tdp_mmu_pages_lock);
kvm->arch.tdp_mmu_zap_wq = wq;
@@ -47,9 +40,6 @@ static __always_inline bool kvm_lockdep_assert_mmu_lock_held(struct kvm *kvm,
void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm)
{
- if (!kvm->arch.tdp_mmu_enabled)
- return;
-
/* Also waits for any queued work items. */
destroy_workqueue(kvm->arch.tdp_mmu_zap_wq);
@@ -144,7 +134,7 @@ void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root,
if (!refcount_dec_and_test(&root->tdp_mmu_root_count))
return;
- WARN_ON(!root->tdp_mmu_page);
+ WARN_ON(!is_tdp_mmu_page(root));
/*
* The root now has refcount=0. It is valid, but readers already
@@ -690,8 +680,7 @@ static inline int tdp_mmu_zap_spte_atomic(struct kvm *kvm,
if (ret)
return ret;
- kvm_flush_remote_tlbs_with_address(kvm, iter->gfn,
- KVM_PAGES_PER_HPAGE(iter->level));
+ kvm_flush_remote_tlbs_gfn(kvm, iter->gfn, iter->level);
/*
* No other thread can overwrite the removed SPTE as they must either
@@ -1074,7 +1063,9 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu,
int ret = RET_PF_FIXED;
bool wrprot = false;
- WARN_ON(sp->role.level != fault->goal_level);
+ if (WARN_ON_ONCE(sp->role.level != fault->goal_level))
+ return RET_PF_RETRY;
+
if (unlikely(!fault->slot))
new_spte = make_mmio_spte(vcpu, iter->gfn, ACC_ALL);
else
@@ -1088,8 +1079,7 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu,
return RET_PF_RETRY;
else if (is_shadow_present_pte(iter->old_spte) &&
!is_last_spte(iter->old_spte, iter->level))
- kvm_flush_remote_tlbs_with_address(vcpu->kvm, sp->gfn,
- KVM_PAGES_PER_HPAGE(iter->level + 1));
+ kvm_flush_remote_tlbs_gfn(vcpu->kvm, iter->gfn, iter->level);
/*
* If the page fault was caused by a write but the page is write
@@ -1173,9 +1163,6 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
if (fault->nx_huge_page_workaround_enabled)
disallowed_hugepage_adjust(fault, iter.old_spte, iter.level);
- if (iter.level == fault->goal_level)
- break;
-
/*
* If SPTE has been frozen by another thread, just give up and
* retry, avoiding unnecessary page table allocation and free.
@@ -1183,6 +1170,9 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
if (is_removed_spte(iter.old_spte))
goto retry;
+ if (iter.level == fault->goal_level)
+ goto map_target_level;
+
/* Step down into the lower level page table if it exists. */
if (is_shadow_present_pte(iter.old_spte) &&
!is_large_pte(iter.old_spte))
@@ -1203,8 +1193,8 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
r = tdp_mmu_link_sp(kvm, &iter, sp, true);
/*
- * Also force the guest to retry the access if the upper level SPTEs
- * aren't in place.
+ * Force the guest to retry if installing an upper level SPTE
+ * failed, e.g. because a different task modified the SPTE.
*/
if (r) {
tdp_mmu_free_sp(sp);
@@ -1214,11 +1204,20 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
if (fault->huge_page_disallowed &&
fault->req_level >= iter.level) {
spin_lock(&kvm->arch.tdp_mmu_pages_lock);
- track_possible_nx_huge_page(kvm, sp);
+ if (sp->nx_huge_page_disallowed)
+ track_possible_nx_huge_page(kvm, sp);
spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
}
}
+ /*
+ * The walk aborted before reaching the target level, e.g. because the
+ * iterator detected an upper level SPTE was frozen during traversal.
+ */
+ WARN_ON_ONCE(iter.level == fault->goal_level);
+ goto retry;
+
+map_target_level:
ret = tdp_mmu_map_handle_target_level(vcpu, fault, &iter);
retry: