summaryrefslogtreecommitdiff
path: root/virt/kvm/kvm_main.c
diff options
context:
space:
mode:
Diffstat (limited to 'virt/kvm/kvm_main.c')
-rw-r--r--virt/kvm/kvm_main.c83
1 files changed, 58 insertions, 25 deletions
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 8367d88ce39b..383df23514b9 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -58,6 +58,7 @@
#include "coalesced_mmio.h"
#include "async_pf.h"
+#include "mmu_lock.h"
#include "vfio.h"
#define CREATE_TRACE_POINTS
@@ -459,13 +460,15 @@ static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
int idx;
idx = srcu_read_lock(&kvm->srcu);
- spin_lock(&kvm->mmu_lock);
+
+ KVM_MMU_LOCK(kvm);
+
kvm->mmu_notifier_seq++;
if (kvm_set_spte_hva(kvm, address, pte))
kvm_flush_remote_tlbs(kvm);
- spin_unlock(&kvm->mmu_lock);
+ KVM_MMU_UNLOCK(kvm);
srcu_read_unlock(&kvm->srcu, idx);
}
@@ -476,20 +479,38 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
int need_tlb_flush = 0, idx;
idx = srcu_read_lock(&kvm->srcu);
- spin_lock(&kvm->mmu_lock);
+ KVM_MMU_LOCK(kvm);
/*
* The count increase must become visible at unlock time as no
* spte can be established without taking the mmu_lock and
* count is also read inside the mmu_lock critical section.
*/
kvm->mmu_notifier_count++;
+ if (likely(kvm->mmu_notifier_count == 1)) {
+ kvm->mmu_notifier_range_start = range->start;
+ kvm->mmu_notifier_range_end = range->end;
+ } else {
+ /*
+ * Fully tracking multiple concurrent ranges has dimishing
+ * returns. Keep things simple and just find the minimal range
+ * which includes the current and new ranges. As there won't be
+ * enough information to subtract a range after its invalidate
+ * completes, any ranges invalidated concurrently will
+ * accumulate and persist until all outstanding invalidates
+ * complete.
+ */
+ kvm->mmu_notifier_range_start =
+ min(kvm->mmu_notifier_range_start, range->start);
+ kvm->mmu_notifier_range_end =
+ max(kvm->mmu_notifier_range_end, range->end);
+ }
need_tlb_flush = kvm_unmap_hva_range(kvm, range->start, range->end,
range->flags);
/* we've to flush the tlb before the pages can be freed */
if (need_tlb_flush || kvm->tlbs_dirty)
kvm_flush_remote_tlbs(kvm);
- spin_unlock(&kvm->mmu_lock);
+ KVM_MMU_UNLOCK(kvm);
srcu_read_unlock(&kvm->srcu, idx);
return 0;
@@ -500,7 +521,7 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
{
struct kvm *kvm = mmu_notifier_to_kvm(mn);
- spin_lock(&kvm->mmu_lock);
+ KVM_MMU_LOCK(kvm);
/*
* This sequence increase will notify the kvm page fault that
* the page that is going to be mapped in the spte could have
@@ -514,7 +535,7 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
* in conjunction with the smp_rmb in mmu_notifier_retry().
*/
kvm->mmu_notifier_count--;
- spin_unlock(&kvm->mmu_lock);
+ KVM_MMU_UNLOCK(kvm);
BUG_ON(kvm->mmu_notifier_count < 0);
}
@@ -528,13 +549,13 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
int young, idx;
idx = srcu_read_lock(&kvm->srcu);
- spin_lock(&kvm->mmu_lock);
+ KVM_MMU_LOCK(kvm);
young = kvm_age_hva(kvm, start, end);
if (young)
kvm_flush_remote_tlbs(kvm);
- spin_unlock(&kvm->mmu_lock);
+ KVM_MMU_UNLOCK(kvm);
srcu_read_unlock(&kvm->srcu, idx);
return young;
@@ -549,7 +570,7 @@ static int kvm_mmu_notifier_clear_young(struct mmu_notifier *mn,
int young, idx;
idx = srcu_read_lock(&kvm->srcu);
- spin_lock(&kvm->mmu_lock);
+ KVM_MMU_LOCK(kvm);
/*
* Even though we do not flush TLB, this will still adversely
* affect performance on pre-Haswell Intel EPT, where there is
@@ -564,7 +585,7 @@ static int kvm_mmu_notifier_clear_young(struct mmu_notifier *mn,
* more sophisticated heuristic later.
*/
young = kvm_age_hva(kvm, start, end);
- spin_unlock(&kvm->mmu_lock);
+ KVM_MMU_UNLOCK(kvm);
srcu_read_unlock(&kvm->srcu, idx);
return young;
@@ -578,9 +599,9 @@ static int kvm_mmu_notifier_test_young(struct mmu_notifier *mn,
int young, idx;
idx = srcu_read_lock(&kvm->srcu);
- spin_lock(&kvm->mmu_lock);
+ KVM_MMU_LOCK(kvm);
young = kvm_test_age_hva(kvm, address);
- spin_unlock(&kvm->mmu_lock);
+ KVM_MMU_UNLOCK(kvm);
srcu_read_unlock(&kvm->srcu, idx);
return young;
@@ -745,7 +766,7 @@ static struct kvm *kvm_create_vm(unsigned long type)
if (!kvm)
return ERR_PTR(-ENOMEM);
- spin_lock_init(&kvm->mmu_lock);
+ KVM_MMU_LOCK_INIT(kvm);
mmgrab(current->mm);
kvm->mm = current->mm;
kvm_eventfd_init(kvm);
@@ -1525,7 +1546,7 @@ static int kvm_get_dirty_log_protect(struct kvm *kvm, struct kvm_dirty_log *log)
dirty_bitmap_buffer = kvm_second_dirty_bitmap(memslot);
memset(dirty_bitmap_buffer, 0, n);
- spin_lock(&kvm->mmu_lock);
+ KVM_MMU_LOCK(kvm);
for (i = 0; i < n / sizeof(long); i++) {
unsigned long mask;
gfn_t offset;
@@ -1541,7 +1562,7 @@ static int kvm_get_dirty_log_protect(struct kvm *kvm, struct kvm_dirty_log *log)
kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot,
offset, mask);
}
- spin_unlock(&kvm->mmu_lock);
+ KVM_MMU_UNLOCK(kvm);
}
if (flush)
@@ -1636,7 +1657,7 @@ static int kvm_clear_dirty_log_protect(struct kvm *kvm,
if (copy_from_user(dirty_bitmap_buffer, log->dirty_bitmap, n))
return -EFAULT;
- spin_lock(&kvm->mmu_lock);
+ KVM_MMU_LOCK(kvm);
for (offset = log->first_page, i = offset / BITS_PER_LONG,
n = DIV_ROUND_UP(log->num_pages, BITS_PER_LONG); n--;
i++, offset += BITS_PER_LONG) {
@@ -1659,7 +1680,7 @@ static int kvm_clear_dirty_log_protect(struct kvm *kvm,
offset, mask);
}
}
- spin_unlock(&kvm->mmu_lock);
+ KVM_MMU_UNLOCK(kvm);
if (flush)
kvm_arch_flush_remote_tlbs_memslot(kvm, memslot);
@@ -1903,10 +1924,12 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma,
bool write_fault, bool *writable,
kvm_pfn_t *p_pfn)
{
- unsigned long pfn;
+ kvm_pfn_t pfn;
+ pte_t *ptep;
+ spinlock_t *ptl;
int r;
- r = follow_pfn(vma, addr, &pfn);
+ r = follow_pte(vma->vm_mm, addr, &ptep, &ptl);
if (r) {
/*
* get_user_pages fails for VM_IO and VM_PFNMAP vmas and does
@@ -1921,14 +1944,19 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma,
if (r)
return r;
- r = follow_pfn(vma, addr, &pfn);
+ r = follow_pte(vma->vm_mm, addr, &ptep, &ptl);
if (r)
return r;
+ }
+ if (write_fault && !pte_write(*ptep)) {
+ pfn = KVM_PFN_ERR_RO_FAULT;
+ goto out;
}
if (writable)
- *writable = true;
+ *writable = pte_write(*ptep);
+ pfn = pte_pfn(*ptep);
/*
* Get a reference here because callers of *hva_to_pfn* and
@@ -1943,6 +1971,8 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma,
*/
kvm_get_pfn(pfn);
+out:
+ pte_unmap_unlock(ptep, ptl);
*p_pfn = pfn;
return 0;
}
@@ -2011,10 +2041,13 @@ exit:
kvm_pfn_t __gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn,
bool atomic, bool *async, bool write_fault,
- bool *writable)
+ bool *writable, hva_t *hva)
{
unsigned long addr = __gfn_to_hva_many(slot, gfn, NULL, write_fault);
+ if (hva)
+ *hva = addr;
+
if (addr == KVM_HVA_ERR_RO_BAD) {
if (writable)
*writable = false;
@@ -2042,19 +2075,19 @@ kvm_pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
bool *writable)
{
return __gfn_to_pfn_memslot(gfn_to_memslot(kvm, gfn), gfn, false, NULL,
- write_fault, writable);
+ write_fault, writable, NULL);
}
EXPORT_SYMBOL_GPL(gfn_to_pfn_prot);
kvm_pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
{
- return __gfn_to_pfn_memslot(slot, gfn, false, NULL, true, NULL);
+ return __gfn_to_pfn_memslot(slot, gfn, false, NULL, true, NULL, NULL);
}
EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot);
kvm_pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn)
{
- return __gfn_to_pfn_memslot(slot, gfn, true, NULL, true, NULL);
+ return __gfn_to_pfn_memslot(slot, gfn, true, NULL, true, NULL, NULL);
}
EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot_atomic);