diff options
Diffstat (limited to 'virt/kvm/kvm_main.c')
-rw-r--r-- | virt/kvm/kvm_main.c | 83 |
1 files changed, 58 insertions, 25 deletions
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 8367d88ce39b..383df23514b9 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -58,6 +58,7 @@ #include "coalesced_mmio.h" #include "async_pf.h" +#include "mmu_lock.h" #include "vfio.h" #define CREATE_TRACE_POINTS @@ -459,13 +460,15 @@ static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn, int idx; idx = srcu_read_lock(&kvm->srcu); - spin_lock(&kvm->mmu_lock); + + KVM_MMU_LOCK(kvm); + kvm->mmu_notifier_seq++; if (kvm_set_spte_hva(kvm, address, pte)) kvm_flush_remote_tlbs(kvm); - spin_unlock(&kvm->mmu_lock); + KVM_MMU_UNLOCK(kvm); srcu_read_unlock(&kvm->srcu, idx); } @@ -476,20 +479,38 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, int need_tlb_flush = 0, idx; idx = srcu_read_lock(&kvm->srcu); - spin_lock(&kvm->mmu_lock); + KVM_MMU_LOCK(kvm); /* * The count increase must become visible at unlock time as no * spte can be established without taking the mmu_lock and * count is also read inside the mmu_lock critical section. */ kvm->mmu_notifier_count++; + if (likely(kvm->mmu_notifier_count == 1)) { + kvm->mmu_notifier_range_start = range->start; + kvm->mmu_notifier_range_end = range->end; + } else { + /* + * Fully tracking multiple concurrent ranges has dimishing + * returns. Keep things simple and just find the minimal range + * which includes the current and new ranges. As there won't be + * enough information to subtract a range after its invalidate + * completes, any ranges invalidated concurrently will + * accumulate and persist until all outstanding invalidates + * complete. + */ + kvm->mmu_notifier_range_start = + min(kvm->mmu_notifier_range_start, range->start); + kvm->mmu_notifier_range_end = + max(kvm->mmu_notifier_range_end, range->end); + } need_tlb_flush = kvm_unmap_hva_range(kvm, range->start, range->end, range->flags); /* we've to flush the tlb before the pages can be freed */ if (need_tlb_flush || kvm->tlbs_dirty) kvm_flush_remote_tlbs(kvm); - spin_unlock(&kvm->mmu_lock); + KVM_MMU_UNLOCK(kvm); srcu_read_unlock(&kvm->srcu, idx); return 0; @@ -500,7 +521,7 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, { struct kvm *kvm = mmu_notifier_to_kvm(mn); - spin_lock(&kvm->mmu_lock); + KVM_MMU_LOCK(kvm); /* * This sequence increase will notify the kvm page fault that * the page that is going to be mapped in the spte could have @@ -514,7 +535,7 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, * in conjunction with the smp_rmb in mmu_notifier_retry(). */ kvm->mmu_notifier_count--; - spin_unlock(&kvm->mmu_lock); + KVM_MMU_UNLOCK(kvm); BUG_ON(kvm->mmu_notifier_count < 0); } @@ -528,13 +549,13 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, int young, idx; idx = srcu_read_lock(&kvm->srcu); - spin_lock(&kvm->mmu_lock); + KVM_MMU_LOCK(kvm); young = kvm_age_hva(kvm, start, end); if (young) kvm_flush_remote_tlbs(kvm); - spin_unlock(&kvm->mmu_lock); + KVM_MMU_UNLOCK(kvm); srcu_read_unlock(&kvm->srcu, idx); return young; @@ -549,7 +570,7 @@ static int kvm_mmu_notifier_clear_young(struct mmu_notifier *mn, int young, idx; idx = srcu_read_lock(&kvm->srcu); - spin_lock(&kvm->mmu_lock); + KVM_MMU_LOCK(kvm); /* * Even though we do not flush TLB, this will still adversely * affect performance on pre-Haswell Intel EPT, where there is @@ -564,7 +585,7 @@ static int kvm_mmu_notifier_clear_young(struct mmu_notifier *mn, * more sophisticated heuristic later. */ young = kvm_age_hva(kvm, start, end); - spin_unlock(&kvm->mmu_lock); + KVM_MMU_UNLOCK(kvm); srcu_read_unlock(&kvm->srcu, idx); return young; @@ -578,9 +599,9 @@ static int kvm_mmu_notifier_test_young(struct mmu_notifier *mn, int young, idx; idx = srcu_read_lock(&kvm->srcu); - spin_lock(&kvm->mmu_lock); + KVM_MMU_LOCK(kvm); young = kvm_test_age_hva(kvm, address); - spin_unlock(&kvm->mmu_lock); + KVM_MMU_UNLOCK(kvm); srcu_read_unlock(&kvm->srcu, idx); return young; @@ -745,7 +766,7 @@ static struct kvm *kvm_create_vm(unsigned long type) if (!kvm) return ERR_PTR(-ENOMEM); - spin_lock_init(&kvm->mmu_lock); + KVM_MMU_LOCK_INIT(kvm); mmgrab(current->mm); kvm->mm = current->mm; kvm_eventfd_init(kvm); @@ -1525,7 +1546,7 @@ static int kvm_get_dirty_log_protect(struct kvm *kvm, struct kvm_dirty_log *log) dirty_bitmap_buffer = kvm_second_dirty_bitmap(memslot); memset(dirty_bitmap_buffer, 0, n); - spin_lock(&kvm->mmu_lock); + KVM_MMU_LOCK(kvm); for (i = 0; i < n / sizeof(long); i++) { unsigned long mask; gfn_t offset; @@ -1541,7 +1562,7 @@ static int kvm_get_dirty_log_protect(struct kvm *kvm, struct kvm_dirty_log *log) kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot, offset, mask); } - spin_unlock(&kvm->mmu_lock); + KVM_MMU_UNLOCK(kvm); } if (flush) @@ -1636,7 +1657,7 @@ static int kvm_clear_dirty_log_protect(struct kvm *kvm, if (copy_from_user(dirty_bitmap_buffer, log->dirty_bitmap, n)) return -EFAULT; - spin_lock(&kvm->mmu_lock); + KVM_MMU_LOCK(kvm); for (offset = log->first_page, i = offset / BITS_PER_LONG, n = DIV_ROUND_UP(log->num_pages, BITS_PER_LONG); n--; i++, offset += BITS_PER_LONG) { @@ -1659,7 +1680,7 @@ static int kvm_clear_dirty_log_protect(struct kvm *kvm, offset, mask); } } - spin_unlock(&kvm->mmu_lock); + KVM_MMU_UNLOCK(kvm); if (flush) kvm_arch_flush_remote_tlbs_memslot(kvm, memslot); @@ -1903,10 +1924,12 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma, bool write_fault, bool *writable, kvm_pfn_t *p_pfn) { - unsigned long pfn; + kvm_pfn_t pfn; + pte_t *ptep; + spinlock_t *ptl; int r; - r = follow_pfn(vma, addr, &pfn); + r = follow_pte(vma->vm_mm, addr, &ptep, &ptl); if (r) { /* * get_user_pages fails for VM_IO and VM_PFNMAP vmas and does @@ -1921,14 +1944,19 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma, if (r) return r; - r = follow_pfn(vma, addr, &pfn); + r = follow_pte(vma->vm_mm, addr, &ptep, &ptl); if (r) return r; + } + if (write_fault && !pte_write(*ptep)) { + pfn = KVM_PFN_ERR_RO_FAULT; + goto out; } if (writable) - *writable = true; + *writable = pte_write(*ptep); + pfn = pte_pfn(*ptep); /* * Get a reference here because callers of *hva_to_pfn* and @@ -1943,6 +1971,8 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma, */ kvm_get_pfn(pfn); +out: + pte_unmap_unlock(ptep, ptl); *p_pfn = pfn; return 0; } @@ -2011,10 +2041,13 @@ exit: kvm_pfn_t __gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn, bool atomic, bool *async, bool write_fault, - bool *writable) + bool *writable, hva_t *hva) { unsigned long addr = __gfn_to_hva_many(slot, gfn, NULL, write_fault); + if (hva) + *hva = addr; + if (addr == KVM_HVA_ERR_RO_BAD) { if (writable) *writable = false; @@ -2042,19 +2075,19 @@ kvm_pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault, bool *writable) { return __gfn_to_pfn_memslot(gfn_to_memslot(kvm, gfn), gfn, false, NULL, - write_fault, writable); + write_fault, writable, NULL); } EXPORT_SYMBOL_GPL(gfn_to_pfn_prot); kvm_pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn) { - return __gfn_to_pfn_memslot(slot, gfn, false, NULL, true, NULL); + return __gfn_to_pfn_memslot(slot, gfn, false, NULL, true, NULL, NULL); } EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot); kvm_pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn) { - return __gfn_to_pfn_memslot(slot, gfn, true, NULL, true, NULL); + return __gfn_to_pfn_memslot(slot, gfn, true, NULL, true, NULL, NULL); } EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot_atomic); |