diff options
author | James Morris <james.l.morris@oracle.com> | 2017-11-29 12:47:41 +1100 |
---|---|---|
committer | James Morris <james.l.morris@oracle.com> | 2017-11-29 12:47:41 +1100 |
commit | cf40a76e7d5874bb25f4404eecc58a2e033af885 (patch) | |
tree | 8fd81cbea03c87b3d41d7ae5b1d11eadd35d6ef5 /virt/kvm/kvm_main.c | |
parent | ab5348c9c23cd253f5902980d2d8fe067dc24c82 (diff) | |
parent | 4fbd8d194f06c8a3fd2af1ce560ddb31f7ec8323 (diff) |
Merge tag 'v4.15-rc1' into next-seccomp
Linux 4.15-rc1
Diffstat (limited to 'virt/kvm/kvm_main.c')
-rw-r--r-- | virt/kvm/kvm_main.c | 113 |
1 files changed, 29 insertions, 84 deletions
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 82987d457b8b..f169ecc4f2e8 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -122,7 +122,6 @@ static void hardware_disable_all(void); static void kvm_io_bus_destroy(struct kvm_io_bus *bus); -static void kvm_release_pfn_dirty(kvm_pfn_t pfn); static void mark_page_dirty_in_slot(struct kvm_memory_slot *memslot, gfn_t gfn); __visible bool kvm_rebooting; @@ -322,47 +321,6 @@ static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn) return container_of(mn, struct kvm, mmu_notifier); } -static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn, - struct mm_struct *mm, - unsigned long address) -{ - struct kvm *kvm = mmu_notifier_to_kvm(mn); - int need_tlb_flush, idx; - - /* - * When ->invalidate_page runs, the linux pte has been zapped - * already but the page is still allocated until - * ->invalidate_page returns. So if we increase the sequence - * here the kvm page fault will notice if the spte can't be - * established because the page is going to be freed. If - * instead the kvm page fault establishes the spte before - * ->invalidate_page runs, kvm_unmap_hva will release it - * before returning. - * - * The sequence increase only need to be seen at spin_unlock - * time, and not at spin_lock time. - * - * Increasing the sequence after the spin_unlock would be - * unsafe because the kvm page fault could then establish the - * pte after kvm_unmap_hva returned, without noticing the page - * is going to be freed. - */ - idx = srcu_read_lock(&kvm->srcu); - spin_lock(&kvm->mmu_lock); - - kvm->mmu_notifier_seq++; - need_tlb_flush = kvm_unmap_hva(kvm, address) | kvm->tlbs_dirty; - /* we've to flush the tlb before the pages can be freed */ - if (need_tlb_flush) - kvm_flush_remote_tlbs(kvm); - - spin_unlock(&kvm->mmu_lock); - - kvm_arch_mmu_notifier_invalidate_page(kvm, address); - - srcu_read_unlock(&kvm->srcu, idx); -} - static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn, struct mm_struct *mm, unsigned long address, @@ -510,7 +468,6 @@ static void kvm_mmu_notifier_release(struct mmu_notifier *mn, } static const struct mmu_notifier_ops kvm_mmu_notifier_ops = { - .invalidate_page = kvm_mmu_notifier_invalidate_page, .invalidate_range_start = kvm_mmu_notifier_invalidate_range_start, .invalidate_range_end = kvm_mmu_notifier_invalidate_range_end, .clear_flush_young = kvm_mmu_notifier_clear_flush_young, @@ -716,11 +673,11 @@ out_err_no_irq_srcu: out_err_no_srcu: hardware_disable_all(); out_err_no_disable: + refcount_set(&kvm->users_count, 0); for (i = 0; i < KVM_NR_BUSES; i++) - kfree(rcu_access_pointer(kvm->buses[i])); + kfree(kvm_get_bus(kvm, i)); for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) - kvm_free_memslots(kvm, - rcu_dereference_protected(kvm->memslots[i], 1)); + kvm_free_memslots(kvm, __kvm_memslots(kvm, i)); kvm_arch_free_vm(kvm); mmdrop(current->mm); return ERR_PTR(r); @@ -754,9 +711,8 @@ static void kvm_destroy_vm(struct kvm *kvm) spin_unlock(&kvm_lock); kvm_free_irq_routing(kvm); for (i = 0; i < KVM_NR_BUSES; i++) { - struct kvm_io_bus *bus; + struct kvm_io_bus *bus = kvm_get_bus(kvm, i); - bus = rcu_dereference_protected(kvm->buses[i], 1); if (bus) kvm_io_bus_destroy(bus); kvm->buses[i] = NULL; @@ -770,8 +726,7 @@ static void kvm_destroy_vm(struct kvm *kvm) kvm_arch_destroy_vm(kvm); kvm_destroy_devices(kvm); for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) - kvm_free_memslots(kvm, - rcu_dereference_protected(kvm->memslots[i], 1)); + kvm_free_memslots(kvm, __kvm_memslots(kvm, i)); cleanup_srcu_struct(&kvm->irq_srcu); cleanup_srcu_struct(&kvm->srcu); kvm_arch_free_vm(kvm); @@ -1654,7 +1609,7 @@ int gfn_to_page_many_atomic(struct kvm_memory_slot *slot, gfn_t gfn, struct page **pages, int nr_pages) { unsigned long addr; - gfn_t entry; + gfn_t entry = 0; addr = gfn_to_hva_many(slot, gfn, &entry); if (kvm_is_error_hva(addr)) @@ -1723,11 +1678,12 @@ void kvm_release_page_dirty(struct page *page) } EXPORT_SYMBOL_GPL(kvm_release_page_dirty); -static void kvm_release_pfn_dirty(kvm_pfn_t pfn) +void kvm_release_pfn_dirty(kvm_pfn_t pfn) { kvm_set_pfn_dirty(pfn); kvm_release_pfn_clean(pfn); } +EXPORT_SYMBOL_GPL(kvm_release_pfn_dirty); void kvm_set_pfn_dirty(kvm_pfn_t pfn) { @@ -1973,6 +1929,7 @@ static int __kvm_gfn_to_hva_cache_init(struct kvm_memslots *slots, * verify that the entire region is valid here. */ while (start_gfn <= end_gfn) { + nr_pages_avail = 0; ghc->memslot = __gfn_to_memslot(slots, start_gfn); ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, &nr_pages_avail); @@ -2230,7 +2187,7 @@ bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu) struct swait_queue_head *wqp; wqp = kvm_arch_vcpu_wq(vcpu); - if (swait_active(wqp)) { + if (swq_has_sleeper(wqp)) { swake_up(wqp); ++vcpu->stat.halt_wakeup; return true; @@ -2320,7 +2277,7 @@ static bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu) #endif } -void kvm_vcpu_on_spin(struct kvm_vcpu *me) +void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode) { struct kvm *kvm = me->kvm; struct kvm_vcpu *vcpu; @@ -2345,12 +2302,14 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me) continue; } else if (pass && i > last_boosted_vcpu) break; - if (!ACCESS_ONCE(vcpu->preempted)) + if (!READ_ONCE(vcpu->preempted)) continue; if (vcpu == me) continue; if (swait_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu)) continue; + if (yield_to_kernel_mode && !kvm_arch_vcpu_in_kernel(vcpu)) + continue; if (!kvm_vcpu_eligible_for_directed_yield(vcpu)) continue; @@ -2765,7 +2724,6 @@ static long kvm_vcpu_compat_ioctl(struct file *filp, case KVM_SET_SIGNAL_MASK: { struct kvm_signal_mask __user *sigmask_arg = argp; struct kvm_signal_mask kvm_sigmask; - compat_sigset_t csigset; sigset_t sigset; if (argp) { @@ -2774,13 +2732,11 @@ static long kvm_vcpu_compat_ioctl(struct file *filp, sizeof(kvm_sigmask))) goto out; r = -EINVAL; - if (kvm_sigmask.len != sizeof(csigset)) + if (kvm_sigmask.len != sizeof(compat_sigset_t)) goto out; r = -EFAULT; - if (copy_from_user(&csigset, sigmask_arg->sigset, - sizeof(csigset))) + if (get_compat_sigset(&sigset, (void *)sigmask_arg->sigset)) goto out; - sigset_from_compat(&sigset, &csigset); r = kvm_vcpu_ioctl_set_sigmask(vcpu, &sigset); } else r = kvm_vcpu_ioctl_set_sigmask(vcpu, NULL); @@ -3883,7 +3839,6 @@ static const struct file_operations *stat_fops[] = { static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm) { struct kobj_uevent_env *env; - char *tmp, *pathbuf = NULL; unsigned long long created, active; if (!kvm_dev.this_device || !kvm) @@ -3907,38 +3862,28 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm) add_uevent_var(env, "CREATED=%llu", created); add_uevent_var(env, "COUNT=%llu", active); - if (type == KVM_EVENT_CREATE_VM) + if (type == KVM_EVENT_CREATE_VM) { add_uevent_var(env, "EVENT=create"); - else if (type == KVM_EVENT_DESTROY_VM) + kvm->userspace_pid = task_pid_nr(current); + } else if (type == KVM_EVENT_DESTROY_VM) { add_uevent_var(env, "EVENT=destroy"); + } + add_uevent_var(env, "PID=%d", kvm->userspace_pid); if (kvm->debugfs_dentry) { - char p[ITOA_MAX_LEN]; - - snprintf(p, sizeof(p), "%s", kvm->debugfs_dentry->d_name.name); - tmp = strchrnul(p + 1, '-'); - *tmp = '\0'; - add_uevent_var(env, "PID=%s", p); - pathbuf = kmalloc(PATH_MAX, GFP_KERNEL); - if (pathbuf) { - /* sizeof counts the final '\0' */ - int len = sizeof("STATS_PATH=") - 1; - const char *pvar = "STATS_PATH="; - - tmp = dentry_path_raw(kvm->debugfs_dentry, - pathbuf + len, - PATH_MAX - len); - if (!IS_ERR(tmp)) { - memcpy(tmp - len, pvar, len); - env->envp[env->envp_idx++] = tmp - len; - } + char *tmp, *p = kmalloc(PATH_MAX, GFP_KERNEL); + + if (p) { + tmp = dentry_path_raw(kvm->debugfs_dentry, p, PATH_MAX); + if (!IS_ERR(tmp)) + add_uevent_var(env, "STATS_PATH=%s", tmp); + kfree(p); } } /* no need for checks, since we are adding at most only 5 keys */ env->envp[env->envp_idx++] = NULL; kobject_uevent_env(&kvm_dev.this_device->kobj, KOBJ_CHANGE, env->envp); kfree(env); - kfree(pathbuf); } static int kvm_init_debug(void) @@ -4062,7 +4007,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, if (!vcpu_align) vcpu_align = __alignof__(struct kvm_vcpu); kvm_vcpu_cache = kmem_cache_create("kvm_vcpu", vcpu_size, vcpu_align, - 0, NULL); + SLAB_ACCOUNT, NULL); if (!kvm_vcpu_cache) { r = -ENOMEM; goto out_free_3; |