diff options
Diffstat (limited to 'virt/kvm/kvm_main.c')
-rw-r--r-- | virt/kvm/kvm_main.c | 163 |
1 files changed, 83 insertions, 80 deletions
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 32896c845ffe..26383e63d9dd 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -484,6 +484,10 @@ static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) vcpu->ready = false; preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops); vcpu->last_used_slot = NULL; + + /* Fill the stats id string for the vcpu */ + snprintf(vcpu->stats_id, sizeof(vcpu->stats_id), "kvm-%d/vcpu-%d", + task_pid_nr(current), id); } static void kvm_vcpu_destroy(struct kvm_vcpu *vcpu) @@ -698,30 +702,31 @@ static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn, /* * .change_pte() must be surrounded by .invalidate_range_{start,end}(). - * If mmu_notifier_count is zero, then no in-progress invalidations, - * including this one, found a relevant memslot at start(); rechecking - * memslots here is unnecessary. Note, a false positive (count elevated - * by a different invalidation) is sub-optimal but functionally ok. + * If mmu_invalidate_in_progress is zero, then no in-progress + * invalidations, including this one, found a relevant memslot at + * start(); rechecking memslots here is unnecessary. Note, a false + * positive (count elevated by a different invalidation) is sub-optimal + * but functionally ok. */ WARN_ON_ONCE(!READ_ONCE(kvm->mn_active_invalidate_count)); - if (!READ_ONCE(kvm->mmu_notifier_count)) + if (!READ_ONCE(kvm->mmu_invalidate_in_progress)) return; kvm_handle_hva_range(mn, address, address + 1, pte, kvm_set_spte_gfn); } -void kvm_inc_notifier_count(struct kvm *kvm, unsigned long start, - unsigned long end) +void kvm_mmu_invalidate_begin(struct kvm *kvm, unsigned long start, + unsigned long end) { /* * The count increase must become visible at unlock time as no * spte can be established without taking the mmu_lock and * count is also read inside the mmu_lock critical section. */ - kvm->mmu_notifier_count++; - if (likely(kvm->mmu_notifier_count == 1)) { - kvm->mmu_notifier_range_start = start; - kvm->mmu_notifier_range_end = end; + kvm->mmu_invalidate_in_progress++; + if (likely(kvm->mmu_invalidate_in_progress == 1)) { + kvm->mmu_invalidate_range_start = start; + kvm->mmu_invalidate_range_end = end; } else { /* * Fully tracking multiple concurrent ranges has diminishing @@ -732,10 +737,10 @@ void kvm_inc_notifier_count(struct kvm *kvm, unsigned long start, * accumulate and persist until all outstanding invalidates * complete. */ - kvm->mmu_notifier_range_start = - min(kvm->mmu_notifier_range_start, start); - kvm->mmu_notifier_range_end = - max(kvm->mmu_notifier_range_end, end); + kvm->mmu_invalidate_range_start = + min(kvm->mmu_invalidate_range_start, start); + kvm->mmu_invalidate_range_end = + max(kvm->mmu_invalidate_range_end, end); } } @@ -748,7 +753,7 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, .end = range->end, .pte = __pte(0), .handler = kvm_unmap_gfn_range, - .on_lock = kvm_inc_notifier_count, + .on_lock = kvm_mmu_invalidate_begin, .on_unlock = kvm_arch_guest_memory_reclaimed, .flush_on_ret = true, .may_block = mmu_notifier_range_blockable(range), @@ -759,7 +764,7 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, /* * Prevent memslot modification between range_start() and range_end() * so that conditionally locking provides the same result in both - * functions. Without that guarantee, the mmu_notifier_count + * functions. Without that guarantee, the mmu_invalidate_in_progress * adjustments will be imbalanced. * * Pairs with the decrement in range_end(). @@ -775,7 +780,8 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, * any given time, and the caches themselves can check for hva overlap, * i.e. don't need to rely on memslot overlap checks for performance. * Because this runs without holding mmu_lock, the pfn caches must use - * mn_active_invalidate_count (see above) instead of mmu_notifier_count. + * mn_active_invalidate_count (see above) instead of + * mmu_invalidate_in_progress. */ gfn_to_pfn_cache_invalidate_start(kvm, range->start, range->end, hva_range.may_block); @@ -785,22 +791,22 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, return 0; } -void kvm_dec_notifier_count(struct kvm *kvm, unsigned long start, - unsigned long end) +void kvm_mmu_invalidate_end(struct kvm *kvm, unsigned long start, + unsigned long end) { /* * This sequence increase will notify the kvm page fault that * the page that is going to be mapped in the spte could have * been freed. */ - kvm->mmu_notifier_seq++; + kvm->mmu_invalidate_seq++; smp_wmb(); /* * The above sequence increase must be visible before the * below count decrease, which is ensured by the smp_wmb above - * in conjunction with the smp_rmb in mmu_notifier_retry(). + * in conjunction with the smp_rmb in mmu_invalidate_retry(). */ - kvm->mmu_notifier_count--; + kvm->mmu_invalidate_in_progress--; } static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, @@ -812,7 +818,7 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, .end = range->end, .pte = __pte(0), .handler = (void *)kvm_null_fn, - .on_lock = kvm_dec_notifier_count, + .on_lock = kvm_mmu_invalidate_end, .on_unlock = (void *)kvm_null_fn, .flush_on_ret = false, .may_block = mmu_notifier_range_blockable(range), @@ -833,7 +839,7 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, if (wake) rcuwait_wake_up(&kvm->mn_memslots_update_rcuwait); - BUG_ON(kvm->mmu_notifier_count < 0); + BUG_ON(kvm->mmu_invalidate_in_progress < 0); } static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, @@ -1017,21 +1023,21 @@ static void kvm_destroy_vm_debugfs(struct kvm *kvm) } } -static int kvm_create_vm_debugfs(struct kvm *kvm, int fd) +static int kvm_create_vm_debugfs(struct kvm *kvm, const char *fdname) { static DEFINE_MUTEX(kvm_debugfs_lock); struct dentry *dent; char dir_name[ITOA_MAX_LEN * 2]; struct kvm_stat_data *stat_data; const struct _kvm_stats_desc *pdesc; - int i, ret; + int i, ret = -ENOMEM; int kvm_debugfs_num_entries = kvm_vm_stats_header.num_desc + kvm_vcpu_stats_header.num_desc; if (!debugfs_initialized()) return 0; - snprintf(dir_name, sizeof(dir_name), "%d-%d", task_pid_nr(current), fd); + snprintf(dir_name, sizeof(dir_name), "%d-%s", task_pid_nr(current), fdname); mutex_lock(&kvm_debugfs_lock); dent = debugfs_lookup(dir_name, kvm_debugfs_dir); if (dent) { @@ -1050,13 +1056,13 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd) sizeof(*kvm->debugfs_stat_data), GFP_KERNEL_ACCOUNT); if (!kvm->debugfs_stat_data) - return -ENOMEM; + goto out_err; for (i = 0; i < kvm_vm_stats_header.num_desc; ++i) { pdesc = &kvm_vm_stats_desc[i]; stat_data = kzalloc(sizeof(*stat_data), GFP_KERNEL_ACCOUNT); if (!stat_data) - return -ENOMEM; + goto out_err; stat_data->kvm = kvm; stat_data->desc = pdesc; @@ -1071,7 +1077,7 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd) pdesc = &kvm_vcpu_stats_desc[i]; stat_data = kzalloc(sizeof(*stat_data), GFP_KERNEL_ACCOUNT); if (!stat_data) - return -ENOMEM; + goto out_err; stat_data->kvm = kvm; stat_data->desc = pdesc; @@ -1083,12 +1089,13 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd) } ret = kvm_arch_create_vm_debugfs(kvm); - if (ret) { - kvm_destroy_vm_debugfs(kvm); - return i; - } + if (ret) + goto out_err; return 0; +out_err: + kvm_destroy_vm_debugfs(kvm); + return ret; } /* @@ -1119,7 +1126,7 @@ int __weak kvm_arch_create_vm_debugfs(struct kvm *kvm) return 0; } -static struct kvm *kvm_create_vm(unsigned long type) +static struct kvm *kvm_create_vm(unsigned long type, const char *fdname) { struct kvm *kvm = kvm_arch_alloc_vm(); struct kvm_memslots *slots; @@ -1129,6 +1136,9 @@ static struct kvm *kvm_create_vm(unsigned long type) if (!kvm) return ERR_PTR(-ENOMEM); + /* KVM is pinned via open("/dev/kvm"), the fd passed to this ioctl(). */ + __module_get(kvm_chardev_ops.owner); + KVM_MMU_LOCK_INIT(kvm); mmgrab(current->mm); kvm->mm = current->mm; @@ -1155,6 +1165,9 @@ static struct kvm *kvm_create_vm(unsigned long type) */ kvm->debugfs_dentry = ERR_PTR(-ENOENT); + snprintf(kvm->stats_id, sizeof(kvm->stats_id), "kvm-%d", + task_pid_nr(current)); + if (init_srcu_struct(&kvm->srcu)) goto out_err_no_srcu; if (init_srcu_struct(&kvm->irq_srcu)) @@ -1203,6 +1216,14 @@ static struct kvm *kvm_create_vm(unsigned long type) if (r) goto out_err_no_mmu_notifier; + r = kvm_coalesced_mmio_init(kvm); + if (r < 0) + goto out_no_coalesced_mmio; + + r = kvm_create_vm_debugfs(kvm, fdname); + if (r) + goto out_err_no_debugfs; + r = kvm_arch_post_init_vm(kvm); if (r) goto out_err; @@ -1214,19 +1235,13 @@ static struct kvm *kvm_create_vm(unsigned long type) preempt_notifier_inc(); kvm_init_pm_notifier(kvm); - /* - * When the fd passed to this ioctl() is opened it pins the module, - * but try_module_get() also prevents getting a reference if the module - * is in MODULE_STATE_GOING (e.g. if someone ran "rmmod --wait"). - */ - if (!try_module_get(kvm_chardev_ops.owner)) { - r = -ENODEV; - goto out_err; - } - return kvm; out_err: + kvm_destroy_vm_debugfs(kvm); +out_err_no_debugfs: + kvm_coalesced_mmio_free(kvm); +out_no_coalesced_mmio: #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) if (kvm->mmu_notifier.ops) mmu_notifier_unregister(&kvm->mmu_notifier, current->mm); @@ -1245,6 +1260,7 @@ out_err_no_irq_srcu: out_err_no_srcu: kvm_arch_free_vm(kvm); mmdrop(current->mm); + module_put(kvm_chardev_ops.owner); return ERR_PTR(r); } @@ -2502,7 +2518,7 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault, { unsigned int flags = FOLL_HWPOISON; struct page *page; - int npages = 0; + int npages; might_sleep(); @@ -3393,10 +3409,8 @@ static int kvm_vcpu_check_block(struct kvm_vcpu *vcpu) int ret = -EINTR; int idx = srcu_read_lock(&vcpu->kvm->srcu); - if (kvm_arch_vcpu_runnable(vcpu)) { - kvm_make_request(KVM_REQ_UNHALT, vcpu); + if (kvm_arch_vcpu_runnable(vcpu)) goto out; - } if (kvm_cpu_has_pending_timer(vcpu)) goto out; if (signal_pending(current)) @@ -3916,10 +3930,6 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) if (r) goto unlock_vcpu_destroy; - /* Fill the stats id string for the vcpu */ - snprintf(vcpu->stats_id, sizeof(vcpu->stats_id), "kvm-%d/vcpu-%d", - task_pid_nr(current), id); - /* Now it's all set up, let userspace reach it */ kvm_get_kvm(kvm); r = create_vcpu_fd(vcpu); @@ -4368,7 +4378,7 @@ void kvm_unregister_device_ops(u32 type) static int kvm_ioctl_create_device(struct kvm *kvm, struct kvm_create_device *cd) { - const struct kvm_device_ops *ops = NULL; + const struct kvm_device_ops *ops; struct kvm_device *dev; bool test = cd->flags & KVM_CREATE_DEVICE_TEST; int type; @@ -4886,28 +4896,25 @@ EXPORT_SYMBOL_GPL(file_is_kvm); static int kvm_dev_ioctl_create_vm(unsigned long type) { - int r; + char fdname[ITOA_MAX_LEN + 1]; + int r, fd; struct kvm *kvm; struct file *file; - kvm = kvm_create_vm(type); - if (IS_ERR(kvm)) - return PTR_ERR(kvm); -#ifdef CONFIG_KVM_MMIO - r = kvm_coalesced_mmio_init(kvm); - if (r < 0) - goto put_kvm; -#endif - r = get_unused_fd_flags(O_CLOEXEC); - if (r < 0) - goto put_kvm; + fd = get_unused_fd_flags(O_CLOEXEC); + if (fd < 0) + return fd; - snprintf(kvm->stats_id, sizeof(kvm->stats_id), - "kvm-%d", task_pid_nr(current)); + snprintf(fdname, sizeof(fdname), "%d", fd); + + kvm = kvm_create_vm(type, fdname); + if (IS_ERR(kvm)) { + r = PTR_ERR(kvm); + goto put_fd; + } file = anon_inode_getfile("kvm-vm", &kvm_vm_fops, kvm, O_RDWR); if (IS_ERR(file)) { - put_unused_fd(r); r = PTR_ERR(file); goto put_kvm; } @@ -4918,18 +4925,15 @@ static int kvm_dev_ioctl_create_vm(unsigned long type) * cases it will be called by the final fput(file) and will take * care of doing kvm_put_kvm(kvm). */ - if (kvm_create_vm_debugfs(kvm, r) < 0) { - put_unused_fd(r); - fput(file); - return -ENOMEM; - } kvm_uevent_notify_change(KVM_EVENT_CREATE_VM, kvm); - fd_install(r, file); - return r; + fd_install(fd, file); + return fd; put_kvm: kvm_put_kvm(kvm); +put_fd: + put_unused_fd(fd); return r; } @@ -5875,7 +5879,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, r = kvm_async_pf_init(); if (r) - goto out_free_5; + goto out_free_4; kvm_chardev_ops.owner = module; @@ -5899,10 +5903,9 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, out_unreg: kvm_async_pf_deinit(); -out_free_5: +out_free_4: for_each_possible_cpu(cpu) free_cpumask_var(per_cpu(cpu_kick_mask, cpu)); -out_free_4: kmem_cache_destroy(kvm_vcpu_cache); out_free_3: unregister_reboot_notifier(&kvm_reboot_notifier); |