summaryrefslogtreecommitdiff
path: root/virt/kvm/kvm_main.c
diff options
context:
space:
mode:
Diffstat (limited to 'virt/kvm/kvm_main.c')
-rw-r--r--virt/kvm/kvm_main.c163
1 files changed, 83 insertions, 80 deletions
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 32896c845ffe..26383e63d9dd 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -484,6 +484,10 @@ static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
vcpu->ready = false;
preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops);
vcpu->last_used_slot = NULL;
+
+ /* Fill the stats id string for the vcpu */
+ snprintf(vcpu->stats_id, sizeof(vcpu->stats_id), "kvm-%d/vcpu-%d",
+ task_pid_nr(current), id);
}
static void kvm_vcpu_destroy(struct kvm_vcpu *vcpu)
@@ -698,30 +702,31 @@ static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
/*
* .change_pte() must be surrounded by .invalidate_range_{start,end}().
- * If mmu_notifier_count is zero, then no in-progress invalidations,
- * including this one, found a relevant memslot at start(); rechecking
- * memslots here is unnecessary. Note, a false positive (count elevated
- * by a different invalidation) is sub-optimal but functionally ok.
+ * If mmu_invalidate_in_progress is zero, then no in-progress
+ * invalidations, including this one, found a relevant memslot at
+ * start(); rechecking memslots here is unnecessary. Note, a false
+ * positive (count elevated by a different invalidation) is sub-optimal
+ * but functionally ok.
*/
WARN_ON_ONCE(!READ_ONCE(kvm->mn_active_invalidate_count));
- if (!READ_ONCE(kvm->mmu_notifier_count))
+ if (!READ_ONCE(kvm->mmu_invalidate_in_progress))
return;
kvm_handle_hva_range(mn, address, address + 1, pte, kvm_set_spte_gfn);
}
-void kvm_inc_notifier_count(struct kvm *kvm, unsigned long start,
- unsigned long end)
+void kvm_mmu_invalidate_begin(struct kvm *kvm, unsigned long start,
+ unsigned long end)
{
/*
* The count increase must become visible at unlock time as no
* spte can be established without taking the mmu_lock and
* count is also read inside the mmu_lock critical section.
*/
- kvm->mmu_notifier_count++;
- if (likely(kvm->mmu_notifier_count == 1)) {
- kvm->mmu_notifier_range_start = start;
- kvm->mmu_notifier_range_end = end;
+ kvm->mmu_invalidate_in_progress++;
+ if (likely(kvm->mmu_invalidate_in_progress == 1)) {
+ kvm->mmu_invalidate_range_start = start;
+ kvm->mmu_invalidate_range_end = end;
} else {
/*
* Fully tracking multiple concurrent ranges has diminishing
@@ -732,10 +737,10 @@ void kvm_inc_notifier_count(struct kvm *kvm, unsigned long start,
* accumulate and persist until all outstanding invalidates
* complete.
*/
- kvm->mmu_notifier_range_start =
- min(kvm->mmu_notifier_range_start, start);
- kvm->mmu_notifier_range_end =
- max(kvm->mmu_notifier_range_end, end);
+ kvm->mmu_invalidate_range_start =
+ min(kvm->mmu_invalidate_range_start, start);
+ kvm->mmu_invalidate_range_end =
+ max(kvm->mmu_invalidate_range_end, end);
}
}
@@ -748,7 +753,7 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
.end = range->end,
.pte = __pte(0),
.handler = kvm_unmap_gfn_range,
- .on_lock = kvm_inc_notifier_count,
+ .on_lock = kvm_mmu_invalidate_begin,
.on_unlock = kvm_arch_guest_memory_reclaimed,
.flush_on_ret = true,
.may_block = mmu_notifier_range_blockable(range),
@@ -759,7 +764,7 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
/*
* Prevent memslot modification between range_start() and range_end()
* so that conditionally locking provides the same result in both
- * functions. Without that guarantee, the mmu_notifier_count
+ * functions. Without that guarantee, the mmu_invalidate_in_progress
* adjustments will be imbalanced.
*
* Pairs with the decrement in range_end().
@@ -775,7 +780,8 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
* any given time, and the caches themselves can check for hva overlap,
* i.e. don't need to rely on memslot overlap checks for performance.
* Because this runs without holding mmu_lock, the pfn caches must use
- * mn_active_invalidate_count (see above) instead of mmu_notifier_count.
+ * mn_active_invalidate_count (see above) instead of
+ * mmu_invalidate_in_progress.
*/
gfn_to_pfn_cache_invalidate_start(kvm, range->start, range->end,
hva_range.may_block);
@@ -785,22 +791,22 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
return 0;
}
-void kvm_dec_notifier_count(struct kvm *kvm, unsigned long start,
- unsigned long end)
+void kvm_mmu_invalidate_end(struct kvm *kvm, unsigned long start,
+ unsigned long end)
{
/*
* This sequence increase will notify the kvm page fault that
* the page that is going to be mapped in the spte could have
* been freed.
*/
- kvm->mmu_notifier_seq++;
+ kvm->mmu_invalidate_seq++;
smp_wmb();
/*
* The above sequence increase must be visible before the
* below count decrease, which is ensured by the smp_wmb above
- * in conjunction with the smp_rmb in mmu_notifier_retry().
+ * in conjunction with the smp_rmb in mmu_invalidate_retry().
*/
- kvm->mmu_notifier_count--;
+ kvm->mmu_invalidate_in_progress--;
}
static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
@@ -812,7 +818,7 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
.end = range->end,
.pte = __pte(0),
.handler = (void *)kvm_null_fn,
- .on_lock = kvm_dec_notifier_count,
+ .on_lock = kvm_mmu_invalidate_end,
.on_unlock = (void *)kvm_null_fn,
.flush_on_ret = false,
.may_block = mmu_notifier_range_blockable(range),
@@ -833,7 +839,7 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
if (wake)
rcuwait_wake_up(&kvm->mn_memslots_update_rcuwait);
- BUG_ON(kvm->mmu_notifier_count < 0);
+ BUG_ON(kvm->mmu_invalidate_in_progress < 0);
}
static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
@@ -1017,21 +1023,21 @@ static void kvm_destroy_vm_debugfs(struct kvm *kvm)
}
}
-static int kvm_create_vm_debugfs(struct kvm *kvm, int fd)
+static int kvm_create_vm_debugfs(struct kvm *kvm, const char *fdname)
{
static DEFINE_MUTEX(kvm_debugfs_lock);
struct dentry *dent;
char dir_name[ITOA_MAX_LEN * 2];
struct kvm_stat_data *stat_data;
const struct _kvm_stats_desc *pdesc;
- int i, ret;
+ int i, ret = -ENOMEM;
int kvm_debugfs_num_entries = kvm_vm_stats_header.num_desc +
kvm_vcpu_stats_header.num_desc;
if (!debugfs_initialized())
return 0;
- snprintf(dir_name, sizeof(dir_name), "%d-%d", task_pid_nr(current), fd);
+ snprintf(dir_name, sizeof(dir_name), "%d-%s", task_pid_nr(current), fdname);
mutex_lock(&kvm_debugfs_lock);
dent = debugfs_lookup(dir_name, kvm_debugfs_dir);
if (dent) {
@@ -1050,13 +1056,13 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd)
sizeof(*kvm->debugfs_stat_data),
GFP_KERNEL_ACCOUNT);
if (!kvm->debugfs_stat_data)
- return -ENOMEM;
+ goto out_err;
for (i = 0; i < kvm_vm_stats_header.num_desc; ++i) {
pdesc = &kvm_vm_stats_desc[i];
stat_data = kzalloc(sizeof(*stat_data), GFP_KERNEL_ACCOUNT);
if (!stat_data)
- return -ENOMEM;
+ goto out_err;
stat_data->kvm = kvm;
stat_data->desc = pdesc;
@@ -1071,7 +1077,7 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd)
pdesc = &kvm_vcpu_stats_desc[i];
stat_data = kzalloc(sizeof(*stat_data), GFP_KERNEL_ACCOUNT);
if (!stat_data)
- return -ENOMEM;
+ goto out_err;
stat_data->kvm = kvm;
stat_data->desc = pdesc;
@@ -1083,12 +1089,13 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd)
}
ret = kvm_arch_create_vm_debugfs(kvm);
- if (ret) {
- kvm_destroy_vm_debugfs(kvm);
- return i;
- }
+ if (ret)
+ goto out_err;
return 0;
+out_err:
+ kvm_destroy_vm_debugfs(kvm);
+ return ret;
}
/*
@@ -1119,7 +1126,7 @@ int __weak kvm_arch_create_vm_debugfs(struct kvm *kvm)
return 0;
}
-static struct kvm *kvm_create_vm(unsigned long type)
+static struct kvm *kvm_create_vm(unsigned long type, const char *fdname)
{
struct kvm *kvm = kvm_arch_alloc_vm();
struct kvm_memslots *slots;
@@ -1129,6 +1136,9 @@ static struct kvm *kvm_create_vm(unsigned long type)
if (!kvm)
return ERR_PTR(-ENOMEM);
+ /* KVM is pinned via open("/dev/kvm"), the fd passed to this ioctl(). */
+ __module_get(kvm_chardev_ops.owner);
+
KVM_MMU_LOCK_INIT(kvm);
mmgrab(current->mm);
kvm->mm = current->mm;
@@ -1155,6 +1165,9 @@ static struct kvm *kvm_create_vm(unsigned long type)
*/
kvm->debugfs_dentry = ERR_PTR(-ENOENT);
+ snprintf(kvm->stats_id, sizeof(kvm->stats_id), "kvm-%d",
+ task_pid_nr(current));
+
if (init_srcu_struct(&kvm->srcu))
goto out_err_no_srcu;
if (init_srcu_struct(&kvm->irq_srcu))
@@ -1203,6 +1216,14 @@ static struct kvm *kvm_create_vm(unsigned long type)
if (r)
goto out_err_no_mmu_notifier;
+ r = kvm_coalesced_mmio_init(kvm);
+ if (r < 0)
+ goto out_no_coalesced_mmio;
+
+ r = kvm_create_vm_debugfs(kvm, fdname);
+ if (r)
+ goto out_err_no_debugfs;
+
r = kvm_arch_post_init_vm(kvm);
if (r)
goto out_err;
@@ -1214,19 +1235,13 @@ static struct kvm *kvm_create_vm(unsigned long type)
preempt_notifier_inc();
kvm_init_pm_notifier(kvm);
- /*
- * When the fd passed to this ioctl() is opened it pins the module,
- * but try_module_get() also prevents getting a reference if the module
- * is in MODULE_STATE_GOING (e.g. if someone ran "rmmod --wait").
- */
- if (!try_module_get(kvm_chardev_ops.owner)) {
- r = -ENODEV;
- goto out_err;
- }
-
return kvm;
out_err:
+ kvm_destroy_vm_debugfs(kvm);
+out_err_no_debugfs:
+ kvm_coalesced_mmio_free(kvm);
+out_no_coalesced_mmio:
#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
if (kvm->mmu_notifier.ops)
mmu_notifier_unregister(&kvm->mmu_notifier, current->mm);
@@ -1245,6 +1260,7 @@ out_err_no_irq_srcu:
out_err_no_srcu:
kvm_arch_free_vm(kvm);
mmdrop(current->mm);
+ module_put(kvm_chardev_ops.owner);
return ERR_PTR(r);
}
@@ -2502,7 +2518,7 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault,
{
unsigned int flags = FOLL_HWPOISON;
struct page *page;
- int npages = 0;
+ int npages;
might_sleep();
@@ -3393,10 +3409,8 @@ static int kvm_vcpu_check_block(struct kvm_vcpu *vcpu)
int ret = -EINTR;
int idx = srcu_read_lock(&vcpu->kvm->srcu);
- if (kvm_arch_vcpu_runnable(vcpu)) {
- kvm_make_request(KVM_REQ_UNHALT, vcpu);
+ if (kvm_arch_vcpu_runnable(vcpu))
goto out;
- }
if (kvm_cpu_has_pending_timer(vcpu))
goto out;
if (signal_pending(current))
@@ -3916,10 +3930,6 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
if (r)
goto unlock_vcpu_destroy;
- /* Fill the stats id string for the vcpu */
- snprintf(vcpu->stats_id, sizeof(vcpu->stats_id), "kvm-%d/vcpu-%d",
- task_pid_nr(current), id);
-
/* Now it's all set up, let userspace reach it */
kvm_get_kvm(kvm);
r = create_vcpu_fd(vcpu);
@@ -4368,7 +4378,7 @@ void kvm_unregister_device_ops(u32 type)
static int kvm_ioctl_create_device(struct kvm *kvm,
struct kvm_create_device *cd)
{
- const struct kvm_device_ops *ops = NULL;
+ const struct kvm_device_ops *ops;
struct kvm_device *dev;
bool test = cd->flags & KVM_CREATE_DEVICE_TEST;
int type;
@@ -4886,28 +4896,25 @@ EXPORT_SYMBOL_GPL(file_is_kvm);
static int kvm_dev_ioctl_create_vm(unsigned long type)
{
- int r;
+ char fdname[ITOA_MAX_LEN + 1];
+ int r, fd;
struct kvm *kvm;
struct file *file;
- kvm = kvm_create_vm(type);
- if (IS_ERR(kvm))
- return PTR_ERR(kvm);
-#ifdef CONFIG_KVM_MMIO
- r = kvm_coalesced_mmio_init(kvm);
- if (r < 0)
- goto put_kvm;
-#endif
- r = get_unused_fd_flags(O_CLOEXEC);
- if (r < 0)
- goto put_kvm;
+ fd = get_unused_fd_flags(O_CLOEXEC);
+ if (fd < 0)
+ return fd;
- snprintf(kvm->stats_id, sizeof(kvm->stats_id),
- "kvm-%d", task_pid_nr(current));
+ snprintf(fdname, sizeof(fdname), "%d", fd);
+
+ kvm = kvm_create_vm(type, fdname);
+ if (IS_ERR(kvm)) {
+ r = PTR_ERR(kvm);
+ goto put_fd;
+ }
file = anon_inode_getfile("kvm-vm", &kvm_vm_fops, kvm, O_RDWR);
if (IS_ERR(file)) {
- put_unused_fd(r);
r = PTR_ERR(file);
goto put_kvm;
}
@@ -4918,18 +4925,15 @@ static int kvm_dev_ioctl_create_vm(unsigned long type)
* cases it will be called by the final fput(file) and will take
* care of doing kvm_put_kvm(kvm).
*/
- if (kvm_create_vm_debugfs(kvm, r) < 0) {
- put_unused_fd(r);
- fput(file);
- return -ENOMEM;
- }
kvm_uevent_notify_change(KVM_EVENT_CREATE_VM, kvm);
- fd_install(r, file);
- return r;
+ fd_install(fd, file);
+ return fd;
put_kvm:
kvm_put_kvm(kvm);
+put_fd:
+ put_unused_fd(fd);
return r;
}
@@ -5875,7 +5879,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
r = kvm_async_pf_init();
if (r)
- goto out_free_5;
+ goto out_free_4;
kvm_chardev_ops.owner = module;
@@ -5899,10 +5903,9 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
out_unreg:
kvm_async_pf_deinit();
-out_free_5:
+out_free_4:
for_each_possible_cpu(cpu)
free_cpumask_var(per_cpu(cpu_kick_mask, cpu));
-out_free_4:
kmem_cache_destroy(kvm_vcpu_cache);
out_free_3:
unregister_reboot_notifier(&kvm_reboot_notifier);