diff options
Diffstat (limited to 'virt')
-rw-r--r-- | virt/kvm/coalesced_mmio.c | 31 | ||||
-rw-r--r-- | virt/kvm/eventfd.c | 6 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 301 | ||||
-rw-r--r-- | virt/kvm/vfio.c | 8 |
4 files changed, 166 insertions, 180 deletions
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c index 1b90acb6e3fe..375d6285475e 100644 --- a/virt/kvm/coalesced_mmio.c +++ b/virt/kvm/coalesced_mmio.c @@ -40,27 +40,6 @@ static int coalesced_mmio_in_range(struct kvm_coalesced_mmio_dev *dev, return 1; } -static int coalesced_mmio_has_room(struct kvm_coalesced_mmio_dev *dev, u32 last) -{ - struct kvm_coalesced_mmio_ring *ring; - unsigned avail; - - /* Are we able to batch it ? */ - - /* last is the first free entry - * check if we don't meet the first used entry - * there is always one unused entry in the buffer - */ - ring = dev->kvm->coalesced_mmio_ring; - avail = (ring->first - last - 1) % KVM_COALESCED_MMIO_MAX; - if (avail == 0) { - /* full */ - return 0; - } - - return 1; -} - static int coalesced_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr, int len, const void *val) @@ -74,9 +53,15 @@ static int coalesced_mmio_write(struct kvm_vcpu *vcpu, spin_lock(&dev->kvm->ring_lock); + /* + * last is the index of the entry to fill. Verify userspace hasn't + * set last to be out of range, and that there is room in the ring. + * Leave one entry free in the ring so that userspace can differentiate + * between an empty ring and a full ring. + */ insert = READ_ONCE(ring->last); - if (!coalesced_mmio_has_room(dev, insert) || - insert >= KVM_COALESCED_MMIO_MAX) { + if (insert >= KVM_COALESCED_MMIO_MAX || + (insert + 1) % KVM_COALESCED_MMIO_MAX == READ_ONCE(ring->first)) { spin_unlock(&dev->kvm->ring_lock); return -EOPNOTSUPP; } diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 992f9beb3e7d..6b390b622b72 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -328,12 +328,12 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) seqcount_spinlock_init(&irqfd->irq_entry_sc, &kvm->irqfds.lock); f = fdget(args->fd); - if (!f.file) { + if (!fd_file(f)) { ret = -EBADF; goto out; } - eventfd = eventfd_ctx_fileget(f.file); + eventfd = eventfd_ctx_fileget(fd_file(f)); if (IS_ERR(eventfd)) { ret = PTR_ERR(eventfd); goto fail; @@ -420,7 +420,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) * Check if there was an event already pending on the eventfd * before we registered, and trigger it as if we didn't miss it. */ - events = vfs_poll(f.file, &irqfd->pt); + events = vfs_poll(fd_file(f), &irqfd->pt); if (events & EPOLLIN) schedule_work(&irqfd->inject); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index cb2b78e92910..05cbb2548d99 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -136,8 +136,8 @@ static int kvm_no_compat_open(struct inode *inode, struct file *file) #define KVM_COMPAT(c) .compat_ioctl = kvm_no_compat_ioctl, \ .open = kvm_no_compat_open #endif -static int hardware_enable_all(void); -static void hardware_disable_all(void); +static int kvm_enable_virtualization(void); +static void kvm_disable_virtualization(void); static void kvm_io_bus_destroy(struct kvm_io_bus *bus); @@ -1220,7 +1220,7 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname) if (r) goto out_err_no_arch_destroy_vm; - r = hardware_enable_all(); + r = kvm_enable_virtualization(); if (r) goto out_err_no_disable; @@ -1263,7 +1263,7 @@ out_no_coalesced_mmio: mmu_notifier_unregister(&kvm->mmu_notifier, current->mm); #endif out_err_no_mmu_notifier: - hardware_disable_all(); + kvm_disable_virtualization(); out_err_no_disable: kvm_arch_destroy_vm(kvm); out_err_no_arch_destroy_vm: @@ -1360,7 +1360,7 @@ static void kvm_destroy_vm(struct kvm *kvm) #endif kvm_arch_free_vm(kvm); preempt_notifier_dec(); - hardware_disable_all(); + kvm_disable_virtualization(); mmdrop(mm); } @@ -2860,13 +2860,11 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma, unsigned long addr, bool write_fault, bool *writable, kvm_pfn_t *p_pfn) { + struct follow_pfnmap_args args = { .vma = vma, .address = addr }; kvm_pfn_t pfn; - pte_t *ptep; - pte_t pte; - spinlock_t *ptl; int r; - r = follow_pte(vma, addr, &ptep, &ptl); + r = follow_pfnmap_start(&args); if (r) { /* * get_user_pages fails for VM_IO and VM_PFNMAP vmas and does @@ -2881,21 +2879,19 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma, if (r) return r; - r = follow_pte(vma, addr, &ptep, &ptl); + r = follow_pfnmap_start(&args); if (r) return r; } - pte = ptep_get(ptep); - - if (write_fault && !pte_write(pte)) { + if (write_fault && !args.writable) { pfn = KVM_PFN_ERR_RO_FAULT; goto out; } if (writable) - *writable = pte_write(pte); - pfn = pte_pfn(pte); + *writable = args.writable; + pfn = args.pfn; /* * Get a reference here because callers of *hva_to_pfn* and @@ -2916,9 +2912,8 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma, */ if (!kvm_try_get_pfn(pfn)) r = -EFAULT; - out: - pte_unmap_unlock(ptep, ptl); + follow_pfnmap_end(&args); *p_pfn = pfn; return r; @@ -3275,6 +3270,9 @@ static int __kvm_read_guest_page(struct kvm_memory_slot *slot, gfn_t gfn, int r; unsigned long addr; + if (WARN_ON_ONCE(offset + len > PAGE_SIZE)) + return -EFAULT; + addr = gfn_to_hva_memslot_prot(slot, gfn, NULL); if (kvm_is_error_hva(addr)) return -EFAULT; @@ -3348,6 +3346,9 @@ static int __kvm_read_guest_atomic(struct kvm_memory_slot *slot, gfn_t gfn, int r; unsigned long addr; + if (WARN_ON_ONCE(offset + len > PAGE_SIZE)) + return -EFAULT; + addr = gfn_to_hva_memslot_prot(slot, gfn, NULL); if (kvm_is_error_hva(addr)) return -EFAULT; @@ -3378,6 +3379,9 @@ static int __kvm_write_guest_page(struct kvm *kvm, int r; unsigned long addr; + if (WARN_ON_ONCE(offset + len > PAGE_SIZE)) + return -EFAULT; + addr = gfn_to_hva_memslot(memslot, gfn); if (kvm_is_error_hva(addr)) return -EFAULT; @@ -3581,7 +3585,7 @@ int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len) int ret; while ((seg = next_segment(len, offset)) != 0) { - ret = kvm_write_guest_page(kvm, gfn, zero_page, offset, len); + ret = kvm_write_guest_page(kvm, gfn, zero_page, offset, seg); if (ret < 0) return ret; offset = 0; @@ -5571,137 +5575,67 @@ static struct miscdevice kvm_dev = { }; #ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING +static bool enable_virt_at_load = true; +module_param(enable_virt_at_load, bool, 0444); + __visible bool kvm_rebooting; EXPORT_SYMBOL_GPL(kvm_rebooting); -static DEFINE_PER_CPU(bool, hardware_enabled); +static DEFINE_PER_CPU(bool, virtualization_enabled); +static DEFINE_MUTEX(kvm_usage_lock); static int kvm_usage_count; -static int __hardware_enable_nolock(void) +__weak void kvm_arch_enable_virtualization(void) +{ + +} + +__weak void kvm_arch_disable_virtualization(void) +{ + +} + +static int kvm_enable_virtualization_cpu(void) { - if (__this_cpu_read(hardware_enabled)) + if (__this_cpu_read(virtualization_enabled)) return 0; - if (kvm_arch_hardware_enable()) { + if (kvm_arch_enable_virtualization_cpu()) { pr_info("kvm: enabling virtualization on CPU%d failed\n", raw_smp_processor_id()); return -EIO; } - __this_cpu_write(hardware_enabled, true); + __this_cpu_write(virtualization_enabled, true); return 0; } -static void hardware_enable_nolock(void *failed) -{ - if (__hardware_enable_nolock()) - atomic_inc(failed); -} - static int kvm_online_cpu(unsigned int cpu) { - int ret = 0; - /* * Abort the CPU online process if hardware virtualization cannot * be enabled. Otherwise running VMs would encounter unrecoverable * errors when scheduled to this CPU. */ - mutex_lock(&kvm_lock); - if (kvm_usage_count) - ret = __hardware_enable_nolock(); - mutex_unlock(&kvm_lock); - return ret; + return kvm_enable_virtualization_cpu(); } -static void hardware_disable_nolock(void *junk) +static void kvm_disable_virtualization_cpu(void *ign) { - /* - * Note, hardware_disable_all_nolock() tells all online CPUs to disable - * hardware, not just CPUs that successfully enabled hardware! - */ - if (!__this_cpu_read(hardware_enabled)) + if (!__this_cpu_read(virtualization_enabled)) return; - kvm_arch_hardware_disable(); + kvm_arch_disable_virtualization_cpu(); - __this_cpu_write(hardware_enabled, false); + __this_cpu_write(virtualization_enabled, false); } static int kvm_offline_cpu(unsigned int cpu) { - mutex_lock(&kvm_lock); - if (kvm_usage_count) - hardware_disable_nolock(NULL); - mutex_unlock(&kvm_lock); + kvm_disable_virtualization_cpu(NULL); return 0; } -static void hardware_disable_all_nolock(void) -{ - BUG_ON(!kvm_usage_count); - - kvm_usage_count--; - if (!kvm_usage_count) - on_each_cpu(hardware_disable_nolock, NULL, 1); -} - -static void hardware_disable_all(void) -{ - cpus_read_lock(); - mutex_lock(&kvm_lock); - hardware_disable_all_nolock(); - mutex_unlock(&kvm_lock); - cpus_read_unlock(); -} - -static int hardware_enable_all(void) -{ - atomic_t failed = ATOMIC_INIT(0); - int r; - - /* - * Do not enable hardware virtualization if the system is going down. - * If userspace initiated a forced reboot, e.g. reboot -f, then it's - * possible for an in-flight KVM_CREATE_VM to trigger hardware enabling - * after kvm_reboot() is called. Note, this relies on system_state - * being set _before_ kvm_reboot(), which is why KVM uses a syscore ops - * hook instead of registering a dedicated reboot notifier (the latter - * runs before system_state is updated). - */ - if (system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF || - system_state == SYSTEM_RESTART) - return -EBUSY; - - /* - * When onlining a CPU, cpu_online_mask is set before kvm_online_cpu() - * is called, and so on_each_cpu() between them includes the CPU that - * is being onlined. As a result, hardware_enable_nolock() may get - * invoked before kvm_online_cpu(), which also enables hardware if the - * usage count is non-zero. Disable CPU hotplug to avoid attempting to - * enable hardware multiple times. - */ - cpus_read_lock(); - mutex_lock(&kvm_lock); - - r = 0; - - kvm_usage_count++; - if (kvm_usage_count == 1) { - on_each_cpu(hardware_enable_nolock, &failed, 1); - - if (atomic_read(&failed)) { - hardware_disable_all_nolock(); - r = -EBUSY; - } - } - - mutex_unlock(&kvm_lock); - cpus_read_unlock(); - - return r; -} - static void kvm_shutdown(void) { /* @@ -5717,34 +5651,32 @@ static void kvm_shutdown(void) */ pr_info("kvm: exiting hardware virtualization\n"); kvm_rebooting = true; - on_each_cpu(hardware_disable_nolock, NULL, 1); + on_each_cpu(kvm_disable_virtualization_cpu, NULL, 1); } static int kvm_suspend(void) { /* * Secondary CPUs and CPU hotplug are disabled across the suspend/resume - * callbacks, i.e. no need to acquire kvm_lock to ensure the usage count - * is stable. Assert that kvm_lock is not held to ensure the system - * isn't suspended while KVM is enabling hardware. Hardware enabling - * can be preempted, but the task cannot be frozen until it has dropped - * all locks (userspace tasks are frozen via a fake signal). + * callbacks, i.e. no need to acquire kvm_usage_lock to ensure the usage + * count is stable. Assert that kvm_usage_lock is not held to ensure + * the system isn't suspended while KVM is enabling hardware. Hardware + * enabling can be preempted, but the task cannot be frozen until it has + * dropped all locks (userspace tasks are frozen via a fake signal). */ - lockdep_assert_not_held(&kvm_lock); + lockdep_assert_not_held(&kvm_usage_lock); lockdep_assert_irqs_disabled(); - if (kvm_usage_count) - hardware_disable_nolock(NULL); + kvm_disable_virtualization_cpu(NULL); return 0; } static void kvm_resume(void) { - lockdep_assert_not_held(&kvm_lock); + lockdep_assert_not_held(&kvm_usage_lock); lockdep_assert_irqs_disabled(); - if (kvm_usage_count) - WARN_ON_ONCE(__hardware_enable_nolock()); + WARN_ON_ONCE(kvm_enable_virtualization_cpu()); } static struct syscore_ops kvm_syscore_ops = { @@ -5752,13 +5684,95 @@ static struct syscore_ops kvm_syscore_ops = { .resume = kvm_resume, .shutdown = kvm_shutdown, }; + +static int kvm_enable_virtualization(void) +{ + int r; + + guard(mutex)(&kvm_usage_lock); + + if (kvm_usage_count++) + return 0; + + kvm_arch_enable_virtualization(); + + r = cpuhp_setup_state(CPUHP_AP_KVM_ONLINE, "kvm/cpu:online", + kvm_online_cpu, kvm_offline_cpu); + if (r) + goto err_cpuhp; + + register_syscore_ops(&kvm_syscore_ops); + + /* + * Undo virtualization enabling and bail if the system is going down. + * If userspace initiated a forced reboot, e.g. reboot -f, then it's + * possible for an in-flight operation to enable virtualization after + * syscore_shutdown() is called, i.e. without kvm_shutdown() being + * invoked. Note, this relies on system_state being set _before_ + * kvm_shutdown(), e.g. to ensure either kvm_shutdown() is invoked + * or this CPU observes the impending shutdown. Which is why KVM uses + * a syscore ops hook instead of registering a dedicated reboot + * notifier (the latter runs before system_state is updated). + */ + if (system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF || + system_state == SYSTEM_RESTART) { + r = -EBUSY; + goto err_rebooting; + } + + return 0; + +err_rebooting: + unregister_syscore_ops(&kvm_syscore_ops); + cpuhp_remove_state(CPUHP_AP_KVM_ONLINE); +err_cpuhp: + kvm_arch_disable_virtualization(); + --kvm_usage_count; + return r; +} + +static void kvm_disable_virtualization(void) +{ + guard(mutex)(&kvm_usage_lock); + + if (--kvm_usage_count) + return; + + unregister_syscore_ops(&kvm_syscore_ops); + cpuhp_remove_state(CPUHP_AP_KVM_ONLINE); + kvm_arch_disable_virtualization(); +} + +static int kvm_init_virtualization(void) +{ + if (enable_virt_at_load) + return kvm_enable_virtualization(); + + return 0; +} + +static void kvm_uninit_virtualization(void) +{ + if (enable_virt_at_load) + kvm_disable_virtualization(); +} #else /* CONFIG_KVM_GENERIC_HARDWARE_ENABLING */ -static int hardware_enable_all(void) +static int kvm_enable_virtualization(void) +{ + return 0; +} + +static int kvm_init_virtualization(void) { return 0; } -static void hardware_disable_all(void) +static void kvm_disable_virtualization(void) +{ + +} + +static void kvm_uninit_virtualization(void) { } @@ -6191,7 +6205,6 @@ static const struct file_operations stat_fops_per_vm = { .release = kvm_debugfs_release, .read = simple_attr_read, .write = simple_attr_write, - .llseek = no_llseek, }; static int vm_stat_get(void *_offset, u64 *val) @@ -6460,15 +6473,6 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module) int r; int cpu; -#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING - r = cpuhp_setup_state_nocalls(CPUHP_AP_KVM_ONLINE, "kvm/cpu:online", - kvm_online_cpu, kvm_offline_cpu); - if (r) - return r; - - register_syscore_ops(&kvm_syscore_ops); -#endif - /* A kmem cache lets us meet the alignment requirements of fx_save. */ if (!vcpu_align) vcpu_align = __alignof__(struct kvm_vcpu); @@ -6479,10 +6483,8 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module) offsetofend(struct kvm_vcpu, stats_id) - offsetof(struct kvm_vcpu, arch), NULL); - if (!kvm_vcpu_cache) { - r = -ENOMEM; - goto err_vcpu_cache; - } + if (!kvm_vcpu_cache) + return -ENOMEM; for_each_possible_cpu(cpu) { if (!alloc_cpumask_var_node(&per_cpu(cpu_kick_mask, cpu), @@ -6516,6 +6518,10 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module) kvm_gmem_init(module); + r = kvm_init_virtualization(); + if (r) + goto err_virt; + /* * Registration _must_ be the very last thing done, as this exposes * /dev/kvm to userspace, i.e. all infrastructure must be setup! @@ -6529,6 +6535,8 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module) return 0; err_register: + kvm_uninit_virtualization(); +err_virt: kvm_vfio_ops_exit(); err_vfio: kvm_async_pf_deinit(); @@ -6539,11 +6547,6 @@ err_cpu_kick_mask: for_each_possible_cpu(cpu) free_cpumask_var(per_cpu(cpu_kick_mask, cpu)); kmem_cache_destroy(kvm_vcpu_cache); -err_vcpu_cache: -#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING - unregister_syscore_ops(&kvm_syscore_ops); - cpuhp_remove_state_nocalls(CPUHP_AP_KVM_ONLINE); -#endif return r; } EXPORT_SYMBOL_GPL(kvm_init); @@ -6559,16 +6562,14 @@ void kvm_exit(void) */ misc_deregister(&kvm_dev); + kvm_uninit_virtualization(); + debugfs_remove_recursive(kvm_debugfs_dir); for_each_possible_cpu(cpu) free_cpumask_var(per_cpu(cpu_kick_mask, cpu)); kmem_cache_destroy(kvm_vcpu_cache); kvm_vfio_ops_exit(); kvm_async_pf_deinit(); -#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING - unregister_syscore_ops(&kvm_syscore_ops); - cpuhp_remove_state_nocalls(CPUHP_AP_KVM_ONLINE); -#endif kvm_irqfd_exit(); } EXPORT_SYMBOL_GPL(kvm_exit); diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c index 76b7f6085dcd..388ae471d258 100644 --- a/virt/kvm/vfio.c +++ b/virt/kvm/vfio.c @@ -194,7 +194,7 @@ static int kvm_vfio_file_del(struct kvm_device *dev, unsigned int fd) int ret; f = fdget(fd); - if (!f.file) + if (!fd_file(f)) return -EBADF; ret = -ENOENT; @@ -202,7 +202,7 @@ static int kvm_vfio_file_del(struct kvm_device *dev, unsigned int fd) mutex_lock(&kv->lock); list_for_each_entry(kvf, &kv->file_list, node) { - if (kvf->file != f.file) + if (kvf->file != fd_file(f)) continue; list_del(&kvf->node); @@ -240,7 +240,7 @@ static int kvm_vfio_file_set_spapr_tce(struct kvm_device *dev, return -EFAULT; f = fdget(param.groupfd); - if (!f.file) + if (!fd_file(f)) return -EBADF; ret = -ENOENT; @@ -248,7 +248,7 @@ static int kvm_vfio_file_set_spapr_tce(struct kvm_device *dev, mutex_lock(&kv->lock); list_for_each_entry(kvf, &kv->file_list, node) { - if (kvf->file != f.file) + if (kvf->file != fd_file(f)) continue; if (!kvf->iommu_group) { |