From 37b9a671f346a184c4e381b32ee465cf7d248ae8 Mon Sep 17 00:00:00 2001 From: GanShun Date: Wed, 30 Nov 2016 10:28:19 -0800 Subject: kvm: nVMX: Correct a VMX instruction error code for VMPTRLD When the operand passed to VMPTRLD matches the address of the VMXON region, the VMX instruction error code should be VMXERR_VMPTRLD_VMXON_POINTER rather than VMXERR_VMCLEAR_VMXON_POINTER. Signed-off-by: GanShun Signed-off-by: Jim Mattson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index aae43c6f2472..c41d7ffdda5a 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -7122,7 +7122,7 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason, if (vmptr == vmx->nested.vmxon_ptr) { nested_vmx_failValid(vcpu, - VMXERR_VMCLEAR_VMXON_POINTER); + VMXERR_VMPTRLD_VMXON_POINTER); return kvm_skip_emulated_instruction(vcpu); } break; -- cgit v1.2.3 From 83781d180b219bd079ae72b341ee3f21fb236e97 Mon Sep 17 00:00:00 2001 From: Yi Sun Date: Wed, 14 Dec 2016 10:42:29 +0800 Subject: KVM: x86: Expose Intel AVX512IFMA/AVX512VBMI/SHA features to guest. Expose AVX512IFMA/AVX512VBMI/SHA features to guest. AVX512 spec can be found at: https://software.intel.com/sites/default/files/managed/26/40/319433-026.pdf SHA spec can be found at: https://software.intel.com/sites/default/files/managed/39/c5/325462-sdm-vol-1-2abcd-3abcd.pdf This patch depends on below patch. http://marc.info/?l=linux-kernel&m=147932800828178&w=2 Signed-off-by: Yi Sun Signed-off-by: Paolo Bonzini --- arch/x86/kvm/cpuid.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index b2d3cf1ef54a..e85f6bd7b9d5 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -373,16 +373,17 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, const u32 kvm_cpuid_7_0_ebx_x86_features = F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) | F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) | - F(ADX) | F(SMAP) | F(AVX512F) | F(AVX512PF) | F(AVX512ER) | - F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(AVX512DQ) | - F(AVX512BW) | F(AVX512VL); + F(ADX) | F(SMAP) | F(AVX512IFMA) | F(AVX512F) | F(AVX512PF) | + F(AVX512ER) | F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(AVX512DQ) | + F(SHA_NI) | F(AVX512BW) | F(AVX512VL); /* cpuid 0xD.1.eax */ const u32 kvm_cpuid_D_1_eax_x86_features = F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | f_xsaves; /* cpuid 7.0.ecx*/ - const u32 kvm_cpuid_7_0_ecx_x86_features = F(PKU) | 0 /*OSPKE*/; + const u32 kvm_cpuid_7_0_ecx_x86_features = + F(AVX512VBMI) | F(PKU) | 0 /*OSPKE*/; /* cpuid 7.0.edx*/ const u32 kvm_cpuid_7_0_edx_x86_features = -- cgit v1.2.3 From 3f5ad8be3713572f3946b69eb376206153d0ea2d Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 12 Dec 2016 10:12:53 +0100 Subject: KVM: hyperv: fix locking of struct kvm_hv fields Introduce a new mutex to avoid an AB-BA deadlock between kvm->lock and vcpu->mutex. Protect accesses in kvm_hv_setup_tsc_page too, as suggested by Roman. Reported-by: Dmitry Vyukov Reviewed-by: Roman Kagan Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/locking.txt | 8 ++++++-- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/hyperv.c | 24 +++++++++++++++--------- arch/x86/kvm/x86.c | 1 + 4 files changed, 23 insertions(+), 11 deletions(-) diff --git a/Documentation/virtual/kvm/locking.txt b/Documentation/virtual/kvm/locking.txt index e5dd9f4d6100..fd013bf4115b 100644 --- a/Documentation/virtual/kvm/locking.txt +++ b/Documentation/virtual/kvm/locking.txt @@ -13,8 +13,12 @@ The acquisition orders for mutexes are as follows: - kvm->slots_lock is taken outside kvm->irq_lock, though acquiring them together is quite rare. -For spinlocks, kvm_lock is taken outside kvm->mmu_lock. Everything -else is a leaf: no other lock is taken inside the critical sections. +On x86, vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock. + +For spinlocks, kvm_lock is taken outside kvm->mmu_lock. + +Everything else is a leaf: no other lock is taken inside the critical +sections. 2: Exception ------------ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 7892530cbacf..2e25038dbd93 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -704,6 +704,7 @@ struct kvm_apic_map { /* Hyper-V emulation context */ struct kvm_hv { + struct mutex hv_lock; u64 hv_guest_os_id; u64 hv_hypercall; u64 hv_tsc_page; diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 99cde5220e07..1572c35b4f1a 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -852,6 +852,10 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm, if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)) return; + mutex_lock(&kvm->arch.hyperv.hv_lock); + if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)) + goto out_unlock; + gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; /* * Because the TSC parameters only vary when there is a @@ -859,7 +863,7 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm, */ if (unlikely(kvm_read_guest(kvm, gfn_to_gpa(gfn), &tsc_seq, sizeof(tsc_seq)))) - return; + goto out_unlock; /* * While we're computing and writing the parameters, force the @@ -868,15 +872,15 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm, hv->tsc_ref.tsc_sequence = 0; if (kvm_write_guest(kvm, gfn_to_gpa(gfn), &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence))) - return; + goto out_unlock; if (!compute_tsc_page_parameters(hv_clock, &hv->tsc_ref)) - return; + goto out_unlock; /* Ensure sequence is zero before writing the rest of the struct. */ smp_wmb(); if (kvm_write_guest(kvm, gfn_to_gpa(gfn), &hv->tsc_ref, sizeof(hv->tsc_ref))) - return; + goto out_unlock; /* * Now switch to the TSC page mechanism by writing the sequence. @@ -891,6 +895,8 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm, hv->tsc_ref.tsc_sequence = tsc_seq; kvm_write_guest(kvm, gfn_to_gpa(gfn), &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence)); +out_unlock: + mutex_unlock(&kvm->arch.hyperv.hv_lock); } static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data, @@ -1142,9 +1148,9 @@ int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) if (kvm_hv_msr_partition_wide(msr)) { int r; - mutex_lock(&vcpu->kvm->lock); + mutex_lock(&vcpu->kvm->arch.hyperv.hv_lock); r = kvm_hv_set_msr_pw(vcpu, msr, data, host); - mutex_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->arch.hyperv.hv_lock); return r; } else return kvm_hv_set_msr(vcpu, msr, data, host); @@ -1155,9 +1161,9 @@ int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) if (kvm_hv_msr_partition_wide(msr)) { int r; - mutex_lock(&vcpu->kvm->lock); + mutex_lock(&vcpu->kvm->arch.hyperv.hv_lock); r = kvm_hv_get_msr_pw(vcpu, msr, pdata); - mutex_unlock(&vcpu->kvm->lock); + mutex_unlock(&vcpu->kvm->arch.hyperv.hv_lock); return r; } else return kvm_hv_get_msr(vcpu, msr, pdata); @@ -1165,7 +1171,7 @@ int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) bool kvm_hv_hypercall_enabled(struct kvm *kvm) { - return kvm->arch.hyperv.hv_hypercall & HV_X64_MSR_HYPERCALL_ENABLE; + return READ_ONCE(kvm->arch.hyperv.hv_hypercall) & HV_X64_MSR_HYPERCALL_ENABLE; } static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1f0d2383f5ee..49da1064ef50 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7881,6 +7881,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) raw_spin_lock_init(&kvm->arch.tsc_write_lock); mutex_init(&kvm->arch.apic_map_lock); + mutex_init(&kvm->arch.hyperv.hv_lock); spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock); kvm->arch.kvmclock_offset = -ktime_get_boot_ns(); -- cgit v1.2.3 From 931f261b42f10c8c8c9ab53f5ceb47ce51af7cf5 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Sat, 17 Dec 2016 18:43:52 +0100 Subject: kvm: fix schedule in atomic in kvm_steal_time_set_preempted() kvm_steal_time_set_preempted() isn't disabling the pagefaults before calling __copy_to_user and the kernel debug notices. Signed-off-by: Andrea Arcangeli Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 49da1064ef50..8ce1139a08b5 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2844,7 +2844,17 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { + /* + * Disable page faults because we're in atomic context here. + * kvm_write_guest_offset_cached() would call might_fault() + * that relies on pagefault_disable() to tell if there's a + * bug. NOTE: the write to guest memory may not go through if + * during postcopy live migration or if there's heavy guest + * paging. + */ + pagefault_disable(); kvm_steal_time_set_preempted(vcpu); + pagefault_enable(); kvm_x86_ops->vcpu_put(vcpu); kvm_put_guest_fpu(vcpu); vcpu->arch.last_host_tsc = rdtsc(); -- cgit v1.2.3 From cc0d907c0907561f108b2f4d4da24e85f18d0ca5 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Sat, 17 Dec 2016 19:13:32 +0100 Subject: kvm: take srcu lock around kvm_steal_time_set_preempted() kvm_memslots() will be called by kvm_write_guest_offset_cached() so take the srcu lock. Signed-off-by: Andrea Arcangeli Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8ce1139a08b5..445c51b6cf6d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2844,6 +2844,7 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { + int idx; /* * Disable page faults because we're in atomic context here. * kvm_write_guest_offset_cached() would call might_fault() @@ -2853,7 +2854,13 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) * paging. */ pagefault_disable(); + /* + * kvm_memslots() will be called by + * kvm_write_guest_offset_cached() so take the srcu lock. + */ + idx = srcu_read_lock(&vcpu->kvm->srcu); kvm_steal_time_set_preempted(vcpu); + srcu_read_unlock(&vcpu->kvm->srcu, idx); pagefault_enable(); kvm_x86_ops->vcpu_put(vcpu); kvm_put_guest_fpu(vcpu); -- cgit v1.2.3 From ef85b67385436ddc1998f45f1d6a210f935b3388 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Mon, 12 Dec 2016 11:01:37 -0800 Subject: kvm: nVMX: Allow L1 to intercept software exceptions (#BP and #OF) When L2 exits to L0 due to "exception or NMI", software exceptions (#BP and #OF) for which L1 has requested an intercept should be handled by L1 rather than L0. Previously, only hardware exceptions were forwarded to L1. Signed-off-by: Jim Mattson Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index c41d7ffdda5a..24db5fb6f575 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1389,10 +1389,10 @@ static inline bool nested_cpu_has_posted_intr(struct vmcs12 *vmcs12) return vmcs12->pin_based_vm_exec_control & PIN_BASED_POSTED_INTR; } -static inline bool is_exception(u32 intr_info) +static inline bool is_nmi(u32 intr_info) { return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) - == (INTR_TYPE_HARD_EXCEPTION | INTR_INFO_VALID_MASK); + == (INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK); } static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, @@ -5728,7 +5728,7 @@ static int handle_exception(struct kvm_vcpu *vcpu) if (is_machine_check(intr_info)) return handle_machine_check(vcpu); - if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR) + if (is_nmi(intr_info)) return 1; /* already handled by vmx_vcpu_run() */ if (is_no_device(intr_info)) { @@ -8170,7 +8170,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) switch (exit_reason) { case EXIT_REASON_EXCEPTION_NMI: - if (!is_exception(intr_info)) + if (is_nmi(intr_info)) return false; else if (is_page_fault(intr_info)) return enable_ept; @@ -8765,8 +8765,7 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) kvm_machine_check(); /* We need to handle NMIs before interrupts are enabled */ - if ((exit_intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR && - (exit_intr_info & INTR_INFO_VALID_MASK)) { + if (is_nmi(exit_intr_info)) { kvm_before_handle_nmi(&vmx->vcpu); asm("int $2"); kvm_after_handle_nmi(&vmx->vcpu); -- cgit v1.2.3