diff options
Diffstat (limited to 'arch/x86/kvm/vmx')
-rw-r--r-- | arch/x86/kvm/vmx/capabilities.h | 10 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/main.c | 10 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/nested.c | 134 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/nested.h | 8 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/sgx.c | 2 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/vmx.c | 77 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/vmx.h | 9 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/vmx_onhyperv.h | 8 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/vmx_ops.h | 2 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/x86_ops.h | 7 |
10 files changed, 172 insertions, 95 deletions
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h index 41a4533f9989..cb6588238f46 100644 --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h @@ -54,9 +54,7 @@ struct nested_vmx_msrs { }; struct vmcs_config { - int size; - u32 basic_cap; - u32 revision_id; + u64 basic; u32 pin_based_exec_ctrl; u32 cpu_based_exec_ctrl; u32 cpu_based_2nd_exec_ctrl; @@ -76,7 +74,7 @@ extern struct vmx_capability vmx_capability __ro_after_init; static inline bool cpu_has_vmx_basic_inout(void) { - return (((u64)vmcs_config.basic_cap << 32) & VMX_BASIC_INOUT); + return vmcs_config.basic & VMX_BASIC_INOUT; } static inline bool cpu_has_virtual_nmis(void) @@ -225,7 +223,7 @@ static inline bool cpu_has_vmx_vmfunc(void) static inline bool cpu_has_vmx_shadow_vmcs(void) { /* check if the cpu supports writing r/o exit information fields */ - if (!(vmcs_config.misc & MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS)) + if (!(vmcs_config.misc & VMX_MISC_VMWRITE_SHADOW_RO_FIELDS)) return false; return vmcs_config.cpu_based_2nd_exec_ctrl & @@ -367,7 +365,7 @@ static inline bool cpu_has_vmx_invvpid_global(void) static inline bool cpu_has_vmx_intel_pt(void) { - return (vmcs_config.misc & MSR_IA32_VMX_MISC_INTEL_PT) && + return (vmcs_config.misc & VMX_MISC_INTEL_PT) && (vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_PT_USE_GPA) && (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_RTIT_CTL); } diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c index 0bf35ebe8a1b..7668e2fb8043 100644 --- a/arch/x86/kvm/vmx/main.c +++ b/arch/x86/kvm/vmx/main.c @@ -23,8 +23,10 @@ struct kvm_x86_ops vt_x86_ops __initdata = { .hardware_unsetup = vmx_hardware_unsetup, - .hardware_enable = vmx_hardware_enable, - .hardware_disable = vmx_hardware_disable, + .enable_virtualization_cpu = vmx_enable_virtualization_cpu, + .disable_virtualization_cpu = vmx_disable_virtualization_cpu, + .emergency_disable_virtualization_cpu = vmx_emergency_disable_virtualization_cpu, + .has_emulated_msr = vmx_has_emulated_msr, .vm_size = sizeof(struct kvm_vmx), @@ -41,7 +43,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = { .vcpu_put = vmx_vcpu_put, .update_exception_bitmap = vmx_update_exception_bitmap, - .get_msr_feature = vmx_get_msr_feature, + .get_feature_msr = vmx_get_feature_msr, .get_msr = vmx_get_msr, .set_msr = vmx_set_msr, .get_segment_base = vmx_get_segment_base, @@ -89,6 +91,8 @@ struct kvm_x86_ops vt_x86_ops __initdata = { .enable_nmi_window = vmx_enable_nmi_window, .enable_irq_window = vmx_enable_irq_window, .update_cr8_intercept = vmx_update_cr8_intercept, + + .x2apic_icr_is_split = false, .set_virtual_apic_mode = vmx_set_virtual_apic_mode, .set_apic_access_page_addr = vmx_set_apic_access_page_addr, .refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl, diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 2392a7ef254d..a8e7bc04d9bf 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -981,7 +981,7 @@ static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count) __func__, i, e.index, e.reserved); goto fail; } - if (kvm_set_msr(vcpu, e.index, e.value)) { + if (kvm_set_msr_with_filter(vcpu, e.index, e.value)) { pr_debug_ratelimited( "%s cannot write MSR (%u, 0x%x, 0x%llx)\n", __func__, i, e.index, e.value); @@ -1017,7 +1017,7 @@ static bool nested_vmx_get_vmexit_msr_value(struct kvm_vcpu *vcpu, } } - if (kvm_get_msr(vcpu, msr_index, data)) { + if (kvm_get_msr_with_filter(vcpu, msr_index, data)) { pr_debug_ratelimited("%s cannot read MSR (0x%x)\n", __func__, msr_index); return false; @@ -1112,9 +1112,9 @@ static void prepare_vmx_msr_autostore_list(struct kvm_vcpu *vcpu, /* * Emulated VMEntry does not fail here. Instead a less * accurate value will be returned by - * nested_vmx_get_vmexit_msr_value() using kvm_get_msr() - * instead of reading the value from the vmcs02 VMExit - * MSR-store area. + * nested_vmx_get_vmexit_msr_value() by reading KVM's + * internal MSR state instead of reading the value from + * the vmcs02 VMExit MSR-store area. */ pr_warn_ratelimited( "Not enough msr entries in msr_autostore. Can't add msr %x\n", @@ -1251,21 +1251,32 @@ static bool is_bitwise_subset(u64 superset, u64 subset, u64 mask) static int vmx_restore_vmx_basic(struct vcpu_vmx *vmx, u64 data) { - const u64 feature_and_reserved = - /* feature (except bit 48; see below) */ - BIT_ULL(49) | BIT_ULL(54) | BIT_ULL(55) | - /* reserved */ - BIT_ULL(31) | GENMASK_ULL(47, 45) | GENMASK_ULL(63, 56); + const u64 feature_bits = VMX_BASIC_DUAL_MONITOR_TREATMENT | + VMX_BASIC_INOUT | + VMX_BASIC_TRUE_CTLS; + + const u64 reserved_bits = GENMASK_ULL(63, 56) | + GENMASK_ULL(47, 45) | + BIT_ULL(31); + u64 vmx_basic = vmcs_config.nested.basic; - if (!is_bitwise_subset(vmx_basic, data, feature_and_reserved)) + BUILD_BUG_ON(feature_bits & reserved_bits); + + /* + * Except for 32BIT_PHYS_ADDR_ONLY, which is an anti-feature bit (has + * inverted polarity), the incoming value must not set feature bits or + * reserved bits that aren't allowed/supported by KVM. Fields, i.e. + * multi-bit values, are explicitly checked below. + */ + if (!is_bitwise_subset(vmx_basic, data, feature_bits | reserved_bits)) return -EINVAL; /* * KVM does not emulate a version of VMX that constrains physical * addresses of VMX structures (e.g. VMCS) to 32-bits. */ - if (data & BIT_ULL(48)) + if (data & VMX_BASIC_32BIT_PHYS_ADDR_ONLY) return -EINVAL; if (vmx_basic_vmcs_revision_id(vmx_basic) != @@ -1334,16 +1345,29 @@ vmx_restore_control_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data) static int vmx_restore_vmx_misc(struct vcpu_vmx *vmx, u64 data) { - const u64 feature_and_reserved_bits = - /* feature */ - BIT_ULL(5) | GENMASK_ULL(8, 6) | BIT_ULL(14) | BIT_ULL(15) | - BIT_ULL(28) | BIT_ULL(29) | BIT_ULL(30) | - /* reserved */ - GENMASK_ULL(13, 9) | BIT_ULL(31); + const u64 feature_bits = VMX_MISC_SAVE_EFER_LMA | + VMX_MISC_ACTIVITY_HLT | + VMX_MISC_ACTIVITY_SHUTDOWN | + VMX_MISC_ACTIVITY_WAIT_SIPI | + VMX_MISC_INTEL_PT | + VMX_MISC_RDMSR_IN_SMM | + VMX_MISC_VMWRITE_SHADOW_RO_FIELDS | + VMX_MISC_VMXOFF_BLOCK_SMI | + VMX_MISC_ZERO_LEN_INS; + + const u64 reserved_bits = BIT_ULL(31) | GENMASK_ULL(13, 9); + u64 vmx_misc = vmx_control_msr(vmcs_config.nested.misc_low, vmcs_config.nested.misc_high); - if (!is_bitwise_subset(vmx_misc, data, feature_and_reserved_bits)) + BUILD_BUG_ON(feature_bits & reserved_bits); + + /* + * The incoming value must not set feature bits or reserved bits that + * aren't allowed/supported by KVM. Fields, i.e. multi-bit values, are + * explicitly checked below. + */ + if (!is_bitwise_subset(vmx_misc, data, feature_bits | reserved_bits)) return -EINVAL; if ((vmx->nested.msrs.pinbased_ctls_high & @@ -2317,10 +2341,12 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs0 /* Posted interrupts setting is only taken from vmcs12. */ vmx->nested.pi_pending = false; - if (nested_cpu_has_posted_intr(vmcs12)) + if (nested_cpu_has_posted_intr(vmcs12)) { vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv; - else + } else { + vmx->nested.posted_intr_nv = -1; exec_control &= ~PIN_BASED_POSTED_INTR; + } pin_controls_set(vmx, exec_control); /* @@ -2470,6 +2496,7 @@ static void prepare_vmcs02_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) if (!hv_evmcs || !(hv_evmcs->hv_clean_fields & HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2)) { + vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector); vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector); vmcs_write16(GUEST_SS_SELECTOR, vmcs12->guest_ss_selector); @@ -2507,7 +2534,7 @@ static void prepare_vmcs02_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base); vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base); - vmx->segment_cache.bitmask = 0; + vmx_segment_cache_clear(vmx); } if (!hv_evmcs || !(hv_evmcs->hv_clean_fields & @@ -4284,11 +4311,52 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu) } if (kvm_cpu_has_interrupt(vcpu) && !vmx_interrupt_blocked(vcpu)) { + int irq; + if (block_nested_events) return -EBUSY; if (!nested_exit_on_intr(vcpu)) goto no_vmexit; - nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0); + + if (!nested_exit_intr_ack_set(vcpu)) { + nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0); + return 0; + } + + irq = kvm_cpu_get_extint(vcpu); + if (irq != -1) { + nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, + INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR | irq, 0); + return 0; + } + + irq = kvm_apic_has_interrupt(vcpu); + if (WARN_ON_ONCE(irq < 0)) + goto no_vmexit; + + /* + * If the IRQ is L2's PI notification vector, process posted + * interrupts for L2 instead of injecting VM-Exit, as the + * detection/morphing architecturally occurs when the IRQ is + * delivered to the CPU. Note, only interrupts that are routed + * through the local APIC trigger posted interrupt processing, + * and enabling posted interrupts requires ACK-on-exit. + */ + if (irq == vmx->nested.posted_intr_nv) { + vmx->nested.pi_pending = true; + kvm_apic_clear_irr(vcpu, irq); + goto no_vmexit; + } + + nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, + INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR | irq, 0); + + /* + * ACK the interrupt _after_ emulating VM-Exit, as the IRQ must + * be marked as in-service in vmcs01.GUEST_INTERRUPT_STATUS.SVI + * if APICv is active. + */ + kvm_apic_ack_interrupt(vcpu, irq); return 0; } @@ -4806,7 +4874,7 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu) goto vmabort; } - if (kvm_set_msr(vcpu, h.index, h.value)) { + if (kvm_set_msr_with_filter(vcpu, h.index, h.value)) { pr_debug_ratelimited( "%s WRMSR failed (%u, 0x%x, 0x%llx)\n", __func__, j, h.index, h.value); @@ -4969,14 +5037,6 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason, vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; if (likely(!vmx->fail)) { - if ((u16)vm_exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT && - nested_exit_intr_ack_set(vcpu)) { - int irq = kvm_cpu_get_interrupt(vcpu); - WARN_ON(irq < 0); - vmcs12->vm_exit_intr_info = irq | - INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR; - } - if (vm_exit_reason != -1) trace_kvm_nested_vmexit_inject(vmcs12->vm_exit_reason, vmcs12->exit_qualification, @@ -7051,7 +7111,7 @@ static void nested_vmx_setup_misc_data(struct vmcs_config *vmcs_conf, { msrs->misc_low = (u32)vmcs_conf->misc & VMX_MISC_SAVE_EFER_LMA; msrs->misc_low |= - MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS | + VMX_MISC_VMWRITE_SHADOW_RO_FIELDS | VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE | VMX_MISC_ACTIVITY_HLT | VMX_MISC_ACTIVITY_WAIT_SIPI; @@ -7066,12 +7126,10 @@ static void nested_vmx_setup_basic(struct nested_vmx_msrs *msrs) * guest, and the VMCS structure we give it - not about the * VMX support of the underlying hardware. */ - msrs->basic = - VMCS12_REVISION | - VMX_BASIC_TRUE_CTLS | - ((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) | - (VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT); + msrs->basic = vmx_basic_encode_vmcs_info(VMCS12_REVISION, VMCS12_SIZE, + X86_MEMTYPE_WB); + msrs->basic |= VMX_BASIC_TRUE_CTLS; if (cpu_has_vmx_basic_inout()) msrs->basic |= VMX_BASIC_INOUT; } diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h index cce4e2aa30fb..2c296b6abb8c 100644 --- a/arch/x86/kvm/vmx/nested.h +++ b/arch/x86/kvm/vmx/nested.h @@ -39,11 +39,17 @@ bool nested_vmx_check_io_bitmaps(struct kvm_vcpu *vcpu, unsigned int port, static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu) { + lockdep_assert_once(lockdep_is_held(&vcpu->mutex) || + !refcount_read(&vcpu->kvm->users_count)); + return to_vmx(vcpu)->nested.cached_vmcs12; } static inline struct vmcs12 *get_shadow_vmcs12(struct kvm_vcpu *vcpu) { + lockdep_assert_once(lockdep_is_held(&vcpu->mutex) || + !refcount_read(&vcpu->kvm->users_count)); + return to_vmx(vcpu)->nested.cached_shadow_vmcs12; } @@ -109,7 +115,7 @@ static inline unsigned nested_cpu_vmx_misc_cr3_count(struct kvm_vcpu *vcpu) static inline bool nested_cpu_has_vmwrite_any_field(struct kvm_vcpu *vcpu) { return to_vmx(vcpu)->nested.msrs.misc_low & - MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS; + VMX_MISC_VMWRITE_SHADOW_RO_FIELDS; } static inline bool nested_cpu_has_zero_length_injection(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/vmx/sgx.c b/arch/x86/kvm/vmx/sgx.c index 6fef01e0536e..a3c3d2a51f47 100644 --- a/arch/x86/kvm/vmx/sgx.c +++ b/arch/x86/kvm/vmx/sgx.c @@ -274,7 +274,7 @@ static int handle_encls_ecreate(struct kvm_vcpu *vcpu) * simultaneously set SGX_ATTR_PROVISIONKEY to bypass the check to * enforce restriction of access to the PROVISIONKEY. */ - contents = (struct sgx_secs *)__get_free_page(GFP_KERNEL_ACCOUNT); + contents = (struct sgx_secs *)__get_free_page(GFP_KERNEL); if (!contents) return -ENOMEM; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index f18c2d8c7476..1a4438358c5e 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -525,10 +525,6 @@ static const struct kvm_vmx_segment_field { VMX_SEGMENT_FIELD(LDTR), }; -static inline void vmx_segment_cache_clear(struct vcpu_vmx *vmx) -{ - vmx->segment_cache.bitmask = 0; -} static unsigned long host_idt_base; @@ -755,7 +751,7 @@ fault: return -EIO; } -static void vmx_emergency_disable(void) +void vmx_emergency_disable_virtualization_cpu(void) { int cpu = raw_smp_processor_id(); struct loaded_vmcs *v; @@ -1998,15 +1994,15 @@ static inline bool is_vmx_feature_control_msr_valid(struct vcpu_vmx *vmx, return !(msr->data & ~valid_bits); } -int vmx_get_msr_feature(struct kvm_msr_entry *msr) +int vmx_get_feature_msr(u32 msr, u64 *data) { - switch (msr->index) { + switch (msr) { case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR: if (!nested) return 1; - return vmx_get_vmx_msr(&vmcs_config.nested, msr->index, &msr->data); + return vmx_get_vmx_msr(&vmcs_config.nested, msr, data); default: - return KVM_MSR_RET_INVALID; + return KVM_MSR_RET_UNSUPPORTED; } } @@ -2605,13 +2601,13 @@ static u64 adjust_vmx_controls64(u64 ctl_opt, u32 msr) static int setup_vmcs_config(struct vmcs_config *vmcs_conf, struct vmx_capability *vmx_cap) { - u32 vmx_msr_low, vmx_msr_high; u32 _pin_based_exec_control = 0; u32 _cpu_based_exec_control = 0; u32 _cpu_based_2nd_exec_control = 0; u64 _cpu_based_3rd_exec_control = 0; u32 _vmexit_control = 0; u32 _vmentry_control = 0; + u64 basic_msr; u64 misc_msr; int i; @@ -2734,29 +2730,29 @@ static int setup_vmcs_config(struct vmcs_config *vmcs_conf, _vmexit_control &= ~x_ctrl; } - rdmsr(MSR_IA32_VMX_BASIC, vmx_msr_low, vmx_msr_high); + rdmsrl(MSR_IA32_VMX_BASIC, basic_msr); /* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */ - if ((vmx_msr_high & 0x1fff) > PAGE_SIZE) + if (vmx_basic_vmcs_size(basic_msr) > PAGE_SIZE) return -EIO; #ifdef CONFIG_X86_64 - /* IA-32 SDM Vol 3B: 64-bit CPUs always have VMX_BASIC_MSR[48]==0. */ - if (vmx_msr_high & (1u<<16)) + /* + * KVM expects to be able to shove all legal physical addresses into + * VMCS fields for 64-bit kernels, and per the SDM, "This bit is always + * 0 for processors that support Intel 64 architecture". + */ + if (basic_msr & VMX_BASIC_32BIT_PHYS_ADDR_ONLY) return -EIO; #endif /* Require Write-Back (WB) memory type for VMCS accesses. */ - if (((vmx_msr_high >> 18) & 15) != 6) + if (vmx_basic_vmcs_mem_type(basic_msr) != X86_MEMTYPE_WB) return -EIO; rdmsrl(MSR_IA32_VMX_MISC, misc_msr); - vmcs_conf->size = vmx_msr_high & 0x1fff; - vmcs_conf->basic_cap = vmx_msr_high & ~0x1fff; - - vmcs_conf->revision_id = vmx_msr_low; - + vmcs_conf->basic = basic_msr; vmcs_conf->pin_based_exec_ctrl = _pin_based_exec_control; vmcs_conf->cpu_based_exec_ctrl = _cpu_based_exec_control; vmcs_conf->cpu_based_2nd_exec_ctrl = _cpu_based_2nd_exec_control; @@ -2844,7 +2840,7 @@ fault: return -EFAULT; } -int vmx_hardware_enable(void) +int vmx_enable_virtualization_cpu(void) { int cpu = raw_smp_processor_id(); u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); @@ -2881,7 +2877,7 @@ static void vmclear_local_loaded_vmcss(void) __loaded_vmcs_clear(v); } -void vmx_hardware_disable(void) +void vmx_disable_virtualization_cpu(void) { vmclear_local_loaded_vmcss(); @@ -2903,13 +2899,13 @@ struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags) if (!pages) return NULL; vmcs = page_address(pages); - memset(vmcs, 0, vmcs_config.size); + memset(vmcs, 0, vmx_basic_vmcs_size(vmcs_config.basic)); /* KVM supports Enlightened VMCS v1 only */ if (kvm_is_using_evmcs()) vmcs->hdr.revision_id = KVM_EVMCS_VERSION; else - vmcs->hdr.revision_id = vmcs_config.revision_id; + vmcs->hdr.revision_id = vmx_basic_vmcs_revision_id(vmcs_config.basic); if (shadow) vmcs->hdr.shadow_vmcs = 1; @@ -3002,7 +2998,7 @@ static __init int alloc_kvm_area(void) * physical CPU. */ if (kvm_is_using_evmcs()) - vmcs->hdr.revision_id = vmcs_config.revision_id; + vmcs->hdr.revision_id = vmx_basic_vmcs_revision_id(vmcs_config.basic); per_cpu(vmxarea, cpu) = vmcs; } @@ -4219,6 +4215,13 @@ static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu, { struct vcpu_vmx *vmx = to_vmx(vcpu); + /* + * DO NOT query the vCPU's vmcs12, as vmcs12 is dynamically allocated + * and freed, and must not be accessed outside of vcpu->mutex. The + * vCPU's cached PI NV is valid if and only if posted interrupts + * enabled in its vmcs12, i.e. checking the vector also checks that + * L1 has enabled posted interrupts for L2. + */ if (is_guest_mode(vcpu) && vector == vmx->nested.posted_intr_nv) { /* @@ -5804,8 +5807,9 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) error_code |= (exit_qualification & EPT_VIOLATION_RWX_MASK) ? PFERR_PRESENT_MASK : 0; - error_code |= (exit_qualification & EPT_VIOLATION_GVA_TRANSLATED) != 0 ? - PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK; + if (error_code & EPT_VIOLATION_GVA_IS_VALID) + error_code |= (exit_qualification & EPT_VIOLATION_GVA_TRANSLATED) ? + PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK; /* * Check that the GPA doesn't exceed physical memory limits, as that is @@ -7265,6 +7269,8 @@ static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu, return handle_fastpath_set_msr_irqoff(vcpu); case EXIT_REASON_PREEMPTION_TIMER: return handle_fastpath_preemption_timer(vcpu, force_immediate_exit); + case EXIT_REASON_HLT: + return handle_fastpath_hlt(vcpu); default: return EXIT_FASTPATH_NONE; } @@ -7659,13 +7665,11 @@ u8 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) /* * Force WB and ignore guest PAT if the VM does NOT have a non-coherent - * device attached and the CPU doesn't support self-snoop. Letting the - * guest control memory types on Intel CPUs without self-snoop may - * result in unexpected behavior, and so KVM's (historical) ABI is to - * trust the guest to behave only as a last resort. + * device attached. Letting the guest control memory types on Intel + * CPUs may result in unexpected behavior, and so KVM's ABI is to trust + * the guest to behave only as a last resort. */ - if (!static_cpu_has(X86_FEATURE_SELFSNOOP) && - !kvm_arch_has_noncoherent_dma(vcpu->kvm)) + if (!kvm_arch_has_noncoherent_dma(vcpu->kvm)) return (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT) | VMX_EPT_IPAT_BIT; return (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT); @@ -7967,6 +7971,7 @@ static __init void vmx_set_cpu_caps(void) kvm_cpu_cap_clear(X86_FEATURE_SGX_LC); kvm_cpu_cap_clear(X86_FEATURE_SGX1); kvm_cpu_cap_clear(X86_FEATURE_SGX2); + kvm_cpu_cap_clear(X86_FEATURE_SGX_EDECCSSA); } if (vmx_umip_emulated()) @@ -8517,7 +8522,7 @@ __init int vmx_hardware_setup(void) u64 use_timer_freq = 5000ULL * 1000 * 1000; cpu_preemption_timer_multi = - vmcs_config.misc & VMX_MISC_PREEMPTION_TIMER_RATE_MASK; + vmx_misc_preemption_timer_rate(vmcs_config.misc); if (tsc_khz) use_timer_freq = (u64)tsc_khz * 1000; @@ -8584,8 +8589,6 @@ static void __vmx_exit(void) { allow_smaller_maxphyaddr = false; - cpu_emergency_unregister_virt_callback(vmx_emergency_disable); - vmx_cleanup_l1d_flush(); } @@ -8632,8 +8635,6 @@ static int __init vmx_init(void) pi_init_cpu(cpu); } - cpu_emergency_register_virt_callback(vmx_emergency_disable); - vmx_check_vmcs12_offsets(); /* diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 42498fa63abb..2325f773a20b 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -17,10 +17,6 @@ #include "run_flags.h" #include "../mmu.h" -#define MSR_TYPE_R 1 -#define MSR_TYPE_W 2 -#define MSR_TYPE_RW 3 - #define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4)) #ifdef CONFIG_X86_64 @@ -756,4 +752,9 @@ static inline bool vmx_can_use_ipiv(struct kvm_vcpu *vcpu) return lapic_in_kernel(vcpu) && enable_ipiv; } +static inline void vmx_segment_cache_clear(struct vcpu_vmx *vmx) +{ + vmx->segment_cache.bitmask = 0; +} + #endif /* __KVM_X86_VMX_H */ diff --git a/arch/x86/kvm/vmx/vmx_onhyperv.h b/arch/x86/kvm/vmx/vmx_onhyperv.h index eb48153bfd73..bba24ed99ee6 100644 --- a/arch/x86/kvm/vmx/vmx_onhyperv.h +++ b/arch/x86/kvm/vmx/vmx_onhyperv.h @@ -104,6 +104,14 @@ static inline void evmcs_load(u64 phys_addr) struct hv_vp_assist_page *vp_ap = hv_get_vp_assist_page(smp_processor_id()); + /* + * When enabling eVMCS, KVM verifies that every CPU has a valid hv_vp_assist_page() + * and aborts enabling the feature otherwise. CPU onlining path is also checked in + * vmx_hardware_enable(). + */ + if (KVM_BUG_ON(!vp_ap, kvm_get_running_vcpu()->kvm)) + return; + if (current_evmcs->hv_enlightenments_control.nested_flush_hypercall) vp_ap->nested_control.features.directhypercall = 1; vp_ap->current_nested_vmcs = phys_addr; diff --git a/arch/x86/kvm/vmx/vmx_ops.h b/arch/x86/kvm/vmx/vmx_ops.h index 8060e5fc6dbd..93e020dc88f6 100644 --- a/arch/x86/kvm/vmx/vmx_ops.h +++ b/arch/x86/kvm/vmx/vmx_ops.h @@ -47,7 +47,7 @@ static __always_inline void vmcs_check16(unsigned long field) BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2001, "16-bit accessor invalid for 64-bit high field"); BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x4000, - "16-bit accessor invalid for 32-bit high field"); + "16-bit accessor invalid for 32-bit field"); BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x6000, "16-bit accessor invalid for natural width field"); } diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h index ce3221cd1d01..a55981c5216e 100644 --- a/arch/x86/kvm/vmx/x86_ops.h +++ b/arch/x86/kvm/vmx/x86_ops.h @@ -13,8 +13,9 @@ extern struct kvm_x86_init_ops vt_init_ops __initdata; void vmx_hardware_unsetup(void); int vmx_check_processor_compat(void); -int vmx_hardware_enable(void); -void vmx_hardware_disable(void); +int vmx_enable_virtualization_cpu(void); +void vmx_disable_virtualization_cpu(void); +void vmx_emergency_disable_virtualization_cpu(void); int vmx_vm_init(struct kvm *kvm); void vmx_vm_destroy(struct kvm *kvm); int vmx_vcpu_precreate(struct kvm *kvm); @@ -56,7 +57,7 @@ bool vmx_has_emulated_msr(struct kvm *kvm, u32 index); void vmx_msr_filter_changed(struct kvm_vcpu *vcpu); void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu); void vmx_update_exception_bitmap(struct kvm_vcpu *vcpu); -int vmx_get_msr_feature(struct kvm_msr_entry *msr); +int vmx_get_feature_msr(u32 msr, u64 *data); int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info); u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg); void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); |