diff options
-rw-r--r-- | arch/x86/include/asm/kvm-x86-ops.h | 1 | ||||
-rw-r--r-- | arch/x86/include/asm/kvm_host.h | 3 | ||||
-rw-r--r-- | arch/x86/kvm/lapic.c | 25 | ||||
-rw-r--r-- | arch/x86/kvm/lapic.h | 1 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/main.c | 3 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/nested.c | 84 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/vmx.c | 76 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/vmx.h | 6 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/x86_ops.h | 3 |
9 files changed, 120 insertions, 82 deletions
diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index 5aff7222e40f..7342af00e319 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -83,7 +83,6 @@ KVM_X86_OP(enable_nmi_window) KVM_X86_OP(enable_irq_window) KVM_X86_OP_OPTIONAL(update_cr8_intercept) KVM_X86_OP(refresh_apicv_exec_ctrl) -KVM_X86_OP_OPTIONAL(hwapic_irr_update) KVM_X86_OP_OPTIONAL(hwapic_isr_update) KVM_X86_OP_OPTIONAL(load_eoi_exitmap) KVM_X86_OP_OPTIONAL(set_virtual_apic_mode) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index e159e44a6a1b..7bee06c14742 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1734,8 +1734,7 @@ struct kvm_x86_ops { const unsigned long required_apicv_inhibits; bool allow_apicv_in_x2apic_without_x2apic_virtualization; void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu); - void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr); - void (*hwapic_isr_update)(int isr); + void (*hwapic_isr_update)(struct kvm_vcpu *vcpu, int isr); void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu); void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 3c83951c619e..1c89d20bbc1e 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -734,10 +734,7 @@ static inline int apic_find_highest_irr(struct kvm_lapic *apic) static inline void apic_clear_irr(int vec, struct kvm_lapic *apic) { if (unlikely(apic->apicv_active)) { - /* need to update RVI */ kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR); - kvm_x86_call(hwapic_irr_update)(apic->vcpu, - apic_find_highest_irr(apic)); } else { apic->irr_pending = false; kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR); @@ -763,7 +760,7 @@ static inline void apic_set_isr(int vec, struct kvm_lapic *apic) * just set SVI. */ if (unlikely(apic->apicv_active)) - kvm_x86_call(hwapic_isr_update)(vec); + kvm_x86_call(hwapic_isr_update)(apic->vcpu, vec); else { ++apic->isr_count; BUG_ON(apic->isr_count > MAX_APIC_VECTOR); @@ -808,7 +805,7 @@ static inline void apic_clear_isr(int vec, struct kvm_lapic *apic) * and must be left alone. */ if (unlikely(apic->apicv_active)) - kvm_x86_call(hwapic_isr_update)(apic_find_highest_isr(apic)); + kvm_x86_call(hwapic_isr_update)(apic->vcpu, apic_find_highest_isr(apic)); else { --apic->isr_count; BUG_ON(apic->isr_count < 0); @@ -816,6 +813,17 @@ static inline void apic_clear_isr(int vec, struct kvm_lapic *apic) } } +void kvm_apic_update_hwapic_isr(struct kvm_vcpu *vcpu) +{ + struct kvm_lapic *apic = vcpu->arch.apic; + + if (WARN_ON_ONCE(!lapic_in_kernel(vcpu)) || !apic->apicv_active) + return; + + kvm_x86_call(hwapic_isr_update)(vcpu, apic_find_highest_isr(apic)); +} +EXPORT_SYMBOL_GPL(kvm_apic_update_hwapic_isr); + int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) { /* This may race with setting of irr in __apic_accept_irq() and @@ -2805,8 +2813,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) apic_update_ppr(apic); if (apic->apicv_active) { kvm_x86_call(apicv_post_state_restore)(vcpu); - kvm_x86_call(hwapic_irr_update)(vcpu, -1); - kvm_x86_call(hwapic_isr_update)(-1); + kvm_x86_call(hwapic_isr_update)(vcpu, -1); } vcpu->arch.apic_arb_prio = 0; @@ -3121,9 +3128,7 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) kvm_apic_update_apicv(vcpu); if (apic->apicv_active) { kvm_x86_call(apicv_post_state_restore)(vcpu); - kvm_x86_call(hwapic_irr_update)(vcpu, - apic_find_highest_irr(apic)); - kvm_x86_call(hwapic_isr_update)(apic_find_highest_isr(apic)); + kvm_x86_call(hwapic_isr_update)(vcpu, apic_find_highest_isr(apic)); } kvm_make_request(KVM_REQ_EVENT, vcpu); if (ioapic_in_kernel(vcpu->kvm)) diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 24add38beaf0..1a8553ebdb42 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -118,6 +118,7 @@ void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high); int kvm_apic_set_base(struct kvm_vcpu *vcpu, u64 value, bool host_initiated); int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s); int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s); +void kvm_apic_update_hwapic_isr(struct kvm_vcpu *vcpu); int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c index 92d35cc6cd15..28360c350454 100644 --- a/arch/x86/kvm/vmx/main.c +++ b/arch/x86/kvm/vmx/main.c @@ -100,7 +100,6 @@ struct kvm_x86_ops vt_x86_ops __initdata = { .load_eoi_exitmap = vmx_load_eoi_exitmap, .apicv_pre_state_restore = vmx_apicv_pre_state_restore, .required_apicv_inhibits = VMX_REQUIRED_APICV_INHIBITS, - .hwapic_irr_update = vmx_hwapic_irr_update, .hwapic_isr_update = vmx_hwapic_isr_update, .sync_pir_to_irr = vmx_sync_pir_to_irr, .deliver_interrupt = vmx_deliver_interrupt, @@ -126,7 +125,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = { .check_intercept = vmx_check_intercept, .handle_exit_irqoff = vmx_handle_exit_irqoff, - .cpu_dirty_log_size = PML_ENTITY_NUM, + .cpu_dirty_log_size = PML_LOG_NR_ENTRIES, .update_cpu_dirty_logging = vmx_update_cpu_dirty_logging, .nested_ops = &vmx_nested_ops, diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index aa78b6f38dfe..a0ea5317cec8 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -3442,7 +3442,7 @@ static int nested_vmx_write_pml_buffer(struct kvm_vcpu *vcpu, gpa_t gpa) if (!nested_cpu_has_pml(vmcs12)) return 0; - if (vmcs12->guest_pml_index >= PML_ENTITY_NUM) { + if (vmcs12->guest_pml_index >= PML_LOG_NR_ENTRIES) { vmx->nested.pml_full = true; return 1; } @@ -3481,14 +3481,6 @@ static int nested_vmx_check_permission(struct kvm_vcpu *vcpu) return 1; } -static u8 vmx_has_apicv_interrupt(struct kvm_vcpu *vcpu) -{ - u8 rvi = vmx_get_rvi(); - u8 vppr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_PROCPRI); - - return ((rvi & 0xf0) > (vppr & 0xf0)); -} - static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12); @@ -3508,7 +3500,6 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx = to_vmx(vcpu); struct vmcs12 *vmcs12 = get_vmcs12(vcpu); enum vm_entry_failure_code entry_failure_code; - bool evaluate_pending_interrupts; union vmx_exit_reason exit_reason = { .basic = EXIT_REASON_INVALID_STATE, .failed_vmentry = 1, @@ -3527,13 +3518,6 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, kvm_service_local_tlb_flush_requests(vcpu); - evaluate_pending_interrupts = exec_controls_get(vmx) & - (CPU_BASED_INTR_WINDOW_EXITING | CPU_BASED_NMI_WINDOW_EXITING); - if (likely(!evaluate_pending_interrupts) && kvm_vcpu_apicv_active(vcpu)) - evaluate_pending_interrupts |= vmx_has_apicv_interrupt(vcpu); - if (!evaluate_pending_interrupts) - evaluate_pending_interrupts |= kvm_apic_has_pending_init_or_sipi(vcpu); - if (!vmx->nested.nested_run_pending || !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) vmx->nested.pre_vmenter_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); @@ -3616,9 +3600,13 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, * Re-evaluate pending events if L1 had a pending IRQ/NMI/INIT/SIPI * when it executed VMLAUNCH/VMRESUME, as entering non-root mode can * effectively unblock various events, e.g. INIT/SIPI cause VM-Exit - * unconditionally. + * unconditionally. Take care to pull data from vmcs01 as appropriate, + * e.g. when checking for interrupt windows, as vmcs02 is now loaded. */ - if (unlikely(evaluate_pending_interrupts)) + if ((__exec_controls_get(&vmx->vmcs01) & (CPU_BASED_INTR_WINDOW_EXITING | + CPU_BASED_NMI_WINDOW_EXITING)) || + kvm_apic_has_pending_init_or_sipi(vcpu) || + kvm_apic_has_interrupt(vcpu)) kvm_make_request(KVM_REQ_EVENT, vcpu); /* @@ -3751,14 +3739,6 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) if (unlikely(status != NVMX_VMENTRY_SUCCESS)) goto vmentry_failed; - /* Emulate processing of posted interrupts on VM-Enter. */ - if (nested_cpu_has_posted_intr(vmcs12) && - kvm_apic_has_interrupt(vcpu) == vmx->nested.posted_intr_nv) { - vmx->nested.pi_pending = true; - kvm_make_request(KVM_REQ_EVENT, vcpu); - kvm_apic_clear_irr(vcpu, vmx->nested.posted_intr_nv); - } - /* Hide L1D cache contents from the nested guest. */ vmx->vcpu.arch.l1tf_flush_l1d = true; @@ -4220,13 +4200,25 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu) */ bool block_nested_exceptions = vmx->nested.nested_run_pending; /* - * New events (not exceptions) are only recognized at instruction + * Events that don't require injection, i.e. that are virtualized by + * hardware, aren't blocked by a pending VM-Enter as KVM doesn't need + * to regain control in order to deliver the event, and hardware will + * handle event ordering, e.g. with respect to injected exceptions. + * + * But, new events (not exceptions) are only recognized at instruction * boundaries. If an event needs reinjection, then KVM is handling a - * VM-Exit that occurred _during_ instruction execution; new events are - * blocked until the instruction completes. + * VM-Exit that occurred _during_ instruction execution; new events, + * irrespective of whether or not they're injected, are blocked until + * the instruction completes. + */ + bool block_non_injected_events = kvm_event_needs_reinjection(vcpu); + /* + * Inject events are blocked by nested VM-Enter, as KVM is responsible + * for managing priority between concurrent events, i.e. KVM needs to + * wait until after VM-Enter completes to deliver injected events. */ bool block_nested_events = block_nested_exceptions || - kvm_event_needs_reinjection(vcpu); + block_non_injected_events; if (lapic_in_kernel(vcpu) && test_bit(KVM_APIC_INIT, &apic->pending_events)) { @@ -4338,18 +4330,26 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu) if (kvm_cpu_has_interrupt(vcpu) && !vmx_interrupt_blocked(vcpu)) { int irq; - if (block_nested_events) - return -EBUSY; - if (!nested_exit_on_intr(vcpu)) + if (!nested_exit_on_intr(vcpu)) { + if (block_nested_events) + return -EBUSY; + goto no_vmexit; + } if (!nested_exit_intr_ack_set(vcpu)) { + if (block_nested_events) + return -EBUSY; + nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0); return 0; } irq = kvm_cpu_get_extint(vcpu); if (irq != -1) { + if (block_nested_events) + return -EBUSY; + nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR | irq, 0); return 0; @@ -4368,11 +4368,22 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu) * and enabling posted interrupts requires ACK-on-exit. */ if (irq == vmx->nested.posted_intr_nv) { + /* + * Nested posted interrupts are delivered via RVI, i.e. + * aren't injected by KVM, and so can be queued even if + * manual event injection is disallowed. + */ + if (block_non_injected_events) + return -EBUSY; + vmx->nested.pi_pending = true; kvm_apic_clear_irr(vcpu, irq); goto no_vmexit; } + if (block_nested_events) + return -EBUSY; + nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR | irq, 0); @@ -5050,6 +5061,11 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason, kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu); } + if (vmx->nested.update_vmcs01_hwapic_isr) { + vmx->nested.update_vmcs01_hwapic_isr = false; + kvm_apic_update_hwapic_isr(vcpu); + } + if ((vm_exit_reason != -1) && (enable_shadow_vmcs || nested_vmx_is_evmptr12_valid(vmx))) vmx->nested.need_vmcs12_to_shadow_sync = true; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 893366e53732..72435b1769f6 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1636,7 +1636,8 @@ static int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data) * result in a #GP unless the same write also clears TraceEn. */ if ((vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) && - ((vmx->pt_desc.guest.ctl ^ data) & ~RTIT_CTL_TRACEEN)) + (data & RTIT_CTL_TRACEEN) && + data != vmx->pt_desc.guest.ctl) return 1; /* @@ -4820,7 +4821,7 @@ static void init_vmcs(struct vcpu_vmx *vmx) if (enable_pml) { vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg)); - vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1); + vmcs_write16(GUEST_PML_INDEX, PML_HEAD_INDEX); } vmx_write_encls_bitmap(&vmx->vcpu, NULL); @@ -6049,7 +6050,7 @@ static int handle_preemption_timer(struct kvm_vcpu *vcpu) /* * When nested=0, all VMX instruction VM Exits filter here. The handlers - * are overwritten by nested_vmx_setup() when nested=1. + * are overwritten by nested_vmx_hardware_setup() when nested=1. */ static int handle_vmx_instruction(struct kvm_vcpu *vcpu) { @@ -6202,32 +6203,40 @@ static void vmx_destroy_pml_buffer(struct vcpu_vmx *vmx) static void vmx_flush_pml_buffer(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); + u16 pml_idx, pml_tail_index; u64 *pml_buf; - u16 pml_idx; + int i; pml_idx = vmcs_read16(GUEST_PML_INDEX); /* Do nothing if PML buffer is empty */ - if (pml_idx == (PML_ENTITY_NUM - 1)) + if (pml_idx == PML_HEAD_INDEX) return; + /* + * PML index always points to the next available PML buffer entity + * unless PML log has just overflowed. + */ + pml_tail_index = (pml_idx >= PML_LOG_NR_ENTRIES) ? 0 : pml_idx + 1; - /* PML index always points to next available PML buffer entity */ - if (pml_idx >= PML_ENTITY_NUM) - pml_idx = 0; - else - pml_idx++; - + /* + * PML log is written backwards: the CPU first writes the entry 511 + * then the entry 510, and so on. + * + * Read the entries in the same order they were written, to ensure that + * the dirty ring is filled in the same order the CPU wrote them. + */ pml_buf = page_address(vmx->pml_pg); - for (; pml_idx < PML_ENTITY_NUM; pml_idx++) { + + for (i = PML_HEAD_INDEX; i >= pml_tail_index; i--) { u64 gpa; - gpa = pml_buf[pml_idx]; + gpa = pml_buf[i]; WARN_ON(gpa & (PAGE_SIZE - 1)); kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT); } /* reset PML index */ - vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1); + vmcs_write16(GUEST_PML_INDEX, PML_HEAD_INDEX); } static void vmx_dump_sel(char *name, uint32_t sel) @@ -6862,11 +6871,32 @@ void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu) read_unlock(&vcpu->kvm->mmu_lock); } -void vmx_hwapic_isr_update(int max_isr) +void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr) { u16 status; u8 old; + /* + * If L2 is active, defer the SVI update until vmcs01 is loaded, as SVI + * is only relevant for if and only if Virtual Interrupt Delivery is + * enabled in vmcs12, and if VID is enabled then L2 EOIs affect L2's + * vAPIC, not L1's vAPIC. KVM must update vmcs01 on the next nested + * VM-Exit, otherwise L1 with run with a stale SVI. + */ + if (is_guest_mode(vcpu)) { + /* + * KVM is supposed to forward intercepted L2 EOIs to L1 if VID + * is enabled in vmcs12; as above, the EOIs affect L2's vAPIC. + * Note, userspace can stuff state while L2 is active; assert + * that VID is disabled if and only if the vCPU is in KVM_RUN + * to avoid false positives if userspace is setting APIC state. + */ + WARN_ON_ONCE(vcpu->wants_to_run && + nested_cpu_has_vid(get_vmcs12(vcpu))); + to_vmx(vcpu)->nested.update_vmcs01_hwapic_isr = true; + return; + } + if (max_isr == -1) max_isr = 0; @@ -6896,20 +6926,6 @@ static void vmx_set_rvi(int vector) } } -void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr) -{ - /* - * When running L2, updating RVI is only relevant when - * vmcs12 virtual-interrupt-delivery enabled. - * However, it can be enabled only when L1 also - * intercepts external-interrupts and in that case - * we should not update vmcs02 RVI but instead intercept - * interrupt. Therefore, do nothing when running L2. - */ - if (!is_guest_mode(vcpu)) - vmx_set_rvi(max_irr); -} - int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -8597,7 +8613,7 @@ static void __vmx_exit(void) vmx_cleanup_l1d_flush(); } -static void vmx_exit(void) +static void __exit vmx_exit(void) { kvm_exit(); __vmx_exit(); diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 43f573f6ca46..8b111ce1087c 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -176,6 +176,7 @@ struct nested_vmx { bool reload_vmcs01_apic_access_page; bool update_vmcs01_cpu_dirty_logging; bool update_vmcs01_apicv_status; + bool update_vmcs01_hwapic_isr; /* * Enlightened VMCS has been enabled. It does not mean that L1 has to @@ -330,7 +331,10 @@ struct vcpu_vmx { bool ple_window_dirty; /* Support for PML */ -#define PML_ENTITY_NUM 512 +#define PML_LOG_NR_ENTRIES 512 + /* PML is written backwards: this is the first entry written by the CPU */ +#define PML_HEAD_INDEX (PML_LOG_NR_ENTRIES-1) + struct page *pml_pg; /* apic deadline value in host tsc */ diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h index a55981c5216e..600c3bc89213 100644 --- a/arch/x86/kvm/vmx/x86_ops.h +++ b/arch/x86/kvm/vmx/x86_ops.h @@ -47,8 +47,7 @@ bool vmx_apic_init_signal_blocked(struct kvm_vcpu *vcpu); void vmx_migrate_timers(struct kvm_vcpu *vcpu); void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu); void vmx_apicv_pre_state_restore(struct kvm_vcpu *vcpu); -void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr); -void vmx_hwapic_isr_update(int max_isr); +void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr); int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu); void vmx_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode, int trig_mode, int vector); |