diff options
Diffstat (limited to 'arch/x86/kvm/svm.c')
-rw-r--r-- | arch/x86/kvm/svm.c | 201 |
1 files changed, 109 insertions, 92 deletions
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 905ea6052517..8dbd8dbc83eb 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -194,7 +194,6 @@ struct vcpu_svm { unsigned int3_injected; unsigned long int3_rip; - u32 apf_reason; /* cached guest cpuid flags for faster access */ bool nrips_enabled : 1; @@ -277,6 +276,10 @@ static int avic; module_param(avic, int, S_IRUGO); #endif +/* enable/disable Virtual VMLOAD VMSAVE */ +static int vls = true; +module_param(vls, int, 0444); + /* AVIC VM ID bit masks and lock */ static DECLARE_BITMAP(avic_vm_id_bitmap, AVIC_VM_ID_NR); static DEFINE_SPINLOCK(avic_vm_id_lock); @@ -633,11 +636,13 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) svm_set_interrupt_shadow(vcpu, 0); } -static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, - bool has_error_code, u32 error_code, - bool reinject) +static void svm_queue_exception(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); + unsigned nr = vcpu->arch.exception.nr; + bool has_error_code = vcpu->arch.exception.has_error_code; + bool reinject = vcpu->arch.exception.reinject; + u32 error_code = vcpu->arch.exception.error_code; /* * If we are within a nested VM we'd better #VMEXIT and let the guest @@ -947,7 +952,7 @@ static void svm_enable_lbrv(struct vcpu_svm *svm) { u32 *msrpm = svm->msrpm; - svm->vmcb->control.lbr_ctl = 1; + svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK; set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1); set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1); set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 1, 1); @@ -958,7 +963,7 @@ static void svm_disable_lbrv(struct vcpu_svm *svm) { u32 *msrpm = svm->msrpm; - svm->vmcb->control.lbr_ctl = 0; + svm->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK; set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0); set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0); set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 0, 0); @@ -1093,6 +1098,16 @@ static __init int svm_hardware_setup(void) } } + if (vls) { + if (!npt_enabled || + !boot_cpu_has(X86_FEATURE_V_VMSAVE_VMLOAD) || + !IS_ENABLED(CONFIG_X86_64)) { + vls = false; + } else { + pr_info("Virtual VMLOAD VMSAVE supported\n"); + } + } + return 0; err: @@ -1152,9 +1167,9 @@ static void avic_init_vmcb(struct vcpu_svm *svm) { struct vmcb *vmcb = svm->vmcb; struct kvm_arch *vm_data = &svm->vcpu.kvm->arch; - phys_addr_t bpa = page_to_phys(svm->avic_backing_page); - phys_addr_t lpa = page_to_phys(vm_data->avic_logical_id_table_page); - phys_addr_t ppa = page_to_phys(vm_data->avic_physical_id_table_page); + phys_addr_t bpa = __sme_set(page_to_phys(svm->avic_backing_page)); + phys_addr_t lpa = __sme_set(page_to_phys(vm_data->avic_logical_id_table_page)); + phys_addr_t ppa = __sme_set(page_to_phys(vm_data->avic_physical_id_table_page)); vmcb->control.avic_backing_page = bpa & AVIC_HPA_MASK; vmcb->control.avic_logical_id = lpa & AVIC_HPA_MASK; @@ -1217,8 +1232,8 @@ static void init_vmcb(struct vcpu_svm *svm) set_intercept(svm, INTERCEPT_MWAIT); } - control->iopm_base_pa = iopm_base; - control->msrpm_base_pa = __pa(svm->msrpm); + control->iopm_base_pa = __sme_set(iopm_base); + control->msrpm_base_pa = __sme_set(__pa(svm->msrpm)); control->int_ctl = V_INTR_MASKING_MASK; init_seg(&save->es); @@ -1280,6 +1295,16 @@ static void init_vmcb(struct vcpu_svm *svm) if (avic) avic_init_vmcb(svm); + /* + * If hardware supports Virtual VMLOAD VMSAVE then enable it + * in VMCB and clear intercepts to avoid #VMEXIT. + */ + if (vls) { + clr_intercept(svm, INTERCEPT_VMLOAD); + clr_intercept(svm, INTERCEPT_VMSAVE); + svm->vmcb->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK; + } + mark_all_dirty(svm->vmcb); enable_gif(svm); @@ -1352,9 +1377,9 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu) return -EINVAL; new_entry = READ_ONCE(*entry); - new_entry = (page_to_phys(svm->avic_backing_page) & - AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK) | - AVIC_PHYSICAL_ID_ENTRY_VALID_MASK; + new_entry = __sme_set((page_to_phys(svm->avic_backing_page) & + AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK) | + AVIC_PHYSICAL_ID_ENTRY_VALID_MASK); WRITE_ONCE(*entry, new_entry); svm->avic_physical_id_cache = entry; @@ -1622,7 +1647,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) svm->vmcb = page_address(page); clear_page(svm->vmcb); - svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT; + svm->vmcb_pa = __sme_set(page_to_pfn(page) << PAGE_SHIFT); svm->asid_generation = 0; init_vmcb(svm); @@ -1650,7 +1675,7 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - __free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT)); + __free_page(pfn_to_page(__sme_clr(svm->vmcb_pa) >> PAGE_SHIFT)); __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER); __free_page(virt_to_page(svm->nested.hsave)); __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER); @@ -1752,11 +1777,6 @@ static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) to_svm(vcpu)->vmcb->save.rflags = rflags; } -static u32 svm_get_pkru(struct kvm_vcpu *vcpu) -{ - return 0; -} - static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) { switch (reg) { @@ -2096,34 +2116,11 @@ static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value) static int pf_interception(struct vcpu_svm *svm) { u64 fault_address = svm->vmcb->control.exit_info_2; - u64 error_code; - int r = 1; - - switch (svm->apf_reason) { - default: - error_code = svm->vmcb->control.exit_info_1; + u64 error_code = svm->vmcb->control.exit_info_1; - trace_kvm_page_fault(fault_address, error_code); - if (!npt_enabled && kvm_event_needs_reinjection(&svm->vcpu)) - kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address); - r = kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code, + return kvm_handle_page_fault(&svm->vcpu, error_code, fault_address, svm->vmcb->control.insn_bytes, - svm->vmcb->control.insn_len); - break; - case KVM_PV_REASON_PAGE_NOT_PRESENT: - svm->apf_reason = 0; - local_irq_disable(); - kvm_async_pf_task_wait(fault_address); - local_irq_enable(); - break; - case KVM_PV_REASON_PAGE_READY: - svm->apf_reason = 0; - local_irq_disable(); - kvm_async_pf_task_wake(fault_address); - local_irq_enable(); - break; - } - return r; + svm->vmcb->control.insn_len, !npt_enabled); } static int db_interception(struct vcpu_svm *svm) @@ -2267,7 +2264,7 @@ static int io_interception(struct vcpu_svm *svm) { struct kvm_vcpu *vcpu = &svm->vcpu; u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */ - int size, in, string; + int size, in, string, ret; unsigned port; ++svm->vcpu.stat.io_exits; @@ -2279,10 +2276,16 @@ static int io_interception(struct vcpu_svm *svm) port = io_info >> 16; size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; svm->next_rip = svm->vmcb->control.exit_info_2; - skip_emulated_instruction(&svm->vcpu); + ret = kvm_skip_emulated_instruction(&svm->vcpu); - return in ? kvm_fast_pio_in(vcpu, size, port) - : kvm_fast_pio_out(vcpu, size, port); + /* + * TODO: we might be squashing a KVM_GUESTDBG_SINGLESTEP-triggered + * KVM_EXIT_DEBUG here. + */ + if (in) + return kvm_fast_pio_in(vcpu, size, port) && ret; + else + return kvm_fast_pio_out(vcpu, size, port) && ret; } static int nmi_interception(struct vcpu_svm *svm) @@ -2327,7 +2330,7 @@ static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index) u64 pdpte; int ret; - ret = kvm_vcpu_read_guest_page(vcpu, gpa_to_gfn(cr3), &pdpte, + ret = kvm_vcpu_read_guest_page(vcpu, gpa_to_gfn(__sme_clr(cr3)), &pdpte, offset_in_page(cr3) + index * 8, 8); if (ret) return 0; @@ -2339,7 +2342,7 @@ static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu, { struct vcpu_svm *svm = to_svm(vcpu); - svm->vmcb->control.nested_cr3 = root; + svm->vmcb->control.nested_cr3 = __sme_set(root); mark_dirty(svm->vmcb, VMCB_NPT); svm_flush_tlb(vcpu); } @@ -2415,15 +2418,29 @@ static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, if (!is_guest_mode(&svm->vcpu)) return 0; + vmexit = nested_svm_intercept(svm); + if (vmexit != NESTED_EXIT_DONE) + return 0; + svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr; svm->vmcb->control.exit_code_hi = 0; svm->vmcb->control.exit_info_1 = error_code; - svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2; - vmexit = nested_svm_intercept(svm); - if (vmexit == NESTED_EXIT_DONE) - svm->nested.exit_required = true; + /* + * FIXME: we should not write CR2 when L1 intercepts an L2 #PF exception. + * The fix is to add the ancillary datum (CR2 or DR6) to structs + * kvm_queued_exception and kvm_vcpu_events, so that CR2 and DR6 can be + * written only when inject_pending_event runs (DR6 would written here + * too). This should be conditional on a new capability---if the + * capability is disabled, kvm_multiple_exception would write the + * ancillary information to CR2 or DR6, for backwards ABI-compatibility. + */ + if (svm->vcpu.arch.exception.nested_apf) + svm->vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token; + else + svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2; + svm->nested.exit_required = true; return vmexit; } @@ -2598,7 +2615,7 @@ static int nested_svm_exit_special(struct vcpu_svm *svm) break; case SVM_EXIT_EXCP_BASE + PF_VECTOR: /* When we're shadowing, trap PFs, but not async PF */ - if (!npt_enabled && svm->apf_reason == 0) + if (!npt_enabled && svm->vcpu.arch.apf.host_apf_reason == 0) return NESTED_EXIT_HOST; break; default: @@ -2645,7 +2662,7 @@ static int nested_svm_intercept(struct vcpu_svm *svm) } /* async page fault always cause vmexit */ else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) && - svm->apf_reason != 0) + svm->vcpu.arch.exception.nested_apf != 0) vmexit = NESTED_EXIT_DONE; break; } @@ -2702,7 +2719,7 @@ static inline void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *fr dst->event_inj = from->event_inj; dst->event_inj_err = from->event_inj_err; dst->nested_cr3 = from->nested_cr3; - dst->lbr_ctl = from->lbr_ctl; + dst->virt_ext = from->virt_ext; } static int nested_svm_vmexit(struct vcpu_svm *svm) @@ -2856,7 +2873,7 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) svm->nested.msrpm[p] = svm->msrpm[p] | value; } - svm->vmcb->control.msrpm_base_pa = __pa(svm->nested.msrpm); + svm->vmcb->control.msrpm_base_pa = __sme_set(__pa(svm->nested.msrpm)); return true; } @@ -3008,7 +3025,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) /* We don't want to see VMMCALLs from a nested guest */ clr_intercept(svm, INTERCEPT_VMMCALL); - svm->vmcb->control.lbr_ctl = nested_vmcb->control.lbr_ctl; + svm->vmcb->control.virt_ext = nested_vmcb->control.virt_ext; svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; svm->vmcb->control.int_state = nested_vmcb->control.int_state; svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset; @@ -3055,6 +3072,7 @@ static int vmload_interception(struct vcpu_svm *svm) { struct vmcb *nested_vmcb; struct page *page; + int ret; if (nested_svm_check_permissions(svm)) return 1; @@ -3064,18 +3082,19 @@ static int vmload_interception(struct vcpu_svm *svm) return 1; svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; - skip_emulated_instruction(&svm->vcpu); + ret = kvm_skip_emulated_instruction(&svm->vcpu); nested_svm_vmloadsave(nested_vmcb, svm->vmcb); nested_svm_unmap(page); - return 1; + return ret; } static int vmsave_interception(struct vcpu_svm *svm) { struct vmcb *nested_vmcb; struct page *page; + int ret; if (nested_svm_check_permissions(svm)) return 1; @@ -3085,12 +3104,12 @@ static int vmsave_interception(struct vcpu_svm *svm) return 1; svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; - skip_emulated_instruction(&svm->vcpu); + ret = kvm_skip_emulated_instruction(&svm->vcpu); nested_svm_vmloadsave(svm->vmcb, nested_vmcb); nested_svm_unmap(page); - return 1; + return ret; } static int vmrun_interception(struct vcpu_svm *svm) @@ -3123,25 +3142,29 @@ failed: static int stgi_interception(struct vcpu_svm *svm) { + int ret; + if (nested_svm_check_permissions(svm)) return 1; svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; - skip_emulated_instruction(&svm->vcpu); + ret = kvm_skip_emulated_instruction(&svm->vcpu); kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); enable_gif(svm); - return 1; + return ret; } static int clgi_interception(struct vcpu_svm *svm) { + int ret; + if (nested_svm_check_permissions(svm)) return 1; svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; - skip_emulated_instruction(&svm->vcpu); + ret = kvm_skip_emulated_instruction(&svm->vcpu); disable_gif(svm); @@ -3152,7 +3175,7 @@ static int clgi_interception(struct vcpu_svm *svm) mark_dirty(svm->vmcb, VMCB_INTR); } - return 1; + return ret; } static int invlpga_interception(struct vcpu_svm *svm) @@ -3166,8 +3189,7 @@ static int invlpga_interception(struct vcpu_svm *svm) kvm_mmu_invlpg(vcpu, kvm_register_read(&svm->vcpu, VCPU_REGS_RAX)); svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; - skip_emulated_instruction(&svm->vcpu); - return 1; + return kvm_skip_emulated_instruction(&svm->vcpu); } static int skinit_interception(struct vcpu_svm *svm) @@ -3190,7 +3212,7 @@ static int xsetbv_interception(struct vcpu_svm *svm) if (kvm_set_xcr(&svm->vcpu, index, new_bv) == 0) { svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; - skip_emulated_instruction(&svm->vcpu); + return kvm_skip_emulated_instruction(&svm->vcpu); } return 1; @@ -3286,8 +3308,7 @@ static int invlpg_interception(struct vcpu_svm *svm) return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; kvm_mmu_invlpg(&svm->vcpu, svm->vmcb->control.exit_info_1); - skip_emulated_instruction(&svm->vcpu); - return 1; + return kvm_skip_emulated_instruction(&svm->vcpu); } static int emulate_on_interception(struct vcpu_svm *svm) @@ -3437,9 +3458,7 @@ static int dr_interception(struct vcpu_svm *svm) kvm_register_write(&svm->vcpu, reg, val); } - skip_emulated_instruction(&svm->vcpu); - - return 1; + return kvm_skip_emulated_instruction(&svm->vcpu); } static int cr8_write_interception(struct vcpu_svm *svm) @@ -3562,6 +3581,7 @@ static int rdmsr_interception(struct vcpu_svm *svm) if (svm_get_msr(&svm->vcpu, &msr_info)) { trace_kvm_msr_read_ex(ecx); kvm_inject_gp(&svm->vcpu, 0); + return 1; } else { trace_kvm_msr_read(ecx, msr_info.data); @@ -3570,9 +3590,8 @@ static int rdmsr_interception(struct vcpu_svm *svm) kvm_register_write(&svm->vcpu, VCPU_REGS_RDX, msr_info.data >> 32); svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; - skip_emulated_instruction(&svm->vcpu); + return kvm_skip_emulated_instruction(&svm->vcpu); } - return 1; } static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data) @@ -3698,11 +3717,11 @@ static int wrmsr_interception(struct vcpu_svm *svm) if (kvm_set_msr(&svm->vcpu, &msr)) { trace_kvm_msr_write_ex(ecx, data); kvm_inject_gp(&svm->vcpu, 0); + return 1; } else { trace_kvm_msr_write(ecx, data); - skip_emulated_instruction(&svm->vcpu); + return kvm_skip_emulated_instruction(&svm->vcpu); } - return 1; } static int msr_interception(struct vcpu_svm *svm) @@ -3731,8 +3750,7 @@ static int pause_interception(struct vcpu_svm *svm) static int nop_interception(struct vcpu_svm *svm) { - skip_emulated_instruction(&(svm->vcpu)); - return 1; + return kvm_skip_emulated_instruction(&(svm->vcpu)); } static int monitor_interception(struct vcpu_svm *svm) @@ -4117,7 +4135,7 @@ static void dump_vmcb(struct kvm_vcpu *vcpu) pr_err("%-20s%016llx\n", "avic_vapic_bar:", control->avic_vapic_bar); pr_err("%-20s%08x\n", "event_inj:", control->event_inj); pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err); - pr_err("%-20s%lld\n", "lbr_ctl:", control->lbr_ctl); + pr_err("%-20s%lld\n", "virt_ext:", control->virt_ext); pr_err("%-20s%016llx\n", "next_rip:", control->next_rip); pr_err("%-20s%016llx\n", "avic_backing_page:", control->avic_backing_page); pr_err("%-20s%016llx\n", "avic_logical_id:", control->avic_logical_id); @@ -4488,7 +4506,7 @@ get_pi_vcpu_info(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, pr_debug("SVM: %s: use GA mode for irq %u\n", __func__, irq.vector); *svm = to_svm(vcpu); - vcpu_info->pi_desc_addr = page_to_phys((*svm)->avic_backing_page); + vcpu_info->pi_desc_addr = __sme_set(page_to_phys((*svm)->avic_backing_page)); vcpu_info->vector = irq.vector; return 0; @@ -4539,7 +4557,8 @@ static int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq, struct amd_iommu_pi_data pi; /* Try to enable guest_mode in IRTE */ - pi.base = page_to_phys(svm->avic_backing_page) & AVIC_HPA_MASK; + pi.base = __sme_set(page_to_phys(svm->avic_backing_page) & + AVIC_HPA_MASK); pi.ga_tag = AVIC_GATAG(kvm->arch.avic_vm_id, svm->vcpu.vcpu_id); pi.is_guest_mode = true; @@ -4965,7 +4984,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) /* if exit due to PF check for async PF */ if (svm->vmcb->control.exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) - svm->apf_reason = kvm_read_and_reset_pf_reason(); + svm->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason(); if (npt_enabled) { vcpu->arch.regs_avail &= ~(1 << VCPU_EXREG_PDPTR); @@ -4988,7 +5007,7 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root) { struct vcpu_svm *svm = to_svm(vcpu); - svm->vmcb->save.cr3 = root; + svm->vmcb->save.cr3 = __sme_set(root); mark_dirty(svm->vmcb, VMCB_CR); svm_flush_tlb(vcpu); } @@ -4997,7 +5016,7 @@ static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root) { struct vcpu_svm *svm = to_svm(vcpu); - svm->vmcb->control.nested_cr3 = root; + svm->vmcb->control.nested_cr3 = __sme_set(root); mark_dirty(svm->vmcb, VMCB_NPT); /* Also sync guest cr3 here in case we live migrate */ @@ -5390,8 +5409,6 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .get_rflags = svm_get_rflags, .set_rflags = svm_set_rflags, - .get_pkru = svm_get_pkru, - .tlb_flush = svm_flush_tlb, .run = svm_vcpu_run, |