diff options
Diffstat (limited to 'arch/x86/kvm/vmx.c')
-rw-r--r-- | arch/x86/kvm/vmx.c | 601 |
1 files changed, 199 insertions, 402 deletions
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index ffd8f24dc66c..a4018b01e1f9 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -16,6 +16,7 @@ */ #include "irq.h" +#include "vmx.h" #include "mmu.h" #include <linux/kvm_host.h> @@ -30,8 +31,6 @@ #include <asm/io.h> #include <asm/desc.h> -#include <asm/vmx.h> -#include <asm/virtext.h> #define __ex(x) __kvm_handle_fault_on_reboot(x) @@ -91,11 +90,6 @@ struct vcpu_vmx { } rmode; int vpid; bool emulation_required; - - /* Support for vnmi-less CPUs */ - int soft_vnmi_blocked; - ktime_t entry_time; - s64 vnmi_blocked_time; }; static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) @@ -128,7 +122,7 @@ static struct vmcs_config { u32 vmentry_ctrl; } vmcs_config; -static struct vmx_capability { +struct vmx_capability { u32 ept; u32 vpid; } vmx_capability; @@ -189,21 +183,21 @@ static inline int is_page_fault(u32 intr_info) { return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | INTR_INFO_VALID_MASK)) == - (INTR_TYPE_HARD_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK); + (INTR_TYPE_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK); } static inline int is_no_device(u32 intr_info) { return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | INTR_INFO_VALID_MASK)) == - (INTR_TYPE_HARD_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK); + (INTR_TYPE_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK); } static inline int is_invalid_opcode(u32 intr_info) { return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | INTR_INFO_VALID_MASK)) == - (INTR_TYPE_HARD_EXCEPTION | UD_VECTOR | INTR_INFO_VALID_MASK); + (INTR_TYPE_EXCEPTION | UD_VECTOR | INTR_INFO_VALID_MASK); } static inline int is_external_interrupt(u32 intr_info) @@ -480,13 +474,8 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) eb = (1u << PF_VECTOR) | (1u << UD_VECTOR); if (!vcpu->fpu_active) eb |= 1u << NM_VECTOR; - if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { - if (vcpu->guest_debug & - (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) - eb |= 1u << DB_VECTOR; - if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) - eb |= 1u << BP_VECTOR; - } + if (vcpu->guest_debug.enabled) + eb |= 1u << DB_VECTOR; if (vcpu->arch.rmode.active) eb = ~0; if (vm_need_ept()) @@ -752,33 +741,29 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, bool has_error_code, u32 error_code) { struct vcpu_vmx *vmx = to_vmx(vcpu); - u32 intr_info = nr | INTR_INFO_VALID_MASK; - if (has_error_code) { + if (has_error_code) vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code); - intr_info |= INTR_INFO_DELIVER_CODE_MASK; - } if (vcpu->arch.rmode.active) { vmx->rmode.irq.pending = true; vmx->rmode.irq.vector = nr; vmx->rmode.irq.rip = kvm_rip_read(vcpu); - if (nr == BP_VECTOR || nr == OF_VECTOR) + if (nr == BP_VECTOR) vmx->rmode.irq.rip++; - intr_info |= INTR_TYPE_SOFT_INTR; - vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); + vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, + nr | INTR_TYPE_SOFT_INTR + | (has_error_code ? INTR_INFO_DELIVER_CODE_MASK : 0) + | INTR_INFO_VALID_MASK); vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1); kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1); return; } - if (nr == BP_VECTOR || nr == OF_VECTOR) { - vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1); - intr_info |= INTR_TYPE_SOFT_EXCEPTION; - } else - intr_info |= INTR_TYPE_HARD_EXCEPTION; - - vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); + vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, + nr | INTR_TYPE_EXCEPTION + | (has_error_code ? INTR_INFO_DELIVER_CODE_MASK : 0) + | INTR_INFO_VALID_MASK); } static bool vmx_exception_injected(struct kvm_vcpu *vcpu) @@ -972,13 +957,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", msr_index, data); break; - case MSR_IA32_CR_PAT: - if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { - vmcs_write64(GUEST_IA32_PAT, data); - vcpu->arch.pat = data; - break; - } - /* Otherwise falls through to kvm_set_msr_common */ default: vmx_load_host_state(vmx); msr = find_msr_entry(vmx, msr_index); @@ -1007,28 +985,40 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) } } -static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) +static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg) { - int old_debug = vcpu->guest_debug; - unsigned long flags; + unsigned long dr7 = 0x400; + int old_singlestep; - vcpu->guest_debug = dbg->control; - if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE)) - vcpu->guest_debug = 0; + old_singlestep = vcpu->guest_debug.singlestep; - if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) - vmcs_writel(GUEST_DR7, dbg->arch.debugreg[7]); - else - vmcs_writel(GUEST_DR7, vcpu->arch.dr7); + vcpu->guest_debug.enabled = dbg->enabled; + if (vcpu->guest_debug.enabled) { + int i; - flags = vmcs_readl(GUEST_RFLAGS); - if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) - flags |= X86_EFLAGS_TF | X86_EFLAGS_RF; - else if (old_debug & KVM_GUESTDBG_SINGLESTEP) + dr7 |= 0x200; /* exact */ + for (i = 0; i < 4; ++i) { + if (!dbg->breakpoints[i].enabled) + continue; + vcpu->guest_debug.bp[i] = dbg->breakpoints[i].address; + dr7 |= 2 << (i*2); /* global enable */ + dr7 |= 0 << (i*4+16); /* execution breakpoint */ + } + + vcpu->guest_debug.singlestep = dbg->singlestep; + } else + vcpu->guest_debug.singlestep = 0; + + if (old_singlestep && !vcpu->guest_debug.singlestep) { + unsigned long flags; + + flags = vmcs_readl(GUEST_RFLAGS); flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF); - vmcs_writel(GUEST_RFLAGS, flags); + vmcs_writel(GUEST_RFLAGS, flags); + } update_exception_bitmap(vcpu); + vmcs_writel(GUEST_DR7, dr7); return 0; } @@ -1042,7 +1032,8 @@ static int vmx_get_irq(struct kvm_vcpu *vcpu) static __init int cpu_has_kvm_support(void) { - return cpu_has_vmx(); + unsigned long ecx = cpuid_ecx(1); + return test_bit(5, &ecx); /* CPUID.1:ECX.VMX[bit 5] -> VT */ } static __init int vmx_disabled_by_bios(void) @@ -1088,20 +1079,11 @@ static void vmclear_local_vcpus(void) __vcpu_clear(vmx); } - -/* Just like cpu_vmxoff(), but with the __kvm_handle_fault_on_reboot() - * tricks. - */ -static void kvm_cpu_vmxoff(void) -{ - asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc"); - write_cr4(read_cr4() & ~X86_CR4_VMXE); -} - static void hardware_disable(void *garbage) { vmclear_local_vcpus(); - kvm_cpu_vmxoff(); + asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc"); + write_cr4(read_cr4() & ~X86_CR4_VMXE); } static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, @@ -1194,13 +1176,12 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) #ifdef CONFIG_X86_64 min |= VM_EXIT_HOST_ADDR_SPACE_SIZE; #endif - opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT; + opt = 0; if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS, &_vmexit_control) < 0) return -EIO; - min = 0; - opt = VM_ENTRY_LOAD_IA32_PAT; + min = opt = 0; if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS, &_vmentry_control) < 0) return -EIO; @@ -2106,9 +2087,8 @@ static void vmx_disable_intercept_for_msr(struct page *msr_bitmap, u32 msr) */ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) { - u32 host_sysenter_cs, msr_low, msr_high; + u32 host_sysenter_cs; u32 junk; - u64 host_pat; unsigned long a; struct descriptor_table dt; int i; @@ -2196,20 +2176,6 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) rdmsrl(MSR_IA32_SYSENTER_EIP, a); vmcs_writel(HOST_IA32_SYSENTER_EIP, a); /* 22.2.3 */ - if (vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_PAT) { - rdmsr(MSR_IA32_CR_PAT, msr_low, msr_high); - host_pat = msr_low | ((u64) msr_high << 32); - vmcs_write64(HOST_IA32_PAT, host_pat); - } - if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { - rdmsr(MSR_IA32_CR_PAT, msr_low, msr_high); - host_pat = msr_low | ((u64) msr_high << 32); - /* Write the default value follow host pat */ - vmcs_write64(GUEST_IA32_PAT, host_pat); - /* Keep arch.pat sync with GUEST_IA32_PAT */ - vmx->vcpu.arch.pat = host_pat; - } - for (i = 0; i < NR_VMX_MSR; ++i) { u32 index = vmx_msr_index[i]; u32 data_low, data_high; @@ -2264,8 +2230,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) vmx->vcpu.arch.rmode.active = 0; - vmx->soft_vnmi_blocked = 0; - vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); kvm_set_cr8(&vmx->vcpu, 0); msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; @@ -2315,6 +2279,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) kvm_rip_write(vcpu, 0); kvm_register_write(vcpu, VCPU_REGS_RSP, 0); + /* todo: dr0 = dr1 = dr2 = dr3 = 0; dr6 = 0xffff0ff0 */ vmcs_writel(GUEST_DR7, 0x400); vmcs_writel(GUEST_GDTR_BASE, 0); @@ -2370,29 +2335,6 @@ out: return ret; } -static void enable_irq_window(struct kvm_vcpu *vcpu) -{ - u32 cpu_based_vm_exec_control; - - cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); - cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; - vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); -} - -static void enable_nmi_window(struct kvm_vcpu *vcpu) -{ - u32 cpu_based_vm_exec_control; - - if (!cpu_has_virtual_nmis()) { - enable_irq_window(vcpu); - return; - } - - cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); - cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING; - vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); -} - static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -2416,54 +2358,10 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq) static void vmx_inject_nmi(struct kvm_vcpu *vcpu) { - struct vcpu_vmx *vmx = to_vmx(vcpu); - - if (!cpu_has_virtual_nmis()) { - /* - * Tracking the NMI-blocked state in software is built upon - * finding the next open IRQ window. This, in turn, depends on - * well-behaving guests: They have to keep IRQs disabled at - * least as long as the NMI handler runs. Otherwise we may - * cause NMI nesting, maybe breaking the guest. But as this is - * highly unlikely, we can live with the residual risk. - */ - vmx->soft_vnmi_blocked = 1; - vmx->vnmi_blocked_time = 0; - } - - ++vcpu->stat.nmi_injections; - if (vcpu->arch.rmode.active) { - vmx->rmode.irq.pending = true; - vmx->rmode.irq.vector = NMI_VECTOR; - vmx->rmode.irq.rip = kvm_rip_read(vcpu); - vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, - NMI_VECTOR | INTR_TYPE_SOFT_INTR | - INTR_INFO_VALID_MASK); - vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1); - kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1); - return; - } vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); } -static void vmx_update_window_states(struct kvm_vcpu *vcpu) -{ - u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); - - vcpu->arch.nmi_window_open = - !(guest_intr & (GUEST_INTR_STATE_STI | - GUEST_INTR_STATE_MOV_SS | - GUEST_INTR_STATE_NMI)); - if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked) - vcpu->arch.nmi_window_open = 0; - - vcpu->arch.interrupt_window_open = - ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && - !(guest_intr & (GUEST_INTR_STATE_STI | - GUEST_INTR_STATE_MOV_SS))); -} - static void kvm_do_inject_irq(struct kvm_vcpu *vcpu) { int word_index = __ffs(vcpu->arch.irq_summary); @@ -2476,54 +2374,40 @@ static void kvm_do_inject_irq(struct kvm_vcpu *vcpu) kvm_queue_interrupt(vcpu, irq); } + static void do_interrupt_requests(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { - vmx_update_window_states(vcpu); - - if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) - vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, - GUEST_INTR_STATE_STI | - GUEST_INTR_STATE_MOV_SS); - - if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) { - if (vcpu->arch.interrupt.pending) { - enable_nmi_window(vcpu); - } else if (vcpu->arch.nmi_window_open) { - vcpu->arch.nmi_pending = false; - vcpu->arch.nmi_injected = true; - } else { - enable_nmi_window(vcpu); - return; - } - } - if (vcpu->arch.nmi_injected) { - vmx_inject_nmi(vcpu); - if (vcpu->arch.nmi_pending) - enable_nmi_window(vcpu); - else if (vcpu->arch.irq_summary - || kvm_run->request_interrupt_window) - enable_irq_window(vcpu); - return; - } + u32 cpu_based_vm_exec_control; - if (vcpu->arch.interrupt_window_open) { - if (vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending) - kvm_do_inject_irq(vcpu); + vcpu->arch.interrupt_window_open = + ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && + (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0); - if (vcpu->arch.interrupt.pending) - vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr); - } + if (vcpu->arch.interrupt_window_open && + vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending) + kvm_do_inject_irq(vcpu); + + if (vcpu->arch.interrupt_window_open && vcpu->arch.interrupt.pending) + vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr); + + cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); if (!vcpu->arch.interrupt_window_open && (vcpu->arch.irq_summary || kvm_run->request_interrupt_window)) - enable_irq_window(vcpu); + /* + * Interrupts blocked. Wait for unblock. + */ + cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; + else + cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING; + vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); } static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) { int ret; struct kvm_userspace_memory_region tss_mem = { - .slot = TSS_PRIVATE_MEMSLOT, + .slot = 8, .guest_phys_addr = addr, .memory_size = PAGE_SIZE * 3, .flags = 0, @@ -2536,6 +2420,24 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) return 0; } +static void kvm_guest_debug_pre(struct kvm_vcpu *vcpu) +{ + struct kvm_guest_debug *dbg = &vcpu->guest_debug; + + set_debugreg(dbg->bp[0], 0); + set_debugreg(dbg->bp[1], 1); + set_debugreg(dbg->bp[2], 2); + set_debugreg(dbg->bp[3], 3); + + if (dbg->singlestep) { + unsigned long flags; + + flags = vmcs_readl(GUEST_RFLAGS); + flags |= X86_EFLAGS_TF | X86_EFLAGS_RF; + vmcs_writel(GUEST_RFLAGS, flags); + } +} + static int handle_rmode_exception(struct kvm_vcpu *vcpu, int vec, u32 err_code) { @@ -2552,17 +2454,9 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, * the required debugging infrastructure rework. */ switch (vec) { + case DE_VECTOR: case DB_VECTOR: - if (vcpu->guest_debug & - (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) - return 0; - kvm_queue_exception(vcpu, vec); - return 1; case BP_VECTOR: - if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) - return 0; - /* fall through */ - case DE_VECTOR: case OF_VECTOR: case BR_VECTOR: case UD_VECTOR: @@ -2579,8 +2473,8 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { struct vcpu_vmx *vmx = to_vmx(vcpu); - u32 intr_info, ex_no, error_code; - unsigned long cr2, rip, dr6; + u32 intr_info, error_code; + unsigned long cr2, rip; u32 vect_info; enum emulation_result er; @@ -2598,7 +2492,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) set_bit(irq / BITS_PER_LONG, &vcpu->arch.irq_summary); } - if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR) + if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) /* nmi */ return 1; /* already handled by vmx_vcpu_run() */ if (is_no_device(intr_info)) { @@ -2639,30 +2533,14 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return 1; } - ex_no = intr_info & INTR_INFO_VECTOR_MASK; - switch (ex_no) { - case DB_VECTOR: - dr6 = vmcs_readl(EXIT_QUALIFICATION); - if (!(vcpu->guest_debug & - (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) { - vcpu->arch.dr6 = dr6 | DR6_FIXED_1; - kvm_queue_exception(vcpu, DB_VECTOR); - return 1; - } - kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1; - kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7); - /* fall through */ - case BP_VECTOR: + if ((intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK)) == + (INTR_TYPE_EXCEPTION | 1)) { kvm_run->exit_reason = KVM_EXIT_DEBUG; - kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip; - kvm_run->debug.arch.exception = ex_no; - break; - default: - kvm_run->exit_reason = KVM_EXIT_EXCEPTION; - kvm_run->ex.exception = ex_no; - kvm_run->ex.error_code = error_code; - break; + return 0; } + kvm_run->exit_reason = KVM_EXIT_EXCEPTION; + kvm_run->ex.exception = intr_info & INTR_INFO_VECTOR_MASK; + kvm_run->ex.error_code = error_code; return 0; } @@ -2703,7 +2581,6 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) rep = (exit_qualification & 32) != 0; port = exit_qualification >> 16; - skip_emulated_instruction(vcpu); return kvm_emulate_pio(vcpu, kvm_run, in, size, port); } @@ -2801,44 +2678,21 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) unsigned long val; int dr, reg; - dr = vmcs_readl(GUEST_DR7); - if (dr & DR7_GD) { - /* - * As the vm-exit takes precedence over the debug trap, we - * need to emulate the latter, either for the host or the - * guest debugging itself. - */ - if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) { - kvm_run->debug.arch.dr6 = vcpu->arch.dr6; - kvm_run->debug.arch.dr7 = dr; - kvm_run->debug.arch.pc = - vmcs_readl(GUEST_CS_BASE) + - vmcs_readl(GUEST_RIP); - kvm_run->debug.arch.exception = DB_VECTOR; - kvm_run->exit_reason = KVM_EXIT_DEBUG; - return 0; - } else { - vcpu->arch.dr7 &= ~DR7_GD; - vcpu->arch.dr6 |= DR6_BD; - vmcs_writel(GUEST_DR7, vcpu->arch.dr7); - kvm_queue_exception(vcpu, DB_VECTOR); - return 1; - } - } - + /* + * FIXME: this code assumes the host is debugging the guest. + * need to deal with guest debugging itself too. + */ exit_qualification = vmcs_readl(EXIT_QUALIFICATION); - dr = exit_qualification & DEBUG_REG_ACCESS_NUM; - reg = DEBUG_REG_ACCESS_REG(exit_qualification); - if (exit_qualification & TYPE_MOV_FROM_DR) { + dr = exit_qualification & 7; + reg = (exit_qualification >> 8) & 15; + if (exit_qualification & 16) { + /* mov from dr */ switch (dr) { - case 0 ... 3: - val = vcpu->arch.db[dr]; - break; case 6: - val = vcpu->arch.dr6; + val = 0xffff0ff0; break; case 7: - val = vcpu->arch.dr7; + val = 0x400; break; default: val = 0; @@ -2846,38 +2700,7 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) kvm_register_write(vcpu, reg, val); KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler); } else { - val = vcpu->arch.regs[reg]; - switch (dr) { - case 0 ... 3: - vcpu->arch.db[dr] = val; - if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) - vcpu->arch.eff_db[dr] = val; - break; - case 4 ... 5: - if (vcpu->arch.cr4 & X86_CR4_DE) - kvm_queue_exception(vcpu, UD_VECTOR); - break; - case 6: - if (val & 0xffffffff00000000ULL) { - kvm_queue_exception(vcpu, GP_VECTOR); - break; - } - vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; - break; - case 7: - if (val & 0xffffffff00000000ULL) { - kvm_queue_exception(vcpu, GP_VECTOR); - break; - } - vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1; - if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { - vmcs_writel(GUEST_DR7, vcpu->arch.dr7); - vcpu->arch.switch_db_regs = - (val & DR7_BP_EN_MASK); - } - break; - } - KVMTRACE_2D(DR_WRITE, vcpu, (u32)dr, (u32)val, handler); + /* mov to dr */ } skip_emulated_instruction(vcpu); return 1; @@ -2944,7 +2767,6 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu, vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); KVMTRACE_0D(PEND_INTR, vcpu, handler); - ++vcpu->stat.irq_window_exits; /* * If the user space waits to inject interrupts, exit as soon as @@ -2953,6 +2775,7 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu, if (kvm_run->request_interrupt_window && !vcpu->arch.irq_summary) { kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; + ++vcpu->stat.irq_window_exits; return 0; } return 1; @@ -3009,7 +2832,6 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { - struct vcpu_vmx *vmx = to_vmx(vcpu); unsigned long exit_qualification; u16 tss_selector; int reason; @@ -3017,29 +2839,9 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) exit_qualification = vmcs_readl(EXIT_QUALIFICATION); reason = (u32)exit_qualification >> 30; - if (reason == TASK_SWITCH_GATE && vmx->vcpu.arch.nmi_injected && - (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) && - (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK) - == INTR_TYPE_NMI_INTR) { - vcpu->arch.nmi_injected = false; - if (cpu_has_virtual_nmis()) - vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, - GUEST_INTR_STATE_NMI); - } tss_selector = exit_qualification; - if (!kvm_task_switch(vcpu, tss_selector, reason)) - return 0; - - /* clear all local breakpoint enable flags */ - vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~55); - - /* - * TODO: What about debug traps on tss switch? - * Are we supposed to inject them and update dr6? - */ - - return 1; + return kvm_task_switch(vcpu, tss_selector, reason); } static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) @@ -3125,12 +2927,16 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu, while (!guest_state_valid(vcpu)) { err = emulate_instruction(vcpu, kvm_run, 0, 0, 0); - if (err == EMULATE_DO_MMIO) - break; - - if (err != EMULATE_DONE) { - kvm_report_emulation_failure(vcpu, "emulation failure"); - return; + switch (err) { + case EMULATE_DONE: + break; + case EMULATE_DO_MMIO: + kvm_report_emulation_failure(vcpu, "mmio"); + /* TODO: Handle MMIO */ + return; + default: + kvm_report_emulation_failure(vcpu, "emulation failure"); + return; } if (signal_pending(current)) @@ -3142,10 +2948,8 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu, local_irq_disable(); preempt_disable(); - /* Guest state should be valid now except if we need to - * emulate an MMIO */ - if (guest_state_valid(vcpu)) - vmx->emulation_required = 0; + /* Guest state should be valid now, no more emulation should be needed */ + vmx->emulation_required = 0; } /* @@ -3192,11 +2996,6 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)kvm_rip_read(vcpu), (u32)((u64)kvm_rip_read(vcpu) >> 32), entryexit); - /* If we need to emulate an MMIO from handle_invalid_guest_state - * we just return 0 */ - if (vmx->emulation_required && emulate_invalid_guest_state) - return 0; - /* Access CR3 don't cause VMExit in paging mode, so we need * to sync with guest real CR3. */ if (vm_need_ept() && is_paging(vcpu)) { @@ -3213,32 +3012,9 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) if ((vectoring_info & VECTORING_INFO_VALID_MASK) && (exit_reason != EXIT_REASON_EXCEPTION_NMI && - exit_reason != EXIT_REASON_EPT_VIOLATION && - exit_reason != EXIT_REASON_TASK_SWITCH)) - printk(KERN_WARNING "%s: unexpected, valid vectoring info " - "(0x%x) and exit reason is 0x%x\n", - __func__, vectoring_info, exit_reason); - - if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) { - if (vcpu->arch.interrupt_window_open) { - vmx->soft_vnmi_blocked = 0; - vcpu->arch.nmi_window_open = 1; - } else if (vmx->vnmi_blocked_time > 1000000000LL && - vcpu->arch.nmi_pending) { - /* - * This CPU don't support us in finding the end of an - * NMI-blocked window if the guest runs with IRQs - * disabled. So we pull the trigger after 1 s of - * futile waiting, but inform the user about this. - */ - printk(KERN_WARNING "%s: Breaking out of NMI-blocked " - "state on VCPU %d after 1 s timeout\n", - __func__, vcpu->vcpu_id); - vmx->soft_vnmi_blocked = 0; - vmx->vcpu.arch.nmi_window_open = 1; - } - } - + exit_reason != EXIT_REASON_EPT_VIOLATION)) + printk(KERN_WARNING "%s: unexpected, valid vectoring info and " + "exit reason is 0x%x\n", __func__, exit_reason); if (exit_reason < kvm_vmx_max_exit_handlers && kvm_vmx_exit_handlers[exit_reason]) return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run); @@ -3266,6 +3042,51 @@ static void update_tpr_threshold(struct kvm_vcpu *vcpu) vmcs_write32(TPR_THRESHOLD, (max_irr > tpr) ? tpr >> 4 : max_irr >> 4); } +static void enable_irq_window(struct kvm_vcpu *vcpu) +{ + u32 cpu_based_vm_exec_control; + + cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); + cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; + vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); +} + +static void enable_nmi_window(struct kvm_vcpu *vcpu) +{ + u32 cpu_based_vm_exec_control; + + if (!cpu_has_virtual_nmis()) + return; + + cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); + cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING; + vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); +} + +static int vmx_nmi_enabled(struct kvm_vcpu *vcpu) +{ + u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); + return !(guest_intr & (GUEST_INTR_STATE_NMI | + GUEST_INTR_STATE_MOV_SS | + GUEST_INTR_STATE_STI)); +} + +static int vmx_irq_enabled(struct kvm_vcpu *vcpu) +{ + u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); + return (!(guest_intr & (GUEST_INTR_STATE_MOV_SS | + GUEST_INTR_STATE_STI)) && + (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF)); +} + +static void enable_intr_window(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.nmi_pending) + enable_nmi_window(vcpu); + else if (kvm_cpu_has_interrupt(vcpu)) + enable_irq_window(vcpu); +} + static void vmx_complete_interrupts(struct vcpu_vmx *vmx) { u32 exit_intr_info; @@ -3288,9 +3109,7 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx) if (unblock_nmi && vector != DF_VECTOR) vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI); - } else if (unlikely(vmx->soft_vnmi_blocked)) - vmx->vnmi_blocked_time += - ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time)); + } idt_vectoring_info = vmx->idt_vectoring_info; idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; @@ -3309,8 +3128,7 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx) vmx->vcpu.arch.nmi_injected = false; } kvm_clear_exception_queue(&vmx->vcpu); - if (idtv_info_valid && (type == INTR_TYPE_HARD_EXCEPTION || - type == INTR_TYPE_SOFT_EXCEPTION)) { + if (idtv_info_valid && type == INTR_TYPE_EXCEPTION) { if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) { error = vmcs_read32(IDT_VECTORING_ERROR_CODE); kvm_queue_exception_e(&vmx->vcpu, vector, error); @@ -3329,34 +3147,26 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu) { update_tpr_threshold(vcpu); - vmx_update_window_states(vcpu); - - if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) - vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, - GUEST_INTR_STATE_STI | - GUEST_INTR_STATE_MOV_SS); - - if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) { - if (vcpu->arch.interrupt.pending) { - enable_nmi_window(vcpu); - } else if (vcpu->arch.nmi_window_open) { - vcpu->arch.nmi_pending = false; - vcpu->arch.nmi_injected = true; - } else { - enable_nmi_window(vcpu); + if (cpu_has_virtual_nmis()) { + if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) { + if (vcpu->arch.interrupt.pending) { + enable_nmi_window(vcpu); + } else if (vmx_nmi_enabled(vcpu)) { + vcpu->arch.nmi_pending = false; + vcpu->arch.nmi_injected = true; + } else { + enable_intr_window(vcpu); + return; + } + } + if (vcpu->arch.nmi_injected) { + vmx_inject_nmi(vcpu); + enable_intr_window(vcpu); return; } } - if (vcpu->arch.nmi_injected) { - vmx_inject_nmi(vcpu); - if (vcpu->arch.nmi_pending) - enable_nmi_window(vcpu); - else if (kvm_cpu_has_interrupt(vcpu)) - enable_irq_window(vcpu); - return; - } if (!vcpu->arch.interrupt.pending && kvm_cpu_has_interrupt(vcpu)) { - if (vcpu->arch.interrupt_window_open) + if (vmx_irq_enabled(vcpu)) kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu)); else enable_irq_window(vcpu); @@ -3364,8 +3174,6 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu) if (vcpu->arch.interrupt.pending) { vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr); kvm_timer_intr_post(vcpu, vcpu->arch.interrupt.nr); - if (kvm_cpu_has_interrupt(vcpu)) - enable_irq_window(vcpu); } } @@ -3405,10 +3213,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) struct vcpu_vmx *vmx = to_vmx(vcpu); u32 intr_info; - /* Record the guest's net vcpu time for enforced NMI injections. */ - if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) - vmx->entry_time = ktime_get(); - /* Handle invalid guest state instead of entering VMX */ if (vmx->emulation_required && emulate_invalid_guest_state) { handle_invalid_guest_state(vcpu, kvm_run); @@ -3425,8 +3229,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) */ vmcs_writel(HOST_CR0, read_cr0()); - set_debugreg(vcpu->arch.dr6, 6); - asm( /* Store host registers */ "push %%"R"dx; push %%"R"bp;" @@ -3521,13 +3323,13 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)); vcpu->arch.regs_dirty = 0; - get_debugreg(vcpu->arch.dr6, 6); - vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); if (vmx->rmode.irq.pending) fixup_rmode_irq(vmx); - vmx_update_window_states(vcpu); + vcpu->arch.interrupt_window_open = + (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & + (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)) == 0; asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); vmx->launched = 1; @@ -3535,7 +3337,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) intr_info = vmcs_read32(VM_EXIT_INTR_INFO); /* We need to handle NMIs before interrupts are enabled */ - if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR && + if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200 && (intr_info & INTR_INFO_VALID_MASK)) { KVMTRACE_0D(NMI, vcpu, handler); asm("int $2"); @@ -3653,11 +3455,6 @@ static int get_ept_level(void) return VMX_EPT_DEFAULT_GAW + 1; } -static int vmx_get_mt_mask_shift(void) -{ - return VMX_EPT_MT_EPTE_SHIFT; -} - static struct kvm_x86_ops vmx_x86_ops = { .cpu_has_kvm_support = cpu_has_kvm_support, .disabled_by_bios = vmx_disabled_by_bios, @@ -3677,6 +3474,7 @@ static struct kvm_x86_ops vmx_x86_ops = { .vcpu_put = vmx_vcpu_put, .set_guest_debug = set_guest_debug, + .guest_debug_pre = kvm_guest_debug_pre, .get_msr = vmx_get_msr, .set_msr = vmx_set_msr, .get_segment_base = vmx_get_segment_base, @@ -3712,7 +3510,6 @@ static struct kvm_x86_ops vmx_x86_ops = { .set_tss_addr = vmx_set_tss_addr, .get_tdp_level = get_ept_level, - .get_mt_mask_shift = vmx_get_mt_mask_shift, }; static int __init vmx_init(void) @@ -3769,10 +3566,10 @@ static int __init vmx_init(void) bypass_guest_pf = 0; kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | VMX_EPT_WRITABLE_MASK | + VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT | VMX_EPT_IGMT_BIT); kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull, - VMX_EPT_EXECUTABLE_MASK, - VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT); + VMX_EPT_EXECUTABLE_MASK); kvm_enable_tdp(); } else kvm_disable_tdp(); |