summaryrefslogtreecommitdiff
path: root/arch/x86/kvm/vmx.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/vmx.c')
-rw-r--r--arch/x86/kvm/vmx.c601
1 files changed, 199 insertions, 402 deletions
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ffd8f24dc66c..a4018b01e1f9 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -16,6 +16,7 @@
*/
#include "irq.h"
+#include "vmx.h"
#include "mmu.h"
#include <linux/kvm_host.h>
@@ -30,8 +31,6 @@
#include <asm/io.h>
#include <asm/desc.h>
-#include <asm/vmx.h>
-#include <asm/virtext.h>
#define __ex(x) __kvm_handle_fault_on_reboot(x)
@@ -91,11 +90,6 @@ struct vcpu_vmx {
} rmode;
int vpid;
bool emulation_required;
-
- /* Support for vnmi-less CPUs */
- int soft_vnmi_blocked;
- ktime_t entry_time;
- s64 vnmi_blocked_time;
};
static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
@@ -128,7 +122,7 @@ static struct vmcs_config {
u32 vmentry_ctrl;
} vmcs_config;
-static struct vmx_capability {
+struct vmx_capability {
u32 ept;
u32 vpid;
} vmx_capability;
@@ -189,21 +183,21 @@ static inline int is_page_fault(u32 intr_info)
{
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
INTR_INFO_VALID_MASK)) ==
- (INTR_TYPE_HARD_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK);
+ (INTR_TYPE_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK);
}
static inline int is_no_device(u32 intr_info)
{
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
INTR_INFO_VALID_MASK)) ==
- (INTR_TYPE_HARD_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK);
+ (INTR_TYPE_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK);
}
static inline int is_invalid_opcode(u32 intr_info)
{
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
INTR_INFO_VALID_MASK)) ==
- (INTR_TYPE_HARD_EXCEPTION | UD_VECTOR | INTR_INFO_VALID_MASK);
+ (INTR_TYPE_EXCEPTION | UD_VECTOR | INTR_INFO_VALID_MASK);
}
static inline int is_external_interrupt(u32 intr_info)
@@ -480,13 +474,8 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
eb = (1u << PF_VECTOR) | (1u << UD_VECTOR);
if (!vcpu->fpu_active)
eb |= 1u << NM_VECTOR;
- if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
- if (vcpu->guest_debug &
- (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
- eb |= 1u << DB_VECTOR;
- if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
- eb |= 1u << BP_VECTOR;
- }
+ if (vcpu->guest_debug.enabled)
+ eb |= 1u << DB_VECTOR;
if (vcpu->arch.rmode.active)
eb = ~0;
if (vm_need_ept())
@@ -752,33 +741,29 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
bool has_error_code, u32 error_code)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- u32 intr_info = nr | INTR_INFO_VALID_MASK;
- if (has_error_code) {
+ if (has_error_code)
vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
- intr_info |= INTR_INFO_DELIVER_CODE_MASK;
- }
if (vcpu->arch.rmode.active) {
vmx->rmode.irq.pending = true;
vmx->rmode.irq.vector = nr;
vmx->rmode.irq.rip = kvm_rip_read(vcpu);
- if (nr == BP_VECTOR || nr == OF_VECTOR)
+ if (nr == BP_VECTOR)
vmx->rmode.irq.rip++;
- intr_info |= INTR_TYPE_SOFT_INTR;
- vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
+ vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
+ nr | INTR_TYPE_SOFT_INTR
+ | (has_error_code ? INTR_INFO_DELIVER_CODE_MASK : 0)
+ | INTR_INFO_VALID_MASK);
vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1);
return;
}
- if (nr == BP_VECTOR || nr == OF_VECTOR) {
- vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
- intr_info |= INTR_TYPE_SOFT_EXCEPTION;
- } else
- intr_info |= INTR_TYPE_HARD_EXCEPTION;
-
- vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
+ vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
+ nr | INTR_TYPE_EXCEPTION
+ | (has_error_code ? INTR_INFO_DELIVER_CODE_MASK : 0)
+ | INTR_INFO_VALID_MASK);
}
static bool vmx_exception_injected(struct kvm_vcpu *vcpu)
@@ -972,13 +957,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", msr_index, data);
break;
- case MSR_IA32_CR_PAT:
- if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
- vmcs_write64(GUEST_IA32_PAT, data);
- vcpu->arch.pat = data;
- break;
- }
- /* Otherwise falls through to kvm_set_msr_common */
default:
vmx_load_host_state(vmx);
msr = find_msr_entry(vmx, msr_index);
@@ -1007,28 +985,40 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
}
}
-static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
+static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg)
{
- int old_debug = vcpu->guest_debug;
- unsigned long flags;
+ unsigned long dr7 = 0x400;
+ int old_singlestep;
- vcpu->guest_debug = dbg->control;
- if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE))
- vcpu->guest_debug = 0;
+ old_singlestep = vcpu->guest_debug.singlestep;
- if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
- vmcs_writel(GUEST_DR7, dbg->arch.debugreg[7]);
- else
- vmcs_writel(GUEST_DR7, vcpu->arch.dr7);
+ vcpu->guest_debug.enabled = dbg->enabled;
+ if (vcpu->guest_debug.enabled) {
+ int i;
- flags = vmcs_readl(GUEST_RFLAGS);
- if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
- flags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
- else if (old_debug & KVM_GUESTDBG_SINGLESTEP)
+ dr7 |= 0x200; /* exact */
+ for (i = 0; i < 4; ++i) {
+ if (!dbg->breakpoints[i].enabled)
+ continue;
+ vcpu->guest_debug.bp[i] = dbg->breakpoints[i].address;
+ dr7 |= 2 << (i*2); /* global enable */
+ dr7 |= 0 << (i*4+16); /* execution breakpoint */
+ }
+
+ vcpu->guest_debug.singlestep = dbg->singlestep;
+ } else
+ vcpu->guest_debug.singlestep = 0;
+
+ if (old_singlestep && !vcpu->guest_debug.singlestep) {
+ unsigned long flags;
+
+ flags = vmcs_readl(GUEST_RFLAGS);
flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
- vmcs_writel(GUEST_RFLAGS, flags);
+ vmcs_writel(GUEST_RFLAGS, flags);
+ }
update_exception_bitmap(vcpu);
+ vmcs_writel(GUEST_DR7, dr7);
return 0;
}
@@ -1042,7 +1032,8 @@ static int vmx_get_irq(struct kvm_vcpu *vcpu)
static __init int cpu_has_kvm_support(void)
{
- return cpu_has_vmx();
+ unsigned long ecx = cpuid_ecx(1);
+ return test_bit(5, &ecx); /* CPUID.1:ECX.VMX[bit 5] -> VT */
}
static __init int vmx_disabled_by_bios(void)
@@ -1088,20 +1079,11 @@ static void vmclear_local_vcpus(void)
__vcpu_clear(vmx);
}
-
-/* Just like cpu_vmxoff(), but with the __kvm_handle_fault_on_reboot()
- * tricks.
- */
-static void kvm_cpu_vmxoff(void)
-{
- asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
- write_cr4(read_cr4() & ~X86_CR4_VMXE);
-}
-
static void hardware_disable(void *garbage)
{
vmclear_local_vcpus();
- kvm_cpu_vmxoff();
+ asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
+ write_cr4(read_cr4() & ~X86_CR4_VMXE);
}
static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
@@ -1194,13 +1176,12 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
#ifdef CONFIG_X86_64
min |= VM_EXIT_HOST_ADDR_SPACE_SIZE;
#endif
- opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT;
+ opt = 0;
if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS,
&_vmexit_control) < 0)
return -EIO;
- min = 0;
- opt = VM_ENTRY_LOAD_IA32_PAT;
+ min = opt = 0;
if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS,
&_vmentry_control) < 0)
return -EIO;
@@ -2106,9 +2087,8 @@ static void vmx_disable_intercept_for_msr(struct page *msr_bitmap, u32 msr)
*/
static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
{
- u32 host_sysenter_cs, msr_low, msr_high;
+ u32 host_sysenter_cs;
u32 junk;
- u64 host_pat;
unsigned long a;
struct descriptor_table dt;
int i;
@@ -2196,20 +2176,6 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
rdmsrl(MSR_IA32_SYSENTER_EIP, a);
vmcs_writel(HOST_IA32_SYSENTER_EIP, a); /* 22.2.3 */
- if (vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_PAT) {
- rdmsr(MSR_IA32_CR_PAT, msr_low, msr_high);
- host_pat = msr_low | ((u64) msr_high << 32);
- vmcs_write64(HOST_IA32_PAT, host_pat);
- }
- if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
- rdmsr(MSR_IA32_CR_PAT, msr_low, msr_high);
- host_pat = msr_low | ((u64) msr_high << 32);
- /* Write the default value follow host pat */
- vmcs_write64(GUEST_IA32_PAT, host_pat);
- /* Keep arch.pat sync with GUEST_IA32_PAT */
- vmx->vcpu.arch.pat = host_pat;
- }
-
for (i = 0; i < NR_VMX_MSR; ++i) {
u32 index = vmx_msr_index[i];
u32 data_low, data_high;
@@ -2264,8 +2230,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
vmx->vcpu.arch.rmode.active = 0;
- vmx->soft_vnmi_blocked = 0;
-
vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
kvm_set_cr8(&vmx->vcpu, 0);
msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
@@ -2315,6 +2279,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
kvm_rip_write(vcpu, 0);
kvm_register_write(vcpu, VCPU_REGS_RSP, 0);
+ /* todo: dr0 = dr1 = dr2 = dr3 = 0; dr6 = 0xffff0ff0 */
vmcs_writel(GUEST_DR7, 0x400);
vmcs_writel(GUEST_GDTR_BASE, 0);
@@ -2370,29 +2335,6 @@ out:
return ret;
}
-static void enable_irq_window(struct kvm_vcpu *vcpu)
-{
- u32 cpu_based_vm_exec_control;
-
- cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
- cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
- vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
-}
-
-static void enable_nmi_window(struct kvm_vcpu *vcpu)
-{
- u32 cpu_based_vm_exec_control;
-
- if (!cpu_has_virtual_nmis()) {
- enable_irq_window(vcpu);
- return;
- }
-
- cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
- cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING;
- vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
-}
-
static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -2416,54 +2358,10 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
{
- struct vcpu_vmx *vmx = to_vmx(vcpu);
-
- if (!cpu_has_virtual_nmis()) {
- /*
- * Tracking the NMI-blocked state in software is built upon
- * finding the next open IRQ window. This, in turn, depends on
- * well-behaving guests: They have to keep IRQs disabled at
- * least as long as the NMI handler runs. Otherwise we may
- * cause NMI nesting, maybe breaking the guest. But as this is
- * highly unlikely, we can live with the residual risk.
- */
- vmx->soft_vnmi_blocked = 1;
- vmx->vnmi_blocked_time = 0;
- }
-
- ++vcpu->stat.nmi_injections;
- if (vcpu->arch.rmode.active) {
- vmx->rmode.irq.pending = true;
- vmx->rmode.irq.vector = NMI_VECTOR;
- vmx->rmode.irq.rip = kvm_rip_read(vcpu);
- vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
- NMI_VECTOR | INTR_TYPE_SOFT_INTR |
- INTR_INFO_VALID_MASK);
- vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
- kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1);
- return;
- }
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
}
-static void vmx_update_window_states(struct kvm_vcpu *vcpu)
-{
- u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
-
- vcpu->arch.nmi_window_open =
- !(guest_intr & (GUEST_INTR_STATE_STI |
- GUEST_INTR_STATE_MOV_SS |
- GUEST_INTR_STATE_NMI));
- if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked)
- vcpu->arch.nmi_window_open = 0;
-
- vcpu->arch.interrupt_window_open =
- ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
- !(guest_intr & (GUEST_INTR_STATE_STI |
- GUEST_INTR_STATE_MOV_SS)));
-}
-
static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
{
int word_index = __ffs(vcpu->arch.irq_summary);
@@ -2476,54 +2374,40 @@ static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
kvm_queue_interrupt(vcpu, irq);
}
+
static void do_interrupt_requests(struct kvm_vcpu *vcpu,
struct kvm_run *kvm_run)
{
- vmx_update_window_states(vcpu);
-
- if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
- vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
- GUEST_INTR_STATE_STI |
- GUEST_INTR_STATE_MOV_SS);
-
- if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
- if (vcpu->arch.interrupt.pending) {
- enable_nmi_window(vcpu);
- } else if (vcpu->arch.nmi_window_open) {
- vcpu->arch.nmi_pending = false;
- vcpu->arch.nmi_injected = true;
- } else {
- enable_nmi_window(vcpu);
- return;
- }
- }
- if (vcpu->arch.nmi_injected) {
- vmx_inject_nmi(vcpu);
- if (vcpu->arch.nmi_pending)
- enable_nmi_window(vcpu);
- else if (vcpu->arch.irq_summary
- || kvm_run->request_interrupt_window)
- enable_irq_window(vcpu);
- return;
- }
+ u32 cpu_based_vm_exec_control;
- if (vcpu->arch.interrupt_window_open) {
- if (vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending)
- kvm_do_inject_irq(vcpu);
+ vcpu->arch.interrupt_window_open =
+ ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
+ (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0);
- if (vcpu->arch.interrupt.pending)
- vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
- }
+ if (vcpu->arch.interrupt_window_open &&
+ vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending)
+ kvm_do_inject_irq(vcpu);
+
+ if (vcpu->arch.interrupt_window_open && vcpu->arch.interrupt.pending)
+ vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
+
+ cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
if (!vcpu->arch.interrupt_window_open &&
(vcpu->arch.irq_summary || kvm_run->request_interrupt_window))
- enable_irq_window(vcpu);
+ /*
+ * Interrupts blocked. Wait for unblock.
+ */
+ cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
+ else
+ cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
+ vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
}
static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
{
int ret;
struct kvm_userspace_memory_region tss_mem = {
- .slot = TSS_PRIVATE_MEMSLOT,
+ .slot = 8,
.guest_phys_addr = addr,
.memory_size = PAGE_SIZE * 3,
.flags = 0,
@@ -2536,6 +2420,24 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
return 0;
}
+static void kvm_guest_debug_pre(struct kvm_vcpu *vcpu)
+{
+ struct kvm_guest_debug *dbg = &vcpu->guest_debug;
+
+ set_debugreg(dbg->bp[0], 0);
+ set_debugreg(dbg->bp[1], 1);
+ set_debugreg(dbg->bp[2], 2);
+ set_debugreg(dbg->bp[3], 3);
+
+ if (dbg->singlestep) {
+ unsigned long flags;
+
+ flags = vmcs_readl(GUEST_RFLAGS);
+ flags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
+ vmcs_writel(GUEST_RFLAGS, flags);
+ }
+}
+
static int handle_rmode_exception(struct kvm_vcpu *vcpu,
int vec, u32 err_code)
{
@@ -2552,17 +2454,9 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
* the required debugging infrastructure rework.
*/
switch (vec) {
+ case DE_VECTOR:
case DB_VECTOR:
- if (vcpu->guest_debug &
- (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
- return 0;
- kvm_queue_exception(vcpu, vec);
- return 1;
case BP_VECTOR:
- if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
- return 0;
- /* fall through */
- case DE_VECTOR:
case OF_VECTOR:
case BR_VECTOR:
case UD_VECTOR:
@@ -2579,8 +2473,8 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- u32 intr_info, ex_no, error_code;
- unsigned long cr2, rip, dr6;
+ u32 intr_info, error_code;
+ unsigned long cr2, rip;
u32 vect_info;
enum emulation_result er;
@@ -2598,7 +2492,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
set_bit(irq / BITS_PER_LONG, &vcpu->arch.irq_summary);
}
- if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR)
+ if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) /* nmi */
return 1; /* already handled by vmx_vcpu_run() */
if (is_no_device(intr_info)) {
@@ -2639,30 +2533,14 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
return 1;
}
- ex_no = intr_info & INTR_INFO_VECTOR_MASK;
- switch (ex_no) {
- case DB_VECTOR:
- dr6 = vmcs_readl(EXIT_QUALIFICATION);
- if (!(vcpu->guest_debug &
- (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
- vcpu->arch.dr6 = dr6 | DR6_FIXED_1;
- kvm_queue_exception(vcpu, DB_VECTOR);
- return 1;
- }
- kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1;
- kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7);
- /* fall through */
- case BP_VECTOR:
+ if ((intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK)) ==
+ (INTR_TYPE_EXCEPTION | 1)) {
kvm_run->exit_reason = KVM_EXIT_DEBUG;
- kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip;
- kvm_run->debug.arch.exception = ex_no;
- break;
- default:
- kvm_run->exit_reason = KVM_EXIT_EXCEPTION;
- kvm_run->ex.exception = ex_no;
- kvm_run->ex.error_code = error_code;
- break;
+ return 0;
}
+ kvm_run->exit_reason = KVM_EXIT_EXCEPTION;
+ kvm_run->ex.exception = intr_info & INTR_INFO_VECTOR_MASK;
+ kvm_run->ex.error_code = error_code;
return 0;
}
@@ -2703,7 +2581,6 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
rep = (exit_qualification & 32) != 0;
port = exit_qualification >> 16;
- skip_emulated_instruction(vcpu);
return kvm_emulate_pio(vcpu, kvm_run, in, size, port);
}
@@ -2801,44 +2678,21 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
unsigned long val;
int dr, reg;
- dr = vmcs_readl(GUEST_DR7);
- if (dr & DR7_GD) {
- /*
- * As the vm-exit takes precedence over the debug trap, we
- * need to emulate the latter, either for the host or the
- * guest debugging itself.
- */
- if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
- kvm_run->debug.arch.dr6 = vcpu->arch.dr6;
- kvm_run->debug.arch.dr7 = dr;
- kvm_run->debug.arch.pc =
- vmcs_readl(GUEST_CS_BASE) +
- vmcs_readl(GUEST_RIP);
- kvm_run->debug.arch.exception = DB_VECTOR;
- kvm_run->exit_reason = KVM_EXIT_DEBUG;
- return 0;
- } else {
- vcpu->arch.dr7 &= ~DR7_GD;
- vcpu->arch.dr6 |= DR6_BD;
- vmcs_writel(GUEST_DR7, vcpu->arch.dr7);
- kvm_queue_exception(vcpu, DB_VECTOR);
- return 1;
- }
- }
-
+ /*
+ * FIXME: this code assumes the host is debugging the guest.
+ * need to deal with guest debugging itself too.
+ */
exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
- dr = exit_qualification & DEBUG_REG_ACCESS_NUM;
- reg = DEBUG_REG_ACCESS_REG(exit_qualification);
- if (exit_qualification & TYPE_MOV_FROM_DR) {
+ dr = exit_qualification & 7;
+ reg = (exit_qualification >> 8) & 15;
+ if (exit_qualification & 16) {
+ /* mov from dr */
switch (dr) {
- case 0 ... 3:
- val = vcpu->arch.db[dr];
- break;
case 6:
- val = vcpu->arch.dr6;
+ val = 0xffff0ff0;
break;
case 7:
- val = vcpu->arch.dr7;
+ val = 0x400;
break;
default:
val = 0;
@@ -2846,38 +2700,7 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
kvm_register_write(vcpu, reg, val);
KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler);
} else {
- val = vcpu->arch.regs[reg];
- switch (dr) {
- case 0 ... 3:
- vcpu->arch.db[dr] = val;
- if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
- vcpu->arch.eff_db[dr] = val;
- break;
- case 4 ... 5:
- if (vcpu->arch.cr4 & X86_CR4_DE)
- kvm_queue_exception(vcpu, UD_VECTOR);
- break;
- case 6:
- if (val & 0xffffffff00000000ULL) {
- kvm_queue_exception(vcpu, GP_VECTOR);
- break;
- }
- vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1;
- break;
- case 7:
- if (val & 0xffffffff00000000ULL) {
- kvm_queue_exception(vcpu, GP_VECTOR);
- break;
- }
- vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1;
- if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
- vmcs_writel(GUEST_DR7, vcpu->arch.dr7);
- vcpu->arch.switch_db_regs =
- (val & DR7_BP_EN_MASK);
- }
- break;
- }
- KVMTRACE_2D(DR_WRITE, vcpu, (u32)dr, (u32)val, handler);
+ /* mov to dr */
}
skip_emulated_instruction(vcpu);
return 1;
@@ -2944,7 +2767,6 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu,
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
KVMTRACE_0D(PEND_INTR, vcpu, handler);
- ++vcpu->stat.irq_window_exits;
/*
* If the user space waits to inject interrupts, exit as soon as
@@ -2953,6 +2775,7 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu,
if (kvm_run->request_interrupt_window &&
!vcpu->arch.irq_summary) {
kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
+ ++vcpu->stat.irq_window_exits;
return 0;
}
return 1;
@@ -3009,7 +2832,6 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
- struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long exit_qualification;
u16 tss_selector;
int reason;
@@ -3017,29 +2839,9 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
reason = (u32)exit_qualification >> 30;
- if (reason == TASK_SWITCH_GATE && vmx->vcpu.arch.nmi_injected &&
- (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
- (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK)
- == INTR_TYPE_NMI_INTR) {
- vcpu->arch.nmi_injected = false;
- if (cpu_has_virtual_nmis())
- vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
- GUEST_INTR_STATE_NMI);
- }
tss_selector = exit_qualification;
- if (!kvm_task_switch(vcpu, tss_selector, reason))
- return 0;
-
- /* clear all local breakpoint enable flags */
- vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~55);
-
- /*
- * TODO: What about debug traps on tss switch?
- * Are we supposed to inject them and update dr6?
- */
-
- return 1;
+ return kvm_task_switch(vcpu, tss_selector, reason);
}
static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
@@ -3125,12 +2927,16 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
while (!guest_state_valid(vcpu)) {
err = emulate_instruction(vcpu, kvm_run, 0, 0, 0);
- if (err == EMULATE_DO_MMIO)
- break;
-
- if (err != EMULATE_DONE) {
- kvm_report_emulation_failure(vcpu, "emulation failure");
- return;
+ switch (err) {
+ case EMULATE_DONE:
+ break;
+ case EMULATE_DO_MMIO:
+ kvm_report_emulation_failure(vcpu, "mmio");
+ /* TODO: Handle MMIO */
+ return;
+ default:
+ kvm_report_emulation_failure(vcpu, "emulation failure");
+ return;
}
if (signal_pending(current))
@@ -3142,10 +2948,8 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
local_irq_disable();
preempt_disable();
- /* Guest state should be valid now except if we need to
- * emulate an MMIO */
- if (guest_state_valid(vcpu))
- vmx->emulation_required = 0;
+ /* Guest state should be valid now, no more emulation should be needed */
+ vmx->emulation_required = 0;
}
/*
@@ -3192,11 +2996,6 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)kvm_rip_read(vcpu),
(u32)((u64)kvm_rip_read(vcpu) >> 32), entryexit);
- /* If we need to emulate an MMIO from handle_invalid_guest_state
- * we just return 0 */
- if (vmx->emulation_required && emulate_invalid_guest_state)
- return 0;
-
/* Access CR3 don't cause VMExit in paging mode, so we need
* to sync with guest real CR3. */
if (vm_need_ept() && is_paging(vcpu)) {
@@ -3213,32 +3012,9 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
if ((vectoring_info & VECTORING_INFO_VALID_MASK) &&
(exit_reason != EXIT_REASON_EXCEPTION_NMI &&
- exit_reason != EXIT_REASON_EPT_VIOLATION &&
- exit_reason != EXIT_REASON_TASK_SWITCH))
- printk(KERN_WARNING "%s: unexpected, valid vectoring info "
- "(0x%x) and exit reason is 0x%x\n",
- __func__, vectoring_info, exit_reason);
-
- if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) {
- if (vcpu->arch.interrupt_window_open) {
- vmx->soft_vnmi_blocked = 0;
- vcpu->arch.nmi_window_open = 1;
- } else if (vmx->vnmi_blocked_time > 1000000000LL &&
- vcpu->arch.nmi_pending) {
- /*
- * This CPU don't support us in finding the end of an
- * NMI-blocked window if the guest runs with IRQs
- * disabled. So we pull the trigger after 1 s of
- * futile waiting, but inform the user about this.
- */
- printk(KERN_WARNING "%s: Breaking out of NMI-blocked "
- "state on VCPU %d after 1 s timeout\n",
- __func__, vcpu->vcpu_id);
- vmx->soft_vnmi_blocked = 0;
- vmx->vcpu.arch.nmi_window_open = 1;
- }
- }
-
+ exit_reason != EXIT_REASON_EPT_VIOLATION))
+ printk(KERN_WARNING "%s: unexpected, valid vectoring info and "
+ "exit reason is 0x%x\n", __func__, exit_reason);
if (exit_reason < kvm_vmx_max_exit_handlers
&& kvm_vmx_exit_handlers[exit_reason])
return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run);
@@ -3266,6 +3042,51 @@ static void update_tpr_threshold(struct kvm_vcpu *vcpu)
vmcs_write32(TPR_THRESHOLD, (max_irr > tpr) ? tpr >> 4 : max_irr >> 4);
}
+static void enable_irq_window(struct kvm_vcpu *vcpu)
+{
+ u32 cpu_based_vm_exec_control;
+
+ cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+ cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
+ vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
+}
+
+static void enable_nmi_window(struct kvm_vcpu *vcpu)
+{
+ u32 cpu_based_vm_exec_control;
+
+ if (!cpu_has_virtual_nmis())
+ return;
+
+ cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+ cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING;
+ vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
+}
+
+static int vmx_nmi_enabled(struct kvm_vcpu *vcpu)
+{
+ u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
+ return !(guest_intr & (GUEST_INTR_STATE_NMI |
+ GUEST_INTR_STATE_MOV_SS |
+ GUEST_INTR_STATE_STI));
+}
+
+static int vmx_irq_enabled(struct kvm_vcpu *vcpu)
+{
+ u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
+ return (!(guest_intr & (GUEST_INTR_STATE_MOV_SS |
+ GUEST_INTR_STATE_STI)) &&
+ (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF));
+}
+
+static void enable_intr_window(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->arch.nmi_pending)
+ enable_nmi_window(vcpu);
+ else if (kvm_cpu_has_interrupt(vcpu))
+ enable_irq_window(vcpu);
+}
+
static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
{
u32 exit_intr_info;
@@ -3288,9 +3109,7 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
if (unblock_nmi && vector != DF_VECTOR)
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
GUEST_INTR_STATE_NMI);
- } else if (unlikely(vmx->soft_vnmi_blocked))
- vmx->vnmi_blocked_time +=
- ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time));
+ }
idt_vectoring_info = vmx->idt_vectoring_info;
idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
@@ -3309,8 +3128,7 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
vmx->vcpu.arch.nmi_injected = false;
}
kvm_clear_exception_queue(&vmx->vcpu);
- if (idtv_info_valid && (type == INTR_TYPE_HARD_EXCEPTION ||
- type == INTR_TYPE_SOFT_EXCEPTION)) {
+ if (idtv_info_valid && type == INTR_TYPE_EXCEPTION) {
if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
error = vmcs_read32(IDT_VECTORING_ERROR_CODE);
kvm_queue_exception_e(&vmx->vcpu, vector, error);
@@ -3329,34 +3147,26 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
{
update_tpr_threshold(vcpu);
- vmx_update_window_states(vcpu);
-
- if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
- vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
- GUEST_INTR_STATE_STI |
- GUEST_INTR_STATE_MOV_SS);
-
- if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
- if (vcpu->arch.interrupt.pending) {
- enable_nmi_window(vcpu);
- } else if (vcpu->arch.nmi_window_open) {
- vcpu->arch.nmi_pending = false;
- vcpu->arch.nmi_injected = true;
- } else {
- enable_nmi_window(vcpu);
+ if (cpu_has_virtual_nmis()) {
+ if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
+ if (vcpu->arch.interrupt.pending) {
+ enable_nmi_window(vcpu);
+ } else if (vmx_nmi_enabled(vcpu)) {
+ vcpu->arch.nmi_pending = false;
+ vcpu->arch.nmi_injected = true;
+ } else {
+ enable_intr_window(vcpu);
+ return;
+ }
+ }
+ if (vcpu->arch.nmi_injected) {
+ vmx_inject_nmi(vcpu);
+ enable_intr_window(vcpu);
return;
}
}
- if (vcpu->arch.nmi_injected) {
- vmx_inject_nmi(vcpu);
- if (vcpu->arch.nmi_pending)
- enable_nmi_window(vcpu);
- else if (kvm_cpu_has_interrupt(vcpu))
- enable_irq_window(vcpu);
- return;
- }
if (!vcpu->arch.interrupt.pending && kvm_cpu_has_interrupt(vcpu)) {
- if (vcpu->arch.interrupt_window_open)
+ if (vmx_irq_enabled(vcpu))
kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
else
enable_irq_window(vcpu);
@@ -3364,8 +3174,6 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
if (vcpu->arch.interrupt.pending) {
vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
kvm_timer_intr_post(vcpu, vcpu->arch.interrupt.nr);
- if (kvm_cpu_has_interrupt(vcpu))
- enable_irq_window(vcpu);
}
}
@@ -3405,10 +3213,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 intr_info;
- /* Record the guest's net vcpu time for enforced NMI injections. */
- if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked))
- vmx->entry_time = ktime_get();
-
/* Handle invalid guest state instead of entering VMX */
if (vmx->emulation_required && emulate_invalid_guest_state) {
handle_invalid_guest_state(vcpu, kvm_run);
@@ -3425,8 +3229,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
*/
vmcs_writel(HOST_CR0, read_cr0());
- set_debugreg(vcpu->arch.dr6, 6);
-
asm(
/* Store host registers */
"push %%"R"dx; push %%"R"bp;"
@@ -3521,13 +3323,13 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP));
vcpu->arch.regs_dirty = 0;
- get_debugreg(vcpu->arch.dr6, 6);
-
vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
if (vmx->rmode.irq.pending)
fixup_rmode_irq(vmx);
- vmx_update_window_states(vcpu);
+ vcpu->arch.interrupt_window_open =
+ (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
+ (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)) == 0;
asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
vmx->launched = 1;
@@ -3535,7 +3337,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
/* We need to handle NMIs before interrupts are enabled */
- if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR &&
+ if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200 &&
(intr_info & INTR_INFO_VALID_MASK)) {
KVMTRACE_0D(NMI, vcpu, handler);
asm("int $2");
@@ -3653,11 +3455,6 @@ static int get_ept_level(void)
return VMX_EPT_DEFAULT_GAW + 1;
}
-static int vmx_get_mt_mask_shift(void)
-{
- return VMX_EPT_MT_EPTE_SHIFT;
-}
-
static struct kvm_x86_ops vmx_x86_ops = {
.cpu_has_kvm_support = cpu_has_kvm_support,
.disabled_by_bios = vmx_disabled_by_bios,
@@ -3677,6 +3474,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
.vcpu_put = vmx_vcpu_put,
.set_guest_debug = set_guest_debug,
+ .guest_debug_pre = kvm_guest_debug_pre,
.get_msr = vmx_get_msr,
.set_msr = vmx_set_msr,
.get_segment_base = vmx_get_segment_base,
@@ -3712,7 +3510,6 @@ static struct kvm_x86_ops vmx_x86_ops = {
.set_tss_addr = vmx_set_tss_addr,
.get_tdp_level = get_ept_level,
- .get_mt_mask_shift = vmx_get_mt_mask_shift,
};
static int __init vmx_init(void)
@@ -3769,10 +3566,10 @@ static int __init vmx_init(void)
bypass_guest_pf = 0;
kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK |
VMX_EPT_WRITABLE_MASK |
+ VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT |
VMX_EPT_IGMT_BIT);
kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull,
- VMX_EPT_EXECUTABLE_MASK,
- VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT);
+ VMX_EPT_EXECUTABLE_MASK);
kvm_enable_tdp();
} else
kvm_disable_tdp();