From e097e5ffd69cbd7be61466e2d54c145468d48073 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Fri, 22 Jul 2011 12:46:52 +0100 Subject: KVM: Record instruction set in all vmexit tracepoints The kvm_exit tracepoint recently added the isa argument to aid decoding exit_reason. The semantics of exit_reason depend on the instruction set (vmx or svm) and the isa argument allows traces to be analyzed on other machines. Add the isa argument to kvm_nested_vmexit and kvm_nested_vmexit_inject so these tracepoints can also be self-describing. Signed-off-by: Stefan Hajnoczi Signed-off-by: Avi Kivity --- arch/x86/kvm/svm.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86/kvm/svm.c') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 475d1c948501..6adb7ba13a4a 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2182,7 +2182,8 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) vmcb->control.exit_info_1, vmcb->control.exit_info_2, vmcb->control.exit_int_info, - vmcb->control.exit_int_info_err); + vmcb->control.exit_int_info_err, + KVM_ISA_SVM); nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, &page); if (!nested_vmcb) @@ -3335,7 +3336,8 @@ static int handle_exit(struct kvm_vcpu *vcpu) svm->vmcb->control.exit_info_1, svm->vmcb->control.exit_info_2, svm->vmcb->control.exit_int_info, - svm->vmcb->control.exit_int_info_err); + svm->vmcb->control.exit_int_info_err, + KVM_ISA_SVM); vmexit = nested_svm_exit_special(svm); -- cgit v1.2.3 From 0d460ffc0956d2dbe12ca9f5f6aa0f8701ea9d73 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Fri, 22 Jul 2011 12:46:53 +0100 Subject: KVM: Use __print_symbolic() for vmexit tracepoints The vmexit tracepoints format the exit_reason to make it human-readable. Since the exit_reason depends on the instruction set (vmx or svm), formatting is handled with ftrace_print_symbols_seq() by referring to the appropriate exit reason table. However, the ftrace_print_symbols_seq() function is not meant to be used directly in tracepoints since it does not export the formatting table which userspace tools like trace-cmd and perf use to format traces. In practice perf dies when formatting vmexit-related events and trace-cmd falls back to printing the numeric value (with extra formatting code in the kvm plugin to paper over this limitation). Other userspace consumers of vmexit-related tracepoints would be in similar trouble. To avoid significant changes to the kvm_exit tracepoint, this patch moves the vmx and svm exit reason tables into arch/x86/kvm/trace.h and selects the right table with __print_symbolic() depending on the instruction set. Note that __print_symbolic() is designed for exporting the formatting table to userspace and allows trace-cmd and perf to work. Signed-off-by: Stefan Hajnoczi Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 2 - arch/x86/kvm/svm.c | 55 --------------------- arch/x86/kvm/trace.h | 106 +++++++++++++++++++++++++++++++++++++--- arch/x86/kvm/vmx.c | 44 ----------------- 4 files changed, 100 insertions(+), 107 deletions(-) (limited to 'arch/x86/kvm/svm.c') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index c00ec28e7147..307e3cfa28ad 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -635,8 +635,6 @@ struct kvm_x86_ops { int (*check_intercept)(struct kvm_vcpu *vcpu, struct x86_instruction_info *info, enum x86_intercept_stage stage); - - const struct trace_print_flags *exit_reasons_str; }; struct kvm_arch_async_pf { diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 6adb7ba13a4a..2b24a88f2c67 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3899,60 +3899,6 @@ static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) } } -static const struct trace_print_flags svm_exit_reasons_str[] = { - { SVM_EXIT_READ_CR0, "read_cr0" }, - { SVM_EXIT_READ_CR3, "read_cr3" }, - { SVM_EXIT_READ_CR4, "read_cr4" }, - { SVM_EXIT_READ_CR8, "read_cr8" }, - { SVM_EXIT_WRITE_CR0, "write_cr0" }, - { SVM_EXIT_WRITE_CR3, "write_cr3" }, - { SVM_EXIT_WRITE_CR4, "write_cr4" }, - { SVM_EXIT_WRITE_CR8, "write_cr8" }, - { SVM_EXIT_READ_DR0, "read_dr0" }, - { SVM_EXIT_READ_DR1, "read_dr1" }, - { SVM_EXIT_READ_DR2, "read_dr2" }, - { SVM_EXIT_READ_DR3, "read_dr3" }, - { SVM_EXIT_WRITE_DR0, "write_dr0" }, - { SVM_EXIT_WRITE_DR1, "write_dr1" }, - { SVM_EXIT_WRITE_DR2, "write_dr2" }, - { SVM_EXIT_WRITE_DR3, "write_dr3" }, - { SVM_EXIT_WRITE_DR5, "write_dr5" }, - { SVM_EXIT_WRITE_DR7, "write_dr7" }, - { SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" }, - { SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" }, - { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, - { SVM_EXIT_EXCP_BASE + PF_VECTOR, "PF excp" }, - { SVM_EXIT_EXCP_BASE + NM_VECTOR, "NM excp" }, - { SVM_EXIT_EXCP_BASE + MC_VECTOR, "MC excp" }, - { SVM_EXIT_INTR, "interrupt" }, - { SVM_EXIT_NMI, "nmi" }, - { SVM_EXIT_SMI, "smi" }, - { SVM_EXIT_INIT, "init" }, - { SVM_EXIT_VINTR, "vintr" }, - { SVM_EXIT_CPUID, "cpuid" }, - { SVM_EXIT_INVD, "invd" }, - { SVM_EXIT_HLT, "hlt" }, - { SVM_EXIT_INVLPG, "invlpg" }, - { SVM_EXIT_INVLPGA, "invlpga" }, - { SVM_EXIT_IOIO, "io" }, - { SVM_EXIT_MSR, "msr" }, - { SVM_EXIT_TASK_SWITCH, "task_switch" }, - { SVM_EXIT_SHUTDOWN, "shutdown" }, - { SVM_EXIT_VMRUN, "vmrun" }, - { SVM_EXIT_VMMCALL, "hypercall" }, - { SVM_EXIT_VMLOAD, "vmload" }, - { SVM_EXIT_VMSAVE, "vmsave" }, - { SVM_EXIT_STGI, "stgi" }, - { SVM_EXIT_CLGI, "clgi" }, - { SVM_EXIT_SKINIT, "skinit" }, - { SVM_EXIT_WBINVD, "wbinvd" }, - { SVM_EXIT_MONITOR, "monitor" }, - { SVM_EXIT_MWAIT, "mwait" }, - { SVM_EXIT_XSETBV, "xsetbv" }, - { SVM_EXIT_NPF, "npf" }, - { -1, NULL } -}; - static int svm_get_lpage_level(void) { return PT_PDPE_LEVEL; @@ -4225,7 +4171,6 @@ static struct kvm_x86_ops svm_x86_ops = { .get_mt_mask = svm_get_mt_mask, .get_exit_info = svm_get_exit_info, - .exit_reasons_str = svm_exit_reasons_str, .get_lpage_level = svm_get_lpage_level, diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 4e1716bf88a4..911d2641f14c 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -2,6 +2,8 @@ #define _TRACE_KVM_H #include +#include +#include #undef TRACE_SYSTEM #define TRACE_SYSTEM kvm @@ -181,6 +183,95 @@ TRACE_EVENT(kvm_apic, #define KVM_ISA_VMX 1 #define KVM_ISA_SVM 2 +#define VMX_EXIT_REASONS \ + { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \ + { EXIT_REASON_EXTERNAL_INTERRUPT, "EXTERNAL_INTERRUPT" }, \ + { EXIT_REASON_TRIPLE_FAULT, "TRIPLE_FAULT" }, \ + { EXIT_REASON_PENDING_INTERRUPT, "PENDING_INTERRUPT" }, \ + { EXIT_REASON_NMI_WINDOW, "NMI_WINDOW" }, \ + { EXIT_REASON_TASK_SWITCH, "TASK_SWITCH" }, \ + { EXIT_REASON_CPUID, "CPUID" }, \ + { EXIT_REASON_HLT, "HLT" }, \ + { EXIT_REASON_INVLPG, "INVLPG" }, \ + { EXIT_REASON_RDPMC, "RDPMC" }, \ + { EXIT_REASON_RDTSC, "RDTSC" }, \ + { EXIT_REASON_VMCALL, "VMCALL" }, \ + { EXIT_REASON_VMCLEAR, "VMCLEAR" }, \ + { EXIT_REASON_VMLAUNCH, "VMLAUNCH" }, \ + { EXIT_REASON_VMPTRLD, "VMPTRLD" }, \ + { EXIT_REASON_VMPTRST, "VMPTRST" }, \ + { EXIT_REASON_VMREAD, "VMREAD" }, \ + { EXIT_REASON_VMRESUME, "VMRESUME" }, \ + { EXIT_REASON_VMWRITE, "VMWRITE" }, \ + { EXIT_REASON_VMOFF, "VMOFF" }, \ + { EXIT_REASON_VMON, "VMON" }, \ + { EXIT_REASON_CR_ACCESS, "CR_ACCESS" }, \ + { EXIT_REASON_DR_ACCESS, "DR_ACCESS" }, \ + { EXIT_REASON_IO_INSTRUCTION, "IO_INSTRUCTION" }, \ + { EXIT_REASON_MSR_READ, "MSR_READ" }, \ + { EXIT_REASON_MSR_WRITE, "MSR_WRITE" }, \ + { EXIT_REASON_MWAIT_INSTRUCTION, "MWAIT_INSTRUCTION" }, \ + { EXIT_REASON_MONITOR_INSTRUCTION, "MONITOR_INSTRUCTION" }, \ + { EXIT_REASON_PAUSE_INSTRUCTION, "PAUSE_INSTRUCTION" }, \ + { EXIT_REASON_MCE_DURING_VMENTRY, "MCE_DURING_VMENTRY" }, \ + { EXIT_REASON_TPR_BELOW_THRESHOLD, "TPR_BELOW_THRESHOLD" }, \ + { EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \ + { EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \ + { EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \ + { EXIT_REASON_WBINVD, "WBINVD" } + +#define SVM_EXIT_REASONS \ + { SVM_EXIT_READ_CR0, "read_cr0" }, \ + { SVM_EXIT_READ_CR3, "read_cr3" }, \ + { SVM_EXIT_READ_CR4, "read_cr4" }, \ + { SVM_EXIT_READ_CR8, "read_cr8" }, \ + { SVM_EXIT_WRITE_CR0, "write_cr0" }, \ + { SVM_EXIT_WRITE_CR3, "write_cr3" }, \ + { SVM_EXIT_WRITE_CR4, "write_cr4" }, \ + { SVM_EXIT_WRITE_CR8, "write_cr8" }, \ + { SVM_EXIT_READ_DR0, "read_dr0" }, \ + { SVM_EXIT_READ_DR1, "read_dr1" }, \ + { SVM_EXIT_READ_DR2, "read_dr2" }, \ + { SVM_EXIT_READ_DR3, "read_dr3" }, \ + { SVM_EXIT_WRITE_DR0, "write_dr0" }, \ + { SVM_EXIT_WRITE_DR1, "write_dr1" }, \ + { SVM_EXIT_WRITE_DR2, "write_dr2" }, \ + { SVM_EXIT_WRITE_DR3, "write_dr3" }, \ + { SVM_EXIT_WRITE_DR5, "write_dr5" }, \ + { SVM_EXIT_WRITE_DR7, "write_dr7" }, \ + { SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" }, \ + { SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" }, \ + { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, \ + { SVM_EXIT_EXCP_BASE + PF_VECTOR, "PF excp" }, \ + { SVM_EXIT_EXCP_BASE + NM_VECTOR, "NM excp" }, \ + { SVM_EXIT_EXCP_BASE + MC_VECTOR, "MC excp" }, \ + { SVM_EXIT_INTR, "interrupt" }, \ + { SVM_EXIT_NMI, "nmi" }, \ + { SVM_EXIT_SMI, "smi" }, \ + { SVM_EXIT_INIT, "init" }, \ + { SVM_EXIT_VINTR, "vintr" }, \ + { SVM_EXIT_CPUID, "cpuid" }, \ + { SVM_EXIT_INVD, "invd" }, \ + { SVM_EXIT_HLT, "hlt" }, \ + { SVM_EXIT_INVLPG, "invlpg" }, \ + { SVM_EXIT_INVLPGA, "invlpga" }, \ + { SVM_EXIT_IOIO, "io" }, \ + { SVM_EXIT_MSR, "msr" }, \ + { SVM_EXIT_TASK_SWITCH, "task_switch" }, \ + { SVM_EXIT_SHUTDOWN, "shutdown" }, \ + { SVM_EXIT_VMRUN, "vmrun" }, \ + { SVM_EXIT_VMMCALL, "hypercall" }, \ + { SVM_EXIT_VMLOAD, "vmload" }, \ + { SVM_EXIT_VMSAVE, "vmsave" }, \ + { SVM_EXIT_STGI, "stgi" }, \ + { SVM_EXIT_CLGI, "clgi" }, \ + { SVM_EXIT_SKINIT, "skinit" }, \ + { SVM_EXIT_WBINVD, "wbinvd" }, \ + { SVM_EXIT_MONITOR, "monitor" }, \ + { SVM_EXIT_MWAIT, "mwait" }, \ + { SVM_EXIT_XSETBV, "xsetbv" }, \ + { SVM_EXIT_NPF, "npf" } + /* * Tracepoint for kvm guest exit: */ @@ -205,8 +296,9 @@ TRACE_EVENT(kvm_exit, ), TP_printk("reason %s rip 0x%lx info %llx %llx", - ftrace_print_symbols_seq(p, __entry->exit_reason, - kvm_x86_ops->exit_reasons_str), + (__entry->isa == KVM_ISA_VMX) ? + __print_symbolic(__entry->exit_reason, VMX_EXIT_REASONS) : + __print_symbolic(__entry->exit_reason, SVM_EXIT_REASONS), __entry->guest_rip, __entry->info1, __entry->info2) ); @@ -512,8 +604,9 @@ TRACE_EVENT(kvm_nested_vmexit, TP_printk("rip: 0x%016llx reason: %s ext_inf1: 0x%016llx " "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x", __entry->rip, - ftrace_print_symbols_seq(p, __entry->exit_code, - kvm_x86_ops->exit_reasons_str), + (__entry->isa == KVM_ISA_VMX) ? + __print_symbolic(__entry->exit_code, VMX_EXIT_REASONS) : + __print_symbolic(__entry->exit_code, SVM_EXIT_REASONS), __entry->exit_info1, __entry->exit_info2, __entry->exit_int_info, __entry->exit_int_info_err) ); @@ -548,8 +641,9 @@ TRACE_EVENT(kvm_nested_vmexit_inject, TP_printk("reason: %s ext_inf1: 0x%016llx " "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x", - ftrace_print_symbols_seq(p, __entry->exit_code, - kvm_x86_ops->exit_reasons_str), + (__entry->isa == KVM_ISA_VMX) ? + __print_symbolic(__entry->exit_code, VMX_EXIT_REASONS) : + __print_symbolic(__entry->exit_code, SVM_EXIT_REASONS), __entry->exit_info1, __entry->exit_info2, __entry->exit_int_info, __entry->exit_int_info_err) ); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index e65a158dee64..e26629fbf1d7 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6241,49 +6241,6 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) return ret; } -#define _ER(x) { EXIT_REASON_##x, #x } - -static const struct trace_print_flags vmx_exit_reasons_str[] = { - _ER(EXCEPTION_NMI), - _ER(EXTERNAL_INTERRUPT), - _ER(TRIPLE_FAULT), - _ER(PENDING_INTERRUPT), - _ER(NMI_WINDOW), - _ER(TASK_SWITCH), - _ER(CPUID), - _ER(HLT), - _ER(INVLPG), - _ER(RDPMC), - _ER(RDTSC), - _ER(VMCALL), - _ER(VMCLEAR), - _ER(VMLAUNCH), - _ER(VMPTRLD), - _ER(VMPTRST), - _ER(VMREAD), - _ER(VMRESUME), - _ER(VMWRITE), - _ER(VMOFF), - _ER(VMON), - _ER(CR_ACCESS), - _ER(DR_ACCESS), - _ER(IO_INSTRUCTION), - _ER(MSR_READ), - _ER(MSR_WRITE), - _ER(MWAIT_INSTRUCTION), - _ER(MONITOR_INSTRUCTION), - _ER(PAUSE_INSTRUCTION), - _ER(MCE_DURING_VMENTRY), - _ER(TPR_BELOW_THRESHOLD), - _ER(APIC_ACCESS), - _ER(EPT_VIOLATION), - _ER(EPT_MISCONFIG), - _ER(WBINVD), - { -1, NULL } -}; - -#undef _ER - static int vmx_get_lpage_level(void) { if (enable_ept && !cpu_has_vmx_ept_1g_page()) @@ -7039,7 +6996,6 @@ static struct kvm_x86_ops vmx_x86_ops = { .get_mt_mask = vmx_get_mt_mask, .get_exit_info = vmx_get_exit_info, - .exit_reasons_str = vmx_exit_reasons_str, .get_lpage_level = vmx_get_lpage_level, -- cgit v1.2.3 From e4e517b4be019787ada4cbbce2f04570c21b0cbd Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 28 Jul 2011 11:36:17 +0300 Subject: KVM: MMU: Do not unconditionally read PDPTE from guest memory Architecturally, PDPTEs are cached in the PDPTRs when CR3 is reloaded. On SVM, it is not possible to implement this, but on VMX this is possible and was indeed implemented until nested SVM changed this to unconditionally read PDPTEs dynamically. This has noticable impact when running PAE guests. Fix by changing the MMU to read PDPTRs from the cache, falling back to reading from memory for the nested MMU. Signed-off-by: Avi Kivity Tested-by: Joerg Roedel Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/kvm_cache_regs.h | 7 ------- arch/x86/kvm/mmu.c | 5 ++++- arch/x86/kvm/paging_tmpl.h | 2 +- arch/x86/kvm/svm.c | 15 +++++++++++++++ 5 files changed, 21 insertions(+), 9 deletions(-) (limited to 'arch/x86/kvm/svm.c') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 307e3cfa28ad..b31a3417a405 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -265,6 +265,7 @@ struct kvm_mmu { void (*new_cr3)(struct kvm_vcpu *vcpu); void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root); unsigned long (*get_cr3)(struct kvm_vcpu *vcpu); + u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index); int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err, bool prefault); void (*inject_page_fault)(struct kvm_vcpu *vcpu, diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h index 3377d53fcd36..544076c4f44b 100644 --- a/arch/x86/kvm/kvm_cache_regs.h +++ b/arch/x86/kvm/kvm_cache_regs.h @@ -45,13 +45,6 @@ static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index) return vcpu->arch.walk_mmu->pdptrs[index]; } -static inline u64 kvm_pdptr_read_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, int index) -{ - load_pdptrs(vcpu, mmu, mmu->get_cr3(vcpu)); - - return mmu->pdptrs[index]; -} - static inline ulong kvm_read_cr0_bits(struct kvm_vcpu *vcpu, ulong mask) { ulong tmask = mask & KVM_POSSIBLE_CR0_GUEST_BITS; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 8e8da7960dbe..f1b36cf3e3d0 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2770,7 +2770,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) ASSERT(!VALID_PAGE(root)); if (vcpu->arch.mmu.root_level == PT32E_ROOT_LEVEL) { - pdptr = kvm_pdptr_read_mmu(vcpu, &vcpu->arch.mmu, i); + pdptr = vcpu->arch.mmu.get_pdptr(vcpu, i); if (!is_present_gpte(pdptr)) { vcpu->arch.mmu.pae_root[i] = 0; continue; @@ -3318,6 +3318,7 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) context->direct_map = true; context->set_cr3 = kvm_x86_ops->set_tdp_cr3; context->get_cr3 = get_cr3; + context->get_pdptr = kvm_pdptr_read; context->inject_page_fault = kvm_inject_page_fault; context->nx = is_nx(vcpu); @@ -3376,6 +3377,7 @@ static int init_kvm_softmmu(struct kvm_vcpu *vcpu) vcpu->arch.walk_mmu->set_cr3 = kvm_x86_ops->set_cr3; vcpu->arch.walk_mmu->get_cr3 = get_cr3; + vcpu->arch.walk_mmu->get_pdptr = kvm_pdptr_read; vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault; return r; @@ -3386,6 +3388,7 @@ static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu) struct kvm_mmu *g_context = &vcpu->arch.nested_mmu; g_context->get_cr3 = get_cr3; + g_context->get_pdptr = kvm_pdptr_read; g_context->inject_page_fault = kvm_inject_page_fault; /* diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 507e2b844cfa..f6dd9feb201b 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -163,7 +163,7 @@ retry_walk: #if PTTYPE == 64 if (walker->level == PT32E_ROOT_LEVEL) { - pte = kvm_pdptr_read_mmu(vcpu, mmu, (addr >> 30) & 3); + pte = mmu->get_pdptr(vcpu, (addr >> 30) & 3); trace_kvm_mmu_paging_element(pte, walker->level); if (!is_present_gpte(pte)) goto error; diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 2b24a88f2c67..f043168a5ab1 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1844,6 +1844,20 @@ static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu) return svm->nested.nested_cr3; } +static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index) +{ + struct vcpu_svm *svm = to_svm(vcpu); + u64 cr3 = svm->nested.nested_cr3; + u64 pdpte; + int ret; + + ret = kvm_read_guest_page(vcpu->kvm, gpa_to_gfn(cr3), &pdpte, + offset_in_page(cr3) + index * 8, 8); + if (ret) + return 0; + return pdpte; +} + static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root) { @@ -1875,6 +1889,7 @@ static int nested_svm_init_mmu_context(struct kvm_vcpu *vcpu) vcpu->arch.mmu.set_cr3 = nested_svm_set_tdp_cr3; vcpu->arch.mmu.get_cr3 = nested_svm_get_tdp_cr3; + vcpu->arch.mmu.get_pdptr = nested_svm_get_tdp_pdptr; vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit; vcpu->arch.mmu.shadow_root_level = get_npt_level(); vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu; -- cgit v1.2.3 From d5c1785d2f3aabe284d91bc7fc8f0abc58525dc9 Mon Sep 17 00:00:00 2001 From: Nadav Har'El Date: Tue, 2 Aug 2011 15:54:20 +0300 Subject: KVM: L1 TSC handling KVM assumed in several places that reading the TSC MSR returns the value for L1. This is incorrect, because when L2 is running, the correct TSC read exit emulation is to return L2's value. We therefore add a new x86_ops function, read_l1_tsc, to use in places that specifically need to read the L1 TSC, NOT the TSC of the current level of guest. Note that one change, of one line in kvm_arch_vcpu_load, is made redundant by a different patch sent by Zachary Amsden (and not yet applied): kvm_arch_vcpu_load() should not read the guest TSC, and if it didn't, of course we didn't have to change the call of kvm_get_msr() to read_l1_tsc(). [avi: moved callback to kvm_x86_ops tsc block] Signed-off-by: Nadav Har'El Acked-by: Zachary Amsdem Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/svm.c | 8 ++++++++ arch/x86/kvm/vmx.c | 16 ++++++++++++++++ arch/x86/kvm/x86.c | 8 ++++---- 4 files changed, 29 insertions(+), 4 deletions(-) (limited to 'arch/x86/kvm/svm.c') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index b31a3417a405..6ab4241c27cb 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -630,6 +630,7 @@ struct kvm_x86_ops { void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); u64 (*compute_tsc_offset)(struct kvm_vcpu *vcpu, u64 target_tsc); + u64 (*read_l1_tsc)(struct kvm_vcpu *vcpu); void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index f043168a5ab1..590d1d2d620b 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2910,6 +2910,13 @@ static int cr8_write_interception(struct vcpu_svm *svm) return 0; } +u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu) +{ + struct vmcb *vmcb = get_host_vmcb(to_svm(vcpu)); + return vmcb->control.tsc_offset + + svm_scale_tsc(vcpu, native_read_tsc()); +} + static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) { struct vcpu_svm *svm = to_svm(vcpu); @@ -4201,6 +4208,7 @@ static struct kvm_x86_ops svm_x86_ops = { .write_tsc_offset = svm_write_tsc_offset, .adjust_tsc_offset = svm_adjust_tsc_offset, .compute_tsc_offset = svm_compute_tsc_offset, + .read_l1_tsc = svm_read_l1_tsc, .set_tdp_cr3 = set_tdp_cr3, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 03df703c8f20..97b64543d4ed 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1747,6 +1747,21 @@ static u64 guest_read_tsc(void) return host_tsc + tsc_offset; } +/* + * Like guest_read_tsc, but always returns L1's notion of the timestamp + * counter, even if a nested guest (L2) is currently running. + */ +u64 vmx_read_l1_tsc(struct kvm_vcpu *vcpu) +{ + u64 host_tsc, tsc_offset; + + rdtscll(host_tsc); + tsc_offset = is_guest_mode(vcpu) ? + to_vmx(vcpu)->nested.vmcs01_tsc_offset : + vmcs_read64(TSC_OFFSET); + return host_tsc + tsc_offset; +} + /* * Empty call-back. Needs to be implemented when VMX enables the SET_TSC_KHZ * ioctl. In this case the call-back should update internal vmx state to make @@ -7010,6 +7025,7 @@ static struct kvm_x86_ops vmx_x86_ops = { .write_tsc_offset = vmx_write_tsc_offset, .adjust_tsc_offset = vmx_adjust_tsc_offset, .compute_tsc_offset = vmx_compute_tsc_offset, + .read_l1_tsc = vmx_read_l1_tsc, .set_tdp_cr3 = vmx_set_cr3, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ea8f9f03e923..6b37f18a1663 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1098,7 +1098,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) /* Keep irq disabled to prevent changes to the clock */ local_irq_save(flags); - kvm_get_msr(v, MSR_IA32_TSC, &tsc_timestamp); + tsc_timestamp = kvm_x86_ops->read_l1_tsc(v); kernel_ns = get_kernel_ns(); this_tsc_khz = vcpu_tsc_khz(v); if (unlikely(this_tsc_khz == 0)) { @@ -2218,7 +2218,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) s64 tsc_delta; u64 tsc; - kvm_get_msr(vcpu, MSR_IA32_TSC, &tsc); + tsc = kvm_x86_ops->read_l1_tsc(vcpu); tsc_delta = !vcpu->arch.last_guest_tsc ? 0 : tsc - vcpu->arch.last_guest_tsc; @@ -2242,7 +2242,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { kvm_x86_ops->vcpu_put(vcpu); kvm_put_guest_fpu(vcpu); - kvm_get_msr(vcpu, MSR_IA32_TSC, &vcpu->arch.last_guest_tsc); + vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu); } static int is_efer_nx(void) @@ -5729,7 +5729,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (hw_breakpoint_active()) hw_breakpoint_restore(); - kvm_get_msr(vcpu, MSR_IA32_TSC, &vcpu->arch.last_guest_tsc); + vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu); vcpu->mode = OUTSIDE_GUEST_MODE; smp_wmb(); -- cgit v1.2.3 From 45133ecaaec7aea447afc98cc2c24aac638bbe5c Mon Sep 17 00:00:00 2001 From: Nadav Har'El Date: Tue, 2 Aug 2011 15:55:23 +0300 Subject: KVM: SVM: Fix TSC MSR read in nested SVM When the TSC MSR is read by an L2 guest (when L1 allowed this MSR to be read without exit), we need to return L2's notion of the TSC, not L1's. The current code incorrectly returned L1 TSC, because svm_get_msr() was also used in x86.c where this was assumed, but now that these places call the new svm_read_l1_tsc(), the MSR read can be fixed. Signed-off-by: Nadav Har'El Tested-by: Joerg Roedel Acked-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/kvm/svm.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/x86/kvm/svm.c') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 590d1d2d620b..8277f32017ad 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2923,9 +2923,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) switch (ecx) { case MSR_IA32_TSC: { - struct vmcb *vmcb = get_host_vmcb(svm); - - *data = vmcb->control.tsc_offset + + *data = svm->vmcb->control.tsc_offset + svm_scale_tsc(vcpu, native_read_tsc()); break; -- cgit v1.2.3 From 1e2b1dd797f9356f779268ecc1ba21110d4e341c Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 12 Sep 2011 10:52:24 +0200 Subject: KVM: x86: Move kvm_trace_exit into atomic vmexit section This avoids that events causing the vmexit are recorded before the actual exit reason. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/svm.c | 4 ++-- arch/x86/kvm/vmx.c | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'arch/x86/kvm/svm.c') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 8277f32017ad..e7ed4b1623b9 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3335,8 +3335,6 @@ static int handle_exit(struct kvm_vcpu *vcpu) struct kvm_run *kvm_run = vcpu->run; u32 exit_code = svm->vmcb->control.exit_code; - trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM); - if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE)) vcpu->arch.cr0 = svm->vmcb->save.cr0; if (npt_enabled) @@ -3790,6 +3788,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp; vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip; + trace_kvm_exit(svm->vmcb->control.exit_code, vcpu, KVM_ISA_SVM); + if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI)) kvm_before_handle_nmi(&svm->vcpu); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 47419d6031ea..21217b65b129 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5739,8 +5739,6 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) u32 exit_reason = vmx->exit_reason; u32 vectoring_info = vmx->idt_vectoring_info; - trace_kvm_exit(exit_reason, vcpu, KVM_ISA_VMX); - /* If guest state is invalid, start emulating */ if (vmx->emulation_required && emulate_invalid_guest_state) return handle_invalid_guest_state(vcpu); @@ -6144,6 +6142,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) vmx->loaded_vmcs->launched = 1; vmx->exit_reason = vmcs_read32(VM_EXIT_REASON); + trace_kvm_exit(vmx->exit_reason, vcpu, KVM_ISA_VMX); vmx_complete_atomic_exit(vmx); vmx_recover_nmi_blocking(vmx); -- cgit v1.2.3 From f1c1da2bde712812a3e0f9a7a7ebe7a916a4b5f4 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Tue, 18 Oct 2011 18:23:11 +0200 Subject: KVM: SVM: Keep intercepting task switching with NPT enabled AMD processors apparently have a bug in the hardware task switching support when NPT is enabled. If the task switch triggers a NPF, we can get wrong EXITINTINFO along with that fault. On resume, spurious exceptions may then be injected into the guest. We were able to reproduce this bug when our guest triggered #SS and the handler were supposed to run over a separate task with not yet touched stack pages. Work around the issue by continuing to emulate task switches even in NPT mode. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/svm.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/kvm/svm.c') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index e7ed4b1623b9..e32243eac2f4 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1084,7 +1084,6 @@ static void init_vmcb(struct vcpu_svm *svm) if (npt_enabled) { /* Setup VMCB for Nested Paging */ control->nested_ctl = 1; - clr_intercept(svm, INTERCEPT_TASK_SWITCH); clr_intercept(svm, INTERCEPT_INVLPG); clr_exception_intercept(svm, PF_VECTOR); clr_cr_intercept(svm, INTERCEPT_CR3_READ); -- cgit v1.2.3