diff options
author | Stephen Rothwell <sfr@canb.auug.org.au> | 2008-02-19 18:11:46 +1100 |
---|---|---|
committer | Stephen Rothwell <sfr@canb.auug.org.au> | 2008-02-19 18:11:46 +1100 |
commit | 6a6a2c5c971d537c1487d7640a05f0a5cfb61b62 (patch) | |
tree | efc7c1af0df5a3f704bfa91436103462f7da8efc /arch/x86/kvm/svm.c | |
parent | 18f9ebeda025c83841bb61008c067f60b27f1396 (diff) | |
parent | fc9e77f83b26089d6228e149605be69228905dc7 (diff) |
Merge commit 'kvm/master'
Diffstat (limited to 'arch/x86/kvm/svm.c')
-rw-r--r-- | arch/x86/kvm/svm.c | 233 |
1 files changed, 188 insertions, 45 deletions
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index de755cb1431d..e1d139f8efc0 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -47,6 +47,18 @@ MODULE_LICENSE("GPL"); #define SVM_FEATURE_LBRV (1 << 1) #define SVM_DEATURE_SVML (1 << 2) +#define DEBUGCTL_RESERVED_BITS (~(0x3fULL)) + +/* enable NPT for AMD64 and X86 with PAE */ +#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) +static bool npt_enabled = true; +#else +static bool npt_enabled = false; +#endif +static int npt = 1; + +module_param(npt, int, S_IRUGO); + static void kvm_reput_irq(struct vcpu_svm *svm); static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu) @@ -55,7 +67,6 @@ static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu) } unsigned long iopm_base; -unsigned long msrpm_base; struct kvm_ldttss_desc { u16 limit0; @@ -182,7 +193,7 @@ static inline void flush_guest_tlb(struct kvm_vcpu *vcpu) static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) { - if (!(efer & EFER_LMA)) + if (!npt_enabled && !(efer & EFER_LMA)) efer &= ~EFER_LME; to_svm(vcpu)->vmcb->save.efer = efer | MSR_EFER_SVME_MASK; @@ -282,7 +293,7 @@ static void svm_hardware_enable(void *garbage) #ifdef CONFIG_X86_64 struct desc_ptr gdt_descr; #else - struct desc_ptr gdt_descr; + struct Xgt_desc_struct gdt_descr; #endif struct desc_struct *gdt; int me = raw_smp_processor_id(); @@ -302,7 +313,6 @@ static void svm_hardware_enable(void *garbage) svm_data->asid_generation = 1; svm_data->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1; svm_data->next_asid = svm_data->max_asid + 1; - svm_features = cpuid_edx(SVM_CPUID_FUNC); asm volatile ("sgdt %0" : "=m"(gdt_descr)); gdt = (struct desc_struct *)gdt_descr.address; @@ -361,12 +371,51 @@ static void set_msr_interception(u32 *msrpm, unsigned msr, BUG(); } +static void svm_vcpu_init_msrpm(u32 *msrpm) +{ + memset(msrpm, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER)); + +#ifdef CONFIG_X86_64 + set_msr_interception(msrpm, MSR_GS_BASE, 1, 1); + set_msr_interception(msrpm, MSR_FS_BASE, 1, 1); + set_msr_interception(msrpm, MSR_KERNEL_GS_BASE, 1, 1); + set_msr_interception(msrpm, MSR_LSTAR, 1, 1); + set_msr_interception(msrpm, MSR_CSTAR, 1, 1); + set_msr_interception(msrpm, MSR_SYSCALL_MASK, 1, 1); +#endif + set_msr_interception(msrpm, MSR_K6_STAR, 1, 1); + set_msr_interception(msrpm, MSR_IA32_SYSENTER_CS, 1, 1); + set_msr_interception(msrpm, MSR_IA32_SYSENTER_ESP, 1, 1); + set_msr_interception(msrpm, MSR_IA32_SYSENTER_EIP, 1, 1); +} + +static void svm_enable_lbrv(struct vcpu_svm *svm) +{ + u32 *msrpm = svm->msrpm; + + svm->vmcb->control.lbr_ctl = 1; + set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1); + set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1); + set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 1, 1); + set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 1, 1); +} + +static void svm_disable_lbrv(struct vcpu_svm *svm) +{ + u32 *msrpm = svm->msrpm; + + svm->vmcb->control.lbr_ctl = 0; + set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0); + set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0); + set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 0, 0); + set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0); +} + static __init int svm_hardware_setup(void) { int cpu; struct page *iopm_pages; - struct page *msrpm_pages; - void *iopm_va, *msrpm_va; + void *iopm_va; int r; iopm_pages = alloc_pages(GFP_KERNEL, IOPM_ALLOC_ORDER); @@ -379,41 +428,33 @@ static __init int svm_hardware_setup(void) clear_bit(0x80, iopm_va); /* allow direct access to PC debug port */ iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT; + if (boot_cpu_has(X86_FEATURE_NX)) + kvm_enable_efer_bits(EFER_NX); - msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); + for_each_online_cpu(cpu) { + r = svm_cpu_init(cpu); + if (r) + goto err; + } - r = -ENOMEM; - if (!msrpm_pages) - goto err_1; + svm_features = cpuid_edx(SVM_CPUID_FUNC); - msrpm_va = page_address(msrpm_pages); - memset(msrpm_va, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER)); - msrpm_base = page_to_pfn(msrpm_pages) << PAGE_SHIFT; + if (!svm_has(SVM_FEATURE_NPT)) + npt_enabled = false; -#ifdef CONFIG_X86_64 - set_msr_interception(msrpm_va, MSR_GS_BASE, 1, 1); - set_msr_interception(msrpm_va, MSR_FS_BASE, 1, 1); - set_msr_interception(msrpm_va, MSR_KERNEL_GS_BASE, 1, 1); - set_msr_interception(msrpm_va, MSR_LSTAR, 1, 1); - set_msr_interception(msrpm_va, MSR_CSTAR, 1, 1); - set_msr_interception(msrpm_va, MSR_SYSCALL_MASK, 1, 1); -#endif - set_msr_interception(msrpm_va, MSR_K6_STAR, 1, 1); - set_msr_interception(msrpm_va, MSR_IA32_SYSENTER_CS, 1, 1); - set_msr_interception(msrpm_va, MSR_IA32_SYSENTER_ESP, 1, 1); - set_msr_interception(msrpm_va, MSR_IA32_SYSENTER_EIP, 1, 1); + if (npt_enabled && !npt) { + printk(KERN_INFO "kvm: Nested Paging disabled\n"); + npt_enabled = false; + } - for_each_online_cpu(cpu) { - r = svm_cpu_init(cpu); - if (r) - goto err_2; + if (npt_enabled) { + printk(KERN_INFO "kvm: Nested Paging enabled\n"); + kvm_enable_tdp(); } + return 0; -err_2: - __free_pages(msrpm_pages, MSRPM_ALLOC_ORDER); - msrpm_base = 0; -err_1: +err: __free_pages(iopm_pages, IOPM_ALLOC_ORDER); iopm_base = 0; return r; @@ -421,9 +462,8 @@ err_1: static __exit void svm_hardware_unsetup(void) { - __free_pages(pfn_to_page(msrpm_base >> PAGE_SHIFT), MSRPM_ALLOC_ORDER); __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER); - iopm_base = msrpm_base = 0; + iopm_base = 0; } static void init_seg(struct vmcb_seg *seg) @@ -443,10 +483,10 @@ static void init_sys_seg(struct vmcb_seg *seg, uint32_t type) seg->base = 0; } -static void init_vmcb(struct vmcb *vmcb) +static void init_vmcb(struct vcpu_svm *svm) { - struct vmcb_control_area *control = &vmcb->control; - struct vmcb_save_area *save = &vmcb->save; + struct vmcb_control_area *control = &svm->vmcb->control; + struct vmcb_save_area *save = &svm->vmcb->save; control->intercept_cr_read = INTERCEPT_CR0_MASK | INTERCEPT_CR3_MASK | @@ -508,7 +548,7 @@ static void init_vmcb(struct vmcb *vmcb) (1ULL << INTERCEPT_MWAIT); control->iopm_base_pa = iopm_base; - control->msrpm_base_pa = msrpm_base; + control->msrpm_base_pa = __pa(svm->msrpm); control->tsc_offset = 0; control->int_ctl = V_INTR_MASKING_MASK; @@ -550,13 +590,29 @@ static void init_vmcb(struct vmcb *vmcb) save->cr0 = 0x00000010 | X86_CR0_PG | X86_CR0_WP; save->cr4 = X86_CR4_PAE; /* rdx = ?? */ + + if (npt_enabled) { + /* Setup VMCB for Nested Paging */ + control->nested_ctl = 1; + control->intercept_exceptions &= ~(1 << PF_VECTOR); + control->intercept_cr_read &= ~(INTERCEPT_CR0_MASK| + INTERCEPT_CR3_MASK); + control->intercept_cr_write &= ~(INTERCEPT_CR0_MASK| + INTERCEPT_CR3_MASK); + save->g_pat = 0x0007040600070406ULL; + /* enable caching because the QEMU Bios doesn't enable it */ + save->cr0 = X86_CR0_ET; + save->cr3 = 0; + save->cr4 = 0; + } + } static int svm_vcpu_reset(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - init_vmcb(svm->vmcb); + init_vmcb(svm); if (vcpu->vcpu_id != 0) { svm->vmcb->save.rip = 0; @@ -571,6 +627,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) { struct vcpu_svm *svm; struct page *page; + struct page *msrpm_pages; int err; svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); @@ -589,12 +646,19 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) goto uninit; } + err = -ENOMEM; + msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); + if (!msrpm_pages) + goto uninit; + svm->msrpm = page_address(msrpm_pages); + svm_vcpu_init_msrpm(svm->msrpm); + svm->vmcb = page_address(page); clear_page(svm->vmcb); svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT; svm->asid_generation = 0; memset(svm->db_regs, 0, sizeof(svm->db_regs)); - init_vmcb(svm->vmcb); + init_vmcb(svm); fx_init(&svm->vcpu); svm->vcpu.fpu_active = 1; @@ -617,6 +681,7 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu) struct vcpu_svm *svm = to_svm(vcpu); __free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT)); + __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER); kvm_vcpu_uninit(vcpu); kmem_cache_free(kvm_vcpu_cache, svm); } @@ -784,6 +849,9 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) } } #endif + if (npt_enabled) + goto set; + if ((vcpu->arch.cr0 & X86_CR0_TS) && !(cr0 & X86_CR0_TS)) { svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); vcpu->fpu_active = 1; @@ -791,6 +859,16 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) vcpu->arch.cr0 = cr0; cr0 |= X86_CR0_PG | X86_CR0_WP; + if (!vcpu->fpu_active) { + svm->vmcb->control.intercept_exceptions |= (1 << NM_VECTOR); + cr0 |= X86_CR0_TS; + } +set: + /* + * re-enable caching here because the QEMU bios + * does not do it - this results in some delay at + * reboot + */ cr0 &= ~(X86_CR0_CD | X86_CR0_NW); svm->vmcb->save.cr0 = cr0; } @@ -798,7 +876,9 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { vcpu->arch.cr4 = cr4; - to_svm(vcpu)->vmcb->save.cr4 = cr4 | X86_CR4_PAE; + if (!npt_enabled) + cr4 |= X86_CR4_PAE; + to_svm(vcpu)->vmcb->save.cr4 = cr4; } static void svm_set_segment(struct kvm_vcpu *vcpu, @@ -965,7 +1045,7 @@ static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) * so reinitialize it. */ clear_page(svm->vmcb); - init_vmcb(svm->vmcb); + init_vmcb(svm); kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; return 0; @@ -1096,6 +1176,24 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) case MSR_IA32_SYSENTER_ESP: *data = svm->vmcb->save.sysenter_esp; break; + /* Nobody will change the following 5 values in the VMCB so + we can safely return them on rdmsr. They will always be 0 + until LBRV is implemented. */ + case MSR_IA32_DEBUGCTLMSR: + *data = svm->vmcb->save.dbgctl; + break; + case MSR_IA32_LASTBRANCHFROMIP: + *data = svm->vmcb->save.br_from; + break; + case MSR_IA32_LASTBRANCHTOIP: + *data = svm->vmcb->save.br_to; + break; + case MSR_IA32_LASTINTFROMIP: + *data = svm->vmcb->save.last_excp_from; + break; + case MSR_IA32_LASTINTTOIP: + *data = svm->vmcb->save.last_excp_to; + break; default: return kvm_get_msr_common(vcpu, ecx, data); } @@ -1156,6 +1254,21 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) case MSR_IA32_SYSENTER_ESP: svm->vmcb->save.sysenter_esp = data; break; + case MSR_IA32_DEBUGCTLMSR: + if (!svm_has(SVM_FEATURE_LBRV)) { + pr_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n", + __FUNCTION__, data); + break; + } + if (data & DEBUGCTL_RESERVED_BITS) + return 1; + + svm->vmcb->save.dbgctl = data; + if (data & (1ULL<<0)) + svm_enable_lbrv(svm); + else + svm_disable_lbrv(svm); + break; case MSR_K7_EVNTSEL0: case MSR_K7_EVNTSEL1: case MSR_K7_EVNTSEL2: @@ -1264,14 +1377,34 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm, [SVM_EXIT_WBINVD] = emulate_on_interception, [SVM_EXIT_MONITOR] = invalid_op_interception, [SVM_EXIT_MWAIT] = invalid_op_interception, + [SVM_EXIT_NPF] = pf_interception, }; - static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); u32 exit_code = svm->vmcb->control.exit_code; + if (npt_enabled) { + int mmu_reload = 0; + if ((vcpu->arch.cr0 ^ svm->vmcb->save.cr0) & X86_CR0_PG) { + svm_set_cr0(vcpu, svm->vmcb->save.cr0); + mmu_reload = 1; + } + vcpu->arch.cr0 = svm->vmcb->save.cr0; + vcpu->arch.cr3 = svm->vmcb->save.cr3; + if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) { + if (!load_pdptrs(vcpu, vcpu->arch.cr3)) { + kvm_inject_gp(vcpu, 0); + return 1; + } + } + if (mmu_reload) { + kvm_mmu_reset_context(vcpu); + kvm_mmu_load(vcpu); + } + } + kvm_reput_irq(svm); if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) { @@ -1282,7 +1415,8 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) } if (is_external_interrupt(svm->vmcb->control.exit_int_info) && - exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR) + exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR && + exit_code != SVM_EXIT_NPF) printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x " "exit_code 0x%x\n", __FUNCTION__, svm->vmcb->control.exit_int_info, @@ -1473,6 +1607,9 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) svm->host_dr6 = read_dr6(); svm->host_dr7 = read_dr7(); svm->vmcb->save.cr2 = vcpu->arch.cr2; + /* required for live migration with NPT */ + if (npt_enabled) + svm->vmcb->save.cr3 = vcpu->arch.cr3; if (svm->vmcb->save.dr7 & 0xff) { write_dr7(0); @@ -1616,6 +1753,12 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root) { struct vcpu_svm *svm = to_svm(vcpu); + if (npt_enabled) { + svm->vmcb->control.nested_cr3 = root; + force_new_asid(vcpu); + return; + } + svm->vmcb->save.cr3 = root; force_new_asid(vcpu); |