diff options
author | Stephen Rothwell <sfr@canb.auug.org.au> | 2010-04-07 16:32:55 +1000 |
---|---|---|
committer | Stephen Rothwell <sfr@canb.auug.org.au> | 2010-04-07 16:32:55 +1000 |
commit | 6ec6c5453e65d0ca97d2a52fb559a5dec4ed0a7e (patch) | |
tree | 83a954a7ed39fd2c1911d2d9efdc4c5c6bb64e7d | |
parent | 2537497021dc1c00dfa23bb9b89e2bdaa2e2d414 (diff) |
Revert "Merge remote branch 'kvm/linux-next'"
This reverts commit 66013e33305208430fb6b39ee3c227a02205c419, reversing
changes made to f07f6a529c341af0c1e246771a1b9cb937619713.
45 files changed, 1888 insertions, 4883 deletions
diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt index baa8fde8bd16..c6416a398163 100644 --- a/Documentation/kvm/api.txt +++ b/Documentation/kvm/api.txt @@ -656,7 +656,6 @@ struct kvm_clock_data { 4.29 KVM_GET_VCPU_EVENTS Capability: KVM_CAP_VCPU_EVENTS -Extended by: KVM_CAP_INTR_SHADOW Architectures: x86 Type: vm ioctl Parameters: struct kvm_vcpu_event (out) @@ -677,7 +676,7 @@ struct kvm_vcpu_events { __u8 injected; __u8 nr; __u8 soft; - __u8 shadow; + __u8 pad; } interrupt; struct { __u8 injected; @@ -689,13 +688,9 @@ struct kvm_vcpu_events { __u32 flags; }; -KVM_VCPUEVENT_VALID_SHADOW may be set in the flags field to signal that -interrupt.shadow contains a valid state. Otherwise, this field is undefined. - 4.30 KVM_SET_VCPU_EVENTS Capability: KVM_CAP_VCPU_EVENTS -Extended by: KVM_CAP_INTR_SHADOW Architectures: x86 Type: vm ioctl Parameters: struct kvm_vcpu_event (in) @@ -714,139 +709,6 @@ current in-kernel state. The bits are: KVM_VCPUEVENT_VALID_NMI_PENDING - transfer nmi.pending to the kernel KVM_VCPUEVENT_VALID_SIPI_VECTOR - transfer sipi_vector -If KVM_CAP_INTR_SHADOW is available, KVM_VCPUEVENT_VALID_SHADOW can be set in -the flags field to signal that interrupt.shadow contains a valid state and -shall be written into the VCPU. - -4.32 KVM_GET_DEBUGREGS - -Capability: KVM_CAP_DEBUGREGS -Architectures: x86 -Type: vm ioctl -Parameters: struct kvm_debugregs (out) -Returns: 0 on success, -1 on error - -Reads debug registers from the vcpu. - -struct kvm_debugregs { - __u64 db[4]; - __u64 dr6; - __u64 dr7; - __u64 flags; - __u64 reserved[9]; -}; - -4.33 KVM_SET_DEBUGREGS - -Capability: KVM_CAP_DEBUGREGS -Architectures: x86 -Type: vm ioctl -Parameters: struct kvm_debugregs (in) -Returns: 0 on success, -1 on error - -Writes debug registers into the vcpu. - -See KVM_GET_DEBUGREGS for the data structure. The flags field is unused -yet and must be cleared on entry. - -4.34 KVM_SET_USER_MEMORY_REGION - -Capability: KVM_CAP_USER_MEM -Architectures: all -Type: vm ioctl -Parameters: struct kvm_userspace_memory_region (in) -Returns: 0 on success, -1 on error - -struct kvm_userspace_memory_region { - __u32 slot; - __u32 flags; - __u64 guest_phys_addr; - __u64 memory_size; /* bytes */ - __u64 userspace_addr; /* start of the userspace allocated memory */ -}; - -/* for kvm_memory_region::flags */ -#define KVM_MEM_LOG_DIRTY_PAGES 1UL - -This ioctl allows the user to create or modify a guest physical memory -slot. When changing an existing slot, it may be moved in the guest -physical memory space, or its flags may be modified. It may not be -resized. Slots may not overlap in guest physical address space. - -Memory for the region is taken starting at the address denoted by the -field userspace_addr, which must point at user addressable memory for -the entire memory slot size. Any object may back this memory, including -anonymous memory, ordinary files, and hugetlbfs. - -It is recommended that the lower 21 bits of guest_phys_addr and userspace_addr -be identical. This allows large pages in the guest to be backed by large -pages in the host. - -The flags field supports just one flag, KVM_MEM_LOG_DIRTY_PAGES, which -instructs kvm to keep track of writes to memory within the slot. See -the KVM_GET_DIRTY_LOG ioctl. - -When the KVM_CAP_SYNC_MMU capability, changes in the backing of the memory -region are automatically reflected into the guest. For example, an mmap() -that affects the region will be made visible immediately. Another example -is madvise(MADV_DROP). - -It is recommended to use this API instead of the KVM_SET_MEMORY_REGION ioctl. -The KVM_SET_MEMORY_REGION does not allow fine grained control over memory -allocation and is deprecated. - -4.35 KVM_SET_TSS_ADDR - -Capability: KVM_CAP_SET_TSS_ADDR -Architectures: x86 -Type: vm ioctl -Parameters: unsigned long tss_address (in) -Returns: 0 on success, -1 on error - -This ioctl defines the physical address of a three-page region in the guest -physical address space. The region must be within the first 4GB of the -guest physical address space and must not conflict with any memory slot -or any mmio address. The guest may malfunction if it accesses this memory -region. - -This ioctl is required on Intel-based hosts. This is needed on Intel hardware -because of a quirk in the virtualization implementation (see the internals -documentation when it pops into existence). - -4.36 KVM_ENABLE_CAP - -Capability: KVM_CAP_ENABLE_CAP -Architectures: ppc -Type: vcpu ioctl -Parameters: struct kvm_enable_cap (in) -Returns: 0 on success; -1 on error - -+Not all extensions are enabled by default. Using this ioctl the application -can enable an extension, making it available to the guest. - -On systems that do not support this ioctl, it always fails. On systems that -do support it, it only works for extensions that are supported for enablement. - -To check if a capability can be enabled, the KVM_CHECK_EXTENSION ioctl should -be used. - -struct kvm_enable_cap { - /* in */ - __u32 cap; - -The capability that is supposed to get enabled. - - __u32 flags; - -A bitfield indicating future enhancements. Has to be 0 for now. - - __u64 args[4]; - -Arguments for enabling a feature. If a feature needs initial values to -function properly, this is the place to put them. - - __u8 pad[64]; -}; 5. The kvm_run structure @@ -958,13 +820,6 @@ executed a memory-mapped I/O instruction which could not be satisfied by kvm. The 'data' member contains the written data if 'is_write' is true, and should be filled by application code otherwise. -NOTE: For KVM_EXIT_IO, KVM_EXIT_MMIO and KVM_EXIT_OSI, the corresponding -operations are complete (and guest state is consistent) only after userspace -has re-entered the kernel with KVM_RUN. The kernel side will first finish -incomplete operations and then check for pending signals. Userspace -can re-enter the guest with an unmasked signal pending to complete -pending operations. - /* KVM_EXIT_HYPERCALL */ struct { __u64 nr; @@ -974,9 +829,7 @@ pending operations. __u32 pad; } hypercall; -Unused. This was once used for 'hypercall to userspace'. To implement -such functionality, use KVM_EXIT_IO (x86) or KVM_EXIT_MMIO (all except s390). -Note KVM_EXIT_IO is significantly faster than KVM_EXIT_MMIO. +Unused. /* KVM_EXIT_TPR_ACCESS */ struct { @@ -1017,19 +870,6 @@ s390 specific. powerpc specific. - /* KVM_EXIT_OSI */ - struct { - __u64 gprs[32]; - } osi; - -MOL uses a special hypercall interface it calls 'OSI'. To enable it, we catch -hypercalls and exit with this exit struct that contains all the guest gprs. - -If exit_reason is KVM_EXIT_OSI, then the vcpu has triggered such a hypercall. -Userspace can now handle the hypercall and when it's done modify the gprs as -necessary. Upon guest entry all guest GPRs will then be replaced by the values -in this struct. - /* Fix the size of the union. */ char padding[256]; }; diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index eb4b796a220d..73c5c2b05f64 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -979,13 +979,11 @@ long kvm_arch_vm_ioctl(struct file *filp, r = -EFAULT; if (copy_from_user(&irq_event, argp, sizeof irq_event)) goto out; - r = -ENXIO; if (irqchip_in_kernel(kvm)) { __s32 status; status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irq_event.irq, irq_event.level); if (ioctl == KVM_IRQ_LINE_STATUS) { - r = -EFAULT; irq_event.status = status; if (copy_to_user(argp, &irq_event, sizeof irq_event)) @@ -1537,10 +1535,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp, goto out; if (copy_to_user(user_stack, stack, - sizeof(struct kvm_ia64_vcpu_stack))) { - r = -EFAULT; + sizeof(struct kvm_ia64_vcpu_stack))) goto out; - } break; } diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h index 6c5547d82bbe..81f3b0b5601e 100644 --- a/arch/powerpc/include/asm/kvm.h +++ b/arch/powerpc/include/asm/kvm.h @@ -77,14 +77,4 @@ struct kvm_debug_exit_arch { struct kvm_guest_debug_arch { }; -#define KVM_REG_MASK 0x001f -#define KVM_REG_EXT_MASK 0xffe0 -#define KVM_REG_GPR 0x0000 -#define KVM_REG_FPR 0x0020 -#define KVM_REG_QPR 0x0040 -#define KVM_REG_FQPR 0x0060 - -#define KVM_INTERRUPT_SET -1U -#define KVM_INTERRUPT_UNSET -2U - #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h index 7238c048e5bb..aadf2dd6f84e 100644 --- a/arch/powerpc/include/asm/kvm_asm.h +++ b/arch/powerpc/include/asm/kvm_asm.h @@ -88,7 +88,6 @@ #define BOOK3S_HFLAG_DCBZ32 0x1 #define BOOK3S_HFLAG_SLB 0x2 -#define BOOK3S_HFLAG_PAIRED_SINGLE 0x4 #define RESUME_FLAG_NV (1<<0) /* Reload guest nonvolatile state? */ #define RESUME_FLAG_HOST (1<<1) /* Resume host? */ diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index ee7992189c6e..db7db0a96967 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -29,40 +29,39 @@ struct kvmppc_slb { u64 vsid; u64 orige; u64 origv; - bool valid : 1; - bool Ks : 1; - bool Kp : 1; - bool nx : 1; - bool large : 1; /* PTEs are 16MB */ - bool tb : 1; /* 1TB segment */ - bool class : 1; + bool valid; + bool Ks; + bool Kp; + bool nx; + bool large; /* PTEs are 16MB */ + bool tb; /* 1TB segment */ + bool class; }; struct kvmppc_sr { u32 raw; u32 vsid; - bool Ks : 1; - bool Kp : 1; - bool nx : 1; - bool valid : 1; + bool Ks; + bool Kp; + bool nx; }; struct kvmppc_bat { u64 raw; u32 bepi; u32 bepi_mask; + bool vs; + bool vp; u32 brpn; u8 wimg; u8 pp; - bool vs : 1; - bool vp : 1; }; struct kvmppc_sid_map { u64 guest_vsid; u64 guest_esid; u64 host_vsid; - bool valid : 1; + bool valid; }; #define SID_MAP_BITS 9 @@ -83,10 +82,9 @@ struct kvmppc_vcpu_book3s { struct kvmppc_bat ibat[8]; struct kvmppc_bat dbat[8]; u64 hid[6]; - u64 gqr[8]; int slb_nr; - u32 dsisr; u64 sdr1; + u64 dsisr; u64 hior; u64 msr_mask; u64 vsid_first; @@ -100,12 +98,11 @@ struct kvmppc_vcpu_book3s { #define CONTEXT_GUEST 1 #define CONTEXT_GUEST_END 2 -#define VSID_REAL_DR 0x7ffffffffff00000ULL -#define VSID_REAL_IR 0x7fffffffffe00000ULL -#define VSID_SPLIT_MASK 0x7fffffffffe00000ULL -#define VSID_REAL 0x7fffffffffc00000ULL -#define VSID_BAT 0x7fffffffffb00000ULL -#define VSID_PR 0x8000000000000000ULL +#define VSID_REAL 0xfffffffffff00000 +#define VSID_REAL_DR 0xffffffffffe00000 +#define VSID_REAL_IR 0xffffffffffd00000 +#define VSID_BAT 0xffffffffffc00000 +#define VSID_PR 0x8000000000000000 extern void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, u64 ea, u64 ea_mask); extern void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 vp, u64 vp_mask); @@ -117,13 +114,11 @@ extern int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte); extern int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr); extern void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu); extern struct kvmppc_pte *kvmppc_mmu_find_pte(struct kvm_vcpu *vcpu, u64 ea, bool data); -extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); -extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); +extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong eaddr, int size, void *ptr, bool data); +extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong eaddr, int size, void *ptr); extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec); extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat, bool upper, u32 val); -extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr); -extern int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu); extern u32 kvmppc_trampoline_lowmem; extern u32 kvmppc_trampoline_enter; @@ -131,8 +126,6 @@ extern void kvmppc_rmcall(ulong srr0, ulong srr1); extern void kvmppc_load_up_fpu(void); extern void kvmppc_load_up_altivec(void); extern void kvmppc_load_up_vsx(void); -extern u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst); -extern ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst); static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu) { @@ -148,11 +141,6 @@ static inline ulong dsisr(void) extern void kvm_return_point(void); -/* Magic register values loaded into r3 and r4 before the 'sc' assembly - * instruction for the OSI hypercalls */ -#define OSI_SC_MAGIC_R3 0x113724FA -#define OSI_SC_MAGIC_R4 0x77810F9B - #define INS_DCBZ 0x7c0007ec #endif /* __ASM_KVM_BOOK3S_H__ */ diff --git a/arch/powerpc/include/asm/kvm_fpu.h b/arch/powerpc/include/asm/kvm_fpu.h deleted file mode 100644 index 94f05de9ad04..000000000000 --- a/arch/powerpc/include/asm/kvm_fpu.h +++ /dev/null @@ -1,85 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * - * Copyright Novell Inc. 2010 - * - * Authors: Alexander Graf <agraf@suse.de> - */ - -#ifndef __ASM_KVM_FPU_H__ -#define __ASM_KVM_FPU_H__ - -#include <linux/types.h> - -extern void fps_fres(struct thread_struct *t, u32 *dst, u32 *src1); -extern void fps_frsqrte(struct thread_struct *t, u32 *dst, u32 *src1); -extern void fps_fsqrts(struct thread_struct *t, u32 *dst, u32 *src1); - -extern void fps_fadds(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2); -extern void fps_fdivs(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2); -extern void fps_fmuls(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2); -extern void fps_fsubs(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2); - -extern void fps_fmadds(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2, - u32 *src3); -extern void fps_fmsubs(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2, - u32 *src3); -extern void fps_fnmadds(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2, - u32 *src3); -extern void fps_fnmsubs(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2, - u32 *src3); -extern void fps_fsel(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2, - u32 *src3); - -#define FPD_ONE_IN(name) extern void fpd_ ## name(u64 *fpscr, u32 *cr, \ - u64 *dst, u64 *src1); -#define FPD_TWO_IN(name) extern void fpd_ ## name(u64 *fpscr, u32 *cr, \ - u64 *dst, u64 *src1, u64 *src2); -#define FPD_THREE_IN(name) extern void fpd_ ## name(u64 *fpscr, u32 *cr, \ - u64 *dst, u64 *src1, u64 *src2, u64 *src3); - -extern void fpd_fcmpu(u64 *fpscr, u32 *cr, u64 *src1, u64 *src2); -extern void fpd_fcmpo(u64 *fpscr, u32 *cr, u64 *src1, u64 *src2); - -FPD_ONE_IN(fsqrts) -FPD_ONE_IN(frsqrtes) -FPD_ONE_IN(fres) -FPD_ONE_IN(frsp) -FPD_ONE_IN(fctiw) -FPD_ONE_IN(fctiwz) -FPD_ONE_IN(fsqrt) -FPD_ONE_IN(fre) -FPD_ONE_IN(frsqrte) -FPD_ONE_IN(fneg) -FPD_ONE_IN(fabs) -FPD_TWO_IN(fadds) -FPD_TWO_IN(fsubs) -FPD_TWO_IN(fdivs) -FPD_TWO_IN(fmuls) -FPD_TWO_IN(fcpsgn) -FPD_TWO_IN(fdiv) -FPD_TWO_IN(fadd) -FPD_TWO_IN(fmul) -FPD_TWO_IN(fsub) -FPD_THREE_IN(fmsubs) -FPD_THREE_IN(fmadds) -FPD_THREE_IN(fnmsubs) -FPD_THREE_IN(fnmadds) -FPD_THREE_IN(fsel) -FPD_THREE_IN(fmsub) -FPD_THREE_IN(fmadd) -FPD_THREE_IN(fnmsub) -FPD_THREE_IN(fnmadd) - -#endif diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 5869a487e2e0..5e5bae7e152f 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -127,9 +127,9 @@ struct kvmppc_pte { u64 eaddr; u64 vpage; u64 raddr; - bool may_read : 1; - bool may_write : 1; - bool may_execute : 1; + bool may_read; + bool may_write; + bool may_execute; }; struct kvmppc_mmu { @@ -175,7 +175,7 @@ struct kvm_vcpu_arch { ulong gpr[32]; u64 fpr[32]; - u64 fpscr; + u32 fpscr; #ifdef CONFIG_ALTIVEC vector128 vr[32]; @@ -186,11 +186,6 @@ struct kvm_vcpu_arch { u64 vsr[32]; #endif -#ifdef CONFIG_PPC_BOOK3S - /* For Gekko paired singles */ - u32 qpr[32]; -#endif - ulong pc; ulong ctr; ulong lr; @@ -260,7 +255,7 @@ struct kvm_vcpu_arch { u32 last_inst; #ifdef CONFIG_PPC64 - u32 fault_dsisr; + ulong fault_dsisr; #endif ulong fault_dear; ulong fault_esr; @@ -270,11 +265,8 @@ struct kvm_vcpu_arch { u8 io_gpr; /* GPR used as IO source/target */ u8 mmio_is_bigendian; - u8 mmio_sign_extend; u8 dcr_needed; u8 dcr_is_write; - u8 osi_needed; - u8 osi_enabled; u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */ diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 6a2464e4d6b9..e2642829e435 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -37,7 +37,6 @@ enum emulation_result { EMULATE_DO_MMIO, /* kvm_run filled with MMIO request */ EMULATE_DO_DCR, /* kvm_run filled with DCR request */ EMULATE_FAIL, /* can't emulate this instruction */ - EMULATE_AGAIN, /* something went wrong. go again */ }; extern int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); @@ -49,11 +48,8 @@ extern void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu); extern int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int rt, unsigned int bytes, int is_bigendian); -extern int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu, - unsigned int rt, unsigned int bytes, - int is_bigendian); extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, - u64 val, unsigned int bytes, int is_bigendian); + u32 val, unsigned int bytes, int is_bigendian); extern int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu); @@ -92,8 +88,6 @@ extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu); extern void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu); extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq); -extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, - struct kvm_interrupt *irq); extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int op, int *advance); @@ -105,39 +99,6 @@ extern void kvmppc_booke_exit(void); extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu); -/* - * Cuts out inst bits with ordering according to spec. - * That means the leftmost bit is zero. All given bits are included. - */ -static inline u32 kvmppc_get_field(u64 inst, int msb, int lsb) -{ - u32 r; - u32 mask; - - BUG_ON(msb > lsb); - - mask = (1 << (lsb - msb + 1)) - 1; - r = (inst >> (63 - lsb)) & mask; - - return r; -} - -/* - * Replaces inst bits with ordering according to spec. - */ -static inline u32 kvmppc_set_field(u64 inst, int msb, int lsb, int value) -{ - u32 r; - u32 mask; - - BUG_ON(msb > lsb); - - mask = ((1 << (lsb - msb + 1)) - 1) << (63 - lsb); - r = (inst & ~mask) | ((value << (63 - lsb)) & mask); - - return r; -} - #ifdef CONFIG_PPC_BOOK3S /* We assume we're always acting on the current vcpu */ diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 8a69a39a10b1..5572e86223f4 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -293,12 +293,10 @@ #define HID1_ABE (1<<10) /* 7450 Address Broadcast Enable */ #define HID1_PS (1<<16) /* 750FX PLL selection */ #define SPRN_HID2 0x3F8 /* Hardware Implementation Register 2 */ -#define SPRN_HID2_GEKKO 0x398 /* Gekko HID2 Register */ #define SPRN_IABR 0x3F2 /* Instruction Address Breakpoint Register */ #define SPRN_IABR2 0x3FA /* 83xx */ #define SPRN_IBCR 0x135 /* 83xx Insn Breakpoint Control Reg */ #define SPRN_HID4 0x3F4 /* 970 HID4 */ -#define SPRN_HID4_GEKKO 0x3F3 /* Gekko HID4 */ #define SPRN_HID5 0x3F6 /* 970 HID5 */ #define SPRN_HID6 0x3F9 /* BE HID 6 */ #define HID6_LB (0x0F<<12) /* Concurrent Large Page Modes */ @@ -467,14 +465,6 @@ #define SPRN_VRSAVE 0x100 /* Vector Register Save Register */ #define SPRN_XER 0x001 /* Fixed Point Exception Register */ -#define SPRN_MMCR0_GEKKO 0x3B8 /* Gekko Monitor Mode Control Register 0 */ -#define SPRN_MMCR1_GEKKO 0x3BC /* Gekko Monitor Mode Control Register 1 */ -#define SPRN_PMC1_GEKKO 0x3B9 /* Gekko Performance Monitor Control 1 */ -#define SPRN_PMC2_GEKKO 0x3BA /* Gekko Performance Monitor Control 2 */ -#define SPRN_PMC3_GEKKO 0x3BD /* Gekko Performance Monitor Control 3 */ -#define SPRN_PMC4_GEKKO 0x3BE /* Gekko Performance Monitor Control 4 */ -#define SPRN_WPAR_GEKKO 0x399 /* Gekko Write Pipe Address Register */ - #define SPRN_SCOMC 0x114 /* SCOM Access Control */ #define SPRN_SCOMD 0x115 /* SCOM Access DATA */ diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index bc9f39d2598b..ab3e392ac63c 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -101,10 +101,6 @@ EXPORT_SYMBOL(pci_dram_offset); EXPORT_SYMBOL(start_thread); EXPORT_SYMBOL(kernel_thread); -#ifndef CONFIG_BOOKE -EXPORT_SYMBOL_GPL(cvt_df); -EXPORT_SYMBOL_GPL(cvt_fd); -#endif EXPORT_SYMBOL(giveup_fpu); #ifdef CONFIG_ALTIVEC EXPORT_SYMBOL(giveup_altivec); diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index eba721e39328..56484d652377 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -40,8 +40,6 @@ kvm-objs-$(CONFIG_KVM_E500) := $(kvm-e500-objs) kvm-book3s_64-objs := \ $(common-objs-y) \ - fpu.o \ - book3s_paired_singles.o \ book3s.o \ book3s_64_emulate.o \ book3s_64_interrupts.o \ diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index cf806e2efe70..25da07fd9f77 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -29,7 +29,6 @@ #include <linux/gfp.h> #include <linux/sched.h> #include <linux/vmalloc.h> -#include <linux/highmem.h> #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU @@ -37,8 +36,7 @@ /* #define EXIT_DEBUG_SIMPLE */ /* #define DEBUG_EXT */ -static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, - ulong msr); +static void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr); struct kvm_stats_debugfs_item debugfs_entries[] = { { "exits", VCPU_STAT(sum_exits) }, @@ -135,21 +133,9 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) if (((vcpu->arch.msr & (MSR_IR|MSR_DR)) != (old_msr & (MSR_IR|MSR_DR))) || (vcpu->arch.msr & MSR_PR) != (old_msr & MSR_PR)) { - bool dr = (vcpu->arch.msr & MSR_DR) ? true : false; - bool ir = (vcpu->arch.msr & MSR_IR) ? true : false; - - /* Flush split mode PTEs */ - if (dr != ir) - kvmppc_mmu_pte_vflush(vcpu, VSID_SPLIT_MASK, - VSID_SPLIT_MASK); - kvmppc_mmu_flush_segments(vcpu); kvmppc_mmu_map_segment(vcpu, vcpu->arch.pc); } - - /* Preload FPU if it's enabled */ - if (vcpu->arch.msr & MSR_FP) - kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP); } void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags) @@ -232,12 +218,6 @@ void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL); } -void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, - struct kvm_interrupt *irq) -{ - kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL); -} - int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority) { int deliver = 1; @@ -357,10 +337,6 @@ void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr) !strcmp(cur_cpu_spec->platform, "ppc970")) vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32; - /* Cell performs badly if MSR_FEx are set. So let's hope nobody - really needs them in a VM on Cell and force disable them. */ - if (!strcmp(cur_cpu_spec->platform, "ppc-cell-be")) - to_book3s(vcpu)->msr_mask &= ~(MSR_FE0 | MSR_FE1); } /* Book3s_32 CPUs always have 32 bytes cache line size, which Linux assumes. To @@ -374,29 +350,34 @@ void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr) */ static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte) { - struct page *hpage; - u64 hpage_offset; + bool touched = false; + hva_t hpage; u32 *page; int i; - hpage = gfn_to_page(vcpu->kvm, pte->raddr >> PAGE_SHIFT); - if (is_error_page(hpage)) + hpage = gfn_to_hva(vcpu->kvm, pte->raddr >> PAGE_SHIFT); + if (kvm_is_error_hva(hpage)) return; - hpage_offset = pte->raddr & ~PAGE_MASK; - hpage_offset &= ~0xFFFULL; - hpage_offset /= 4; + hpage |= pte->raddr & ~PAGE_MASK; + hpage &= ~0xFFFULL; - get_page(hpage); - page = kmap_atomic(hpage, KM_USER0); + page = vmalloc(HW_PAGE_SIZE); - /* patch dcbz into reserved instruction, so we trap */ - for (i=hpage_offset; i < hpage_offset + (HW_PAGE_SIZE / 4); i++) - if ((page[i] & 0xff0007ff) == INS_DCBZ) - page[i] &= 0xfffffff7; + if (copy_from_user(page, (void __user *)hpage, HW_PAGE_SIZE)) + goto out; + + for (i=0; i < HW_PAGE_SIZE / 4; i++) + if ((page[i] & 0xff0007ff) == INS_DCBZ) { + page[i] &= 0xfffffff7; // reserved instruction, so we trap + touched = true; + } - kunmap_atomic(page, KM_USER0); - put_page(hpage); + if (touched) + copy_to_user((void __user *)hpage, page, HW_PAGE_SIZE); + +out: + vfree(page); } static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data, @@ -410,7 +391,15 @@ static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data, } else { pte->eaddr = eaddr; pte->raddr = eaddr & 0xffffffff; - pte->vpage = VSID_REAL | eaddr >> 12; + pte->vpage = eaddr >> 12; + switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) { + case 0: + pte->vpage |= VSID_REAL; + case MSR_DR: + pte->vpage |= VSID_REAL_DR; + case MSR_IR: + pte->vpage |= VSID_REAL_IR; + } pte->may_read = true; pte->may_write = true; pte->may_execute = true; @@ -445,55 +434,55 @@ err: return kvmppc_bad_hva(); } -int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, - bool data) +int kvmppc_st(struct kvm_vcpu *vcpu, ulong eaddr, int size, void *ptr) { struct kvmppc_pte pte; + hva_t hva = eaddr; vcpu->stat.st++; - if (kvmppc_xlate(vcpu, *eaddr, data, &pte)) - return -ENOENT; + if (kvmppc_xlate(vcpu, eaddr, false, &pte)) + goto err; - *eaddr = pte.raddr; + hva = kvmppc_pte_to_hva(vcpu, &pte, false); + if (kvm_is_error_hva(hva)) + goto err; - if (!pte.may_write) - return -EPERM; + if (copy_to_user((void __user *)hva, ptr, size)) { + printk(KERN_INFO "kvmppc_st at 0x%lx failed\n", hva); + goto err; + } - if (kvm_write_guest(vcpu->kvm, pte.raddr, ptr, size)) - return EMULATE_DO_MMIO; + return 0; - return EMULATE_DONE; +err: + return -ENOENT; } -int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, +int kvmppc_ld(struct kvm_vcpu *vcpu, ulong eaddr, int size, void *ptr, bool data) { struct kvmppc_pte pte; - hva_t hva = *eaddr; + hva_t hva = eaddr; vcpu->stat.ld++; - if (kvmppc_xlate(vcpu, *eaddr, data, &pte)) - goto nopte; - - *eaddr = pte.raddr; + if (kvmppc_xlate(vcpu, eaddr, data, &pte)) + goto err; hva = kvmppc_pte_to_hva(vcpu, &pte, true); if (kvm_is_error_hva(hva)) - goto mmio; + goto err; if (copy_from_user(ptr, (void __user *)hva, size)) { printk(KERN_INFO "kvmppc_ld at 0x%lx failed\n", hva); - goto mmio; + goto err; } - return EMULATE_DONE; + return 0; -nopte: +err: return -ENOENT; -mmio: - return EMULATE_DO_MMIO; } static int kvmppc_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn) @@ -510,10 +499,12 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, int page_found = 0; struct kvmppc_pte pte; bool is_mmio = false; - bool dr = (vcpu->arch.msr & MSR_DR) ? true : false; - bool ir = (vcpu->arch.msr & MSR_IR) ? true : false; - relocated = data ? dr : ir; + if ( vec == BOOK3S_INTERRUPT_DATA_STORAGE ) { + relocated = (vcpu->arch.msr & MSR_DR); + } else { + relocated = (vcpu->arch.msr & MSR_IR); + } /* Resolve real address if translation turned on */ if (relocated) { @@ -525,18 +516,14 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, pte.raddr = eaddr & 0xffffffff; pte.eaddr = eaddr; pte.vpage = eaddr >> 12; - } - - switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) { - case 0: - pte.vpage |= VSID_REAL; - break; - case MSR_DR: - pte.vpage |= VSID_REAL_DR; - break; - case MSR_IR: - pte.vpage |= VSID_REAL_IR; - break; + switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) { + case 0: + pte.vpage |= VSID_REAL; + case MSR_DR: + pte.vpage |= VSID_REAL_DR; + case MSR_IR: + pte.vpage |= VSID_REAL_IR; + } } if (vcpu->arch.mmu.is_dcbz32(vcpu) && @@ -596,13 +583,11 @@ static inline int get_fpr_index(int i) } /* Give up external provider (FPU, Altivec, VSX) */ -void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr) +static void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr) { struct thread_struct *t = ¤t->thread; u64 *vcpu_fpr = vcpu->arch.fpr; -#ifdef CONFIG_VSX u64 *vcpu_vsx = vcpu->arch.vsr; -#endif u64 *thread_fpr = (u64*)t->fpr; int i; @@ -644,64 +629,21 @@ void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr) kvmppc_recalc_shadow_msr(vcpu); } -static int kvmppc_read_inst(struct kvm_vcpu *vcpu) -{ - ulong srr0 = vcpu->arch.pc; - int ret; - - ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &vcpu->arch.last_inst, false); - if (ret == -ENOENT) { - vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 33, 33, 1); - vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 34, 36, 0); - vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 42, 47, 0); - kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE); - return EMULATE_AGAIN; - } - - return EMULATE_DONE; -} - -static int kvmppc_check_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr) -{ - - /* Need to do paired single emulation? */ - if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE)) - return EMULATE_DONE; - - /* Read out the instruction */ - if (kvmppc_read_inst(vcpu) == EMULATE_DONE) - /* Need to emulate */ - return EMULATE_FAIL; - - return EMULATE_AGAIN; -} - /* Handle external providers (FPU, Altivec, VSX) */ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, ulong msr) { struct thread_struct *t = ¤t->thread; u64 *vcpu_fpr = vcpu->arch.fpr; -#ifdef CONFIG_VSX u64 *vcpu_vsx = vcpu->arch.vsr; -#endif u64 *thread_fpr = (u64*)t->fpr; int i; - /* When we have paired singles, we emulate in software */ - if (vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE) - return RESUME_GUEST; - if (!(vcpu->arch.msr & msr)) { kvmppc_book3s_queue_irqprio(vcpu, exit_nr); return RESUME_GUEST; } - /* We already own the ext */ - if (vcpu->arch.guest_owned_ext & msr) { - return RESUME_GUEST; - } - #ifdef DEBUG_EXT printk(KERN_INFO "Loading up ext 0x%lx\n", msr); #endif @@ -778,7 +720,6 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, * that no guest that needs the dcbz hack does NX. */ kvmppc_mmu_pte_flush(vcpu, vcpu->arch.pc, ~0xFFFULL); - r = RESUME_GUEST; } else { vcpu->arch.msr |= vcpu->arch.shadow_srr1 & 0x58000000; kvmppc_book3s_queue_irqprio(vcpu, exit_nr); @@ -828,7 +769,6 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, enum emulation_result er; ulong flags; -program_interrupt: flags = vcpu->arch.shadow_srr1 & 0x1f0000ull; if (vcpu->arch.msr & MSR_PR) { @@ -849,80 +789,33 @@ program_interrupt: case EMULATE_DONE: r = RESUME_GUEST_NV; break; - case EMULATE_AGAIN: - r = RESUME_GUEST; - break; case EMULATE_FAIL: printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n", __func__, vcpu->arch.pc, vcpu->arch.last_inst); kvmppc_core_queue_program(vcpu, flags); r = RESUME_GUEST; break; - case EMULATE_DO_MMIO: - run->exit_reason = KVM_EXIT_MMIO; - r = RESUME_HOST_NV; - break; default: BUG(); } break; } case BOOK3S_INTERRUPT_SYSCALL: - // XXX make user settable - if (vcpu->arch.osi_enabled && - (((u32)kvmppc_get_gpr(vcpu, 3)) == OSI_SC_MAGIC_R3) && - (((u32)kvmppc_get_gpr(vcpu, 4)) == OSI_SC_MAGIC_R4)) { - u64 *gprs = run->osi.gprs; - int i; - - run->exit_reason = KVM_EXIT_OSI; - for (i = 0; i < 32; i++) - gprs[i] = kvmppc_get_gpr(vcpu, i); - vcpu->arch.osi_needed = 1; - r = RESUME_HOST_NV; - - } else { - vcpu->stat.syscall_exits++; - kvmppc_book3s_queue_irqprio(vcpu, exit_nr); - r = RESUME_GUEST; - } +#ifdef EXIT_DEBUG + printk(KERN_INFO "Syscall Nr %d\n", (int)kvmppc_get_gpr(vcpu, 0)); +#endif + vcpu->stat.syscall_exits++; + kvmppc_book3s_queue_irqprio(vcpu, exit_nr); + r = RESUME_GUEST; break; case BOOK3S_INTERRUPT_FP_UNAVAIL: + r = kvmppc_handle_ext(vcpu, exit_nr, MSR_FP); + break; case BOOK3S_INTERRUPT_ALTIVEC: - case BOOK3S_INTERRUPT_VSX: - { - int ext_msr = 0; - - switch (exit_nr) { - case BOOK3S_INTERRUPT_FP_UNAVAIL: ext_msr = MSR_FP; break; - case BOOK3S_INTERRUPT_ALTIVEC: ext_msr = MSR_VEC; break; - case BOOK3S_INTERRUPT_VSX: ext_msr = MSR_VSX; break; - } - - switch (kvmppc_check_ext(vcpu, exit_nr)) { - case EMULATE_DONE: - /* everything ok - let's enable the ext */ - r = kvmppc_handle_ext(vcpu, exit_nr, ext_msr); - break; - case EMULATE_FAIL: - /* we need to emulate this instruction */ - goto program_interrupt; - break; - default: - /* nothing to worry about - go again */ - break; - } + r = kvmppc_handle_ext(vcpu, exit_nr, MSR_VEC); break; - } - case BOOK3S_INTERRUPT_ALIGNMENT: - if (kvmppc_read_inst(vcpu) == EMULATE_DONE) { - to_book3s(vcpu)->dsisr = kvmppc_alignment_dsisr(vcpu, - vcpu->arch.last_inst); - vcpu->arch.dear = kvmppc_alignment_dar(vcpu, - vcpu->arch.last_inst); - kvmppc_book3s_queue_irqprio(vcpu, exit_nr); - } - r = RESUME_GUEST; + case BOOK3S_INTERRUPT_VSX: + r = kvmppc_handle_ext(vcpu, exit_nr, MSR_VSX); break; case BOOK3S_INTERRUPT_MACHINE_CHECK: case BOOK3S_INTERRUPT_TRACE: @@ -974,8 +867,6 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { int i; - vcpu_load(vcpu); - regs->pc = vcpu->arch.pc; regs->cr = kvmppc_get_cr(vcpu); regs->ctr = vcpu->arch.ctr; @@ -996,8 +887,6 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) regs->gpr[i] = kvmppc_get_gpr(vcpu, i); - vcpu_put(vcpu); - return 0; } @@ -1005,8 +894,6 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { int i; - vcpu_load(vcpu); - vcpu->arch.pc = regs->pc; kvmppc_set_cr(vcpu, regs->cr); vcpu->arch.ctr = regs->ctr; @@ -1026,8 +913,6 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) kvmppc_set_gpr(vcpu, i, regs->gpr[i]); - vcpu_put(vcpu); - return 0; } @@ -1158,12 +1043,12 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) struct kvm_vcpu *vcpu; int err; - vcpu_book3s = vmalloc(sizeof(struct kvmppc_vcpu_book3s)); + vcpu_book3s = (struct kvmppc_vcpu_book3s *)__get_free_pages( GFP_KERNEL | __GFP_ZERO, + get_order(sizeof(struct kvmppc_vcpu_book3s))); if (!vcpu_book3s) { err = -ENOMEM; goto out; } - memset(vcpu_book3s, 0, sizeof(struct kvmppc_vcpu_book3s)); vcpu = &vcpu_book3s->vcpu; err = kvm_vcpu_init(vcpu, kvm, id); @@ -1197,7 +1082,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) return vcpu; free_vcpu: - vfree(vcpu_book3s); + free_pages((long)vcpu_book3s, get_order(sizeof(struct kvmppc_vcpu_book3s))); out: return ERR_PTR(err); } @@ -1208,7 +1093,7 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) __destroy_context(vcpu_book3s->context_id); kvm_vcpu_uninit(vcpu); - vfree(vcpu_book3s); + free_pages((long)vcpu_book3s, get_order(sizeof(struct kvmppc_vcpu_book3s))); } extern int __kvmppc_vcpu_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); @@ -1216,12 +1101,8 @@ int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) { int ret; struct thread_struct ext_bkp; -#ifdef CONFIG_ALTIVEC bool save_vec = current->thread.used_vr; -#endif -#ifdef CONFIG_VSX bool save_vsx = current->thread.used_vsr; -#endif ulong ext_msr; /* No need to go into the guest when all we do is going out */ @@ -1262,10 +1143,6 @@ int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) /* XXX we get called with irq disabled - change that! */ local_irq_enable(); - /* Preload FPU if it's enabled */ - if (vcpu->arch.msr & MSR_FP) - kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP); - ret = __kvmppc_vcpu_entry(kvm_run, vcpu); local_irq_disable(); diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c index 7071e22b42ff..faf99f20d993 100644 --- a/arch/powerpc/kvm/book3s_32_mmu.c +++ b/arch/powerpc/kvm/book3s_32_mmu.c @@ -37,7 +37,7 @@ #define dprintk(X...) do { } while(0) #endif -#ifdef DEBUG_MMU_PTE +#ifdef DEBUG_PTE #define dprintk_pte(X...) printk(KERN_INFO X) #else #define dprintk_pte(X...) do { } while(0) @@ -57,8 +57,6 @@ static inline bool check_debug_ip(struct kvm_vcpu *vcpu) static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr, struct kvmppc_pte *pte, bool data); -static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, u64 esid, - u64 *vsid); static struct kvmppc_sr *find_sr(struct kvmppc_vcpu_book3s *vcpu_book3s, gva_t eaddr) { @@ -68,14 +66,13 @@ static struct kvmppc_sr *find_sr(struct kvmppc_vcpu_book3s *vcpu_book3s, gva_t e static u64 kvmppc_mmu_book3s_32_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr, bool data) { - u64 vsid; + struct kvmppc_sr *sre = find_sr(to_book3s(vcpu), eaddr); struct kvmppc_pte pte; if (!kvmppc_mmu_book3s_32_xlate_bat(vcpu, eaddr, &pte, data)) return pte.vpage; - kvmppc_mmu_book3s_32_esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid); - return (((u64)eaddr >> 12) & 0xffff) | (vsid << 16); + return (((u64)eaddr >> 12) & 0xffff) | (((u64)sre->vsid) << 16); } static void kvmppc_mmu_book3s_32_reset_msr(struct kvm_vcpu *vcpu) @@ -145,13 +142,8 @@ static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr, bat->bepi_mask); } if ((eaddr & bat->bepi_mask) == bat->bepi) { - u64 vsid; - kvmppc_mmu_book3s_32_esid_to_vsid(vcpu, - eaddr >> SID_SHIFT, &vsid); - vsid <<= 16; - pte->vpage = (((u64)eaddr >> 12) & 0xffff) | vsid; - pte->raddr = bat->brpn | (eaddr & ~bat->bepi_mask); + pte->vpage = (eaddr >> 12) | VSID_BAT; pte->may_read = bat->pp; pte->may_write = bat->pp > 1; pte->may_execute = true; @@ -310,7 +302,6 @@ static void kvmppc_mmu_book3s_32_mtsrin(struct kvm_vcpu *vcpu, u32 srnum, /* And then put in the new SR */ sre->raw = value; sre->vsid = (value & 0x0fffffff); - sre->valid = (value & 0x80000000) ? false : true; sre->Ks = (value & 0x40000000) ? true : false; sre->Kp = (value & 0x20000000) ? true : false; sre->nx = (value & 0x10000000) ? true : false; @@ -321,7 +312,7 @@ static void kvmppc_mmu_book3s_32_mtsrin(struct kvm_vcpu *vcpu, u32 srnum, static void kvmppc_mmu_book3s_32_tlbie(struct kvm_vcpu *vcpu, ulong ea, bool large) { - kvmppc_mmu_pte_flush(vcpu, ea, 0x0FFFF000); + kvmppc_mmu_pte_flush(vcpu, ea, ~0xFFFULL); } static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, u64 esid, @@ -342,22 +333,15 @@ static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, u64 esid, break; case MSR_DR|MSR_IR: { - ulong ea = esid << SID_SHIFT; - struct kvmppc_sr *sr = find_sr(to_book3s(vcpu), ea); - - if (!sr->valid) - return -1; - - *vsid = sr->vsid; + ulong ea; + ea = esid << SID_SHIFT; + *vsid = find_sr(to_book3s(vcpu), ea)->vsid; break; } default: BUG(); } - if (vcpu->arch.msr & MSR_PR) - *vsid |= VSID_PR; - return 0; } diff --git a/arch/powerpc/kvm/book3s_64_emulate.c b/arch/powerpc/kvm/book3s_64_emulate.c index 8f50776a9a1d..2b0ee7e040c9 100644 --- a/arch/powerpc/kvm/book3s_64_emulate.c +++ b/arch/powerpc/kvm/book3s_64_emulate.c @@ -28,16 +28,13 @@ #define OP_31_XOP_MFMSR 83 #define OP_31_XOP_MTMSR 146 #define OP_31_XOP_MTMSRD 178 -#define OP_31_XOP_MTSR 210 #define OP_31_XOP_MTSRIN 242 #define OP_31_XOP_TLBIEL 274 #define OP_31_XOP_TLBIE 306 #define OP_31_XOP_SLBMTE 402 #define OP_31_XOP_SLBIE 434 #define OP_31_XOP_SLBIA 498 -#define OP_31_XOP_MFSR 595 #define OP_31_XOP_MFSRIN 659 -#define OP_31_XOP_DCBA 758 #define OP_31_XOP_SLBMFEV 851 #define OP_31_XOP_EIOIO 854 #define OP_31_XOP_SLBMFEE 915 @@ -45,20 +42,6 @@ /* DCBZ is actually 1014, but we patch it to 1010 so we get a trap */ #define OP_31_XOP_DCBZ 1010 -#define OP_LFS 48 -#define OP_LFD 50 -#define OP_STFS 52 -#define OP_STFD 54 - -#define SPRN_GQR0 912 -#define SPRN_GQR1 913 -#define SPRN_GQR2 914 -#define SPRN_GQR3 915 -#define SPRN_GQR4 916 -#define SPRN_GQR5 917 -#define SPRN_GQR6 918 -#define SPRN_GQR7 919 - int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int inst, int *advance) { @@ -97,18 +80,6 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, case OP_31_XOP_MTMSR: kvmppc_set_msr(vcpu, kvmppc_get_gpr(vcpu, get_rs(inst))); break; - case OP_31_XOP_MFSR: - { - int srnum; - - srnum = kvmppc_get_field(inst, 12 + 32, 15 + 32); - if (vcpu->arch.mmu.mfsrin) { - u32 sr; - sr = vcpu->arch.mmu.mfsrin(vcpu, srnum); - kvmppc_set_gpr(vcpu, get_rt(inst), sr); - } - break; - } case OP_31_XOP_MFSRIN: { int srnum; @@ -121,11 +92,6 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, } break; } - case OP_31_XOP_MTSR: - vcpu->arch.mmu.mtsrin(vcpu, - (inst >> 16) & 0xf, - kvmppc_get_gpr(vcpu, get_rs(inst))); - break; case OP_31_XOP_MTSRIN: vcpu->arch.mmu.mtsrin(vcpu, (kvmppc_get_gpr(vcpu, get_rb(inst)) >> 28) & 0xf, @@ -184,17 +150,12 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, kvmppc_set_gpr(vcpu, get_rt(inst), t); } break; - case OP_31_XOP_DCBA: - /* Gets treated as NOP */ - break; case OP_31_XOP_DCBZ: { ulong rb = kvmppc_get_gpr(vcpu, get_rb(inst)); ulong ra = 0; - ulong addr, vaddr; + ulong addr; u32 zeros[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; - u32 dsisr; - int r; if (get_ra(inst)) ra = kvmppc_get_gpr(vcpu, get_ra(inst)); @@ -202,25 +163,15 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, addr = (ra + rb) & ~31ULL; if (!(vcpu->arch.msr & MSR_SF)) addr &= 0xffffffff; - vaddr = addr; - - r = kvmppc_st(vcpu, &addr, 32, zeros, true); - if ((r == -ENOENT) || (r == -EPERM)) { - *advance = 0; - vcpu->arch.dear = vaddr; - vcpu->arch.fault_dear = vaddr; - - dsisr = DSISR_ISSTORE; - if (r == -ENOENT) - dsisr |= DSISR_NOHPTE; - else if (r == -EPERM) - dsisr |= DSISR_PROTFAULT; - - to_book3s(vcpu)->dsisr = dsisr; - vcpu->arch.fault_dsisr = dsisr; + if (kvmppc_st(vcpu, addr, 32, zeros)) { + vcpu->arch.dear = addr; + vcpu->arch.fault_dear = addr; + to_book3s(vcpu)->dsisr = DSISR_PROTFAULT | + DSISR_ISSTORE; kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE); + kvmppc_mmu_pte_flush(vcpu, addr, ~0xFFFULL); } break; @@ -233,9 +184,6 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, emulated = EMULATE_FAIL; } - if (emulated == EMULATE_FAIL) - emulated = kvmppc_emulate_paired_single(run, vcpu); - return emulated; } @@ -259,34 +207,6 @@ void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat, bool upper, } } -static u32 kvmppc_read_bat(struct kvm_vcpu *vcpu, int sprn) -{ - struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); - struct kvmppc_bat *bat; - - switch (sprn) { - case SPRN_IBAT0U ... SPRN_IBAT3L: - bat = &vcpu_book3s->ibat[(sprn - SPRN_IBAT0U) / 2]; - break; - case SPRN_IBAT4U ... SPRN_IBAT7L: - bat = &vcpu_book3s->ibat[4 + ((sprn - SPRN_IBAT4U) / 2)]; - break; - case SPRN_DBAT0U ... SPRN_DBAT3L: - bat = &vcpu_book3s->dbat[(sprn - SPRN_DBAT0U) / 2]; - break; - case SPRN_DBAT4U ... SPRN_DBAT7L: - bat = &vcpu_book3s->dbat[4 + ((sprn - SPRN_DBAT4U) / 2)]; - break; - default: - BUG(); - } - - if (sprn % 2) - return bat->raw >> 32; - else - return bat->raw; -} - static void kvmppc_write_bat(struct kvm_vcpu *vcpu, int sprn, u32 val) { struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); @@ -297,13 +217,13 @@ static void kvmppc_write_bat(struct kvm_vcpu *vcpu, int sprn, u32 val) bat = &vcpu_book3s->ibat[(sprn - SPRN_IBAT0U) / 2]; break; case SPRN_IBAT4U ... SPRN_IBAT7L: - bat = &vcpu_book3s->ibat[4 + ((sprn - SPRN_IBAT4U) / 2)]; + bat = &vcpu_book3s->ibat[(sprn - SPRN_IBAT4U) / 2]; break; case SPRN_DBAT0U ... SPRN_DBAT3L: bat = &vcpu_book3s->dbat[(sprn - SPRN_DBAT0U) / 2]; break; case SPRN_DBAT4U ... SPRN_DBAT7L: - bat = &vcpu_book3s->dbat[4 + ((sprn - SPRN_DBAT4U) / 2)]; + bat = &vcpu_book3s->dbat[(sprn - SPRN_DBAT4U) / 2]; break; default: BUG(); @@ -338,7 +258,6 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) /* BAT writes happen so rarely that we're ok to flush * everything here */ kvmppc_mmu_pte_flush(vcpu, 0, 0); - kvmppc_mmu_flush_segments(vcpu); break; case SPRN_HID0: to_book3s(vcpu)->hid[0] = spr_val; @@ -349,29 +268,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) case SPRN_HID2: to_book3s(vcpu)->hid[2] = spr_val; break; - case SPRN_HID2_GEKKO: - to_book3s(vcpu)->hid[2] = spr_val; - /* HID2.PSE controls paired single on gekko */ - switch (vcpu->arch.pvr) { - case 0x00080200: /* lonestar 2.0 */ - case 0x00088202: /* lonestar 2.2 */ - case 0x70000100: /* gekko 1.0 */ - case 0x00080100: /* gekko 2.0 */ - case 0x00083203: /* gekko 2.3a */ - case 0x00083213: /* gekko 2.3b */ - case 0x00083204: /* gekko 2.4 */ - case 0x00083214: /* gekko 2.4e (8SE) - retail HW2 */ - if (spr_val & (1 << 29)) { /* HID2.PSE */ - vcpu->arch.hflags |= BOOK3S_HFLAG_PAIRED_SINGLE; - kvmppc_giveup_ext(vcpu, MSR_FP); - } else { - vcpu->arch.hflags &= ~BOOK3S_HFLAG_PAIRED_SINGLE; - } - break; - } - break; case SPRN_HID4: - case SPRN_HID4_GEKKO: to_book3s(vcpu)->hid[4] = spr_val; break; case SPRN_HID5: @@ -381,30 +278,12 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) (mfmsr() & MSR_HV)) vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32; break; - case SPRN_GQR0: - case SPRN_GQR1: - case SPRN_GQR2: - case SPRN_GQR3: - case SPRN_GQR4: - case SPRN_GQR5: - case SPRN_GQR6: - case SPRN_GQR7: - to_book3s(vcpu)->gqr[sprn - SPRN_GQR0] = spr_val; - break; case SPRN_ICTC: case SPRN_THRM1: case SPRN_THRM2: case SPRN_THRM3: case SPRN_CTRLF: case SPRN_CTRLT: - case SPRN_L2CR: - case SPRN_MMCR0_GEKKO: - case SPRN_MMCR1_GEKKO: - case SPRN_PMC1_GEKKO: - case SPRN_PMC2_GEKKO: - case SPRN_PMC3_GEKKO: - case SPRN_PMC4_GEKKO: - case SPRN_WPAR_GEKKO: break; default: printk(KERN_INFO "KVM: invalid SPR write: %d\n", sprn); @@ -422,12 +301,6 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) int emulated = EMULATE_DONE; switch (sprn) { - case SPRN_IBAT0U ... SPRN_IBAT3L: - case SPRN_IBAT4U ... SPRN_IBAT7L: - case SPRN_DBAT0U ... SPRN_DBAT3L: - case SPRN_DBAT4U ... SPRN_DBAT7L: - kvmppc_set_gpr(vcpu, rt, kvmppc_read_bat(vcpu, sprn)); - break; case SPRN_SDR1: kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->sdr1); break; @@ -447,40 +320,19 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[1]); break; case SPRN_HID2: - case SPRN_HID2_GEKKO: kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[2]); break; case SPRN_HID4: - case SPRN_HID4_GEKKO: kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[4]); break; case SPRN_HID5: kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[5]); break; - case SPRN_GQR0: - case SPRN_GQR1: - case SPRN_GQR2: - case SPRN_GQR3: - case SPRN_GQR4: - case SPRN_GQR5: - case SPRN_GQR6: - case SPRN_GQR7: - kvmppc_set_gpr(vcpu, rt, - to_book3s(vcpu)->gqr[sprn - SPRN_GQR0]); - break; case SPRN_THRM1: case SPRN_THRM2: case SPRN_THRM3: case SPRN_CTRLF: case SPRN_CTRLT: - case SPRN_L2CR: - case SPRN_MMCR0_GEKKO: - case SPRN_MMCR1_GEKKO: - case SPRN_PMC1_GEKKO: - case SPRN_PMC2_GEKKO: - case SPRN_PMC3_GEKKO: - case SPRN_PMC4_GEKKO: - case SPRN_WPAR_GEKKO: kvmppc_set_gpr(vcpu, rt, 0); break; default: @@ -494,73 +346,3 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) return emulated; } -u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst) -{ - u32 dsisr = 0; - - /* - * This is what the spec says about DSISR bits (not mentioned = 0): - * - * 12:13 [DS] Set to bits 30:31 - * 15:16 [X] Set to bits 29:30 - * 17 [X] Set to bit 25 - * [D/DS] Set to bit 5 - * 18:21 [X] Set to bits 21:24 - * [D/DS] Set to bits 1:4 - * 22:26 Set to bits 6:10 (RT/RS/FRT/FRS) - * 27:31 Set to bits 11:15 (RA) - */ - - switch (get_op(inst)) { - /* D-form */ - case OP_LFS: - case OP_LFD: - case OP_STFD: - case OP_STFS: - dsisr |= (inst >> 12) & 0x4000; /* bit 17 */ - dsisr |= (inst >> 17) & 0x3c00; /* bits 18:21 */ - break; - /* X-form */ - case 31: - dsisr |= (inst << 14) & 0x18000; /* bits 15:16 */ - dsisr |= (inst << 8) & 0x04000; /* bit 17 */ - dsisr |= (inst << 3) & 0x03c00; /* bits 18:21 */ - break; - default: - printk(KERN_INFO "KVM: Unaligned instruction 0x%x\n", inst); - break; - } - - dsisr |= (inst >> 16) & 0x03ff; /* bits 22:31 */ - - return dsisr; -} - -ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst) -{ - ulong dar = 0; - ulong ra; - - switch (get_op(inst)) { - case OP_LFS: - case OP_LFD: - case OP_STFD: - case OP_STFS: - ra = get_ra(inst); - if (ra) - dar = kvmppc_get_gpr(vcpu, ra); - dar += (s32)((s16)inst); - break; - case 31: - ra = get_ra(inst); - if (ra) - dar = kvmppc_get_gpr(vcpu, ra); - dar += kvmppc_get_gpr(vcpu, get_rb(inst)); - break; - default: - printk(KERN_INFO "KVM: Unaligned instruction 0x%x\n", inst); - break; - } - - return dar; -} diff --git a/arch/powerpc/kvm/book3s_64_interrupts.S b/arch/powerpc/kvm/book3s_64_interrupts.S index faca87610d65..c1584d0cbce8 100644 --- a/arch/powerpc/kvm/book3s_64_interrupts.S +++ b/arch/powerpc/kvm/book3s_64_interrupts.S @@ -171,7 +171,7 @@ kvmppc_handler_highmem: std r3, VCPU_PC(r7) std r4, VCPU_SHADOW_SRR1(r7) std r5, VCPU_FAULT_DEAR(r7) - stw r6, VCPU_FAULT_DSISR(r7) + std r6, VCPU_FAULT_DSISR(r7) ld r5, VCPU_HFLAGS(r7) rldicl. r5, r5, 0, 63 /* CR = ((r5 & 1) == 0) */ diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c index a01e9c5a3fc7..f2899b297ffd 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_host.c +++ b/arch/powerpc/kvm/book3s_64_mmu_host.c @@ -257,9 +257,16 @@ map_again: if (ret < 0) { /* If we couldn't map a primary PTE, try a secondary */ +#ifdef USE_SECONDARY hash = ~hash; - vflags ^= HPTE_V_SECONDARY; attempt++; + if (attempt % 2) + vflags = HPTE_V_SECONDARY; + else + vflags = 0; +#else + attempt = 2; +#endif goto map_again; } else { int hpte_id = kvmppc_mmu_hpte_cache_next(vcpu); @@ -270,13 +277,6 @@ map_again: (rflags & HPTE_R_N) ? '-' : 'x', orig_pte->eaddr, hpteg, va, orig_pte->vpage, hpaddr); - /* The ppc_md code may give us a secondary entry even though we - asked for a primary. Fix up. */ - if ((ret & _PTEIDX_SECONDARY) && !(vflags & HPTE_V_SECONDARY)) { - hash = ~hash; - hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); - } - pte->slot = hpteg + (ret & 7); pte->host_va = va; pte->pte = *orig_pte; diff --git a/arch/powerpc/kvm/book3s_64_rmhandlers.S b/arch/powerpc/kvm/book3s_64_rmhandlers.S index bd08535fcdc8..c83c60ad96c5 100644 --- a/arch/powerpc/kvm/book3s_64_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_64_rmhandlers.S @@ -164,15 +164,24 @@ _GLOBAL(kvmppc_rmcall) #define define_load_up(what) \ \ _GLOBAL(kvmppc_load_up_ ## what); \ - stdu r1, -INT_FRAME_SIZE(r1); \ + subi r1, r1, INT_FRAME_SIZE; \ mflr r3; \ std r3, _LINK(r1); \ + mfmsr r4; \ + std r31, GPR3(r1); \ + mr r31, r4; \ + li r5, MSR_DR; \ + oris r5, r5, MSR_EE@h; \ + andc r4, r4, r5; \ + mtmsr r4; \ \ bl .load_up_ ## what; \ \ + mtmsr r31; \ ld r3, _LINK(r1); \ - mtlr r3; \ + ld r31, GPR3(r1); \ addi r1, r1, INT_FRAME_SIZE; \ + mtlr r3; \ blr define_load_up(fpu) diff --git a/arch/powerpc/kvm/book3s_64_slb.S b/arch/powerpc/kvm/book3s_64_slb.S index 091967907954..35b762722187 100644 --- a/arch/powerpc/kvm/book3s_64_slb.S +++ b/arch/powerpc/kvm/book3s_64_slb.S @@ -145,7 +145,7 @@ slb_do_enter: lwz r11, (PACA_KVM_CR)(r13) mtcr r11 - lwz r11, (PACA_KVM_XER)(r13) + ld r11, (PACA_KVM_XER)(r13) mtxer r11 ld r11, (PACA_KVM_R11)(r13) diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book3s_paired_singles.c deleted file mode 100644 index 7a27bac8c44a..000000000000 --- a/arch/powerpc/kvm/book3s_paired_singles.c +++ /dev/null @@ -1,1289 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * - * Copyright Novell Inc 2010 - * - * Authors: Alexander Graf <agraf@suse.de> - */ - -#include <asm/kvm.h> -#include <asm/kvm_ppc.h> -#include <asm/disassemble.h> -#include <asm/kvm_book3s.h> -#include <asm/kvm_fpu.h> -#include <asm/reg.h> -#include <asm/cacheflush.h> -#include <linux/vmalloc.h> - -/* #define DEBUG */ - -#ifdef DEBUG -#define dprintk printk -#else -#define dprintk(...) do { } while(0); -#endif - -#define OP_LFS 48 -#define OP_LFSU 49 -#define OP_LFD 50 -#define OP_LFDU 51 -#define OP_STFS 52 -#define OP_STFSU 53 -#define OP_STFD 54 -#define OP_STFDU 55 -#define OP_PSQ_L 56 -#define OP_PSQ_LU 57 -#define OP_PSQ_ST 60 -#define OP_PSQ_STU 61 - -#define OP_31_LFSX 535 -#define OP_31_LFSUX 567 -#define OP_31_LFDX 599 -#define OP_31_LFDUX 631 -#define OP_31_STFSX 663 -#define OP_31_STFSUX 695 -#define OP_31_STFX 727 -#define OP_31_STFUX 759 -#define OP_31_LWIZX 887 -#define OP_31_STFIWX 983 - -#define OP_59_FADDS 21 -#define OP_59_FSUBS 20 -#define OP_59_FSQRTS 22 -#define OP_59_FDIVS 18 -#define OP_59_FRES 24 -#define OP_59_FMULS 25 -#define OP_59_FRSQRTES 26 -#define OP_59_FMSUBS 28 -#define OP_59_FMADDS 29 -#define OP_59_FNMSUBS 30 -#define OP_59_FNMADDS 31 - -#define OP_63_FCMPU 0 -#define OP_63_FCPSGN 8 -#define OP_63_FRSP 12 -#define OP_63_FCTIW 14 -#define OP_63_FCTIWZ 15 -#define OP_63_FDIV 18 -#define OP_63_FADD 21 -#define OP_63_FSQRT 22 -#define OP_63_FSEL 23 -#define OP_63_FRE 24 -#define OP_63_FMUL 25 -#define OP_63_FRSQRTE 26 -#define OP_63_FMSUB 28 -#define OP_63_FMADD 29 -#define OP_63_FNMSUB 30 -#define OP_63_FNMADD 31 -#define OP_63_FCMPO 32 -#define OP_63_MTFSB1 38 // XXX -#define OP_63_FSUB 20 -#define OP_63_FNEG 40 -#define OP_63_MCRFS 64 -#define OP_63_MTFSB0 70 -#define OP_63_FMR 72 -#define OP_63_MTFSFI 134 -#define OP_63_FABS 264 -#define OP_63_MFFS 583 -#define OP_63_MTFSF 711 - -#define OP_4X_PS_CMPU0 0 -#define OP_4X_PSQ_LX 6 -#define OP_4XW_PSQ_STX 7 -#define OP_4A_PS_SUM0 10 -#define OP_4A_PS_SUM1 11 -#define OP_4A_PS_MULS0 12 -#define OP_4A_PS_MULS1 13 -#define OP_4A_PS_MADDS0 14 -#define OP_4A_PS_MADDS1 15 -#define OP_4A_PS_DIV 18 -#define OP_4A_PS_SUB 20 -#define OP_4A_PS_ADD 21 -#define OP_4A_PS_SEL 23 -#define OP_4A_PS_RES 24 -#define OP_4A_PS_MUL 25 -#define OP_4A_PS_RSQRTE 26 -#define OP_4A_PS_MSUB 28 -#define OP_4A_PS_MADD 29 -#define OP_4A_PS_NMSUB 30 -#define OP_4A_PS_NMADD 31 -#define OP_4X_PS_CMPO0 32 -#define OP_4X_PSQ_LUX 38 -#define OP_4XW_PSQ_STUX 39 -#define OP_4X_PS_NEG 40 -#define OP_4X_PS_CMPU1 64 -#define OP_4X_PS_MR 72 -#define OP_4X_PS_CMPO1 96 -#define OP_4X_PS_NABS 136 -#define OP_4X_PS_ABS 264 -#define OP_4X_PS_MERGE00 528 -#define OP_4X_PS_MERGE01 560 -#define OP_4X_PS_MERGE10 592 -#define OP_4X_PS_MERGE11 624 - -#define SCALAR_NONE 0 -#define SCALAR_HIGH (1 << 0) -#define SCALAR_LOW (1 << 1) -#define SCALAR_NO_PS0 (1 << 2) -#define SCALAR_NO_PS1 (1 << 3) - -#define GQR_ST_TYPE_MASK 0x00000007 -#define GQR_ST_TYPE_SHIFT 0 -#define GQR_ST_SCALE_MASK 0x00003f00 -#define GQR_ST_SCALE_SHIFT 8 -#define GQR_LD_TYPE_MASK 0x00070000 -#define GQR_LD_TYPE_SHIFT 16 -#define GQR_LD_SCALE_MASK 0x3f000000 -#define GQR_LD_SCALE_SHIFT 24 - -#define GQR_QUANTIZE_FLOAT 0 -#define GQR_QUANTIZE_U8 4 -#define GQR_QUANTIZE_U16 5 -#define GQR_QUANTIZE_S8 6 -#define GQR_QUANTIZE_S16 7 - -#define FPU_LS_SINGLE 0 -#define FPU_LS_DOUBLE 1 -#define FPU_LS_SINGLE_LOW 2 - -static inline void kvmppc_sync_qpr(struct kvm_vcpu *vcpu, int rt) -{ - struct thread_struct t; - - t.fpscr.val = vcpu->arch.fpscr; - cvt_df((double*)&vcpu->arch.fpr[rt], (float*)&vcpu->arch.qpr[rt], &t); -} - -static void kvmppc_inject_pf(struct kvm_vcpu *vcpu, ulong eaddr, bool is_store) -{ - u64 dsisr; - - vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 33, 36, 0); - vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 42, 47, 0); - vcpu->arch.dear = eaddr; - /* Page Fault */ - dsisr = kvmppc_set_field(0, 33, 33, 1); - if (is_store) - to_book3s(vcpu)->dsisr = kvmppc_set_field(dsisr, 38, 38, 1); - kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE); -} - -static int kvmppc_emulate_fpr_load(struct kvm_run *run, struct kvm_vcpu *vcpu, - int rs, ulong addr, int ls_type) -{ - int emulated = EMULATE_FAIL; - struct thread_struct t; - int r; - char tmp[8]; - int len = sizeof(u32); - - if (ls_type == FPU_LS_DOUBLE) - len = sizeof(u64); - - t.fpscr.val = vcpu->arch.fpscr; - - /* read from memory */ - r = kvmppc_ld(vcpu, &addr, len, tmp, true); - vcpu->arch.paddr_accessed = addr; - - if (r < 0) { - kvmppc_inject_pf(vcpu, addr, false); - goto done_load; - } else if (r == EMULATE_DO_MMIO) { - emulated = kvmppc_handle_load(run, vcpu, KVM_REG_FPR | rs, len, 1); - goto done_load; - } - - emulated = EMULATE_DONE; - - /* put in registers */ - switch (ls_type) { - case FPU_LS_SINGLE: - cvt_fd((float*)tmp, (double*)&vcpu->arch.fpr[rs], &t); - vcpu->arch.qpr[rs] = *((u32*)tmp); - break; - case FPU_LS_DOUBLE: - vcpu->arch.fpr[rs] = *((u64*)tmp); - break; - } - - dprintk(KERN_INFO "KVM: FPR_LD [0x%llx] at 0x%lx (%d)\n", *(u64*)tmp, - addr, len); - -done_load: - return emulated; -} - -static int kvmppc_emulate_fpr_store(struct kvm_run *run, struct kvm_vcpu *vcpu, - int rs, ulong addr, int ls_type) -{ - int emulated = EMULATE_FAIL; - struct thread_struct t; - int r; - char tmp[8]; - u64 val; - int len; - - t.fpscr.val = vcpu->arch.fpscr; - - switch (ls_type) { - case FPU_LS_SINGLE: - cvt_df((double*)&vcpu->arch.fpr[rs], (float*)tmp, &t); - val = *((u32*)tmp); - len = sizeof(u32); - break; - case FPU_LS_SINGLE_LOW: - *((u32*)tmp) = vcpu->arch.fpr[rs]; - val = vcpu->arch.fpr[rs] & 0xffffffff; - len = sizeof(u32); - break; - case FPU_LS_DOUBLE: - *((u64*)tmp) = vcpu->arch.fpr[rs]; - val = vcpu->arch.fpr[rs]; - len = sizeof(u64); - break; - default: - val = 0; - len = 0; - } - - r = kvmppc_st(vcpu, &addr, len, tmp, true); - vcpu->arch.paddr_accessed = addr; - if (r < 0) { - kvmppc_inject_pf(vcpu, addr, true); - } else if (r == EMULATE_DO_MMIO) { - emulated = kvmppc_handle_store(run, vcpu, val, len, 1); - } else { - emulated = EMULATE_DONE; - } - - dprintk(KERN_INFO "KVM: FPR_ST [0x%llx] at 0x%lx (%d)\n", - val, addr, len); - - return emulated; -} - -static int kvmppc_emulate_psq_load(struct kvm_run *run, struct kvm_vcpu *vcpu, - int rs, ulong addr, bool w, int i) -{ - int emulated = EMULATE_FAIL; - struct thread_struct t; - int r; - float one = 1.0; - u32 tmp[2]; - - t.fpscr.val = vcpu->arch.fpscr; - - /* read from memory */ - if (w) { - r = kvmppc_ld(vcpu, &addr, sizeof(u32), tmp, true); - memcpy(&tmp[1], &one, sizeof(u32)); - } else { - r = kvmppc_ld(vcpu, &addr, sizeof(u32) * 2, tmp, true); - } - vcpu->arch.paddr_accessed = addr; - if (r < 0) { - kvmppc_inject_pf(vcpu, addr, false); - goto done_load; - } else if ((r == EMULATE_DO_MMIO) && w) { - emulated = kvmppc_handle_load(run, vcpu, KVM_REG_FPR | rs, 4, 1); - vcpu->arch.qpr[rs] = tmp[1]; - goto done_load; - } else if (r == EMULATE_DO_MMIO) { - emulated = kvmppc_handle_load(run, vcpu, KVM_REG_FQPR | rs, 8, 1); - goto done_load; - } - - emulated = EMULATE_DONE; - - /* put in registers */ - cvt_fd((float*)&tmp[0], (double*)&vcpu->arch.fpr[rs], &t); - vcpu->arch.qpr[rs] = tmp[1]; - - dprintk(KERN_INFO "KVM: PSQ_LD [0x%x, 0x%x] at 0x%lx (%d)\n", tmp[0], - tmp[1], addr, w ? 4 : 8); - -done_load: - return emulated; -} - -static int kvmppc_emulate_psq_store(struct kvm_run *run, struct kvm_vcpu *vcpu, - int rs, ulong addr, bool w, int i) -{ - int emulated = EMULATE_FAIL; - struct thread_struct t; - int r; - u32 tmp[2]; - int len = w ? sizeof(u32) : sizeof(u64); - - t.fpscr.val = vcpu->arch.fpscr; - - cvt_df((double*)&vcpu->arch.fpr[rs], (float*)&tmp[0], &t); - tmp[1] = vcpu->arch.qpr[rs]; - - r = kvmppc_st(vcpu, &addr, len, tmp, true); - vcpu->arch.paddr_accessed = addr; - if (r < 0) { - kvmppc_inject_pf(vcpu, addr, true); - } else if ((r == EMULATE_DO_MMIO) && w) { - emulated = kvmppc_handle_store(run, vcpu, tmp[0], 4, 1); - } else if (r == EMULATE_DO_MMIO) { - u64 val = ((u64)tmp[0] << 32) | tmp[1]; - emulated = kvmppc_handle_store(run, vcpu, val, 8, 1); - } else { - emulated = EMULATE_DONE; - } - - dprintk(KERN_INFO "KVM: PSQ_ST [0x%x, 0x%x] at 0x%lx (%d)\n", - tmp[0], tmp[1], addr, len); - - return emulated; -} - -/* - * Cuts out inst bits with ordering according to spec. - * That means the leftmost bit is zero. All given bits are included. - */ -static inline u32 inst_get_field(u32 inst, int msb, int lsb) -{ - return kvmppc_get_field(inst, msb + 32, lsb + 32); -} - -/* - * Replaces inst bits with ordering according to spec. - */ -static inline u32 inst_set_field(u32 inst, int msb, int lsb, int value) -{ - return kvmppc_set_field(inst, msb + 32, lsb + 32, value); -} - -bool kvmppc_inst_is_paired_single(struct kvm_vcpu *vcpu, u32 inst) -{ - if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE)) - return false; - - switch (get_op(inst)) { - case OP_PSQ_L: - case OP_PSQ_LU: - case OP_PSQ_ST: - case OP_PSQ_STU: - case OP_LFS: - case OP_LFSU: - case OP_LFD: - case OP_LFDU: - case OP_STFS: - case OP_STFSU: - case OP_STFD: - case OP_STFDU: - return true; - case 4: - /* X form */ - switch (inst_get_field(inst, 21, 30)) { - case OP_4X_PS_CMPU0: - case OP_4X_PSQ_LX: - case OP_4X_PS_CMPO0: - case OP_4X_PSQ_LUX: - case OP_4X_PS_NEG: - case OP_4X_PS_CMPU1: - case OP_4X_PS_MR: - case OP_4X_PS_CMPO1: - case OP_4X_PS_NABS: - case OP_4X_PS_ABS: - case OP_4X_PS_MERGE00: - case OP_4X_PS_MERGE01: - case OP_4X_PS_MERGE10: - case OP_4X_PS_MERGE11: - return true; - } - /* XW form */ - switch (inst_get_field(inst, 25, 30)) { - case OP_4XW_PSQ_STX: - case OP_4XW_PSQ_STUX: - return true; - } - /* A form */ - switch (inst_get_field(inst, 26, 30)) { - case OP_4A_PS_SUM1: - case OP_4A_PS_SUM0: - case OP_4A_PS_MULS0: - case OP_4A_PS_MULS1: - case OP_4A_PS_MADDS0: - case OP_4A_PS_MADDS1: - case OP_4A_PS_DIV: - case OP_4A_PS_SUB: - case OP_4A_PS_ADD: - case OP_4A_PS_SEL: - case OP_4A_PS_RES: - case OP_4A_PS_MUL: - case OP_4A_PS_RSQRTE: - case OP_4A_PS_MSUB: - case OP_4A_PS_MADD: - case OP_4A_PS_NMSUB: - case OP_4A_PS_NMADD: - return true; - } - break; - case 59: - switch (inst_get_field(inst, 21, 30)) { - case OP_59_FADDS: - case OP_59_FSUBS: - case OP_59_FDIVS: - case OP_59_FRES: - case OP_59_FRSQRTES: - return true; - } - switch (inst_get_field(inst, 26, 30)) { - case OP_59_FMULS: - case OP_59_FMSUBS: - case OP_59_FMADDS: - case OP_59_FNMSUBS: - case OP_59_FNMADDS: - return true; - } - break; - case 63: - switch (inst_get_field(inst, 21, 30)) { - case OP_63_MTFSB0: - case OP_63_MTFSB1: - case OP_63_MTFSF: - case OP_63_MTFSFI: - case OP_63_MCRFS: - case OP_63_MFFS: - case OP_63_FCMPU: - case OP_63_FCMPO: - case OP_63_FNEG: - case OP_63_FMR: - case OP_63_FABS: - case OP_63_FRSP: - case OP_63_FDIV: - case OP_63_FADD: - case OP_63_FSUB: - case OP_63_FCTIW: - case OP_63_FCTIWZ: - case OP_63_FRSQRTE: - case OP_63_FCPSGN: - return true; - } - switch (inst_get_field(inst, 26, 30)) { - case OP_63_FMUL: - case OP_63_FSEL: - case OP_63_FMSUB: - case OP_63_FMADD: - case OP_63_FNMSUB: - case OP_63_FNMADD: - return true; - } - break; - case 31: - switch (inst_get_field(inst, 21, 30)) { - case OP_31_LFSX: - case OP_31_LFSUX: - case OP_31_LFDX: - case OP_31_LFDUX: - case OP_31_STFSX: - case OP_31_STFSUX: - case OP_31_STFX: - case OP_31_STFUX: - case OP_31_STFIWX: - return true; - } - break; - } - - return false; -} - -static int get_d_signext(u32 inst) -{ - int d = inst & 0x8ff; - - if (d & 0x800) - return -(d & 0x7ff); - - return (d & 0x7ff); -} - -static int kvmppc_ps_three_in(struct kvm_vcpu *vcpu, bool rc, - int reg_out, int reg_in1, int reg_in2, - int reg_in3, int scalar, - void (*func)(struct thread_struct *t, - u32 *dst, u32 *src1, - u32 *src2, u32 *src3)) -{ - u32 *qpr = vcpu->arch.qpr; - u64 *fpr = vcpu->arch.fpr; - u32 ps0_out; - u32 ps0_in1, ps0_in2, ps0_in3; - u32 ps1_in1, ps1_in2, ps1_in3; - struct thread_struct t; - t.fpscr.val = vcpu->arch.fpscr; - - /* RC */ - WARN_ON(rc); - - /* PS0 */ - cvt_df((double*)&fpr[reg_in1], (float*)&ps0_in1, &t); - cvt_df((double*)&fpr[reg_in2], (float*)&ps0_in2, &t); - cvt_df((double*)&fpr[reg_in3], (float*)&ps0_in3, &t); - - if (scalar & SCALAR_LOW) - ps0_in2 = qpr[reg_in2]; - - func(&t, &ps0_out, &ps0_in1, &ps0_in2, &ps0_in3); - - dprintk(KERN_INFO "PS3 ps0 -> f(0x%x, 0x%x, 0x%x) = 0x%x\n", - ps0_in1, ps0_in2, ps0_in3, ps0_out); - - if (!(scalar & SCALAR_NO_PS0)) - cvt_fd((float*)&ps0_out, (double*)&fpr[reg_out], &t); - - /* PS1 */ - ps1_in1 = qpr[reg_in1]; - ps1_in2 = qpr[reg_in2]; - ps1_in3 = qpr[reg_in3]; - - if (scalar & SCALAR_HIGH) - ps1_in2 = ps0_in2; - - if (!(scalar & SCALAR_NO_PS1)) - func(&t, &qpr[reg_out], &ps1_in1, &ps1_in2, &ps1_in3); - - dprintk(KERN_INFO "PS3 ps1 -> f(0x%x, 0x%x, 0x%x) = 0x%x\n", - ps1_in1, ps1_in2, ps1_in3, qpr[reg_out]); - - return EMULATE_DONE; -} - -static int kvmppc_ps_two_in(struct kvm_vcpu *vcpu, bool rc, - int reg_out, int reg_in1, int reg_in2, - int scalar, - void (*func)(struct thread_struct *t, - u32 *dst, u32 *src1, - u32 *src2)) -{ - u32 *qpr = vcpu->arch.qpr; - u64 *fpr = vcpu->arch.fpr; - u32 ps0_out; - u32 ps0_in1, ps0_in2; - u32 ps1_out; - u32 ps1_in1, ps1_in2; - struct thread_struct t; - t.fpscr.val = vcpu->arch.fpscr; - - /* RC */ - WARN_ON(rc); - - /* PS0 */ - cvt_df((double*)&fpr[reg_in1], (float*)&ps0_in1, &t); - - if (scalar & SCALAR_LOW) - ps0_in2 = qpr[reg_in2]; - else - cvt_df((double*)&fpr[reg_in2], (float*)&ps0_in2, &t); - - func(&t, &ps0_out, &ps0_in1, &ps0_in2); - - if (!(scalar & SCALAR_NO_PS0)) { - dprintk(KERN_INFO "PS2 ps0 -> f(0x%x, 0x%x) = 0x%x\n", - ps0_in1, ps0_in2, ps0_out); - - cvt_fd((float*)&ps0_out, (double*)&fpr[reg_out], &t); - } - - /* PS1 */ - ps1_in1 = qpr[reg_in1]; - ps1_in2 = qpr[reg_in2]; - - if (scalar & SCALAR_HIGH) - ps1_in2 = ps0_in2; - - func(&t, &ps1_out, &ps1_in1, &ps1_in2); - - if (!(scalar & SCALAR_NO_PS1)) { - qpr[reg_out] = ps1_out; - - dprintk(KERN_INFO "PS2 ps1 -> f(0x%x, 0x%x) = 0x%x\n", - ps1_in1, ps1_in2, qpr[reg_out]); - } - - return EMULATE_DONE; -} - -static int kvmppc_ps_one_in(struct kvm_vcpu *vcpu, bool rc, - int reg_out, int reg_in, - void (*func)(struct thread_struct *t, - u32 *dst, u32 *src1)) -{ - u32 *qpr = vcpu->arch.qpr; - u64 *fpr = vcpu->arch.fpr; - u32 ps0_out, ps0_in; - u32 ps1_in; - struct thread_struct t; - t.fpscr.val = vcpu->arch.fpscr; - - /* RC */ - WARN_ON(rc); - - /* PS0 */ - cvt_df((double*)&fpr[reg_in], (float*)&ps0_in, &t); - func(&t, &ps0_out, &ps0_in); - - dprintk(KERN_INFO "PS1 ps0 -> f(0x%x) = 0x%x\n", - ps0_in, ps0_out); - - cvt_fd((float*)&ps0_out, (double*)&fpr[reg_out], &t); - - /* PS1 */ - ps1_in = qpr[reg_in]; - func(&t, &qpr[reg_out], &ps1_in); - - dprintk(KERN_INFO "PS1 ps1 -> f(0x%x) = 0x%x\n", - ps1_in, qpr[reg_out]); - - return EMULATE_DONE; -} - -int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) -{ - u32 inst = vcpu->arch.last_inst; - enum emulation_result emulated = EMULATE_DONE; - - int ax_rd = inst_get_field(inst, 6, 10); - int ax_ra = inst_get_field(inst, 11, 15); - int ax_rb = inst_get_field(inst, 16, 20); - int ax_rc = inst_get_field(inst, 21, 25); - short full_d = inst_get_field(inst, 16, 31); - - u64 *fpr_d = &vcpu->arch.fpr[ax_rd]; - u64 *fpr_a = &vcpu->arch.fpr[ax_ra]; - u64 *fpr_b = &vcpu->arch.fpr[ax_rb]; - u64 *fpr_c = &vcpu->arch.fpr[ax_rc]; - - bool rcomp = (inst & 1) ? true : false; - u32 cr = kvmppc_get_cr(vcpu); - struct thread_struct t; -#ifdef DEBUG - int i; -#endif - - t.fpscr.val = vcpu->arch.fpscr; - - if (!kvmppc_inst_is_paired_single(vcpu, inst)) - return EMULATE_FAIL; - - if (!(vcpu->arch.msr & MSR_FP)) { - kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL); - return EMULATE_AGAIN; - } - - kvmppc_giveup_ext(vcpu, MSR_FP); - preempt_disable(); - enable_kernel_fp(); - /* Do we need to clear FE0 / FE1 here? Don't think so. */ - -#ifdef DEBUG - for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) { - u32 f; - cvt_df((double*)&vcpu->arch.fpr[i], (float*)&f, &t); - dprintk(KERN_INFO "FPR[%d] = 0x%x / 0x%llx QPR[%d] = 0x%x\n", - i, f, vcpu->arch.fpr[i], i, vcpu->arch.qpr[i]); - } -#endif - - switch (get_op(inst)) { - case OP_PSQ_L: - { - ulong addr = ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0; - bool w = inst_get_field(inst, 16, 16) ? true : false; - int i = inst_get_field(inst, 17, 19); - - addr += get_d_signext(inst); - emulated = kvmppc_emulate_psq_load(run, vcpu, ax_rd, addr, w, i); - break; - } - case OP_PSQ_LU: - { - ulong addr = kvmppc_get_gpr(vcpu, ax_ra); - bool w = inst_get_field(inst, 16, 16) ? true : false; - int i = inst_get_field(inst, 17, 19); - - addr += get_d_signext(inst); - emulated = kvmppc_emulate_psq_load(run, vcpu, ax_rd, addr, w, i); - - if (emulated == EMULATE_DONE) - kvmppc_set_gpr(vcpu, ax_ra, addr); - break; - } - case OP_PSQ_ST: - { - ulong addr = ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0; - bool w = inst_get_field(inst, 16, 16) ? true : false; - int i = inst_get_field(inst, 17, 19); - - addr += get_d_signext(inst); - emulated = kvmppc_emulate_psq_store(run, vcpu, ax_rd, addr, w, i); - break; - } - case OP_PSQ_STU: - { - ulong addr = kvmppc_get_gpr(vcpu, ax_ra); - bool w = inst_get_field(inst, 16, 16) ? true : false; - int i = inst_get_field(inst, 17, 19); - - addr += get_d_signext(inst); - emulated = kvmppc_emulate_psq_store(run, vcpu, ax_rd, addr, w, i); - - if (emulated == EMULATE_DONE) - kvmppc_set_gpr(vcpu, ax_ra, addr); - break; - } - case 4: - /* X form */ - switch (inst_get_field(inst, 21, 30)) { - case OP_4X_PS_CMPU0: - /* XXX */ - emulated = EMULATE_FAIL; - break; - case OP_4X_PSQ_LX: - { - ulong addr = ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0; - bool w = inst_get_field(inst, 21, 21) ? true : false; - int i = inst_get_field(inst, 22, 24); - - addr += kvmppc_get_gpr(vcpu, ax_rb); - emulated = kvmppc_emulate_psq_load(run, vcpu, ax_rd, addr, w, i); - break; - } - case OP_4X_PS_CMPO0: - /* XXX */ - emulated = EMULATE_FAIL; - break; - case OP_4X_PSQ_LUX: - { - ulong addr = kvmppc_get_gpr(vcpu, ax_ra); - bool w = inst_get_field(inst, 21, 21) ? true : false; - int i = inst_get_field(inst, 22, 24); - - addr += kvmppc_get_gpr(vcpu, ax_rb); - emulated = kvmppc_emulate_psq_load(run, vcpu, ax_rd, addr, w, i); - - if (emulated == EMULATE_DONE) - kvmppc_set_gpr(vcpu, ax_ra, addr); - break; - } - case OP_4X_PS_NEG: - vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rb]; - vcpu->arch.fpr[ax_rd] ^= 0x8000000000000000ULL; - vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb]; - vcpu->arch.qpr[ax_rd] ^= 0x80000000; - break; - case OP_4X_PS_CMPU1: - /* XXX */ - emulated = EMULATE_FAIL; - break; - case OP_4X_PS_MR: - WARN_ON(rcomp); - vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rb]; - vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb]; - break; - case OP_4X_PS_CMPO1: - /* XXX */ - emulated = EMULATE_FAIL; - break; - case OP_4X_PS_NABS: - WARN_ON(rcomp); - vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rb]; - vcpu->arch.fpr[ax_rd] |= 0x8000000000000000ULL; - vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb]; - vcpu->arch.qpr[ax_rd] |= 0x80000000; - break; - case OP_4X_PS_ABS: - WARN_ON(rcomp); - vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rb]; - vcpu->arch.fpr[ax_rd] &= ~0x8000000000000000ULL; - vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb]; - vcpu->arch.qpr[ax_rd] &= ~0x80000000; - break; - case OP_4X_PS_MERGE00: - WARN_ON(rcomp); - vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_ra]; - /* vcpu->arch.qpr[ax_rd] = vcpu->arch.fpr[ax_rb]; */ - cvt_df((double*)&vcpu->arch.fpr[ax_rb], - (float*)&vcpu->arch.qpr[ax_rd], &t); - break; - case OP_4X_PS_MERGE01: - WARN_ON(rcomp); - vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_ra]; - vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb]; - break; - case OP_4X_PS_MERGE10: - WARN_ON(rcomp); - /* vcpu->arch.fpr[ax_rd] = vcpu->arch.qpr[ax_ra]; */ - cvt_fd((float*)&vcpu->arch.qpr[ax_ra], - (double*)&vcpu->arch.fpr[ax_rd], &t); - /* vcpu->arch.qpr[ax_rd] = vcpu->arch.fpr[ax_rb]; */ - cvt_df((double*)&vcpu->arch.fpr[ax_rb], - (float*)&vcpu->arch.qpr[ax_rd], &t); - break; - case OP_4X_PS_MERGE11: - WARN_ON(rcomp); - /* vcpu->arch.fpr[ax_rd] = vcpu->arch.qpr[ax_ra]; */ - cvt_fd((float*)&vcpu->arch.qpr[ax_ra], - (double*)&vcpu->arch.fpr[ax_rd], &t); - vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb]; - break; - } - /* XW form */ - switch (inst_get_field(inst, 25, 30)) { - case OP_4XW_PSQ_STX: - { - ulong addr = ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0; - bool w = inst_get_field(inst, 21, 21) ? true : false; - int i = inst_get_field(inst, 22, 24); - - addr += kvmppc_get_gpr(vcpu, ax_rb); - emulated = kvmppc_emulate_psq_store(run, vcpu, ax_rd, addr, w, i); - break; - } - case OP_4XW_PSQ_STUX: - { - ulong addr = kvmppc_get_gpr(vcpu, ax_ra); - bool w = inst_get_field(inst, 21, 21) ? true : false; - int i = inst_get_field(inst, 22, 24); - - addr += kvmppc_get_gpr(vcpu, ax_rb); - emulated = kvmppc_emulate_psq_store(run, vcpu, ax_rd, addr, w, i); - - if (emulated == EMULATE_DONE) - kvmppc_set_gpr(vcpu, ax_ra, addr); - break; - } - } - /* A form */ - switch (inst_get_field(inst, 26, 30)) { - case OP_4A_PS_SUM1: - emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd, - ax_rb, ax_ra, SCALAR_NO_PS0 | SCALAR_HIGH, fps_fadds); - vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rc]; - break; - case OP_4A_PS_SUM0: - emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd, - ax_ra, ax_rb, SCALAR_NO_PS1 | SCALAR_LOW, fps_fadds); - vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rc]; - break; - case OP_4A_PS_MULS0: - emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd, - ax_ra, ax_rc, SCALAR_HIGH, fps_fmuls); - break; - case OP_4A_PS_MULS1: - emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd, - ax_ra, ax_rc, SCALAR_LOW, fps_fmuls); - break; - case OP_4A_PS_MADDS0: - emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd, - ax_ra, ax_rc, ax_rb, SCALAR_HIGH, fps_fmadds); - break; - case OP_4A_PS_MADDS1: - emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd, - ax_ra, ax_rc, ax_rb, SCALAR_LOW, fps_fmadds); - break; - case OP_4A_PS_DIV: - emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd, - ax_ra, ax_rb, SCALAR_NONE, fps_fdivs); - break; - case OP_4A_PS_SUB: - emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd, - ax_ra, ax_rb, SCALAR_NONE, fps_fsubs); - break; - case OP_4A_PS_ADD: - emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd, - ax_ra, ax_rb, SCALAR_NONE, fps_fadds); - break; - case OP_4A_PS_SEL: - emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd, - ax_ra, ax_rc, ax_rb, SCALAR_NONE, fps_fsel); - break; - case OP_4A_PS_RES: - emulated = kvmppc_ps_one_in(vcpu, rcomp, ax_rd, - ax_rb, fps_fres); - break; - case OP_4A_PS_MUL: - emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd, - ax_ra, ax_rc, SCALAR_NONE, fps_fmuls); - break; - case OP_4A_PS_RSQRTE: - emulated = kvmppc_ps_one_in(vcpu, rcomp, ax_rd, - ax_rb, fps_frsqrte); - break; - case OP_4A_PS_MSUB: - emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd, - ax_ra, ax_rc, ax_rb, SCALAR_NONE, fps_fmsubs); - break; - case OP_4A_PS_MADD: - emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd, - ax_ra, ax_rc, ax_rb, SCALAR_NONE, fps_fmadds); - break; - case OP_4A_PS_NMSUB: - emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd, - ax_ra, ax_rc, ax_rb, SCALAR_NONE, fps_fnmsubs); - break; - case OP_4A_PS_NMADD: - emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd, - ax_ra, ax_rc, ax_rb, SCALAR_NONE, fps_fnmadds); - break; - } - break; - - /* Real FPU operations */ - - case OP_LFS: - { - ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + full_d; - - emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, addr, - FPU_LS_SINGLE); - break; - } - case OP_LFSU: - { - ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + full_d; - - emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, addr, - FPU_LS_SINGLE); - - if (emulated == EMULATE_DONE) - kvmppc_set_gpr(vcpu, ax_ra, addr); - break; - } - case OP_LFD: - { - ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + full_d; - - emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, addr, - FPU_LS_DOUBLE); - break; - } - case OP_LFDU: - { - ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + full_d; - - emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, addr, - FPU_LS_DOUBLE); - - if (emulated == EMULATE_DONE) - kvmppc_set_gpr(vcpu, ax_ra, addr); - break; - } - case OP_STFS: - { - ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + full_d; - - emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, addr, - FPU_LS_SINGLE); - break; - } - case OP_STFSU: - { - ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + full_d; - - emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, addr, - FPU_LS_SINGLE); - - if (emulated == EMULATE_DONE) - kvmppc_set_gpr(vcpu, ax_ra, addr); - break; - } - case OP_STFD: - { - ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + full_d; - - emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, addr, - FPU_LS_DOUBLE); - break; - } - case OP_STFDU: - { - ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + full_d; - - emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, addr, - FPU_LS_DOUBLE); - - if (emulated == EMULATE_DONE) - kvmppc_set_gpr(vcpu, ax_ra, addr); - break; - } - case 31: - switch (inst_get_field(inst, 21, 30)) { - case OP_31_LFSX: - { - ulong addr = ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0; - - addr += kvmppc_get_gpr(vcpu, ax_rb); - emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, - addr, FPU_LS_SINGLE); - break; - } - case OP_31_LFSUX: - { - ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + - kvmppc_get_gpr(vcpu, ax_rb); - - emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, - addr, FPU_LS_SINGLE); - - if (emulated == EMULATE_DONE) - kvmppc_set_gpr(vcpu, ax_ra, addr); - break; - } - case OP_31_LFDX: - { - ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + - kvmppc_get_gpr(vcpu, ax_rb); - - emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, - addr, FPU_LS_DOUBLE); - break; - } - case OP_31_LFDUX: - { - ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + - kvmppc_get_gpr(vcpu, ax_rb); - - emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, - addr, FPU_LS_DOUBLE); - - if (emulated == EMULATE_DONE) - kvmppc_set_gpr(vcpu, ax_ra, addr); - break; - } - case OP_31_STFSX: - { - ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + - kvmppc_get_gpr(vcpu, ax_rb); - - emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, - addr, FPU_LS_SINGLE); - break; - } - case OP_31_STFSUX: - { - ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + - kvmppc_get_gpr(vcpu, ax_rb); - - emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, - addr, FPU_LS_SINGLE); - - if (emulated == EMULATE_DONE) - kvmppc_set_gpr(vcpu, ax_ra, addr); - break; - } - case OP_31_STFX: - { - ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + - kvmppc_get_gpr(vcpu, ax_rb); - - emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, - addr, FPU_LS_DOUBLE); - break; - } - case OP_31_STFUX: - { - ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + - kvmppc_get_gpr(vcpu, ax_rb); - - emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, - addr, FPU_LS_DOUBLE); - - if (emulated == EMULATE_DONE) - kvmppc_set_gpr(vcpu, ax_ra, addr); - break; - } - case OP_31_STFIWX: - { - ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + - kvmppc_get_gpr(vcpu, ax_rb); - - emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, - addr, - FPU_LS_SINGLE_LOW); - break; - } - break; - } - break; - case 59: - switch (inst_get_field(inst, 21, 30)) { - case OP_59_FADDS: - fpd_fadds(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); - kvmppc_sync_qpr(vcpu, ax_rd); - break; - case OP_59_FSUBS: - fpd_fsubs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); - kvmppc_sync_qpr(vcpu, ax_rd); - break; - case OP_59_FDIVS: - fpd_fdivs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); - kvmppc_sync_qpr(vcpu, ax_rd); - break; - case OP_59_FRES: - fpd_fres(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); - kvmppc_sync_qpr(vcpu, ax_rd); - break; - case OP_59_FRSQRTES: - fpd_frsqrtes(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); - kvmppc_sync_qpr(vcpu, ax_rd); - break; - } - switch (inst_get_field(inst, 26, 30)) { - case OP_59_FMULS: - fpd_fmuls(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c); - kvmppc_sync_qpr(vcpu, ax_rd); - break; - case OP_59_FMSUBS: - fpd_fmsubs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); - kvmppc_sync_qpr(vcpu, ax_rd); - break; - case OP_59_FMADDS: - fpd_fmadds(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); - kvmppc_sync_qpr(vcpu, ax_rd); - break; - case OP_59_FNMSUBS: - fpd_fnmsubs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); - kvmppc_sync_qpr(vcpu, ax_rd); - break; - case OP_59_FNMADDS: - fpd_fnmadds(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); - kvmppc_sync_qpr(vcpu, ax_rd); - break; - } - break; - case 63: - switch (inst_get_field(inst, 21, 30)) { - case OP_63_MTFSB0: - case OP_63_MTFSB1: - case OP_63_MCRFS: - case OP_63_MTFSFI: - /* XXX need to implement */ - break; - case OP_63_MFFS: - /* XXX missing CR */ - *fpr_d = vcpu->arch.fpscr; - break; - case OP_63_MTFSF: - /* XXX missing fm bits */ - /* XXX missing CR */ - vcpu->arch.fpscr = *fpr_b; - break; - case OP_63_FCMPU: - { - u32 tmp_cr; - u32 cr0_mask = 0xf0000000; - u32 cr_shift = inst_get_field(inst, 6, 8) * 4; - - fpd_fcmpu(&vcpu->arch.fpscr, &tmp_cr, fpr_a, fpr_b); - cr &= ~(cr0_mask >> cr_shift); - cr |= (cr & cr0_mask) >> cr_shift; - break; - } - case OP_63_FCMPO: - { - u32 tmp_cr; - u32 cr0_mask = 0xf0000000; - u32 cr_shift = inst_get_field(inst, 6, 8) * 4; - - fpd_fcmpo(&vcpu->arch.fpscr, &tmp_cr, fpr_a, fpr_b); - cr &= ~(cr0_mask >> cr_shift); - cr |= (cr & cr0_mask) >> cr_shift; - break; - } - case OP_63_FNEG: - fpd_fneg(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); - break; - case OP_63_FMR: - *fpr_d = *fpr_b; - break; - case OP_63_FABS: - fpd_fabs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); - break; - case OP_63_FCPSGN: - fpd_fcpsgn(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); - break; - case OP_63_FDIV: - fpd_fdiv(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); - break; - case OP_63_FADD: - fpd_fadd(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); - break; - case OP_63_FSUB: - fpd_fsub(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); - break; - case OP_63_FCTIW: - fpd_fctiw(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); - break; - case OP_63_FCTIWZ: - fpd_fctiwz(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); - break; - case OP_63_FRSP: - fpd_frsp(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); - kvmppc_sync_qpr(vcpu, ax_rd); - break; - case OP_63_FRSQRTE: - { - double one = 1.0f; - - /* fD = sqrt(fB) */ - fpd_fsqrt(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); - /* fD = 1.0f / fD */ - fpd_fdiv(&vcpu->arch.fpscr, &cr, fpr_d, (u64*)&one, fpr_d); - break; - } - } - switch (inst_get_field(inst, 26, 30)) { - case OP_63_FMUL: - fpd_fmul(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c); - break; - case OP_63_FSEL: - fpd_fsel(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); - break; - case OP_63_FMSUB: - fpd_fmsub(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); - break; - case OP_63_FMADD: - fpd_fmadd(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); - break; - case OP_63_FNMSUB: - fpd_fnmsub(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); - break; - case OP_63_FNMADD: - fpd_fnmadd(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); - break; - } - break; - } - -#ifdef DEBUG - for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) { - u32 f; - cvt_df((double*)&vcpu->arch.fpr[i], (float*)&f, &t); - dprintk(KERN_INFO "FPR[%d] = 0x%x\n", i, f); - } -#endif - - if (rcomp) - kvmppc_set_cr(vcpu, cr); - - preempt_enable(); - - return emulated; -} diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index dbb5d6842a51..cb72a65f4ecc 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -38,12 +38,10 @@ #define OP_31_XOP_LBZX 87 #define OP_31_XOP_STWX 151 #define OP_31_XOP_STBX 215 -#define OP_31_XOP_LBZUX 119 #define OP_31_XOP_STBUX 247 #define OP_31_XOP_LHZX 279 #define OP_31_XOP_LHZUX 311 #define OP_31_XOP_MFSPR 339 -#define OP_31_XOP_LHAX 343 #define OP_31_XOP_STHX 407 #define OP_31_XOP_STHUX 439 #define OP_31_XOP_MTSPR 467 @@ -64,8 +62,6 @@ #define OP_STBU 39 #define OP_LHZ 40 #define OP_LHZU 41 -#define OP_LHA 42 -#define OP_LHAU 43 #define OP_STH 44 #define OP_STHU 45 @@ -175,19 +171,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); break; - case OP_31_XOP_LBZUX: - rt = get_rt(inst); - ra = get_ra(inst); - rb = get_rb(inst); - - ea = kvmppc_get_gpr(vcpu, rb); - if (ra) - ea += kvmppc_get_gpr(vcpu, ra); - - emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); - kvmppc_set_gpr(vcpu, ra, ea); - break; - case OP_31_XOP_STWX: rs = get_rs(inst); emulated = kvmppc_handle_store(run, vcpu, @@ -217,11 +200,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) kvmppc_set_gpr(vcpu, rs, ea); break; - case OP_31_XOP_LHAX: - rt = get_rt(inst); - emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); - break; - case OP_31_XOP_LHZX: rt = get_rt(inst); emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); @@ -472,18 +450,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed); break; - case OP_LHA: - rt = get_rt(inst); - emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); - break; - - case OP_LHAU: - ra = get_ra(inst); - rt = get_rt(inst); - emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); - kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed); - break; - case OP_STH: rs = get_rs(inst); emulated = kvmppc_handle_store(run, vcpu, @@ -506,9 +472,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) if (emulated == EMULATE_FAIL) { emulated = kvmppc_core_emulate_op(run, vcpu, inst, &advance); - if (emulated == EMULATE_AGAIN) { - advance = 0; - } else if (emulated == EMULATE_FAIL) { + if (emulated == EMULATE_FAIL) { advance = 0; printk(KERN_ERR "Couldn't emulate instruction 0x%08x " "(op %d xop %d)\n", inst, get_op(inst), get_xop(inst)); diff --git a/arch/powerpc/kvm/fpu.S b/arch/powerpc/kvm/fpu.S deleted file mode 100644 index 2b340a3eee90..000000000000 --- a/arch/powerpc/kvm/fpu.S +++ /dev/null @@ -1,273 +0,0 @@ -/* - * FPU helper code to use FPU operations from inside the kernel - * - * Copyright (C) 2010 Alexander Graf (agraf@suse.de) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - */ - -#include <asm/reg.h> -#include <asm/page.h> -#include <asm/mmu.h> -#include <asm/pgtable.h> -#include <asm/cputable.h> -#include <asm/cache.h> -#include <asm/thread_info.h> -#include <asm/ppc_asm.h> -#include <asm/asm-offsets.h> - -/* Instructions operating on single parameters */ - -/* - * Single operation with one input operand - * - * R3 = (double*)&fpscr - * R4 = (short*)&result - * R5 = (short*)¶m1 - */ -#define FPS_ONE_IN(name) \ -_GLOBAL(fps_ ## name); \ - lfd 0,0(r3); /* load up fpscr value */ \ - MTFSF_L(0); \ - lfs 0,0(r5); \ - \ - name 0,0; \ - \ - stfs 0,0(r4); \ - mffs 0; \ - stfd 0,0(r3); /* save new fpscr value */ \ - blr - -/* - * Single operation with two input operands - * - * R3 = (double*)&fpscr - * R4 = (short*)&result - * R5 = (short*)¶m1 - * R6 = (short*)¶m2 - */ -#define FPS_TWO_IN(name) \ -_GLOBAL(fps_ ## name); \ - lfd 0,0(r3); /* load up fpscr value */ \ - MTFSF_L(0); \ - lfs 0,0(r5); \ - lfs 1,0(r6); \ - \ - name 0,0,1; \ - \ - stfs 0,0(r4); \ - mffs 0; \ - stfd 0,0(r3); /* save new fpscr value */ \ - blr - -/* - * Single operation with three input operands - * - * R3 = (double*)&fpscr - * R4 = (short*)&result - * R5 = (short*)¶m1 - * R6 = (short*)¶m2 - * R7 = (short*)¶m3 - */ -#define FPS_THREE_IN(name) \ -_GLOBAL(fps_ ## name); \ - lfd 0,0(r3); /* load up fpscr value */ \ - MTFSF_L(0); \ - lfs 0,0(r5); \ - lfs 1,0(r6); \ - lfs 2,0(r7); \ - \ - name 0,0,1,2; \ - \ - stfs 0,0(r4); \ - mffs 0; \ - stfd 0,0(r3); /* save new fpscr value */ \ - blr - -FPS_ONE_IN(fres) -FPS_ONE_IN(frsqrte) -FPS_ONE_IN(fsqrts) -FPS_TWO_IN(fadds) -FPS_TWO_IN(fdivs) -FPS_TWO_IN(fmuls) -FPS_TWO_IN(fsubs) -FPS_THREE_IN(fmadds) -FPS_THREE_IN(fmsubs) -FPS_THREE_IN(fnmadds) -FPS_THREE_IN(fnmsubs) -FPS_THREE_IN(fsel) - - -/* Instructions operating on double parameters */ - -/* - * Beginning of double instruction processing - * - * R3 = (double*)&fpscr - * R4 = (u32*)&cr - * R5 = (double*)&result - * R6 = (double*)¶m1 - * R7 = (double*)¶m2 [load_two] - * R8 = (double*)¶m3 [load_three] - * LR = instruction call function - */ -fpd_load_three: - lfd 2,0(r8) /* load param3 */ -fpd_load_two: - lfd 1,0(r7) /* load param2 */ -fpd_load_one: - lfd 0,0(r6) /* load param1 */ -fpd_load_none: - lfd 3,0(r3) /* load up fpscr value */ - MTFSF_L(3) - lwz r6, 0(r4) /* load cr */ - mtcr r6 - blr - -/* - * End of double instruction processing - * - * R3 = (double*)&fpscr - * R4 = (u32*)&cr - * R5 = (double*)&result - * LR = caller of instruction call function - */ -fpd_return: - mfcr r6 - stfd 0,0(r5) /* save result */ - mffs 0 - stfd 0,0(r3) /* save new fpscr value */ - stw r6,0(r4) /* save new cr value */ - blr - -/* - * Double operation with no input operand - * - * R3 = (double*)&fpscr - * R4 = (u32*)&cr - * R5 = (double*)&result - */ -#define FPD_NONE_IN(name) \ -_GLOBAL(fpd_ ## name); \ - mflr r12; \ - bl fpd_load_none; \ - mtlr r12; \ - \ - name. 0; /* call instruction */ \ - b fpd_return - -/* - * Double operation with one input operand - * - * R3 = (double*)&fpscr - * R4 = (u32*)&cr - * R5 = (double*)&result - * R6 = (double*)¶m1 - */ -#define FPD_ONE_IN(name) \ -_GLOBAL(fpd_ ## name); \ - mflr r12; \ - bl fpd_load_one; \ - mtlr r12; \ - \ - name. 0,0; /* call instruction */ \ - b fpd_return - -/* - * Double operation with two input operands - * - * R3 = (double*)&fpscr - * R4 = (u32*)&cr - * R5 = (double*)&result - * R6 = (double*)¶m1 - * R7 = (double*)¶m2 - * R8 = (double*)¶m3 - */ -#define FPD_TWO_IN(name) \ -_GLOBAL(fpd_ ## name); \ - mflr r12; \ - bl fpd_load_two; \ - mtlr r12; \ - \ - name. 0,0,1; /* call instruction */ \ - b fpd_return - -/* - * CR Double operation with two input operands - * - * R3 = (double*)&fpscr - * R4 = (u32*)&cr - * R5 = (double*)¶m1 - * R6 = (double*)¶m2 - * R7 = (double*)¶m3 - */ -#define FPD_TWO_IN_CR(name) \ -_GLOBAL(fpd_ ## name); \ - lfd 1,0(r6); /* load param2 */ \ - lfd 0,0(r5); /* load param1 */ \ - lfd 3,0(r3); /* load up fpscr value */ \ - MTFSF_L(3); \ - lwz r6, 0(r4); /* load cr */ \ - mtcr r6; \ - \ - name 0,0,1; /* call instruction */ \ - mfcr r6; \ - mffs 0; \ - stfd 0,0(r3); /* save new fpscr value */ \ - stw r6,0(r4); /* save new cr value */ \ - blr - -/* - * Double operation with three input operands - * - * R3 = (double*)&fpscr - * R4 = (u32*)&cr - * R5 = (double*)&result - * R6 = (double*)¶m1 - * R7 = (double*)¶m2 - * R8 = (double*)¶m3 - */ -#define FPD_THREE_IN(name) \ -_GLOBAL(fpd_ ## name); \ - mflr r12; \ - bl fpd_load_three; \ - mtlr r12; \ - \ - name. 0,0,1,2; /* call instruction */ \ - b fpd_return - -FPD_ONE_IN(fsqrts) -FPD_ONE_IN(frsqrtes) -FPD_ONE_IN(fres) -FPD_ONE_IN(frsp) -FPD_ONE_IN(fctiw) -FPD_ONE_IN(fctiwz) -FPD_ONE_IN(fsqrt) -FPD_ONE_IN(fre) -FPD_ONE_IN(frsqrte) -FPD_ONE_IN(fneg) -FPD_ONE_IN(fabs) -FPD_TWO_IN(fadds) -FPD_TWO_IN(fsubs) -FPD_TWO_IN(fdivs) -FPD_TWO_IN(fmuls) -FPD_TWO_IN_CR(fcmpu) -FPD_TWO_IN(fcpsgn) -FPD_TWO_IN(fdiv) -FPD_TWO_IN(fadd) -FPD_TWO_IN(fmul) -FPD_TWO_IN_CR(fcmpo) -FPD_TWO_IN(fsub) -FPD_THREE_IN(fmsubs) -FPD_THREE_IN(fmadds) -FPD_THREE_IN(fnmsubs) -FPD_THREE_IN(fnmadds) -FPD_THREE_IN(fsel) -FPD_THREE_IN(fmsub) -FPD_THREE_IN(fmadd) -FPD_THREE_IN(fnmsub) -FPD_THREE_IN(fnmadd) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index ffbe4cac5b15..297fcd2ff7d0 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -148,10 +148,6 @@ int kvm_dev_ioctl_check_extension(long ext) switch (ext) { case KVM_CAP_PPC_SEGSTATE: - case KVM_CAP_PPC_PAIRED_SINGLES: - case KVM_CAP_PPC_UNSET_IRQ: - case KVM_CAP_ENABLE_CAP: - case KVM_CAP_PPC_OSI: r = 1; break; case KVM_CAP_COALESCED_MMIO: @@ -197,17 +193,12 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) { struct kvm_vcpu *vcpu; vcpu = kvmppc_core_vcpu_create(kvm, id); - if (!IS_ERR(vcpu)) - kvmppc_create_vcpu_debugfs(vcpu, id); + kvmppc_create_vcpu_debugfs(vcpu, id); return vcpu; } void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) { - /* Make sure we're not using the vcpu anymore */ - hrtimer_cancel(&vcpu->arch.dec_timer); - tasklet_kill(&vcpu->arch.tasklet); - kvmppc_remove_vcpu_debugfs(vcpu); kvmppc_core_vcpu_free(vcpu); } @@ -287,7 +278,7 @@ static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu, static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, struct kvm_run *run) { - u64 gpr; + ulong gpr; if (run->mmio.len > sizeof(gpr)) { printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len); @@ -296,7 +287,6 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, if (vcpu->arch.mmio_is_bigendian) { switch (run->mmio.len) { - case 8: gpr = *(u64 *)run->mmio.data; break; case 4: gpr = *(u32 *)run->mmio.data; break; case 2: gpr = *(u16 *)run->mmio.data; break; case 1: gpr = *(u8 *)run->mmio.data; break; @@ -310,43 +300,7 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, } } - if (vcpu->arch.mmio_sign_extend) { - switch (run->mmio.len) { -#ifdef CONFIG_PPC64 - case 4: - gpr = (s64)(s32)gpr; - break; -#endif - case 2: - gpr = (s64)(s16)gpr; - break; - case 1: - gpr = (s64)(s8)gpr; - break; - } - } - kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr); - - switch (vcpu->arch.io_gpr & KVM_REG_EXT_MASK) { - case KVM_REG_GPR: - kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr); - break; - case KVM_REG_FPR: - vcpu->arch.fpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr; - break; -#ifdef CONFIG_PPC_BOOK3S - case KVM_REG_QPR: - vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr; - break; - case KVM_REG_FQPR: - vcpu->arch.fpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr; - vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr; - break; -#endif - default: - BUG(); - } } int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, @@ -365,25 +319,12 @@ int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, vcpu->arch.mmio_is_bigendian = is_bigendian; vcpu->mmio_needed = 1; vcpu->mmio_is_write = 0; - vcpu->arch.mmio_sign_extend = 0; return EMULATE_DO_MMIO; } -/* Same as above, but sign extends */ -int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu, - unsigned int rt, unsigned int bytes, int is_bigendian) -{ - int r; - - r = kvmppc_handle_load(run, vcpu, rt, bytes, is_bigendian); - vcpu->arch.mmio_sign_extend = 1; - - return r; -} - int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, - u64 val, unsigned int bytes, int is_bigendian) + u32 val, unsigned int bytes, int is_bigendian) { void *data = run->mmio.data; @@ -401,7 +342,6 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, /* Store the value at the lowest bytes in 'data'. */ if (is_bigendian) { switch (bytes) { - case 8: *(u64 *)data = val; break; case 4: *(u32 *)data = val; break; case 2: *(u16 *)data = val; break; case 1: *(u8 *)data = val; break; @@ -436,13 +376,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) if (!vcpu->arch.dcr_is_write) kvmppc_complete_dcr_load(vcpu, run); vcpu->arch.dcr_needed = 0; - } else if (vcpu->arch.osi_needed) { - u64 *gprs = run->osi.gprs; - int i; - - for (i = 0; i < 32; i++) - kvmppc_set_gpr(vcpu, i, gprs[i]); - vcpu->arch.osi_needed = 0; } kvmppc_core_deliver_interrupts(vcpu); @@ -463,10 +396,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) { - if (irq->irq == KVM_INTERRUPT_UNSET) - kvmppc_core_dequeue_external(vcpu, irq); - else - kvmppc_core_queue_external(vcpu, irq); + kvmppc_core_queue_external(vcpu, irq); if (waitqueue_active(&vcpu->wq)) { wake_up_interruptible(&vcpu->wq); @@ -476,27 +406,6 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) return 0; } -static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, - struct kvm_enable_cap *cap) -{ - int r; - - if (cap->flags) - return -EINVAL; - - switch (cap->cap) { - case KVM_CAP_PPC_OSI: - r = 0; - vcpu->arch.osi_enabled = true; - break; - default: - r = -EINVAL; - break; - } - - return r; -} - int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { @@ -525,15 +434,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = kvm_vcpu_ioctl_interrupt(vcpu, &irq); break; } - case KVM_ENABLE_CAP: - { - struct kvm_enable_cap cap; - r = -EFAULT; - if (copy_from_user(&cap, argp, sizeof(cap))) - goto out; - r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); - break; - } default: r = -EINVAL; } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index ee7c713686ce..49292869a5cd 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -341,13 +341,11 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, rc = kvm_vcpu_init(vcpu, kvm, id); if (rc) - goto out_free_sie_block; + goto out_free_cpu; VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu, vcpu->arch.sie_block); return vcpu; -out_free_sie_block: - free_page((unsigned long)(vcpu->arch.sie_block)); out_free_cpu: kfree(vcpu); out_nomem: diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index ff90055c7f0b..f46b79f6c16c 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h @@ -21,7 +21,6 @@ #define __KVM_HAVE_PIT_STATE2 #define __KVM_HAVE_XEN_HVM #define __KVM_HAVE_VCPU_EVENTS -#define __KVM_HAVE_DEBUGREGS /* Architectural interrupt line count. */ #define KVM_NR_INTERRUPTS 256 @@ -258,11 +257,6 @@ struct kvm_reinject_control { /* When set in flags, include corresponding fields on KVM_SET_VCPU_EVENTS */ #define KVM_VCPUEVENT_VALID_NMI_PENDING 0x00000001 #define KVM_VCPUEVENT_VALID_SIPI_VECTOR 0x00000002 -#define KVM_VCPUEVENT_VALID_SHADOW 0x00000004 - -/* Interrupt shadow states */ -#define KVM_X86_SHADOW_INT_MOV_SS 0x01 -#define KVM_X86_SHADOW_INT_STI 0x02 /* for KVM_GET/SET_VCPU_EVENTS */ struct kvm_vcpu_events { @@ -277,7 +271,7 @@ struct kvm_vcpu_events { __u8 injected; __u8 nr; __u8 soft; - __u8 shadow; + __u8 pad; } interrupt; struct { __u8 injected; @@ -290,13 +284,4 @@ struct kvm_vcpu_events { __u32 reserved[10]; }; -/* for KVM_GET/SET_DEBUGREGS */ -struct kvm_debugregs { - __u64 db[4]; - __u64 dr6; - __u64 dr7; - __u64 flags; - __u64 reserved[9]; -}; - #endif /* _ASM_X86_KVM_H */ diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index a1319c82050e..7a6f54fa13ba 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -11,8 +11,6 @@ #ifndef _ASM_X86_KVM_X86_EMULATE_H #define _ASM_X86_KVM_X86_EMULATE_H -#include <asm/desc_defs.h> - struct x86_emulate_ctxt; /* @@ -65,15 +63,6 @@ struct x86_emulate_ops { unsigned int bytes, struct kvm_vcpu *vcpu, u32 *error); /* - * write_std: Write bytes of standard (non-emulated/special) memory. - * Used for descriptor writing. - * @addr: [IN ] Linear address to which to write. - * @val: [OUT] Value write to memory, zero-extended to 'u_long'. - * @bytes: [IN ] Number of bytes to write to memory. - */ - int (*write_std)(unsigned long addr, void *val, - unsigned int bytes, struct kvm_vcpu *vcpu, u32 *error); - /* * fetch: Read bytes of standard (non-emulated/special) memory. * Used for instruction fetch. * @addr: [IN ] Linear address from which to read. @@ -120,23 +109,6 @@ struct x86_emulate_ops { unsigned int bytes, struct kvm_vcpu *vcpu); - int (*pio_in_emulated)(int size, unsigned short port, void *val, - unsigned int count, struct kvm_vcpu *vcpu); - - int (*pio_out_emulated)(int size, unsigned short port, const void *val, - unsigned int count, struct kvm_vcpu *vcpu); - - bool (*get_cached_descriptor)(struct desc_struct *desc, - int seg, struct kvm_vcpu *vcpu); - void (*set_cached_descriptor)(struct desc_struct *desc, - int seg, struct kvm_vcpu *vcpu); - u16 (*get_segment_selector)(int seg, struct kvm_vcpu *vcpu); - void (*set_segment_selector)(u16 sel, int seg, struct kvm_vcpu *vcpu); - void (*get_gdt)(struct desc_ptr *dt, struct kvm_vcpu *vcpu); - ulong (*get_cr)(int cr, struct kvm_vcpu *vcpu); - void (*set_cr)(int cr, ulong val, struct kvm_vcpu *vcpu); - int (*cpl)(struct kvm_vcpu *vcpu); - void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); }; /* Type, address-of, and value of an instruction's operand. */ @@ -152,12 +124,6 @@ struct fetch_cache { unsigned long end; }; -struct read_cache { - u8 data[1024]; - unsigned long pos; - unsigned long end; -}; - struct decode_cache { u8 twobyte; u8 b; @@ -173,7 +139,7 @@ struct decode_cache { u8 seg_override; unsigned int d; unsigned long regs[NR_VCPU_REGS]; - unsigned long eip; + unsigned long eip, eip_orig; /* modrm */ u8 modrm; u8 modrm_mod; @@ -185,15 +151,16 @@ struct decode_cache { void *modrm_ptr; unsigned long modrm_val; struct fetch_cache fetch; - struct read_cache io_read; }; +#define X86_SHADOW_INT_MOV_SS 1 +#define X86_SHADOW_INT_STI 2 + struct x86_emulate_ctxt { /* Register state before/after emulation. */ struct kvm_vcpu *vcpu; unsigned long eflags; - unsigned long eip; /* eip before instruction emulation */ /* Emulated execution mode, represented by an X86EMUL_MODE value. */ int mode; u32 cs_base; @@ -201,7 +168,6 @@ struct x86_emulate_ctxt { /* interruptibility state, as a result of execution of STI or MOV SS */ int interruptibility; - bool restart; /* restart string instruction after writeback */ /* decode cache */ struct decode_cache decode; }; @@ -228,8 +194,5 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops); int x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops); -int emulator_task_switch(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, - u16 tss_selector, int reason); #endif /* _ASM_X86_KVM_X86_EMULATE_H */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 26c629a062db..06d9e79ca37d 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -224,9 +224,14 @@ struct kvm_pv_mmu_op_buffer { struct kvm_pio_request { unsigned long count; + int cur_count; + gva_t guest_gva; int in; int port; int size; + int string; + int down; + int rep; }; /* @@ -357,8 +362,8 @@ struct kvm_vcpu_arch { u64 *mce_banks; /* used for guest single stepping over the given code position */ + u16 singlestep_cs; unsigned long singlestep_rip; - /* fields used by HYPER-V emulation */ u64 hv_vapic; }; @@ -384,7 +389,6 @@ struct kvm_arch { unsigned int n_free_mmu_pages; unsigned int n_requested_mmu_pages; unsigned int n_alloc_mmu_pages; - atomic_t invlpg_counter; struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; /* * Hash table of struct kvm_mmu_page. @@ -457,6 +461,11 @@ struct kvm_vcpu_stat { u32 nmi_injections; }; +struct descriptor_table { + u16 limit; + unsigned long base; +} __attribute__((packed)); + struct kvm_x86_ops { int (*cpu_has_kvm_support)(void); /* __init */ int (*disabled_by_bios)(void); /* __init */ @@ -494,10 +503,10 @@ struct kvm_x86_ops { void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4); void (*set_efer)(struct kvm_vcpu *vcpu, u64 efer); - void (*get_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); - void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); - void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); - void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); + void (*get_idt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); + void (*set_idt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); + void (*get_gdt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); + void (*set_gdt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); int (*get_dr)(struct kvm_vcpu *vcpu, int dr, unsigned long *dest); int (*set_dr)(struct kvm_vcpu *vcpu, int dr, unsigned long value); void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); @@ -578,14 +587,23 @@ int emulate_instruction(struct kvm_vcpu *vcpu, void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context); void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); +void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw, + unsigned long *rflags); +unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr); +void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long value, + unsigned long *rflags); void kvm_enable_efer_bits(u64); int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data); int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); struct x86_emulate_ctxt; -int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port); +int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in, + int size, unsigned port); +int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in, + int size, unsigned long count, int down, + gva_t address, int rep, unsigned port); void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); int kvm_emulate_halt(struct kvm_vcpu *vcpu); int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address); @@ -631,6 +649,8 @@ int emulator_write_emulated(unsigned long addr, unsigned int bytes, struct kvm_vcpu *vcpu); +unsigned long segment_base(u16 selector); + void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu); void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, int bytes, @@ -655,6 +675,7 @@ void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); void kvm_enable_tdp(void); void kvm_disable_tdp(void); +int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3); int complete_pio(struct kvm_vcpu *vcpu); bool kvm_check_iopl(struct kvm_vcpu *vcpu); @@ -703,6 +724,23 @@ static inline void kvm_load_ldt(u16 sel) asm("lldt %0" : : "rm"(sel)); } +static inline void kvm_get_idt(struct descriptor_table *table) +{ + asm("sidt %0" : "=m"(*table)); +} + +static inline void kvm_get_gdt(struct descriptor_table *table) +{ + asm("sgdt %0" : "=m"(*table)); +} + +static inline unsigned long kvm_read_tr_base(void) +{ + u16 tr; + asm("str %0" : "=g"(tr)); + return segment_base(tr); +} + #ifdef CONFIG_X86_64 static inline unsigned long read_msr(unsigned long msr) { @@ -788,6 +826,4 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v); void kvm_define_shared_msr(unsigned index, u32 msr); void kvm_set_shared_msr(unsigned index, u64 val, u64 mask); -bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip); - #endif /* _ASM_X86_KVM_HOST_H */ diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index b26a38d85356..38638cd2fa4c 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -115,10 +115,6 @@ struct __attribute__ ((__packed__)) vmcb_control_area { #define SVM_IOIO_SIZE_MASK (7 << SVM_IOIO_SIZE_SHIFT) #define SVM_IOIO_ASIZE_MASK (7 << SVM_IOIO_ASIZE_SHIFT) -#define SVM_VM_CR_VALID_MASK 0x001fULL -#define SVM_VM_CR_SVM_LOCK_MASK 0x0008ULL -#define SVM_VM_CR_SVM_DIS_MASK 0x0010ULL - struct __attribute__ ((__packed__)) vmcb_seg { u16 selector; u16 attrib; diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 64c9854f0458..4dade6ac0827 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -33,7 +33,6 @@ #include <asm/kvm_emulate.h> #include "x86.h" -#include "tss.h" /* * Opcode effective-address decode tables. @@ -51,8 +50,6 @@ #define DstReg (2<<1) /* Register operand. */ #define DstMem (3<<1) /* Memory operand. */ #define DstAcc (4<<1) /* Destination Accumulator */ -#define DstDI (5<<1) /* Destination is in ES:(E)DI */ -#define DstMem64 (6<<1) /* 64bit memory operand */ #define DstMask (7<<1) /* Source operand type. */ #define SrcNone (0<<4) /* No source operand. */ @@ -66,7 +63,6 @@ #define SrcOne (7<<4) /* Implied '1' */ #define SrcImmUByte (8<<4) /* 8-bit unsigned immediate operand. */ #define SrcImmU (9<<4) /* Immediate operand, unsigned */ -#define SrcSI (0xa<<4) /* Source is in the DS:RSI */ #define SrcMask (0xf<<4) /* Generic ModRM decode. */ #define ModRM (1<<8) @@ -89,9 +85,6 @@ #define Src2ImmByte (2<<29) #define Src2One (3<<29) #define Src2Imm16 (4<<29) -#define Src2Mem16 (5<<29) /* Used for Ep encoding. First argument has to be - in memory and second argument is located - immediately after the first one in memory. */ #define Src2Mask (7<<29) enum { @@ -154,8 +147,8 @@ static u32 opcode_table[256] = { 0, 0, 0, 0, /* 0x68 - 0x6F */ SrcImm | Mov | Stack, 0, SrcImmByte | Mov | Stack, 0, - DstDI | ByteOp | Mov | String, DstDI | Mov | String, /* insb, insw/insd */ - SrcSI | ByteOp | ImplicitOps | String, SrcSI | ImplicitOps | String, /* outsb, outsw/outsd */ + SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* insb, insw/insd */ + SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* outsb, outsw/outsd */ /* 0x70 - 0x77 */ SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, @@ -180,12 +173,12 @@ static u32 opcode_table[256] = { /* 0xA0 - 0xA7 */ ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs, ByteOp | DstMem | SrcReg | Mov | MemAbs, DstMem | SrcReg | Mov | MemAbs, - ByteOp | SrcSI | DstDI | Mov | String, SrcSI | DstDI | Mov | String, - ByteOp | SrcSI | DstDI | String, SrcSI | DstDI | String, + ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String, + ByteOp | ImplicitOps | String, ImplicitOps | String, /* 0xA8 - 0xAF */ - 0, 0, ByteOp | DstDI | Mov | String, DstDI | Mov | String, - ByteOp | SrcSI | DstAcc | Mov | String, SrcSI | DstAcc | Mov | String, - ByteOp | DstDI | String, DstDI | String, + 0, 0, ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String, + ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String, + ByteOp | ImplicitOps | String, ImplicitOps | String, /* 0xB0 - 0xB7 */ ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov, @@ -211,13 +204,13 @@ static u32 opcode_table[256] = { 0, 0, 0, 0, 0, 0, 0, 0, /* 0xE0 - 0xE7 */ 0, 0, 0, 0, - ByteOp | SrcImmUByte | DstAcc, SrcImmUByte | DstAcc, - ByteOp | SrcImmUByte | DstAcc, SrcImmUByte | DstAcc, + ByteOp | SrcImmUByte, SrcImmUByte, + ByteOp | SrcImmUByte, SrcImmUByte, /* 0xE8 - 0xEF */ SrcImm | Stack, SrcImm | ImplicitOps, SrcImmU | Src2Imm16 | No64, SrcImmByte | ImplicitOps, - SrcNone | ByteOp | DstAcc, SrcNone | DstAcc, - SrcNone | ByteOp | DstAcc, SrcNone | DstAcc, + SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, + SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* 0xF0 - 0xF7 */ 0, 0, 0, 0, ImplicitOps | Priv, ImplicitOps, Group | Group3_Byte, Group | Group3, @@ -350,8 +343,7 @@ static u32 group_table[] = { [Group5*8] = DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM, SrcMem | ModRM | Stack, 0, - SrcMem | ModRM | Stack, SrcMem | ModRM | Src2Mem16 | ImplicitOps, - SrcMem | ModRM | Stack, 0, + SrcMem | ModRM | Stack, 0, SrcMem | ModRM | Stack, 0, [Group7*8] = 0, 0, ModRM | SrcMem | Priv, ModRM | SrcMem | Priv, SrcNone | ModRM | DstMem | Mov, 0, @@ -361,14 +353,14 @@ static u32 group_table[] = { DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM | Lock, DstMem | SrcImmByte | ModRM | Lock, DstMem | SrcImmByte | ModRM | Lock, [Group9*8] = - 0, DstMem64 | ModRM | Lock, 0, 0, 0, 0, 0, 0, + 0, ImplicitOps | ModRM | Lock, 0, 0, 0, 0, 0, 0, }; static u32 group2_table[] = { [Group7*8] = - SrcNone | ModRM | Priv, 0, 0, SrcNone | ModRM | Priv, + SrcNone | ModRM | Priv, 0, 0, SrcNone | ModRM, SrcNone | ModRM | DstMem | Mov, 0, - SrcMem16 | ModRM | Mov | Priv, 0, + SrcMem16 | ModRM | Mov, 0, [Group9*8] = 0, 0, 0, 0, 0, 0, 0, 0, }; @@ -570,7 +562,7 @@ static u32 group2_table[] = { #define insn_fetch(_type, _size, _eip) \ ({ unsigned long _x; \ rc = do_insn_fetch(ctxt, ops, (_eip), &_x, (_size)); \ - if (rc != X86EMUL_CONTINUE) \ + if (rc != 0) \ goto done; \ (_eip) += (_size); \ (_type)_x; \ @@ -655,31 +647,31 @@ static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, if (linear < fc->start || linear >= fc->end) { size = min(15UL, PAGE_SIZE - offset_in_page(linear)); rc = ops->fetch(linear, fc->data, size, ctxt->vcpu, NULL); - if (rc != X86EMUL_CONTINUE) + if (rc) return rc; fc->start = linear; fc->end = linear + size; } *dest = fc->data[linear - fc->start]; - return X86EMUL_CONTINUE; + return 0; } static int do_insn_fetch(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops, unsigned long eip, void *dest, unsigned size) { - int rc; + int rc = 0; /* x86 instructions are limited to 15 bytes. */ - if (eip + size - ctxt->eip > 15) + if (eip + size - ctxt->decode.eip_orig > 15) return X86EMUL_UNHANDLEABLE; eip += ctxt->cs_base; while (size--) { rc = do_fetch_insn_byte(ctxt, ops, eip++, dest++); - if (rc != X86EMUL_CONTINUE) + if (rc) return rc; } - return X86EMUL_CONTINUE; + return 0; } /* @@ -710,7 +702,7 @@ static int read_descriptor(struct x86_emulate_ctxt *ctxt, *address = 0; rc = ops->read_std((unsigned long)ptr, (unsigned long *)size, 2, ctxt->vcpu, NULL); - if (rc != X86EMUL_CONTINUE) + if (rc) return rc; rc = ops->read_std((unsigned long)ptr + 2, address, op_bytes, ctxt->vcpu, NULL); @@ -790,7 +782,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, struct decode_cache *c = &ctxt->decode; u8 sib; int index_reg = 0, base_reg = 0, scale; - int rc = X86EMUL_CONTINUE; + int rc = 0; if (c->rex_prefix) { c->modrm_reg = (c->rex_prefix & 4) << 1; /* REX.R */ @@ -903,7 +895,7 @@ static int decode_abs(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) { struct decode_cache *c = &ctxt->decode; - int rc = X86EMUL_CONTINUE; + int rc = 0; switch (c->ad_bytes) { case 2: @@ -924,17 +916,14 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) { struct decode_cache *c = &ctxt->decode; - int rc = X86EMUL_CONTINUE; + int rc = 0; int mode = ctxt->mode; int def_op_bytes, def_ad_bytes, group; - - /* we cannot decode insn before we complete previous rep insn */ - WARN_ON(ctxt->restart); - /* Shadow copy of register state. Committed on successful emulation. */ + memset(c, 0, sizeof(struct decode_cache)); - c->eip = ctxt->eip; + c->eip = c->eip_orig = kvm_rip_read(ctxt->vcpu); ctxt->cs_base = seg_base(ctxt, VCPU_SREG_CS); memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); @@ -1026,6 +1015,11 @@ done_prefixes: } } + if (mode == X86EMUL_MODE_PROT64 && (c->d & No64)) { + kvm_report_emulation_failure(ctxt->vcpu, "invalid x86/64 instruction"); + return -1; + } + if (c->d & Group) { group = c->d & GroupMask; c->modrm = insn_fetch(u8, 1, c->eip); @@ -1052,7 +1046,7 @@ done_prefixes: rc = decode_modrm(ctxt, ops); else if (c->d & MemAbs) rc = decode_abs(ctxt, ops); - if (rc != X86EMUL_CONTINUE) + if (rc) goto done; if (!c->has_seg_override) @@ -1063,10 +1057,6 @@ done_prefixes: if (c->ad_bytes != 8) c->modrm_ea = (u32)c->modrm_ea; - - if (c->rip_relative) - c->modrm_ea += c->eip; - /* * Decode and fetch the source operand: register, memory * or immediate. @@ -1101,8 +1091,6 @@ done_prefixes: break; } c->src.type = OP_MEM; - c->src.ptr = (unsigned long *)c->modrm_ea; - c->src.val = 0; break; case SrcImm: case SrcImmU: @@ -1151,14 +1139,6 @@ done_prefixes: c->src.bytes = 1; c->src.val = 1; break; - case SrcSI: - c->src.type = OP_MEM; - c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; - c->src.ptr = (unsigned long *) - register_address(c, seg_override_base(ctxt, c), - c->regs[VCPU_REGS_RSI]); - c->src.val = 0; - break; } /* @@ -1188,12 +1168,6 @@ done_prefixes: c->src2.bytes = 1; c->src2.val = 1; break; - case Src2Mem16: - c->src2.type = OP_MEM; - c->src2.bytes = 2; - c->src2.ptr = (unsigned long *)(c->modrm_ea + c->src.bytes); - c->src2.val = 0; - break; } /* Decode and fetch the destination operand: register or memory. */ @@ -1206,7 +1180,6 @@ done_prefixes: c->twobyte && (c->b == 0xb6 || c->b == 0xb7)); break; case DstMem: - case DstMem64: if ((c->d & ModRM) && c->modrm_mod == 3) { c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; c->dst.type = OP_REG; @@ -1215,24 +1188,12 @@ done_prefixes: break; } c->dst.type = OP_MEM; - c->dst.ptr = (unsigned long *)c->modrm_ea; - if ((c->d & DstMask) == DstMem64) - c->dst.bytes = 8; - else - c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; - c->dst.val = 0; - if (c->d & BitOp) { - unsigned long mask = ~(c->dst.bytes * 8 - 1); - - c->dst.ptr = (void *)c->dst.ptr + - (c->src.val & mask) / 8; - } break; case DstAcc: c->dst.type = OP_REG; - c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; + c->dst.bytes = c->op_bytes; c->dst.ptr = &c->regs[VCPU_REGS_RAX]; - switch (c->dst.bytes) { + switch (c->op_bytes) { case 1: c->dst.val = *(u8 *)c->dst.ptr; break; @@ -1242,248 +1203,18 @@ done_prefixes: case 4: c->dst.val = *(u32 *)c->dst.ptr; break; - case 8: - c->dst.val = *(u64 *)c->dst.ptr; - break; } c->dst.orig_val = c->dst.val; break; - case DstDI: - c->dst.type = OP_MEM; - c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; - c->dst.ptr = (unsigned long *) - register_address(c, es_base(ctxt), - c->regs[VCPU_REGS_RDI]); - c->dst.val = 0; - break; } + if (c->rip_relative) + c->modrm_ea += c->eip; + done: return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; } -static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, - unsigned int size, unsigned short port, - void *dest) -{ - struct read_cache *rc = &ctxt->decode.io_read; - - if (rc->pos == rc->end) { /* refill pio read ahead */ - struct decode_cache *c = &ctxt->decode; - unsigned int in_page, n; - unsigned int count = c->rep_prefix ? - address_mask(c, c->regs[VCPU_REGS_RCX]) : 1; - in_page = (ctxt->eflags & EFLG_DF) ? - offset_in_page(c->regs[VCPU_REGS_RDI]) : - PAGE_SIZE - offset_in_page(c->regs[VCPU_REGS_RDI]); - n = min(min(in_page, (unsigned int)sizeof(rc->data)) / size, - count); - if (n == 0) - n = 1; - rc->pos = rc->end = 0; - if (!ops->pio_in_emulated(size, port, rc->data, n, ctxt->vcpu)) - return 0; - rc->end = n * size; - } - - memcpy(dest, rc->data + rc->pos, size); - rc->pos += size; - return 1; -} - -static u32 desc_limit_scaled(struct desc_struct *desc) -{ - u32 limit = get_desc_limit(desc); - - return desc->g ? (limit << 12) | 0xfff : limit; -} - -static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, - u16 selector, struct desc_ptr *dt) -{ - if (selector & 1 << 2) { - struct desc_struct desc; - memset (dt, 0, sizeof *dt); - if (!ops->get_cached_descriptor(&desc, VCPU_SREG_LDTR, ctxt->vcpu)) - return; - - dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */ - dt->address = get_desc_base(&desc); - } else - ops->get_gdt(dt, ctxt->vcpu); -} - -/* allowed just for 8 bytes segments */ -static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, - u16 selector, struct desc_struct *desc) -{ - struct desc_ptr dt; - u16 index = selector >> 3; - int ret; - u32 err; - ulong addr; - - get_descriptor_table_ptr(ctxt, ops, selector, &dt); - - if (dt.size < index * 8 + 7) { - kvm_inject_gp(ctxt->vcpu, selector & 0xfffc); - return X86EMUL_PROPAGATE_FAULT; - } - addr = dt.address + index * 8; - ret = ops->read_std(addr, desc, sizeof *desc, ctxt->vcpu, &err); - if (ret == X86EMUL_PROPAGATE_FAULT) - kvm_inject_page_fault(ctxt->vcpu, addr, err); - - return ret; -} - -/* allowed just for 8 bytes segments */ -static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, - u16 selector, struct desc_struct *desc) -{ - struct desc_ptr dt; - u16 index = selector >> 3; - u32 err; - ulong addr; - int ret; - - get_descriptor_table_ptr(ctxt, ops, selector, &dt); - - if (dt.size < index * 8 + 7) { - kvm_inject_gp(ctxt->vcpu, selector & 0xfffc); - return X86EMUL_PROPAGATE_FAULT; - } - - addr = dt.address + index * 8; - ret = ops->write_std(addr, desc, sizeof *desc, ctxt->vcpu, &err); - if (ret == X86EMUL_PROPAGATE_FAULT) - kvm_inject_page_fault(ctxt->vcpu, addr, err); - - return ret; -} - -static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, - u16 selector, int seg) -{ - struct desc_struct seg_desc; - u8 dpl, rpl, cpl; - unsigned err_vec = GP_VECTOR; - u32 err_code = 0; - bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */ - int ret; - - memset(&seg_desc, 0, sizeof seg_desc); - - if ((seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86) - || ctxt->mode == X86EMUL_MODE_REAL) { - /* set real mode segment descriptor */ - set_desc_base(&seg_desc, selector << 4); - set_desc_limit(&seg_desc, 0xffff); - seg_desc.type = 3; - seg_desc.p = 1; - seg_desc.s = 1; - goto load; - } - - /* NULL selector is not valid for TR, CS and SS */ - if ((seg == VCPU_SREG_CS || seg == VCPU_SREG_SS || seg == VCPU_SREG_TR) - && null_selector) - goto exception; - - /* TR should be in GDT only */ - if (seg == VCPU_SREG_TR && (selector & (1 << 2))) - goto exception; - - if (null_selector) /* for NULL selector skip all following checks */ - goto load; - - ret = read_segment_descriptor(ctxt, ops, selector, &seg_desc); - if (ret != X86EMUL_CONTINUE) - return ret; - - err_code = selector & 0xfffc; - err_vec = GP_VECTOR; - - /* can't load system descriptor into segment selecor */ - if (seg <= VCPU_SREG_GS && !seg_desc.s) - goto exception; - - if (!seg_desc.p) { - err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR; - goto exception; - } - - rpl = selector & 3; - dpl = seg_desc.dpl; - cpl = ops->cpl(ctxt->vcpu); - - switch (seg) { - case VCPU_SREG_SS: - /* - * segment is not a writable data segment or segment - * selector's RPL != CPL or segment selector's RPL != CPL - */ - if (rpl != cpl || (seg_desc.type & 0xa) != 0x2 || dpl != cpl) - goto exception; - break; - case VCPU_SREG_CS: - if (!(seg_desc.type & 8)) - goto exception; - - if (seg_desc.type & 4) { - /* conforming */ - if (dpl > cpl) - goto exception; - } else { - /* nonconforming */ - if (rpl > cpl || dpl != cpl) - goto exception; - } - /* CS(RPL) <- CPL */ - selector = (selector & 0xfffc) | cpl; - break; - case VCPU_SREG_TR: - if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9)) - goto exception; - break; - case VCPU_SREG_LDTR: - if (seg_desc.s || seg_desc.type != 2) - goto exception; - break; - default: /* DS, ES, FS, or GS */ - /* - * segment is not a data or readable code segment or - * ((segment is a data or nonconforming code segment) - * and (both RPL and CPL > DPL)) - */ - if ((seg_desc.type & 0xa) == 0x8 || - (((seg_desc.type & 0xc) != 0xc) && - (rpl > dpl && cpl > dpl))) - goto exception; - break; - } - - if (seg_desc.s) { - /* mark segment as accessed */ - seg_desc.type |= 1; - ret = write_segment_descriptor(ctxt, ops, selector, &seg_desc); - if (ret != X86EMUL_CONTINUE) - return ret; - } -load: - ops->set_segment_selector(selector, seg, ctxt->vcpu); - ops->set_cached_descriptor(&seg_desc, seg, ctxt->vcpu); - return X86EMUL_CONTINUE; -exception: - kvm_queue_exception_e(ctxt->vcpu, err_vec, err_code); - return X86EMUL_PROPAGATE_FAULT; -} - static inline void emulate_push(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; @@ -1520,7 +1251,7 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt, int rc; unsigned long val, change_mask; int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; - int cpl = ops->cpl(ctxt->vcpu); + int cpl = kvm_x86_ops->get_cpl(ctxt->vcpu); rc = emulate_pop(ctxt, ops, &val, len); if (rc != X86EMUL_CONTINUE) @@ -1575,10 +1306,10 @@ static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, int rc; rc = emulate_pop(ctxt, ops, &selector, c->op_bytes); - if (rc != X86EMUL_CONTINUE) + if (rc != 0) return rc; - rc = load_segment_descriptor(ctxt, ops, (u16)selector, seg); + rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)selector, seg); return rc; } @@ -1601,7 +1332,7 @@ static int emulate_popa(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) { struct decode_cache *c = &ctxt->decode; - int rc = X86EMUL_CONTINUE; + int rc = 0; int reg = VCPU_REGS_RDI; while (reg >= VCPU_REGS_RAX) { @@ -1612,7 +1343,7 @@ static int emulate_popa(struct x86_emulate_ctxt *ctxt, } rc = emulate_pop(ctxt, ops, &c->regs[reg], c->op_bytes); - if (rc != X86EMUL_CONTINUE) + if (rc != 0) break; --reg; } @@ -1623,8 +1354,12 @@ static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) { struct decode_cache *c = &ctxt->decode; + int rc; - return emulate_pop(ctxt, ops, &c->dst.val, c->dst.bytes); + rc = emulate_pop(ctxt, ops, &c->dst.val, c->dst.bytes); + if (rc != 0) + return rc; + return 0; } static inline void emulate_grp2(struct x86_emulate_ctxt *ctxt) @@ -1660,6 +1395,7 @@ static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) { struct decode_cache *c = &ctxt->decode; + int rc = 0; switch (c->modrm_reg) { case 0 ... 1: /* test */ @@ -1672,9 +1408,11 @@ static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt, emulate_1op("neg", c->dst, ctxt->eflags); break; default: - return 0; + DPRINTF("Cannot emulate %02x\n", c->b); + rc = X86EMUL_UNHANDLEABLE; + break; } - return 1; + return rc; } static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt, @@ -1704,14 +1442,20 @@ static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt, emulate_push(ctxt); break; } - return X86EMUL_CONTINUE; + return 0; } static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) + struct x86_emulate_ops *ops, + unsigned long memop) { struct decode_cache *c = &ctxt->decode; - u64 old = c->dst.orig_val; + u64 old, new; + int rc; + + rc = ops->read_emulated(memop, &old, 8, ctxt->vcpu); + if (rc != X86EMUL_CONTINUE) + return rc; if (((u32) (old >> 0) != (u32) c->regs[VCPU_REGS_RAX]) || ((u32) (old >> 32) != (u32) c->regs[VCPU_REGS_RDX])) { @@ -1719,13 +1463,17 @@ static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt, c->regs[VCPU_REGS_RAX] = (u32) (old >> 0); c->regs[VCPU_REGS_RDX] = (u32) (old >> 32); ctxt->eflags &= ~EFLG_ZF; + } else { - c->dst.val = ((u64)c->regs[VCPU_REGS_RCX] << 32) | + new = ((u64)c->regs[VCPU_REGS_RCX] << 32) | (u32) c->regs[VCPU_REGS_RBX]; + rc = ops->cmpxchg_emulated(memop, &old, &new, 8, ctxt->vcpu); + if (rc != X86EMUL_CONTINUE) + return rc; ctxt->eflags |= EFLG_ZF; } - return X86EMUL_CONTINUE; + return 0; } static int emulate_ret_far(struct x86_emulate_ctxt *ctxt, @@ -1736,14 +1484,14 @@ static int emulate_ret_far(struct x86_emulate_ctxt *ctxt, unsigned long cs; rc = emulate_pop(ctxt, ops, &c->eip, c->op_bytes); - if (rc != X86EMUL_CONTINUE) + if (rc) return rc; if (c->op_bytes == 4) c->eip = (u32)c->eip; rc = emulate_pop(ctxt, ops, &cs, c->op_bytes); - if (rc != X86EMUL_CONTINUE) + if (rc) return rc; - rc = load_segment_descriptor(ctxt, ops, (u16)cs, VCPU_SREG_CS); + rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)cs, VCPU_SREG_CS); return rc; } @@ -1796,7 +1544,7 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt, default: break; } - return X86EMUL_CONTINUE; + return 0; } static void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask) @@ -1850,11 +1598,8 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt) u64 msr_data; /* syscall is not available in real mode */ - if (ctxt->mode == X86EMUL_MODE_REAL || - ctxt->mode == X86EMUL_MODE_VM86) { - kvm_queue_exception(ctxt->vcpu, UD_VECTOR); - return X86EMUL_PROPAGATE_FAULT; - } + if (ctxt->mode == X86EMUL_MODE_REAL || ctxt->mode == X86EMUL_MODE_VM86) + return X86EMUL_UNHANDLEABLE; setup_syscalls_segments(ctxt, &cs, &ss); @@ -1904,16 +1649,14 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt) /* inject #GP if in real mode */ if (ctxt->mode == X86EMUL_MODE_REAL) { kvm_inject_gp(ctxt->vcpu, 0); - return X86EMUL_PROPAGATE_FAULT; + return X86EMUL_UNHANDLEABLE; } /* XXX sysenter/sysexit have not been tested in 64bit mode. * Therefore, we inject an #UD. */ - if (ctxt->mode == X86EMUL_MODE_PROT64) { - kvm_queue_exception(ctxt->vcpu, UD_VECTOR); - return X86EMUL_PROPAGATE_FAULT; - } + if (ctxt->mode == X86EMUL_MODE_PROT64) + return X86EMUL_UNHANDLEABLE; setup_syscalls_segments(ctxt, &cs, &ss); @@ -1968,7 +1711,7 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt) if (ctxt->mode == X86EMUL_MODE_REAL || ctxt->mode == X86EMUL_MODE_VM86) { kvm_inject_gp(ctxt->vcpu, 0); - return X86EMUL_PROPAGATE_FAULT; + return X86EMUL_UNHANDLEABLE; } setup_syscalls_segments(ctxt, &cs, &ss); @@ -2013,8 +1756,7 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } -static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) +static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt) { int iopl; if (ctxt->mode == X86EMUL_MODE_REAL) @@ -2022,7 +1764,7 @@ static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt, if (ctxt->mode == X86EMUL_MODE_VM86) return true; iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; - return ops->cpl(ctxt->vcpu) > iopl; + return kvm_x86_ops->get_cpl(ctxt->vcpu) > iopl; } static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, @@ -2059,403 +1801,22 @@ static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops, u16 port, u16 len) { - if (emulator_bad_iopl(ctxt, ops)) + if (emulator_bad_iopl(ctxt)) if (!emulator_io_port_access_allowed(ctxt, ops, port, len)) return false; return true; } -static u32 get_cached_descriptor_base(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, - int seg) -{ - struct desc_struct desc; - if (ops->get_cached_descriptor(&desc, seg, ctxt->vcpu)) - return get_desc_base(&desc); - else - return ~0; -} - -static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, - struct tss_segment_16 *tss) -{ - struct decode_cache *c = &ctxt->decode; - - tss->ip = c->eip; - tss->flag = ctxt->eflags; - tss->ax = c->regs[VCPU_REGS_RAX]; - tss->cx = c->regs[VCPU_REGS_RCX]; - tss->dx = c->regs[VCPU_REGS_RDX]; - tss->bx = c->regs[VCPU_REGS_RBX]; - tss->sp = c->regs[VCPU_REGS_RSP]; - tss->bp = c->regs[VCPU_REGS_RBP]; - tss->si = c->regs[VCPU_REGS_RSI]; - tss->di = c->regs[VCPU_REGS_RDI]; - - tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu); - tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu); - tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu); - tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu); - tss->ldt = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu); -} - -static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, - struct tss_segment_16 *tss) -{ - struct decode_cache *c = &ctxt->decode; - int ret; - - c->eip = tss->ip; - ctxt->eflags = tss->flag | 2; - c->regs[VCPU_REGS_RAX] = tss->ax; - c->regs[VCPU_REGS_RCX] = tss->cx; - c->regs[VCPU_REGS_RDX] = tss->dx; - c->regs[VCPU_REGS_RBX] = tss->bx; - c->regs[VCPU_REGS_RSP] = tss->sp; - c->regs[VCPU_REGS_RBP] = tss->bp; - c->regs[VCPU_REGS_RSI] = tss->si; - c->regs[VCPU_REGS_RDI] = tss->di; - - /* - * SDM says that segment selectors are loaded before segment - * descriptors - */ - ops->set_segment_selector(tss->ldt, VCPU_SREG_LDTR, ctxt->vcpu); - ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu); - ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu); - ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu); - ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu); - - /* - * Now load segment descriptors. If fault happenes at this stage - * it is handled in a context of new task - */ - ret = load_segment_descriptor(ctxt, ops, tss->ldt, VCPU_SREG_LDTR); - if (ret != X86EMUL_CONTINUE) - return ret; - ret = load_segment_descriptor(ctxt, ops, tss->es, VCPU_SREG_ES); - if (ret != X86EMUL_CONTINUE) - return ret; - ret = load_segment_descriptor(ctxt, ops, tss->cs, VCPU_SREG_CS); - if (ret != X86EMUL_CONTINUE) - return ret; - ret = load_segment_descriptor(ctxt, ops, tss->ss, VCPU_SREG_SS); - if (ret != X86EMUL_CONTINUE) - return ret; - ret = load_segment_descriptor(ctxt, ops, tss->ds, VCPU_SREG_DS); - if (ret != X86EMUL_CONTINUE) - return ret; - - return X86EMUL_CONTINUE; -} - -static int task_switch_16(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, - u16 tss_selector, u16 old_tss_sel, - ulong old_tss_base, struct desc_struct *new_desc) -{ - struct tss_segment_16 tss_seg; - int ret; - u32 err, new_tss_base = get_desc_base(new_desc); - - ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, - &err); - if (ret == X86EMUL_PROPAGATE_FAULT) { - /* FIXME: need to provide precise fault address */ - kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err); - return ret; - } - - save_state_to_tss16(ctxt, ops, &tss_seg); - - ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, - &err); - if (ret == X86EMUL_PROPAGATE_FAULT) { - /* FIXME: need to provide precise fault address */ - kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err); - return ret; - } - - ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, - &err); - if (ret == X86EMUL_PROPAGATE_FAULT) { - /* FIXME: need to provide precise fault address */ - kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err); - return ret; - } - - if (old_tss_sel != 0xffff) { - tss_seg.prev_task_link = old_tss_sel; - - ret = ops->write_std(new_tss_base, - &tss_seg.prev_task_link, - sizeof tss_seg.prev_task_link, - ctxt->vcpu, &err); - if (ret == X86EMUL_PROPAGATE_FAULT) { - /* FIXME: need to provide precise fault address */ - kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err); - return ret; - } - } - - return load_state_from_tss16(ctxt, ops, &tss_seg); -} - -static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, - struct tss_segment_32 *tss) -{ - struct decode_cache *c = &ctxt->decode; - - tss->cr3 = ops->get_cr(3, ctxt->vcpu); - tss->eip = c->eip; - tss->eflags = ctxt->eflags; - tss->eax = c->regs[VCPU_REGS_RAX]; - tss->ecx = c->regs[VCPU_REGS_RCX]; - tss->edx = c->regs[VCPU_REGS_RDX]; - tss->ebx = c->regs[VCPU_REGS_RBX]; - tss->esp = c->regs[VCPU_REGS_RSP]; - tss->ebp = c->regs[VCPU_REGS_RBP]; - tss->esi = c->regs[VCPU_REGS_RSI]; - tss->edi = c->regs[VCPU_REGS_RDI]; - - tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu); - tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu); - tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu); - tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu); - tss->fs = ops->get_segment_selector(VCPU_SREG_FS, ctxt->vcpu); - tss->gs = ops->get_segment_selector(VCPU_SREG_GS, ctxt->vcpu); - tss->ldt_selector = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu); -} - -static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, - struct tss_segment_32 *tss) -{ - struct decode_cache *c = &ctxt->decode; - int ret; - - ops->set_cr(3, tss->cr3, ctxt->vcpu); - c->eip = tss->eip; - ctxt->eflags = tss->eflags | 2; - c->regs[VCPU_REGS_RAX] = tss->eax; - c->regs[VCPU_REGS_RCX] = tss->ecx; - c->regs[VCPU_REGS_RDX] = tss->edx; - c->regs[VCPU_REGS_RBX] = tss->ebx; - c->regs[VCPU_REGS_RSP] = tss->esp; - c->regs[VCPU_REGS_RBP] = tss->ebp; - c->regs[VCPU_REGS_RSI] = tss->esi; - c->regs[VCPU_REGS_RDI] = tss->edi; - - /* - * SDM says that segment selectors are loaded before segment - * descriptors - */ - ops->set_segment_selector(tss->ldt_selector, VCPU_SREG_LDTR, ctxt->vcpu); - ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu); - ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu); - ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu); - ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu); - ops->set_segment_selector(tss->fs, VCPU_SREG_FS, ctxt->vcpu); - ops->set_segment_selector(tss->gs, VCPU_SREG_GS, ctxt->vcpu); - - /* - * Now load segment descriptors. If fault happenes at this stage - * it is handled in a context of new task - */ - ret = load_segment_descriptor(ctxt, ops, tss->ldt_selector, VCPU_SREG_LDTR); - if (ret != X86EMUL_CONTINUE) - return ret; - ret = load_segment_descriptor(ctxt, ops, tss->es, VCPU_SREG_ES); - if (ret != X86EMUL_CONTINUE) - return ret; - ret = load_segment_descriptor(ctxt, ops, tss->cs, VCPU_SREG_CS); - if (ret != X86EMUL_CONTINUE) - return ret; - ret = load_segment_descriptor(ctxt, ops, tss->ss, VCPU_SREG_SS); - if (ret != X86EMUL_CONTINUE) - return ret; - ret = load_segment_descriptor(ctxt, ops, tss->ds, VCPU_SREG_DS); - if (ret != X86EMUL_CONTINUE) - return ret; - ret = load_segment_descriptor(ctxt, ops, tss->fs, VCPU_SREG_FS); - if (ret != X86EMUL_CONTINUE) - return ret; - ret = load_segment_descriptor(ctxt, ops, tss->gs, VCPU_SREG_GS); - if (ret != X86EMUL_CONTINUE) - return ret; - - return X86EMUL_CONTINUE; -} - -static int task_switch_32(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, - u16 tss_selector, u16 old_tss_sel, - ulong old_tss_base, struct desc_struct *new_desc) -{ - struct tss_segment_32 tss_seg; - int ret; - u32 err, new_tss_base = get_desc_base(new_desc); - - ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, - &err); - if (ret == X86EMUL_PROPAGATE_FAULT) { - /* FIXME: need to provide precise fault address */ - kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err); - return ret; - } - - save_state_to_tss32(ctxt, ops, &tss_seg); - - ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, - &err); - if (ret == X86EMUL_PROPAGATE_FAULT) { - /* FIXME: need to provide precise fault address */ - kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err); - return ret; - } - - ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, - &err); - if (ret == X86EMUL_PROPAGATE_FAULT) { - /* FIXME: need to provide precise fault address */ - kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err); - return ret; - } - - if (old_tss_sel != 0xffff) { - tss_seg.prev_task_link = old_tss_sel; - - ret = ops->write_std(new_tss_base, - &tss_seg.prev_task_link, - sizeof tss_seg.prev_task_link, - ctxt->vcpu, &err); - if (ret == X86EMUL_PROPAGATE_FAULT) { - /* FIXME: need to provide precise fault address */ - kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err); - return ret; - } - } - - return load_state_from_tss32(ctxt, ops, &tss_seg); -} - -static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, - u16 tss_selector, int reason) -{ - struct desc_struct curr_tss_desc, next_tss_desc; - int ret; - u16 old_tss_sel = ops->get_segment_selector(VCPU_SREG_TR, ctxt->vcpu); - ulong old_tss_base = - get_cached_descriptor_base(ctxt, ops, VCPU_SREG_TR); - u32 desc_limit; - - /* FIXME: old_tss_base == ~0 ? */ - - ret = read_segment_descriptor(ctxt, ops, tss_selector, &next_tss_desc); - if (ret != X86EMUL_CONTINUE) - return ret; - ret = read_segment_descriptor(ctxt, ops, old_tss_sel, &curr_tss_desc); - if (ret != X86EMUL_CONTINUE) - return ret; - - /* FIXME: check that next_tss_desc is tss */ - - if (reason != TASK_SWITCH_IRET) { - if ((tss_selector & 3) > next_tss_desc.dpl || - ops->cpl(ctxt->vcpu) > next_tss_desc.dpl) { - kvm_inject_gp(ctxt->vcpu, 0); - return X86EMUL_PROPAGATE_FAULT; - } - } - - desc_limit = desc_limit_scaled(&next_tss_desc); - if (!next_tss_desc.p || - ((desc_limit < 0x67 && (next_tss_desc.type & 8)) || - desc_limit < 0x2b)) { - kvm_queue_exception_e(ctxt->vcpu, TS_VECTOR, - tss_selector & 0xfffc); - return X86EMUL_PROPAGATE_FAULT; - } - - if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) { - curr_tss_desc.type &= ~(1 << 1); /* clear busy flag */ - write_segment_descriptor(ctxt, ops, old_tss_sel, - &curr_tss_desc); - } - - if (reason == TASK_SWITCH_IRET) - ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT; - - /* set back link to prev task only if NT bit is set in eflags - note that old_tss_sel is not used afetr this point */ - if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE) - old_tss_sel = 0xffff; - - if (next_tss_desc.type & 8) - ret = task_switch_32(ctxt, ops, tss_selector, old_tss_sel, - old_tss_base, &next_tss_desc); - else - ret = task_switch_16(ctxt, ops, tss_selector, old_tss_sel, - old_tss_base, &next_tss_desc); - - if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) - ctxt->eflags = ctxt->eflags | X86_EFLAGS_NT; - - if (reason != TASK_SWITCH_IRET) { - next_tss_desc.type |= (1 << 1); /* set busy flag */ - write_segment_descriptor(ctxt, ops, tss_selector, - &next_tss_desc); - } - - ops->set_cr(0, ops->get_cr(0, ctxt->vcpu) | X86_CR0_TS, ctxt->vcpu); - ops->set_cached_descriptor(&next_tss_desc, VCPU_SREG_TR, ctxt->vcpu); - ops->set_segment_selector(tss_selector, VCPU_SREG_TR, ctxt->vcpu); - - return ret; -} - -int emulator_task_switch(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, - u16 tss_selector, int reason) -{ - struct decode_cache *c = &ctxt->decode; - int rc; - - memset(c, 0, sizeof(struct decode_cache)); - c->eip = ctxt->eip; - memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); - - rc = emulator_do_task_switch(ctxt, ops, tss_selector, reason); - - if (rc == X86EMUL_CONTINUE) { - memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs); - kvm_rip_write(ctxt->vcpu, c->eip); - } - - return rc; -} - -static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned long base, - int reg, struct operand *op) -{ - struct decode_cache *c = &ctxt->decode; - int df = (ctxt->eflags & EFLG_DF) ? -1 : 1; - - register_address_increment(c, &c->regs[reg], df * op->bytes); - op->ptr = (unsigned long *)register_address(c, base, c->regs[reg]); -} - int x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) { + unsigned long memop = 0; u64 msr_data; + unsigned long saved_eip = 0; struct decode_cache *c = &ctxt->decode; - int rc = X86EMUL_CONTINUE; - int saved_dst_type = c->dst.type; + unsigned int port; + int io_dir_in; + int rc = 0; ctxt->interruptibility = 0; @@ -2465,30 +1826,26 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) */ memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); - - if (ctxt->mode == X86EMUL_MODE_PROT64 && (c->d & No64)) { - kvm_queue_exception(ctxt->vcpu, UD_VECTOR); - goto done; - } + saved_eip = c->eip; /* LOCK prefix is allowed only with some instructions */ - if (c->lock_prefix && (!(c->d & Lock) || c->dst.type != OP_MEM)) { + if (c->lock_prefix && !(c->d & Lock)) { kvm_queue_exception(ctxt->vcpu, UD_VECTOR); goto done; } /* Privileged instruction can be executed only in CPL=0 */ - if ((c->d & Priv) && ops->cpl(ctxt->vcpu)) { + if ((c->d & Priv) && kvm_x86_ops->get_cpl(ctxt->vcpu)) { kvm_inject_gp(ctxt->vcpu, 0); goto done; } + if (((c->d & ModRM) && (c->modrm_mod != 3)) || (c->d & MemAbs)) + memop = c->modrm_ea; + if (c->rep_prefix && (c->d & String)) { - ctxt->restart = true; /* All REP prefixes have the same first termination condition */ - if (address_mask(c, c->regs[VCPU_REGS_RCX]) == 0) { - string_done: - ctxt->restart = false; + if (c->regs[VCPU_REGS_RCX] == 0) { kvm_rip_write(ctxt->vcpu, c->eip); goto done; } @@ -2500,18 +1857,25 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) * - if REPNE/REPNZ and ZF = 1 then done */ if ((c->b == 0xa6) || (c->b == 0xa7) || - (c->b == 0xae) || (c->b == 0xaf)) { + (c->b == 0xae) || (c->b == 0xaf)) { if ((c->rep_prefix == REPE_PREFIX) && - ((ctxt->eflags & EFLG_ZF) == 0)) - goto string_done; + ((ctxt->eflags & EFLG_ZF) == 0)) { + kvm_rip_write(ctxt->vcpu, c->eip); + goto done; + } if ((c->rep_prefix == REPNE_PREFIX) && - ((ctxt->eflags & EFLG_ZF) == EFLG_ZF)) - goto string_done; + ((ctxt->eflags & EFLG_ZF) == EFLG_ZF)) { + kvm_rip_write(ctxt->vcpu, c->eip); + goto done; + } } - c->eip = ctxt->eip; + c->regs[VCPU_REGS_RCX]--; + c->eip = kvm_rip_read(ctxt->vcpu); } if (c->src.type == OP_MEM) { + c->src.ptr = (unsigned long *)memop; + c->src.val = 0; rc = ops->read_emulated((unsigned long)c->src.ptr, &c->src.val, c->src.bytes, @@ -2521,25 +1885,29 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) c->src.orig_val = c->src.val; } - if (c->src2.type == OP_MEM) { - rc = ops->read_emulated((unsigned long)c->src2.ptr, - &c->src2.val, - c->src2.bytes, - ctxt->vcpu); - if (rc != X86EMUL_CONTINUE) - goto done; - } - if ((c->d & DstMask) == ImplicitOps) goto special_insn; - if ((c->dst.type == OP_MEM) && !(c->d & Mov)) { - /* optimisation - avoid slow emulated read if Mov */ - rc = ops->read_emulated((unsigned long)c->dst.ptr, &c->dst.val, - c->dst.bytes, ctxt->vcpu); - if (rc != X86EMUL_CONTINUE) - goto done; + if (c->dst.type == OP_MEM) { + c->dst.ptr = (unsigned long *)memop; + c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; + c->dst.val = 0; + if (c->d & BitOp) { + unsigned long mask = ~(c->dst.bytes * 8 - 1); + + c->dst.ptr = (void *)c->dst.ptr + + (c->src.val & mask) / 8; + } + if (!(c->d & Mov)) { + /* optimisation - avoid slow emulated read */ + rc = ops->read_emulated((unsigned long)c->dst.ptr, + &c->dst.val, + c->dst.bytes, + ctxt->vcpu); + if (rc != X86EMUL_CONTINUE) + goto done; + } } c->dst.orig_val = c->dst.val; @@ -2558,7 +1926,7 @@ special_insn: break; case 0x07: /* pop es */ rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES); - if (rc != X86EMUL_CONTINUE) + if (rc != 0) goto done; break; case 0x08 ... 0x0d: @@ -2577,7 +1945,7 @@ special_insn: break; case 0x17: /* pop ss */ rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS); - if (rc != X86EMUL_CONTINUE) + if (rc != 0) goto done; break; case 0x18 ... 0x1d: @@ -2589,7 +1957,7 @@ special_insn: break; case 0x1f: /* pop ds */ rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS); - if (rc != X86EMUL_CONTINUE) + if (rc != 0) goto done; break; case 0x20 ... 0x25: @@ -2620,7 +1988,7 @@ special_insn: case 0x58 ... 0x5f: /* pop reg */ pop_instruction: rc = emulate_pop(ctxt, ops, &c->dst.val, c->op_bytes); - if (rc != X86EMUL_CONTINUE) + if (rc != 0) goto done; break; case 0x60: /* pusha */ @@ -2628,7 +1996,7 @@ special_insn: break; case 0x61: /* popa */ rc = emulate_popa(ctxt, ops); - if (rc != X86EMUL_CONTINUE) + if (rc != 0) goto done; break; case 0x63: /* movsxd */ @@ -2642,29 +2010,47 @@ special_insn: break; case 0x6c: /* insb */ case 0x6d: /* insw/insd */ - c->dst.bytes = min(c->dst.bytes, 4u); if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX], - c->dst.bytes)) { + (c->d & ByteOp) ? 1 : c->op_bytes)) { kvm_inject_gp(ctxt->vcpu, 0); goto done; } - if (!pio_in_emulated(ctxt, ops, c->dst.bytes, - c->regs[VCPU_REGS_RDX], &c->dst.val)) - goto done; /* IO is needed, skip writeback */ - break; + if (kvm_emulate_pio_string(ctxt->vcpu, + 1, + (c->d & ByteOp) ? 1 : c->op_bytes, + c->rep_prefix ? + address_mask(c, c->regs[VCPU_REGS_RCX]) : 1, + (ctxt->eflags & EFLG_DF), + register_address(c, es_base(ctxt), + c->regs[VCPU_REGS_RDI]), + c->rep_prefix, + c->regs[VCPU_REGS_RDX]) == 0) { + c->eip = saved_eip; + return -1; + } + return 0; case 0x6e: /* outsb */ case 0x6f: /* outsw/outsd */ - c->src.bytes = min(c->src.bytes, 4u); if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX], - c->src.bytes)) { + (c->d & ByteOp) ? 1 : c->op_bytes)) { kvm_inject_gp(ctxt->vcpu, 0); goto done; } - ops->pio_out_emulated(c->src.bytes, c->regs[VCPU_REGS_RDX], - &c->src.val, 1, ctxt->vcpu); - - c->dst.type = OP_NONE; /* nothing to writeback */ - break; + if (kvm_emulate_pio_string(ctxt->vcpu, + 0, + (c->d & ByteOp) ? 1 : c->op_bytes, + c->rep_prefix ? + address_mask(c, c->regs[VCPU_REGS_RCX]) : 1, + (ctxt->eflags & EFLG_DF), + register_address(c, + seg_override_base(ctxt, c), + c->regs[VCPU_REGS_RSI]), + c->rep_prefix, + c->regs[VCPU_REGS_RDX]) == 0) { + c->eip = saved_eip; + return -1; + } + return 0; case 0x70 ... 0x7f: /* jcc (short) */ if (test_cc(c->b, ctxt->eflags)) jmp_rel(c, c->src.val); @@ -2721,11 +2107,12 @@ special_insn: case 0x8c: { /* mov r/m, sreg */ struct kvm_segment segreg; - if (c->modrm_reg <= VCPU_SREG_GS) + if (c->modrm_reg <= 5) kvm_get_segment(ctxt->vcpu, &segreg, c->modrm_reg); else { - kvm_queue_exception(ctxt->vcpu, UD_VECTOR); - goto done; + printk(KERN_INFO "0x8c: Invalid segreg in modrm byte 0x%02x\n", + c->modrm); + goto cannot_emulate; } c->dst.val = segreg.selector; break; @@ -2745,16 +2132,16 @@ special_insn: } if (c->modrm_reg == VCPU_SREG_SS) - toggle_interruptibility(ctxt, KVM_X86_SHADOW_INT_MOV_SS); + toggle_interruptibility(ctxt, X86_SHADOW_INT_MOV_SS); - rc = load_segment_descriptor(ctxt, ops, sel, c->modrm_reg); + rc = kvm_load_segment_descriptor(ctxt->vcpu, sel, c->modrm_reg); c->dst.type = OP_NONE; /* Disable writeback. */ break; } case 0x8f: /* pop (sole member of Grp1a) */ rc = emulate_grp1a(ctxt, ops); - if (rc != X86EMUL_CONTINUE) + if (rc != 0) goto done; break; case 0x90: /* nop / xchg r8,rax */ @@ -2788,16 +2175,89 @@ special_insn: c->dst.val = (unsigned long)c->regs[VCPU_REGS_RAX]; break; case 0xa4 ... 0xa5: /* movs */ - goto mov; + c->dst.type = OP_MEM; + c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; + c->dst.ptr = (unsigned long *)register_address(c, + es_base(ctxt), + c->regs[VCPU_REGS_RDI]); + rc = ops->read_emulated(register_address(c, + seg_override_base(ctxt, c), + c->regs[VCPU_REGS_RSI]), + &c->dst.val, + c->dst.bytes, ctxt->vcpu); + if (rc != X86EMUL_CONTINUE) + goto done; + register_address_increment(c, &c->regs[VCPU_REGS_RSI], + (ctxt->eflags & EFLG_DF) ? -c->dst.bytes + : c->dst.bytes); + register_address_increment(c, &c->regs[VCPU_REGS_RDI], + (ctxt->eflags & EFLG_DF) ? -c->dst.bytes + : c->dst.bytes); + break; case 0xa6 ... 0xa7: /* cmps */ + c->src.type = OP_NONE; /* Disable writeback. */ + c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; + c->src.ptr = (unsigned long *)register_address(c, + seg_override_base(ctxt, c), + c->regs[VCPU_REGS_RSI]); + rc = ops->read_emulated((unsigned long)c->src.ptr, + &c->src.val, + c->src.bytes, + ctxt->vcpu); + if (rc != X86EMUL_CONTINUE) + goto done; + c->dst.type = OP_NONE; /* Disable writeback. */ + c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; + c->dst.ptr = (unsigned long *)register_address(c, + es_base(ctxt), + c->regs[VCPU_REGS_RDI]); + rc = ops->read_emulated((unsigned long)c->dst.ptr, + &c->dst.val, + c->dst.bytes, + ctxt->vcpu); + if (rc != X86EMUL_CONTINUE) + goto done; + DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.ptr, c->dst.ptr); - goto cmp; + + emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags); + + register_address_increment(c, &c->regs[VCPU_REGS_RSI], + (ctxt->eflags & EFLG_DF) ? -c->src.bytes + : c->src.bytes); + register_address_increment(c, &c->regs[VCPU_REGS_RDI], + (ctxt->eflags & EFLG_DF) ? -c->dst.bytes + : c->dst.bytes); + + break; case 0xaa ... 0xab: /* stos */ + c->dst.type = OP_MEM; + c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; + c->dst.ptr = (unsigned long *)register_address(c, + es_base(ctxt), + c->regs[VCPU_REGS_RDI]); c->dst.val = c->regs[VCPU_REGS_RAX]; + register_address_increment(c, &c->regs[VCPU_REGS_RDI], + (ctxt->eflags & EFLG_DF) ? -c->dst.bytes + : c->dst.bytes); break; case 0xac ... 0xad: /* lods */ - goto mov; + c->dst.type = OP_REG; + c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; + c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX]; + rc = ops->read_emulated(register_address(c, + seg_override_base(ctxt, c), + c->regs[VCPU_REGS_RSI]), + &c->dst.val, + c->dst.bytes, + ctxt->vcpu); + if (rc != X86EMUL_CONTINUE) + goto done; + register_address_increment(c, &c->regs[VCPU_REGS_RSI], + (ctxt->eflags & EFLG_DF) ? -c->dst.bytes + : c->dst.bytes); + break; case 0xae ... 0xaf: /* scas */ DPRINTF("Urk! I don't handle SCAS.\n"); goto cannot_emulate; @@ -2817,7 +2277,7 @@ special_insn: break; case 0xcb: /* ret far */ rc = emulate_ret_far(ctxt, ops); - if (rc != X86EMUL_CONTINUE) + if (rc) goto done; break; case 0xd0 ... 0xd1: /* Grp2 */ @@ -2830,10 +2290,14 @@ special_insn: break; case 0xe4: /* inb */ case 0xe5: /* in */ - goto do_io_in; + port = c->src.val; + io_dir_in = 1; + goto do_io; case 0xe6: /* outb */ case 0xe7: /* out */ - goto do_io_out; + port = c->src.val; + io_dir_in = 0; + goto do_io; case 0xe8: /* call (near) */ { long int rel = c->src.val; c->src.val = (unsigned long) c->eip; @@ -2844,9 +2308,8 @@ special_insn: case 0xe9: /* jmp rel */ goto jmp; case 0xea: /* jmp far */ - jump_far: - if (load_segment_descriptor(ctxt, ops, c->src2.val, - VCPU_SREG_CS)) + if (kvm_load_segment_descriptor(ctxt->vcpu, c->src2.val, + VCPU_SREG_CS)) goto done; c->eip = c->src.val; @@ -2858,29 +2321,25 @@ special_insn: break; case 0xec: /* in al,dx */ case 0xed: /* in (e/r)ax,dx */ - c->src.val = c->regs[VCPU_REGS_RDX]; - do_io_in: - c->dst.bytes = min(c->dst.bytes, 4u); - if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) { - kvm_inject_gp(ctxt->vcpu, 0); - goto done; - } - if (!pio_in_emulated(ctxt, ops, c->dst.bytes, c->src.val, - &c->dst.val)) - goto done; /* IO is needed */ - break; + port = c->regs[VCPU_REGS_RDX]; + io_dir_in = 1; + goto do_io; case 0xee: /* out al,dx */ case 0xef: /* out (e/r)ax,dx */ - c->src.val = c->regs[VCPU_REGS_RDX]; - do_io_out: - c->dst.bytes = min(c->dst.bytes, 4u); - if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) { + port = c->regs[VCPU_REGS_RDX]; + io_dir_in = 0; + do_io: + if (!emulator_io_permited(ctxt, ops, port, + (c->d & ByteOp) ? 1 : c->op_bytes)) { kvm_inject_gp(ctxt->vcpu, 0); goto done; } - ops->pio_out_emulated(c->dst.bytes, c->src.val, &c->dst.val, 1, - ctxt->vcpu); - c->dst.type = OP_NONE; /* Disable writeback. */ + if (kvm_emulate_pio(ctxt->vcpu, io_dir_in, + (c->d & ByteOp) ? 1 : c->op_bytes, + port) != 0) { + c->eip = saved_eip; + goto cannot_emulate; + } break; case 0xf4: /* hlt */ ctxt->vcpu->arch.halt_request = 1; @@ -2891,15 +2350,16 @@ special_insn: c->dst.type = OP_NONE; /* Disable writeback. */ break; case 0xf6 ... 0xf7: /* Grp3 */ - if (!emulate_grp3(ctxt, ops)) - goto cannot_emulate; + rc = emulate_grp3(ctxt, ops); + if (rc != 0) + goto done; break; case 0xf8: /* clc */ ctxt->eflags &= ~EFLG_CF; c->dst.type = OP_NONE; /* Disable writeback. */ break; case 0xfa: /* cli */ - if (emulator_bad_iopl(ctxt, ops)) + if (emulator_bad_iopl(ctxt)) kvm_inject_gp(ctxt->vcpu, 0); else { ctxt->eflags &= ~X86_EFLAGS_IF; @@ -2907,10 +2367,10 @@ special_insn: } break; case 0xfb: /* sti */ - if (emulator_bad_iopl(ctxt, ops)) + if (emulator_bad_iopl(ctxt)) kvm_inject_gp(ctxt->vcpu, 0); else { - toggle_interruptibility(ctxt, KVM_X86_SHADOW_INT_STI); + toggle_interruptibility(ctxt, X86_SHADOW_INT_STI); ctxt->eflags |= X86_EFLAGS_IF; c->dst.type = OP_NONE; /* Disable writeback. */ } @@ -2923,55 +2383,28 @@ special_insn: ctxt->eflags |= EFLG_DF; c->dst.type = OP_NONE; /* Disable writeback. */ break; - case 0xfe: /* Grp4 */ - grp45: + case 0xfe ... 0xff: /* Grp4/Grp5 */ rc = emulate_grp45(ctxt, ops); - if (rc != X86EMUL_CONTINUE) + if (rc != 0) goto done; break; - case 0xff: /* Grp5 */ - if (c->modrm_reg == 5) - goto jump_far; - goto grp45; } writeback: rc = writeback(ctxt, ops); - if (rc != X86EMUL_CONTINUE) + if (rc != 0) goto done; - /* - * restore dst type in case the decoding will be reused - * (happens for string instruction ) - */ - c->dst.type = saved_dst_type; - - if ((c->d & SrcMask) == SrcSI) - string_addr_inc(ctxt, seg_override_base(ctxt, c), VCPU_REGS_RSI, - &c->src); - - if ((c->d & DstMask) == DstDI) - string_addr_inc(ctxt, es_base(ctxt), VCPU_REGS_RDI, &c->dst); - - if (c->rep_prefix && (c->d & String)) { - struct read_cache *rc = &ctxt->decode.io_read; - register_address_increment(c, &c->regs[VCPU_REGS_RCX], -1); - /* - * Re-enter guest when pio read ahead buffer is empty or, - * if it is not used, after each 1024 iteration. - */ - if ((rc->end == 0 && !(c->regs[VCPU_REGS_RCX] & 0x3ff)) || - (rc->end != 0 && rc->end == rc->pos)) - ctxt->restart = false; - } - /* Commit shadow register state. */ memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs); kvm_rip_write(ctxt->vcpu, c->eip); - ops->set_rflags(ctxt->vcpu, ctxt->eflags); done: - return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; + if (rc == X86EMUL_UNHANDLEABLE) { + c->eip = saved_eip; + return -1; + } + return 0; twobyte_insn: switch (c->b) { @@ -2985,18 +2418,18 @@ twobyte_insn: goto cannot_emulate; rc = kvm_fix_hypercall(ctxt->vcpu); - if (rc != X86EMUL_CONTINUE) + if (rc) goto done; /* Let the processor re-execute the fixed hypercall */ - c->eip = ctxt->eip; + c->eip = kvm_rip_read(ctxt->vcpu); /* Disable writeback. */ c->dst.type = OP_NONE; break; case 2: /* lgdt */ rc = read_descriptor(ctxt, ops, c->src.ptr, &size, &address, c->op_bytes); - if (rc != X86EMUL_CONTINUE) + if (rc) goto done; realmode_lgdt(ctxt->vcpu, size, address); /* Disable writeback. */ @@ -3007,7 +2440,7 @@ twobyte_insn: switch (c->modrm_rm) { case 1: rc = kvm_fix_hypercall(ctxt->vcpu); - if (rc != X86EMUL_CONTINUE) + if (rc) goto done; break; default: @@ -3017,7 +2450,7 @@ twobyte_insn: rc = read_descriptor(ctxt, ops, c->src.ptr, &size, &address, c->op_bytes); - if (rc != X86EMUL_CONTINUE) + if (rc) goto done; realmode_lidt(ctxt->vcpu, size, address); } @@ -3026,18 +2459,15 @@ twobyte_insn: break; case 4: /* smsw */ c->dst.bytes = 2; - c->dst.val = ops->get_cr(0, ctxt->vcpu); + c->dst.val = realmode_get_cr(ctxt->vcpu, 0); break; case 6: /* lmsw */ - ops->set_cr(0, (ops->get_cr(0, ctxt->vcpu) & ~0x0ful) | - (c->src.val & 0x0f), ctxt->vcpu); + realmode_lmsw(ctxt->vcpu, (u16)c->src.val, + &ctxt->eflags); c->dst.type = OP_NONE; break; - case 5: /* not defined */ - kvm_queue_exception(ctxt->vcpu, UD_VECTOR); - goto done; case 7: /* invlpg*/ - emulate_invlpg(ctxt->vcpu, c->modrm_ea); + emulate_invlpg(ctxt->vcpu, memop); /* Disable writeback. */ c->dst.type = OP_NONE; break; @@ -3063,54 +2493,54 @@ twobyte_insn: c->dst.type = OP_NONE; break; case 0x20: /* mov cr, reg */ - switch (c->modrm_reg) { - case 1: - case 5 ... 7: - case 9 ... 15: - kvm_queue_exception(ctxt->vcpu, UD_VECTOR); - goto done; - } - c->regs[c->modrm_rm] = ops->get_cr(c->modrm_reg, ctxt->vcpu); + if (c->modrm_mod != 3) + goto cannot_emulate; + c->regs[c->modrm_rm] = + realmode_get_cr(ctxt->vcpu, c->modrm_reg); c->dst.type = OP_NONE; /* no writeback */ break; case 0x21: /* mov from dr to reg */ - if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && - (c->modrm_reg == 4 || c->modrm_reg == 5)) { - kvm_queue_exception(ctxt->vcpu, UD_VECTOR); - goto done; - } - emulator_get_dr(ctxt, c->modrm_reg, &c->regs[c->modrm_rm]); + if (c->modrm_mod != 3) + goto cannot_emulate; + rc = emulator_get_dr(ctxt, c->modrm_reg, &c->regs[c->modrm_rm]); + if (rc) + goto cannot_emulate; c->dst.type = OP_NONE; /* no writeback */ break; case 0x22: /* mov reg, cr */ - ops->set_cr(c->modrm_reg, c->modrm_val, ctxt->vcpu); + if (c->modrm_mod != 3) + goto cannot_emulate; + realmode_set_cr(ctxt->vcpu, + c->modrm_reg, c->modrm_val, &ctxt->eflags); c->dst.type = OP_NONE; break; case 0x23: /* mov from reg to dr */ - if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && - (c->modrm_reg == 4 || c->modrm_reg == 5)) { - kvm_queue_exception(ctxt->vcpu, UD_VECTOR); - goto done; - } - emulator_set_dr(ctxt, c->modrm_reg, c->regs[c->modrm_rm]); + if (c->modrm_mod != 3) + goto cannot_emulate; + rc = emulator_set_dr(ctxt, c->modrm_reg, + c->regs[c->modrm_rm]); + if (rc) + goto cannot_emulate; c->dst.type = OP_NONE; /* no writeback */ break; case 0x30: /* wrmsr */ msr_data = (u32)c->regs[VCPU_REGS_RAX] | ((u64)c->regs[VCPU_REGS_RDX] << 32); - if (kvm_set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data)) { + rc = kvm_set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data); + if (rc) { kvm_inject_gp(ctxt->vcpu, 0); - goto done; + c->eip = kvm_rip_read(ctxt->vcpu); } rc = X86EMUL_CONTINUE; c->dst.type = OP_NONE; break; case 0x32: /* rdmsr */ - if (kvm_get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data)) { + rc = kvm_get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data); + if (rc) { kvm_inject_gp(ctxt->vcpu, 0); - goto done; + c->eip = kvm_rip_read(ctxt->vcpu); } else { c->regs[VCPU_REGS_RAX] = (u32)msr_data; c->regs[VCPU_REGS_RDX] = msr_data >> 32; @@ -3147,7 +2577,7 @@ twobyte_insn: break; case 0xa1: /* pop fs */ rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS); - if (rc != X86EMUL_CONTINUE) + if (rc != 0) goto done; break; case 0xa3: @@ -3166,7 +2596,7 @@ twobyte_insn: break; case 0xa9: /* pop gs */ rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS); - if (rc != X86EMUL_CONTINUE) + if (rc != 0) goto done; break; case 0xab: @@ -3238,14 +2668,16 @@ twobyte_insn: (u64) c->src.val; break; case 0xc7: /* Grp9 (cmpxchg8b) */ - rc = emulate_grp9(ctxt, ops); - if (rc != X86EMUL_CONTINUE) + rc = emulate_grp9(ctxt, ops, memop); + if (rc != 0) goto done; + c->dst.type = OP_NONE; break; } goto writeback; cannot_emulate: DPRINTF("Cannot emulate %02x\n", c->b); + c->eip = saved_eip; return -1; } diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 93825ff3338f..a790fa128a9f 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -33,29 +33,6 @@ #include <linux/kvm_host.h> #include "trace.h" -static void pic_lock(struct kvm_pic *s) - __acquires(&s->lock) -{ - raw_spin_lock(&s->lock); -} - -static void pic_unlock(struct kvm_pic *s) - __releases(&s->lock) -{ - bool wakeup = s->wakeup_needed; - struct kvm_vcpu *vcpu; - - s->wakeup_needed = false; - - raw_spin_unlock(&s->lock); - - if (wakeup) { - vcpu = s->kvm->bsp_vcpu; - if (vcpu) - kvm_vcpu_kick(vcpu); - } -} - static void pic_clear_isr(struct kvm_kpic_state *s, int irq) { s->isr &= ~(1 << irq); @@ -68,19 +45,19 @@ static void pic_clear_isr(struct kvm_kpic_state *s, int irq) * Other interrupt may be delivered to PIC while lock is dropped but * it should be safe since PIC state is already updated at this stage. */ - pic_unlock(s->pics_state); + raw_spin_unlock(&s->pics_state->lock); kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq); - pic_lock(s->pics_state); + raw_spin_lock(&s->pics_state->lock); } void kvm_pic_clear_isr_ack(struct kvm *kvm) { struct kvm_pic *s = pic_irqchip(kvm); - pic_lock(s); + raw_spin_lock(&s->lock); s->pics[0].isr_ack = 0xff; s->pics[1].isr_ack = 0xff; - pic_unlock(s); + raw_spin_unlock(&s->lock); } /* @@ -181,9 +158,9 @@ static void pic_update_irq(struct kvm_pic *s) void kvm_pic_update_irq(struct kvm_pic *s) { - pic_lock(s); + raw_spin_lock(&s->lock); pic_update_irq(s); - pic_unlock(s); + raw_spin_unlock(&s->lock); } int kvm_pic_set_irq(void *opaque, int irq, int level) @@ -191,14 +168,14 @@ int kvm_pic_set_irq(void *opaque, int irq, int level) struct kvm_pic *s = opaque; int ret = -1; - pic_lock(s); + raw_spin_lock(&s->lock); if (irq >= 0 && irq < PIC_NUM_PINS) { ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); pic_update_irq(s); trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr, s->pics[irq >> 3].imr, ret == 0); } - pic_unlock(s); + raw_spin_unlock(&s->lock); return ret; } @@ -228,7 +205,7 @@ int kvm_pic_read_irq(struct kvm *kvm) int irq, irq2, intno; struct kvm_pic *s = pic_irqchip(kvm); - pic_lock(s); + raw_spin_lock(&s->lock); irq = pic_get_irq(&s->pics[0]); if (irq >= 0) { pic_intack(&s->pics[0], irq); @@ -253,7 +230,7 @@ int kvm_pic_read_irq(struct kvm *kvm) intno = s->pics[0].irq_base + irq; } pic_update_irq(s); - pic_unlock(s); + raw_spin_unlock(&s->lock); return intno; } @@ -467,7 +444,7 @@ static int picdev_write(struct kvm_io_device *this, printk(KERN_ERR "PIC: non byte write\n"); return 0; } - pic_lock(s); + raw_spin_lock(&s->lock); switch (addr) { case 0x20: case 0x21: @@ -480,7 +457,7 @@ static int picdev_write(struct kvm_io_device *this, elcr_ioport_write(&s->pics[addr & 1], addr, data); break; } - pic_unlock(s); + raw_spin_unlock(&s->lock); return 0; } @@ -497,7 +474,7 @@ static int picdev_read(struct kvm_io_device *this, printk(KERN_ERR "PIC: non byte read\n"); return 0; } - pic_lock(s); + raw_spin_lock(&s->lock); switch (addr) { case 0x20: case 0x21: @@ -511,7 +488,7 @@ static int picdev_read(struct kvm_io_device *this, break; } *(unsigned char *)val = data; - pic_unlock(s); + raw_spin_unlock(&s->lock); return 0; } @@ -528,7 +505,7 @@ static void pic_irq_request(void *opaque, int level) s->output = level; if (vcpu && level && (s->pics[0].isr_ack & (1 << irq))) { s->pics[0].isr_ack &= ~(1 << irq); - s->wakeup_needed = true; + kvm_vcpu_kick(vcpu); } } diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index cd1f362f413d..34b15915754d 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -63,7 +63,6 @@ struct kvm_kpic_state { struct kvm_pic { raw_spinlock_t lock; - bool wakeup_needed; unsigned pending_acks; struct kvm *kvm; struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */ diff --git a/arch/x86/kvm/kvm_timer.h b/arch/x86/kvm/kvm_timer.h index 64bc6ea78d90..55c7524dda54 100644 --- a/arch/x86/kvm/kvm_timer.h +++ b/arch/x86/kvm/kvm_timer.h @@ -10,7 +10,9 @@ struct kvm_timer { }; struct kvm_timer_ops { - bool (*is_periodic)(struct kvm_timer *); + bool (*is_periodic)(struct kvm_timer *); }; + enum hrtimer_restart kvm_timer_fn(struct hrtimer *data); + diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 2b3d92d2aaf4..48aeee8eefb0 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -148,6 +148,7 @@ module_param(oos_shadow, bool, 0644); #include <trace/events/kvm.h> +#undef TRACE_INCLUDE_FILE #define CREATE_TRACE_POINTS #include "mmutrace.h" @@ -326,6 +327,7 @@ static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache, page = alloc_page(GFP_KERNEL); if (!page) return -ENOMEM; + set_page_private(page, 0); cache->objects[cache->nobjs++] = page_address(page); } return 0; @@ -1329,8 +1331,6 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, role = vcpu->arch.mmu.base_role; role.level = level; role.direct = direct; - if (role.direct) - role.glevels = 0; role.access = access; if (vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) { quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level)); @@ -2296,19 +2296,13 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level) /* no rsvd bits for 2 level 4K page table entries */ context->rsvd_bits_mask[0][1] = 0; context->rsvd_bits_mask[0][0] = 0; - context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0]; - - if (!is_pse(vcpu)) { - context->rsvd_bits_mask[1][1] = 0; - break; - } - if (is_cpuid_PSE36()) /* 36bits PSE 4MB page */ context->rsvd_bits_mask[1][1] = rsvd_bits(17, 21); else /* 32 bits PSE 4MB page */ context->rsvd_bits_mask[1][1] = rsvd_bits(13, 21); + context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[1][0]; break; case PT32E_ROOT_LEVEL: context->rsvd_bits_mask[0][2] = @@ -2321,7 +2315,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level) context->rsvd_bits_mask[1][1] = exb_bit_rsvd | rsvd_bits(maxphyaddr, 62) | rsvd_bits(13, 20); /* large page */ - context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0]; + context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[1][0]; break; case PT64_ROOT_LEVEL: context->rsvd_bits_mask[0][3] = exb_bit_rsvd | @@ -2339,7 +2333,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level) context->rsvd_bits_mask[1][1] = exb_bit_rsvd | rsvd_bits(maxphyaddr, 51) | rsvd_bits(13, 20); /* large page */ - context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0]; + context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[1][0]; break; } } @@ -2565,11 +2559,36 @@ static bool last_updated_pte_accessed(struct kvm_vcpu *vcpu) } static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, - u64 gpte) + const u8 *new, int bytes) { gfn_t gfn; + int r; + u64 gpte = 0; pfn_t pfn; + if (bytes != 4 && bytes != 8) + return; + + /* + * Assume that the pte write on a page table of the same type + * as the current vcpu paging mode. This is nearly always true + * (might be false while changing modes). Note it is verified later + * by update_pte(). + */ + if (is_pae(vcpu)) { + /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ + if ((bytes == 4) && (gpa % 4 == 0)) { + r = kvm_read_guest(vcpu->kvm, gpa & ~(u64)7, &gpte, 8); + if (r) + return; + memcpy((void *)&gpte + (gpa % 8), new, 4); + } else if ((bytes == 8) && (gpa % 8 == 0)) { + memcpy((void *)&gpte, new, 8); + } + } else { + if ((bytes == 4) && (gpa % 4 == 0)) + memcpy((void *)&gpte, new, 4); + } if (!is_present_gpte(gpte)) return; gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; @@ -2618,46 +2637,10 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, int flooded = 0; int npte; int r; - int invlpg_counter; pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); - - invlpg_counter = atomic_read(&vcpu->kvm->arch.invlpg_counter); - - /* - * Assume that the pte write on a page table of the same type - * as the current vcpu paging mode. This is nearly always true - * (might be false while changing modes). Note it is verified later - * by update_pte(). - */ - if ((is_pae(vcpu) && bytes == 4) || !new) { - /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ - if (is_pae(vcpu)) { - gpa &= ~(gpa_t)7; - bytes = 8; - } - r = kvm_read_guest(vcpu->kvm, gpa, &gentry, min(bytes, 8)); - if (r) - gentry = 0; - new = (const u8 *)&gentry; - } - - switch (bytes) { - case 4: - gentry = *(const u32 *)new; - break; - case 8: - gentry = *(const u64 *)new; - break; - default: - gentry = 0; - break; - } - - mmu_guess_page_from_pte_write(vcpu, gpa, gentry); + mmu_guess_page_from_pte_write(vcpu, gpa, new, bytes); spin_lock(&vcpu->kvm->mmu_lock); - if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter) - gentry = 0; kvm_mmu_access_page(vcpu, gfn); kvm_mmu_free_some_pages(vcpu); ++vcpu->kvm->stat.mmu_pte_write; @@ -2721,11 +2704,20 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, continue; } spte = &sp->spt[page_offset / sizeof(*spte)]; + if ((gpa & (pte_size - 1)) || (bytes < pte_size)) { + gentry = 0; + r = kvm_read_guest_atomic(vcpu->kvm, + gpa & ~(u64)(pte_size - 1), + &gentry, pte_size); + new = (const void *)&gentry; + if (r < 0) + new = NULL; + } while (npte--) { entry = *spte; mmu_pte_write_zap_pte(vcpu, sp, spte); - if (gentry) - mmu_pte_write_new_pte(vcpu, sp, spte, &gentry); + if (new) + mmu_pte_write_new_pte(vcpu, sp, spte, new); mmu_pte_write_flush_tlb(vcpu, entry, *spte); ++spte; } diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h index 1fe956ab7617..3e4a5c6ca2a9 100644 --- a/arch/x86/kvm/mmutrace.h +++ b/arch/x86/kvm/mmutrace.h @@ -6,6 +6,8 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM kvmmmu +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE mmutrace #define KVM_MMU_PAGE_FIELDS \ __field(__u64, gfn) \ @@ -214,10 +216,5 @@ TRACE_EVENT( #endif /* _TRACE_KVMMMU_H */ -#undef TRACE_INCLUDE_PATH -#define TRACE_INCLUDE_PATH . -#undef TRACE_INCLUDE_FILE -#define TRACE_INCLUDE_FILE mmutrace - /* This part must be outside protection */ #include <trace/define_trace.h> diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 067797a72768..81eab9a50e6a 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -258,17 +258,11 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, pt_element_t gpte; unsigned pte_access; pfn_t pfn; - u64 new_spte; gpte = *(const pt_element_t *)pte; if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) { - if (!is_present_gpte(gpte)) { - if (page->unsync) - new_spte = shadow_trap_nonpresent_pte; - else - new_spte = shadow_notrap_nonpresent_pte; - __set_spte(spte, new_spte); - } + if (!is_present_gpte(gpte)) + __set_spte(spte, shadow_notrap_nonpresent_pte); return; } pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); @@ -463,7 +457,6 @@ out_unlock: static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) { struct kvm_shadow_walk_iterator iterator; - gpa_t pte_gpa = -1; int level; u64 *sptep; int need_flush = 0; @@ -477,10 +470,6 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) if (level == PT_PAGE_TABLE_LEVEL || ((level == PT_DIRECTORY_LEVEL && is_large_pte(*sptep))) || ((level == PT_PDPE_LEVEL && is_large_pte(*sptep)))) { - struct kvm_mmu_page *sp = page_header(__pa(sptep)); - - pte_gpa = (sp->gfn << PAGE_SHIFT); - pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t); if (is_shadow_present_pte(*sptep)) { rmap_remove(vcpu->kvm, sptep); @@ -498,17 +487,7 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) if (need_flush) kvm_flush_remote_tlbs(vcpu->kvm); - - atomic_inc(&vcpu->kvm->arch.invlpg_counter); - spin_unlock(&vcpu->kvm->mmu_lock); - - if (pte_gpa == -1) - return; - - if (mmu_topup_memory_caches(vcpu)) - return; - kvm_mmu_pte_write(vcpu, pte_gpa, NULL, sizeof(pt_element_t), 0); } static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index e9f79619e185..445c59411ed0 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -47,7 +47,6 @@ MODULE_LICENSE("GPL"); #define SVM_FEATURE_NPT (1 << 0) #define SVM_FEATURE_LBRV (1 << 1) #define SVM_FEATURE_SVML (1 << 2) -#define SVM_FEATURE_NRIP (1 << 3) #define SVM_FEATURE_PAUSE_FILTER (1 << 10) #define NESTED_EXIT_HOST 0 /* Exit handled on host level */ @@ -71,7 +70,6 @@ struct kvm_vcpu; struct nested_state { struct vmcb *hsave; u64 hsave_msr; - u64 vm_cr_msr; u64 vmcb; /* These are the merged vectors */ @@ -79,7 +77,6 @@ struct nested_state { /* gpa pointers to the real vectors */ u64 vmcb_msrpm; - u64 vmcb_iopm; /* A VMEXIT is required but not yet emulated */ bool exit_required; @@ -94,9 +91,6 @@ struct nested_state { }; -#define MSRPM_OFFSETS 16 -static u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly; - struct vcpu_svm { struct kvm_vcpu vcpu; struct vmcb *vmcb; @@ -116,39 +110,13 @@ struct vcpu_svm { struct nested_state nested; bool nmi_singlestep; - - unsigned int3_injected; - unsigned long int3_rip; -}; - -#define MSR_INVALID 0xffffffffU - -static struct svm_direct_access_msrs { - u32 index; /* Index of the MSR */ - bool always; /* True if intercept is always on */ -} direct_access_msrs[] = { - { .index = MSR_K6_STAR, .always = true }, - { .index = MSR_IA32_SYSENTER_CS, .always = true }, -#ifdef CONFIG_X86_64 - { .index = MSR_GS_BASE, .always = true }, - { .index = MSR_FS_BASE, .always = true }, - { .index = MSR_KERNEL_GS_BASE, .always = true }, - { .index = MSR_LSTAR, .always = true }, - { .index = MSR_CSTAR, .always = true }, - { .index = MSR_SYSCALL_MASK, .always = true }, -#endif - { .index = MSR_IA32_LASTBRANCHFROMIP, .always = false }, - { .index = MSR_IA32_LASTBRANCHTOIP, .always = false }, - { .index = MSR_IA32_LASTINTFROMIP, .always = false }, - { .index = MSR_IA32_LASTINTTOIP, .always = false }, - { .index = MSR_INVALID, .always = false }, }; /* enable NPT for AMD64 and X86 with PAE */ #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) static bool npt_enabled = true; #else -static bool npt_enabled; +static bool npt_enabled = false; #endif static int npt = 1; @@ -161,7 +129,6 @@ static void svm_flush_tlb(struct kvm_vcpu *vcpu); static void svm_complete_interrupts(struct vcpu_svm *svm); static int nested_svm_exit_handled(struct vcpu_svm *svm); -static int nested_svm_intercept(struct vcpu_svm *svm); static int nested_svm_vmexit(struct vcpu_svm *svm); static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, bool has_error_code, u32 error_code); @@ -196,8 +163,8 @@ static unsigned long iopm_base; struct kvm_ldttss_desc { u16 limit0; u16 base0; - unsigned base1:8, type:5, dpl:2, p:1; - unsigned limit1:4, zero0:3, g:1, base2:8; + unsigned base1 : 8, type : 5, dpl : 2, p : 1; + unsigned limit1 : 4, zero0 : 3, g : 1, base2 : 8; u32 base3; u32 zero1; } __attribute__((packed)); @@ -227,27 +194,6 @@ static u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000}; #define MSRS_RANGE_SIZE 2048 #define MSRS_IN_RANGE (MSRS_RANGE_SIZE * 8 / 2) -static u32 svm_msrpm_offset(u32 msr) -{ - u32 offset; - int i; - - for (i = 0; i < NUM_MSR_MAPS; i++) { - if (msr < msrpm_ranges[i] || - msr >= msrpm_ranges[i] + MSRS_IN_RANGE) - continue; - - offset = (msr - msrpm_ranges[i]) / 4; /* 4 msrs per u8 */ - offset += (i * MSRS_RANGE_SIZE); /* add range offset */ - - /* Now we have the u8 offset - but need the u32 offset */ - return offset / 4; - } - - /* MSR not in any range */ - return MSR_INVALID; -} - #define MAX_INST_SIZE 15 static inline u32 svm_has(u32 feat) @@ -267,7 +213,7 @@ static inline void stgi(void) static inline void invlpga(unsigned long addr, u32 asid) { - asm volatile (__ex(SVM_INVLPGA) : : "a"(addr), "c"(asid)); + asm volatile (__ex(SVM_INVLPGA) :: "a"(addr), "c"(asid)); } static inline void force_new_asid(struct kvm_vcpu *vcpu) @@ -289,6 +235,23 @@ static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) vcpu->arch.efer = efer; } +static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, + bool has_error_code, u32 error_code) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + /* If we are within a nested VM we'd better #VMEXIT and let the + guest handle the exception */ + if (nested_svm_check_exception(svm, nr, has_error_code, error_code)) + return; + + svm->vmcb->control.event_inj = nr + | SVM_EVTINJ_VALID + | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0) + | SVM_EVTINJ_TYPE_EXEPT; + svm->vmcb->control.event_inj_err = error_code; +} + static int is_external_interrupt(u32 info) { info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID; @@ -301,7 +264,7 @@ static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) u32 ret = 0; if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) - ret |= KVM_X86_SHADOW_INT_STI | KVM_X86_SHADOW_INT_MOV_SS; + ret |= X86_SHADOW_INT_STI | X86_SHADOW_INT_MOV_SS; return ret & mask; } @@ -334,41 +297,6 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) svm_set_interrupt_shadow(vcpu, 0); } -static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, - bool has_error_code, u32 error_code) -{ - struct vcpu_svm *svm = to_svm(vcpu); - - /* - * If we are within a nested VM we'd better #VMEXIT and let the guest - * handle the exception - */ - if (nested_svm_check_exception(svm, nr, has_error_code, error_code)) - return; - - if (nr == BP_VECTOR && !svm_has(SVM_FEATURE_NRIP)) { - unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu); - - /* - * For guest debugging where we have to reinject #BP if some - * INT3 is guest-owned: - * Emulate nRIP by moving RIP forward. Will fail if injection - * raises a fault that is not intercepted. Still better than - * failing in all cases. - */ - skip_emulated_instruction(&svm->vcpu); - rip = kvm_rip_read(&svm->vcpu); - svm->int3_rip = rip + svm->vmcb->save.cs.base; - svm->int3_injected = rip - old_rip; - } - - svm->vmcb->control.event_inj = nr - | SVM_EVTINJ_VALID - | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0) - | SVM_EVTINJ_TYPE_EXEPT; - svm->vmcb->control.event_inj_err = error_code; -} - static int has_svm(void) { const char *msg; @@ -391,7 +319,7 @@ static int svm_hardware_enable(void *garbage) struct svm_cpu_data *sd; uint64_t efer; - struct desc_ptr gdt_descr; + struct descriptor_table gdt_descr; struct desc_struct *gdt; int me = raw_smp_processor_id(); @@ -416,8 +344,8 @@ static int svm_hardware_enable(void *garbage) sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1; sd->next_asid = sd->max_asid + 1; - native_store_gdt(&gdt_descr); - gdt = (struct desc_struct *)gdt_descr.address; + kvm_get_gdt(&gdt_descr); + gdt = (struct desc_struct *)gdt_descr.base; sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS); wrmsrl(MSR_EFER, efer | EFER_SVME); @@ -463,98 +391,42 @@ err_1: } -static bool valid_msr_intercept(u32 index) -{ - int i; - - for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) - if (direct_access_msrs[i].index == index) - return true; - - return false; -} - static void set_msr_interception(u32 *msrpm, unsigned msr, int read, int write) { - u8 bit_read, bit_write; - unsigned long tmp; - u32 offset; - - /* - * If this warning triggers extend the direct_access_msrs list at the - * beginning of the file - */ - WARN_ON(!valid_msr_intercept(msr)); - - offset = svm_msrpm_offset(msr); - bit_read = 2 * (msr & 0x0f); - bit_write = 2 * (msr & 0x0f) + 1; - tmp = msrpm[offset]; - - BUG_ON(offset == MSR_INVALID); - - read ? clear_bit(bit_read, &tmp) : set_bit(bit_read, &tmp); - write ? clear_bit(bit_write, &tmp) : set_bit(bit_write, &tmp); - - msrpm[offset] = tmp; -} - -static void svm_vcpu_init_msrpm(u32 *msrpm) -{ int i; - memset(msrpm, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER)); - - for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) { - if (!direct_access_msrs[i].always) - continue; - - set_msr_interception(msrpm, direct_access_msrs[i].index, 1, 1); - } -} - -static void add_msr_offset(u32 offset) -{ - int i; - - for (i = 0; i < MSRPM_OFFSETS; ++i) { - - /* Offset already in list? */ - if (msrpm_offsets[i] == offset) + for (i = 0; i < NUM_MSR_MAPS; i++) { + if (msr >= msrpm_ranges[i] && + msr < msrpm_ranges[i] + MSRS_IN_RANGE) { + u32 msr_offset = (i * MSRS_IN_RANGE + msr - + msrpm_ranges[i]) * 2; + + u32 *base = msrpm + (msr_offset / 32); + u32 msr_shift = msr_offset % 32; + u32 mask = ((write) ? 0 : 2) | ((read) ? 0 : 1); + *base = (*base & ~(0x3 << msr_shift)) | + (mask << msr_shift); return; - - /* Slot used by another offset? */ - if (msrpm_offsets[i] != MSR_INVALID) - continue; - - /* Add offset to list */ - msrpm_offsets[i] = offset; - - return; + } } - - /* - * If this BUG triggers the msrpm_offsets table has an overflow. Just - * increase MSRPM_OFFSETS in this case. - */ BUG(); } -static void init_msrpm_offsets(void) +static void svm_vcpu_init_msrpm(u32 *msrpm) { - int i; - - memset(msrpm_offsets, 0xff, sizeof(msrpm_offsets)); - - for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) { - u32 offset; - - offset = svm_msrpm_offset(direct_access_msrs[i].index); - BUG_ON(offset == MSR_INVALID); + memset(msrpm, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER)); - add_msr_offset(offset); - } +#ifdef CONFIG_X86_64 + set_msr_interception(msrpm, MSR_GS_BASE, 1, 1); + set_msr_interception(msrpm, MSR_FS_BASE, 1, 1); + set_msr_interception(msrpm, MSR_KERNEL_GS_BASE, 1, 1); + set_msr_interception(msrpm, MSR_LSTAR, 1, 1); + set_msr_interception(msrpm, MSR_CSTAR, 1, 1); + set_msr_interception(msrpm, MSR_SYSCALL_MASK, 1, 1); +#endif + set_msr_interception(msrpm, MSR_K6_STAR, 1, 1); + set_msr_interception(msrpm, MSR_IA32_SYSENTER_CS, 1, 1); } static void svm_enable_lbrv(struct vcpu_svm *svm) @@ -595,8 +467,6 @@ static __init int svm_hardware_setup(void) memset(iopm_va, 0xff, PAGE_SIZE * (1 << IOPM_ALLOC_ORDER)); iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT; - init_msrpm_offsets(); - if (boot_cpu_has(X86_FEATURE_NX)) kvm_enable_efer_bits(EFER_NX); @@ -653,7 +523,7 @@ static void init_seg(struct vmcb_seg *seg) { seg->selector = 0; seg->attrib = SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK | - SVM_SELECTOR_WRITE_MASK; /* Read/Write Data Segment */ + SVM_SELECTOR_WRITE_MASK; /* Read/Write Data Segment */ seg->limit = 0xffff; seg->base = 0; } @@ -673,16 +543,16 @@ static void init_vmcb(struct vcpu_svm *svm) svm->vcpu.fpu_active = 1; - control->intercept_cr_read = INTERCEPT_CR0_MASK | + control->intercept_cr_read = INTERCEPT_CR0_MASK | INTERCEPT_CR3_MASK | INTERCEPT_CR4_MASK; - control->intercept_cr_write = INTERCEPT_CR0_MASK | + control->intercept_cr_write = INTERCEPT_CR0_MASK | INTERCEPT_CR3_MASK | INTERCEPT_CR4_MASK | INTERCEPT_CR8_MASK; - control->intercept_dr_read = INTERCEPT_DR0_MASK | + control->intercept_dr_read = INTERCEPT_DR0_MASK | INTERCEPT_DR1_MASK | INTERCEPT_DR2_MASK | INTERCEPT_DR3_MASK | @@ -691,7 +561,7 @@ static void init_vmcb(struct vcpu_svm *svm) INTERCEPT_DR6_MASK | INTERCEPT_DR7_MASK; - control->intercept_dr_write = INTERCEPT_DR0_MASK | + control->intercept_dr_write = INTERCEPT_DR0_MASK | INTERCEPT_DR1_MASK | INTERCEPT_DR2_MASK | INTERCEPT_DR3_MASK | @@ -705,7 +575,7 @@ static void init_vmcb(struct vcpu_svm *svm) (1 << MC_VECTOR); - control->intercept = (1ULL << INTERCEPT_INTR) | + control->intercept = (1ULL << INTERCEPT_INTR) | (1ULL << INTERCEPT_NMI) | (1ULL << INTERCEPT_SMI) | (1ULL << INTERCEPT_SELECTIVE_CR0) | @@ -766,8 +636,7 @@ static void init_vmcb(struct vcpu_svm *svm) save->rip = 0x0000fff0; svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip; - /* - * This is the guest-visible cr0 value. + /* This is the guest-visible cr0 value. * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0. */ svm->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; @@ -837,30 +706,30 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) if (err) goto free_svm; - err = -ENOMEM; page = alloc_page(GFP_KERNEL); - if (!page) + if (!page) { + err = -ENOMEM; goto uninit; + } + err = -ENOMEM; msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); if (!msrpm_pages) - goto free_page1; + goto uninit; nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); if (!nested_msrpm_pages) - goto free_page2; + goto uninit; + + svm->msrpm = page_address(msrpm_pages); + svm_vcpu_init_msrpm(svm->msrpm); hsave_page = alloc_page(GFP_KERNEL); if (!hsave_page) - goto free_page3; - + goto uninit; svm->nested.hsave = page_address(hsave_page); - svm->msrpm = page_address(msrpm_pages); - svm_vcpu_init_msrpm(svm->msrpm); - svm->nested.msrpm = page_address(nested_msrpm_pages); - svm_vcpu_init_msrpm(svm->nested.msrpm); svm->vmcb = page_address(page); clear_page(svm->vmcb); @@ -875,12 +744,6 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) return &svm->vcpu; -free_page3: - __free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER); -free_page2: - __free_pages(msrpm_pages, MSRPM_ALLOC_ORDER); -free_page1: - __free_page(page); uninit: kvm_vcpu_uninit(&svm->vcpu); free_svm: @@ -1014,8 +877,7 @@ static void svm_get_segment(struct kvm_vcpu *vcpu, var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1; var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1; - /* - * AMD's VMCB does not have an explicit unusable field, so emulate it + /* AMD's VMCB does not have an explicit unusable field, so emulate it * for cross vendor migration purposes by "not present" */ var->unusable = !var->present || (var->type == 0); @@ -1051,8 +913,7 @@ static void svm_get_segment(struct kvm_vcpu *vcpu, var->type |= 0x1; break; case VCPU_SREG_SS: - /* - * On AMD CPUs sometimes the DB bit in the segment + /* On AMD CPUs sometimes the DB bit in the segment * descriptor is left as 1, although the whole segment has * been made unusable. Clear it here to pass an Intel VMX * entry check when cross vendor migrating. @@ -1070,36 +931,36 @@ static int svm_get_cpl(struct kvm_vcpu *vcpu) return save->cpl; } -static void svm_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) +static void svm_get_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) { struct vcpu_svm *svm = to_svm(vcpu); - dt->size = svm->vmcb->save.idtr.limit; - dt->address = svm->vmcb->save.idtr.base; + dt->limit = svm->vmcb->save.idtr.limit; + dt->base = svm->vmcb->save.idtr.base; } -static void svm_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) +static void svm_set_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) { struct vcpu_svm *svm = to_svm(vcpu); - svm->vmcb->save.idtr.limit = dt->size; - svm->vmcb->save.idtr.base = dt->address ; + svm->vmcb->save.idtr.limit = dt->limit; + svm->vmcb->save.idtr.base = dt->base ; } -static void svm_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) +static void svm_get_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) { struct vcpu_svm *svm = to_svm(vcpu); - dt->size = svm->vmcb->save.gdtr.limit; - dt->address = svm->vmcb->save.gdtr.base; + dt->limit = svm->vmcb->save.gdtr.limit; + dt->base = svm->vmcb->save.gdtr.base; } -static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) +static void svm_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) { struct vcpu_svm *svm = to_svm(vcpu); - svm->vmcb->save.gdtr.limit = dt->size; - svm->vmcb->save.gdtr.base = dt->address ; + svm->vmcb->save.gdtr.limit = dt->limit; + svm->vmcb->save.gdtr.base = dt->base ; } static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) @@ -1112,7 +973,6 @@ static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) static void update_cr0_intercept(struct vcpu_svm *svm) { - struct vmcb *vmcb = svm->vmcb; ulong gcr0 = svm->vcpu.arch.cr0; u64 *hcr0 = &svm->vmcb->save.cr0; @@ -1124,25 +984,11 @@ static void update_cr0_intercept(struct vcpu_svm *svm) if (gcr0 == *hcr0 && svm->vcpu.fpu_active) { - vmcb->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK; - vmcb->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK; - if (is_nested(svm)) { - struct vmcb *hsave = svm->nested.hsave; - - hsave->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK; - hsave->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK; - vmcb->control.intercept_cr_read |= svm->nested.intercept_cr_read; - vmcb->control.intercept_cr_write |= svm->nested.intercept_cr_write; - } + svm->vmcb->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK; + svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK; } else { svm->vmcb->control.intercept_cr_read |= INTERCEPT_CR0_MASK; svm->vmcb->control.intercept_cr_write |= INTERCEPT_CR0_MASK; - if (is_nested(svm)) { - struct vmcb *hsave = svm->nested.hsave; - - hsave->control.intercept_cr_read |= INTERCEPT_CR0_MASK; - hsave->control.intercept_cr_write |= INTERCEPT_CR0_MASK; - } } } @@ -1150,27 +996,6 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) { struct vcpu_svm *svm = to_svm(vcpu); - if (is_nested(svm)) { - /* - * We are here because we run in nested mode, the host kvm - * intercepts cr0 writes but the l1 hypervisor does not. - * But the L1 hypervisor may intercept selective cr0 writes. - * This needs to be checked here. - */ - unsigned long old, new; - - /* Remove bits that would trigger a real cr0 write intercept */ - old = vcpu->arch.cr0 & SVM_CR0_SELECTIVE_MASK; - new = cr0 & SVM_CR0_SELECTIVE_MASK; - - if (old == new) { - /* cr0 write with ts and mp unchanged */ - svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE; - if (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE) - return; - } - } - #ifdef CONFIG_X86_64 if (vcpu->arch.efer & EFER_LME) { if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { @@ -1404,7 +1229,7 @@ static int db_interception(struct vcpu_svm *svm) } if (svm->vcpu.guest_debug & - (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) { + (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)){ kvm_run->exit_reason = KVM_EXIT_DEBUG; kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip; @@ -1438,22 +1263,7 @@ static int ud_interception(struct vcpu_svm *svm) static void svm_fpu_activate(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - u32 excp; - - if (is_nested(svm)) { - u32 h_excp, n_excp; - - h_excp = svm->nested.hsave->control.intercept_exceptions; - n_excp = svm->nested.intercept_exceptions; - h_excp &= ~(1 << NM_VECTOR); - excp = h_excp | n_excp; - } else { - excp = svm->vmcb->control.intercept_exceptions; - excp &= ~(1 << NM_VECTOR); - } - - svm->vmcb->control.intercept_exceptions = excp; - + svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); svm->vcpu.fpu_active = 1; update_cr0_intercept(svm); } @@ -1494,23 +1304,29 @@ static int shutdown_interception(struct vcpu_svm *svm) static int io_interception(struct vcpu_svm *svm) { - struct kvm_vcpu *vcpu = &svm->vcpu; u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */ int size, in, string; unsigned port; ++svm->vcpu.stat.io_exits; + + svm->next_rip = svm->vmcb->control.exit_info_2; + string = (io_info & SVM_IOIO_STR_MASK) != 0; - in = (io_info & SVM_IOIO_TYPE_MASK) != 0; - if (string || in) - return !(emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DO_MMIO); + if (string) { + if (emulate_instruction(&svm->vcpu, + 0, 0, 0) == EMULATE_DO_MMIO) + return 0; + return 1; + } + + in = (io_info & SVM_IOIO_TYPE_MASK) != 0; port = io_info >> 16; size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; - svm->next_rip = svm->vmcb->control.exit_info_2; - skip_emulated_instruction(&svm->vcpu); - return kvm_fast_pio_out(vcpu, size, port); + skip_emulated_instruction(&svm->vcpu); + return kvm_emulate_pio(&svm->vcpu, in, size, port); } static int nmi_interception(struct vcpu_svm *svm) @@ -1563,8 +1379,6 @@ static int nested_svm_check_permissions(struct vcpu_svm *svm) static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, bool has_error_code, u32 error_code) { - int vmexit; - if (!is_nested(svm)) return 0; @@ -1573,28 +1387,21 @@ static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, svm->vmcb->control.exit_info_1 = error_code; svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2; - vmexit = nested_svm_intercept(svm); - if (vmexit == NESTED_EXIT_DONE) - svm->nested.exit_required = true; - - return vmexit; + return nested_svm_exit_handled(svm); } -/* This function returns true if it is save to enable the irq window */ -static inline bool nested_svm_intr(struct vcpu_svm *svm) +static inline int nested_svm_intr(struct vcpu_svm *svm) { if (!is_nested(svm)) - return true; + return 0; if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK)) - return true; + return 0; if (!(svm->vcpu.arch.hflags & HF_HIF_MASK)) - return false; + return 0; - svm->vmcb->control.exit_code = SVM_EXIT_INTR; - svm->vmcb->control.exit_info_1 = 0; - svm->vmcb->control.exit_info_2 = 0; + svm->vmcb->control.exit_code = SVM_EXIT_INTR; if (svm->nested.intercept & 1ULL) { /* @@ -1605,40 +1412,21 @@ static inline bool nested_svm_intr(struct vcpu_svm *svm) */ svm->nested.exit_required = true; trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip); - return false; + return 1; } - return true; -} - -/* This function returns true if it is save to enable the nmi window */ -static inline bool nested_svm_nmi(struct vcpu_svm *svm) -{ - if (!is_nested(svm)) - return true; - - if (!(svm->nested.intercept & (1ULL << INTERCEPT_NMI))) - return true; - - svm->vmcb->control.exit_code = SVM_EXIT_NMI; - svm->nested.exit_required = true; - - return false; + return 0; } -static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, struct page **_page) +static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, enum km_type idx) { struct page *page; - might_sleep(); - page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT); if (is_error_page(page)) goto error; - *_page = page; - - return kmap(page); + return kmap_atomic(page, idx); error: kvm_release_page_clean(page); @@ -1647,55 +1435,61 @@ error: return NULL; } -static void nested_svm_unmap(struct page *page) -{ - kunmap(page); - kvm_release_page_dirty(page); -} - -static int nested_svm_intercept_ioio(struct vcpu_svm *svm) +static void nested_svm_unmap(void *addr, enum km_type idx) { - unsigned port; - u8 val, bit; - u64 gpa; - - if (!(svm->nested.intercept & (1ULL << INTERCEPT_IOIO_PROT))) - return NESTED_EXIT_HOST; + struct page *page; - port = svm->vmcb->control.exit_info_1 >> 16; - gpa = svm->nested.vmcb_iopm + (port / 8); - bit = port % 8; - val = 0; + if (!addr) + return; - if (kvm_read_guest(svm->vcpu.kvm, gpa, &val, 1)) - val &= (1 << bit); + page = kmap_atomic_to_page(addr); - return val ? NESTED_EXIT_DONE : NESTED_EXIT_HOST; + kunmap_atomic(addr, idx); + kvm_release_page_dirty(page); } -static int nested_svm_exit_handled_msr(struct vcpu_svm *svm) +static bool nested_svm_exit_handled_msr(struct vcpu_svm *svm) { - u32 offset, msr, value; - int write, mask; + u32 param = svm->vmcb->control.exit_info_1 & 1; + u32 msr = svm->vcpu.arch.regs[VCPU_REGS_RCX]; + bool ret = false; + u32 t0, t1; + u8 *msrpm; if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT))) - return NESTED_EXIT_HOST; + return false; - msr = svm->vcpu.arch.regs[VCPU_REGS_RCX]; - offset = svm_msrpm_offset(msr); - write = svm->vmcb->control.exit_info_1 & 1; - mask = 1 << ((2 * (msr & 0xf)) + write); + msrpm = nested_svm_map(svm, svm->nested.vmcb_msrpm, KM_USER0); - if (offset == MSR_INVALID) - return NESTED_EXIT_DONE; + if (!msrpm) + goto out; + + switch (msr) { + case 0 ... 0x1fff: + t0 = (msr * 2) % 8; + t1 = msr / 8; + break; + case 0xc0000000 ... 0xc0001fff: + t0 = (8192 + msr - 0xc0000000) * 2; + t1 = (t0 / 8); + t0 %= 8; + break; + case 0xc0010000 ... 0xc0011fff: + t0 = (16384 + msr - 0xc0010000) * 2; + t1 = (t0 / 8); + t0 %= 8; + break; + default: + ret = true; + goto out; + } - /* Offset is in 32 bit units but need in 8 bit units */ - offset *= 4; + ret = msrpm[t1] & ((1 << param) << t0); - if (kvm_read_guest(svm->vcpu.kvm, svm->nested.vmcb_msrpm + offset, &value, 4)) - return NESTED_EXIT_DONE; +out: + nested_svm_unmap(msrpm, KM_USER0); - return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST; + return ret; } static int nested_svm_exit_special(struct vcpu_svm *svm) @@ -1706,19 +1500,16 @@ static int nested_svm_exit_special(struct vcpu_svm *svm) case SVM_EXIT_INTR: case SVM_EXIT_NMI: return NESTED_EXIT_HOST; - case SVM_EXIT_NPF: /* For now we are always handling NPFs when using them */ + case SVM_EXIT_NPF: if (npt_enabled) return NESTED_EXIT_HOST; break; + /* When we're shadowing, trap PFs */ case SVM_EXIT_EXCP_BASE + PF_VECTOR: - /* When we're shadowing, trap PFs */ if (!npt_enabled) return NESTED_EXIT_HOST; break; - case SVM_EXIT_EXCP_BASE + NM_VECTOR: - nm_interception(svm); - break; default: break; } @@ -1729,7 +1520,7 @@ static int nested_svm_exit_special(struct vcpu_svm *svm) /* * If this function returns true, this #vmexit was already handled */ -static int nested_svm_intercept(struct vcpu_svm *svm) +static int nested_svm_exit_handled(struct vcpu_svm *svm) { u32 exit_code = svm->vmcb->control.exit_code; int vmexit = NESTED_EXIT_HOST; @@ -1738,9 +1529,6 @@ static int nested_svm_intercept(struct vcpu_svm *svm) case SVM_EXIT_MSR: vmexit = nested_svm_exit_handled_msr(svm); break; - case SVM_EXIT_IOIO: - vmexit = nested_svm_intercept_ioio(svm); - break; case SVM_EXIT_READ_CR0 ... SVM_EXIT_READ_CR8: { u32 cr_bits = 1 << (exit_code - SVM_EXIT_READ_CR0); if (svm->nested.intercept_cr_read & cr_bits) @@ -1778,17 +1566,9 @@ static int nested_svm_intercept(struct vcpu_svm *svm) } } - return vmexit; -} - -static int nested_svm_exit_handled(struct vcpu_svm *svm) -{ - int vmexit; - - vmexit = nested_svm_intercept(svm); - - if (vmexit == NESTED_EXIT_DONE) + if (vmexit == NESTED_EXIT_DONE) { nested_svm_vmexit(svm); + } return vmexit; } @@ -1830,7 +1610,6 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) struct vmcb *nested_vmcb; struct vmcb *hsave = svm->nested.hsave; struct vmcb *vmcb = svm->vmcb; - struct page *page; trace_kvm_nested_vmexit_inject(vmcb->control.exit_code, vmcb->control.exit_info_1, @@ -1838,13 +1617,10 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) vmcb->control.exit_int_info, vmcb->control.exit_int_info_err); - nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, &page); + nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, KM_USER0); if (!nested_vmcb) return 1; - /* Exit nested SVM mode */ - svm->nested.vmcb = 0; - /* Give the current vmcb to the guest */ disable_gif(svm); @@ -1854,13 +1630,9 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) nested_vmcb->save.ds = vmcb->save.ds; nested_vmcb->save.gdtr = vmcb->save.gdtr; nested_vmcb->save.idtr = vmcb->save.idtr; - nested_vmcb->save.cr0 = kvm_read_cr0(&svm->vcpu); if (npt_enabled) nested_vmcb->save.cr3 = vmcb->save.cr3; - else - nested_vmcb->save.cr3 = svm->vcpu.arch.cr3; nested_vmcb->save.cr2 = vmcb->save.cr2; - nested_vmcb->save.cr4 = svm->vcpu.arch.cr4; nested_vmcb->save.rflags = vmcb->save.rflags; nested_vmcb->save.rip = vmcb->save.rip; nested_vmcb->save.rsp = vmcb->save.rsp; @@ -1932,7 +1704,10 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) svm->vmcb->save.cpl = 0; svm->vmcb->control.exit_int_info = 0; - nested_svm_unmap(page); + /* Exit nested SVM mode */ + svm->nested.vmcb = 0; + + nested_svm_unmap(nested_vmcb, KM_USER0); kvm_mmu_reset_context(&svm->vcpu); kvm_mmu_load(&svm->vcpu); @@ -1942,34 +1717,20 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) { - /* - * This function merges the msr permission bitmaps of kvm and the - * nested vmcb. It is omptimized in that it only merges the parts where - * the kvm msr permission bitmap may contain zero bits - */ + u32 *nested_msrpm; int i; - if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT))) - return true; - - for (i = 0; i < MSRPM_OFFSETS; i++) { - u32 value, p; - u64 offset; - - if (msrpm_offsets[i] == 0xffffffff) - break; - - p = msrpm_offsets[i]; - offset = svm->nested.vmcb_msrpm + (p * 4); + nested_msrpm = nested_svm_map(svm, svm->nested.vmcb_msrpm, KM_USER0); + if (!nested_msrpm) + return false; - if (kvm_read_guest(svm->vcpu.kvm, offset, &value, 4)) - return false; - - svm->nested.msrpm[p] = svm->msrpm[p] | value; - } + for (i=0; i< PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER) / 4; i++) + svm->nested.msrpm[i] = svm->msrpm[i] | nested_msrpm[i]; svm->vmcb->control.msrpm_base_pa = __pa(svm->nested.msrpm); + nested_svm_unmap(nested_msrpm, KM_USER0); + return true; } @@ -1978,34 +1739,26 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) struct vmcb *nested_vmcb; struct vmcb *hsave = svm->nested.hsave; struct vmcb *vmcb = svm->vmcb; - struct page *page; - u64 vmcb_gpa; - vmcb_gpa = svm->vmcb->save.rax; - - nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); + nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, KM_USER0); if (!nested_vmcb) return false; - trace_kvm_nested_vmrun(svm->vmcb->save.rip - 3, vmcb_gpa, + /* nested_vmcb is our indicator if nested SVM is activated */ + svm->nested.vmcb = svm->vmcb->save.rax; + + trace_kvm_nested_vmrun(svm->vmcb->save.rip - 3, svm->nested.vmcb, nested_vmcb->save.rip, nested_vmcb->control.int_ctl, nested_vmcb->control.event_inj, nested_vmcb->control.nested_ctl); - trace_kvm_nested_intercepts(nested_vmcb->control.intercept_cr_read, - nested_vmcb->control.intercept_cr_write, - nested_vmcb->control.intercept_exceptions, - nested_vmcb->control.intercept); - /* Clear internal status */ kvm_clear_exception_queue(&svm->vcpu); kvm_clear_interrupt_queue(&svm->vcpu); - /* - * Save the old vmcb, so we don't need to pick what we save, but can - * restore everything when a VMEXIT occurs - */ + /* Save the old vmcb, so we don't need to pick what we save, but + can restore everything when a VMEXIT occurs */ hsave->save.es = vmcb->save.es; hsave->save.cs = vmcb->save.cs; hsave->save.ss = vmcb->save.ss; @@ -2045,17 +1798,14 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) if (npt_enabled) { svm->vmcb->save.cr3 = nested_vmcb->save.cr3; svm->vcpu.arch.cr3 = nested_vmcb->save.cr3; - } else + } else { kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3); - - /* Guest paging mode is active - reset mmu */ - kvm_mmu_reset_context(&svm->vcpu); - + kvm_mmu_reset_context(&svm->vcpu); + } svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2; kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax); kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp); kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip); - /* In case we don't even reach vcpu_run, the fields are not updated */ svm->vmcb->save.rax = nested_vmcb->save.rax; svm->vmcb->save.rsp = nested_vmcb->save.rsp; @@ -2064,8 +1814,22 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) svm->vmcb->save.dr6 = nested_vmcb->save.dr6; svm->vmcb->save.cpl = nested_vmcb->save.cpl; - svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa & ~0x0fffULL; - svm->nested.vmcb_iopm = nested_vmcb->control.iopm_base_pa & ~0x0fffULL; + /* We don't want a nested guest to be more powerful than the guest, + so all intercepts are ORed */ + svm->vmcb->control.intercept_cr_read |= + nested_vmcb->control.intercept_cr_read; + svm->vmcb->control.intercept_cr_write |= + nested_vmcb->control.intercept_cr_write; + svm->vmcb->control.intercept_dr_read |= + nested_vmcb->control.intercept_dr_read; + svm->vmcb->control.intercept_dr_write |= + nested_vmcb->control.intercept_dr_write; + svm->vmcb->control.intercept_exceptions |= + nested_vmcb->control.intercept_exceptions; + + svm->vmcb->control.intercept |= nested_vmcb->control.intercept; + + svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa; /* cache intercepts */ svm->nested.intercept_cr_read = nested_vmcb->control.intercept_cr_read; @@ -2082,40 +1846,13 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) else svm->vcpu.arch.hflags &= ~HF_VINTR_MASK; - if (svm->vcpu.arch.hflags & HF_VINTR_MASK) { - /* We only want the cr8 intercept bits of the guest */ - svm->vmcb->control.intercept_cr_read &= ~INTERCEPT_CR8_MASK; - svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK; - } - - /* - * We don't want a nested guest to be more powerful than the guest, so - * all intercepts are ORed - */ - svm->vmcb->control.intercept_cr_read |= - nested_vmcb->control.intercept_cr_read; - svm->vmcb->control.intercept_cr_write |= - nested_vmcb->control.intercept_cr_write; - svm->vmcb->control.intercept_dr_read |= - nested_vmcb->control.intercept_dr_read; - svm->vmcb->control.intercept_dr_write |= - nested_vmcb->control.intercept_dr_write; - svm->vmcb->control.intercept_exceptions |= - nested_vmcb->control.intercept_exceptions; - - svm->vmcb->control.intercept |= nested_vmcb->control.intercept; - - svm->vmcb->control.lbr_ctl = nested_vmcb->control.lbr_ctl; svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; svm->vmcb->control.int_state = nested_vmcb->control.int_state; svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset; svm->vmcb->control.event_inj = nested_vmcb->control.event_inj; svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err; - nested_svm_unmap(page); - - /* nested_vmcb is our indicator if nested SVM is activated */ - svm->nested.vmcb = vmcb_gpa; + nested_svm_unmap(nested_vmcb, KM_USER0); enable_gif(svm); @@ -2141,7 +1878,6 @@ static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb) static int vmload_interception(struct vcpu_svm *svm) { struct vmcb *nested_vmcb; - struct page *page; if (nested_svm_check_permissions(svm)) return 1; @@ -2149,12 +1885,12 @@ static int vmload_interception(struct vcpu_svm *svm) svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; skip_emulated_instruction(&svm->vcpu); - nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); + nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, KM_USER0); if (!nested_vmcb) return 1; nested_svm_vmloadsave(nested_vmcb, svm->vmcb); - nested_svm_unmap(page); + nested_svm_unmap(nested_vmcb, KM_USER0); return 1; } @@ -2162,7 +1898,6 @@ static int vmload_interception(struct vcpu_svm *svm) static int vmsave_interception(struct vcpu_svm *svm) { struct vmcb *nested_vmcb; - struct page *page; if (nested_svm_check_permissions(svm)) return 1; @@ -2170,12 +1905,12 @@ static int vmsave_interception(struct vcpu_svm *svm) svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; skip_emulated_instruction(&svm->vcpu); - nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); + nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, KM_USER0); if (!nested_vmcb) return 1; nested_svm_vmloadsave(svm->vmcb, nested_vmcb); - nested_svm_unmap(page); + nested_svm_unmap(nested_vmcb, KM_USER0); return 1; } @@ -2405,11 +2140,9 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) case MSR_IA32_SYSENTER_ESP: *data = svm->sysenter_esp; break; - /* - * Nobody will change the following 5 values in the VMCB so we can - * safely return them on rdmsr. They will always be 0 until LBRV is - * implemented. - */ + /* Nobody will change the following 5 values in the VMCB so + we can safely return them on rdmsr. They will always be 0 + until LBRV is implemented. */ case MSR_IA32_DEBUGCTLMSR: *data = svm->vmcb->save.dbgctl; break; @@ -2429,7 +2162,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) *data = svm->nested.hsave_msr; break; case MSR_VM_CR: - *data = svm->nested.vm_cr_msr; + *data = 0; break; case MSR_IA32_UCODE_REV: *data = 0x01000065; @@ -2459,31 +2192,6 @@ static int rdmsr_interception(struct vcpu_svm *svm) return 1; } -static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data) -{ - struct vcpu_svm *svm = to_svm(vcpu); - int svm_dis, chg_mask; - - if (data & ~SVM_VM_CR_VALID_MASK) - return 1; - - chg_mask = SVM_VM_CR_VALID_MASK; - - if (svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK) - chg_mask &= ~(SVM_VM_CR_SVM_LOCK_MASK | SVM_VM_CR_SVM_DIS_MASK); - - svm->nested.vm_cr_msr &= ~chg_mask; - svm->nested.vm_cr_msr |= (data & chg_mask); - - svm_dis = svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK; - - /* check for svm_disable while efer.svme is set */ - if (svm_dis && (vcpu->arch.efer & EFER_SVME)) - return 1; - - return 0; -} - static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) { struct vcpu_svm *svm = to_svm(vcpu); @@ -2550,7 +2258,6 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) svm->nested.hsave_msr = data; break; case MSR_VM_CR: - return svm_set_vm_cr(vcpu, data); case MSR_VM_IGNNE: pr_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); break; @@ -2614,16 +2321,16 @@ static int pause_interception(struct vcpu_svm *svm) } static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { - [SVM_EXIT_READ_CR0] = emulate_on_interception, - [SVM_EXIT_READ_CR3] = emulate_on_interception, - [SVM_EXIT_READ_CR4] = emulate_on_interception, - [SVM_EXIT_READ_CR8] = emulate_on_interception, + [SVM_EXIT_READ_CR0] = emulate_on_interception, + [SVM_EXIT_READ_CR3] = emulate_on_interception, + [SVM_EXIT_READ_CR4] = emulate_on_interception, + [SVM_EXIT_READ_CR8] = emulate_on_interception, [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, - [SVM_EXIT_WRITE_CR0] = emulate_on_interception, - [SVM_EXIT_WRITE_CR3] = emulate_on_interception, - [SVM_EXIT_WRITE_CR4] = emulate_on_interception, - [SVM_EXIT_WRITE_CR8] = cr8_write_interception, - [SVM_EXIT_READ_DR0] = emulate_on_interception, + [SVM_EXIT_WRITE_CR0] = emulate_on_interception, + [SVM_EXIT_WRITE_CR3] = emulate_on_interception, + [SVM_EXIT_WRITE_CR4] = emulate_on_interception, + [SVM_EXIT_WRITE_CR8] = cr8_write_interception, + [SVM_EXIT_READ_DR0] = emulate_on_interception, [SVM_EXIT_READ_DR1] = emulate_on_interception, [SVM_EXIT_READ_DR2] = emulate_on_interception, [SVM_EXIT_READ_DR3] = emulate_on_interception, @@ -2642,14 +2349,15 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception, [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception, [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception, - [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, - [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, - [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception, - [SVM_EXIT_INTR] = intr_interception, + [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, + [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, + [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception, + [SVM_EXIT_INTR] = intr_interception, [SVM_EXIT_NMI] = nmi_interception, [SVM_EXIT_SMI] = nop_on_interception, [SVM_EXIT_INIT] = nop_on_interception, [SVM_EXIT_VINTR] = interrupt_window_interception, + /* [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, */ [SVM_EXIT_CPUID] = cpuid_interception, [SVM_EXIT_IRET] = iret_interception, [SVM_EXIT_INVD] = emulate_on_interception, @@ -2657,7 +2365,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_HLT] = halt_interception, [SVM_EXIT_INVLPG] = invlpg_interception, [SVM_EXIT_INVLPGA] = invlpga_interception, - [SVM_EXIT_IOIO] = io_interception, + [SVM_EXIT_IOIO] = io_interception, [SVM_EXIT_MSR] = msr_interception, [SVM_EXIT_TASK_SWITCH] = task_switch_interception, [SVM_EXIT_SHUTDOWN] = shutdown_interception, @@ -2680,7 +2388,7 @@ static int handle_exit(struct kvm_vcpu *vcpu) struct kvm_run *kvm_run = vcpu->run; u32 exit_code = svm->vmcb->control.exit_code; - trace_kvm_exit(exit_code, vcpu); + trace_kvm_exit(exit_code, svm->vmcb->save.rip); if (unlikely(svm->nested.exit_required)) { nested_svm_vmexit(svm); @@ -2798,9 +2506,6 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) { struct vcpu_svm *svm = to_svm(vcpu); - if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK)) - return; - if (irr == -1) return; @@ -2858,13 +2563,13 @@ static void enable_irq_window(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - /* - * In case GIF=0 we can't rely on the CPU to tell us when GIF becomes - * 1, because that's a separate STGI/VMRUN intercept. The next time we - * get that intercept, this function will be called again though and - * we'll get the vintr intercept. - */ - if (gif_set(svm) && nested_svm_intr(svm)) { + nested_svm_intr(svm); + + /* In case GIF=0 we can't rely on the CPU to tell us when + * GIF becomes 1, because that's a separate STGI/VMRUN intercept. + * The next time we get that intercept, this function will be + * called again though and we'll get the vintr intercept. */ + if (gif_set(svm)) { svm_set_vintr(svm); svm_inject_irq(svm, 0x0); } @@ -2878,15 +2583,12 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) == HF_NMI_MASK) return; /* IRET will cause a vm exit */ - /* - * Something prevents NMI from been injected. Single step over possible - * problem (IRET or exception injection or interrupt shadow) - */ - if (gif_set(svm) && nested_svm_nmi(svm)) { - svm->nmi_singlestep = true; - svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); - update_db_intercept(vcpu); - } + /* Something prevents NMI from been injected. Single step over + possible problem (IRET or exception injection or interrupt + shadow) */ + svm->nmi_singlestep = true; + svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); + update_db_intercept(vcpu); } static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) @@ -2907,9 +2609,6 @@ static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK)) - return; - if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR8_MASK)) { int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK; kvm_set_cr8(vcpu, cr8); @@ -2921,9 +2620,6 @@ static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu) struct vcpu_svm *svm = to_svm(vcpu); u64 cr8; - if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK)) - return; - cr8 = kvm_get_cr8(vcpu); svm->vmcb->control.int_ctl &= ~V_TPR_MASK; svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK; @@ -2934,9 +2630,6 @@ static void svm_complete_interrupts(struct vcpu_svm *svm) u8 vector; int type; u32 exitintinfo = svm->vmcb->control.exit_int_info; - unsigned int3_injected = svm->int3_injected; - - svm->int3_injected = 0; if (svm->vcpu.arch.hflags & HF_IRET_MASK) svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK); @@ -2956,21 +2649,12 @@ static void svm_complete_interrupts(struct vcpu_svm *svm) svm->vcpu.arch.nmi_injected = true; break; case SVM_EXITINTINFO_TYPE_EXEPT: + /* In case of software exception do not reinject an exception + vector, but re-execute and instruction instead */ if (is_nested(svm)) break; - /* - * In case of software exceptions, do not reinject the vector, - * but re-execute the instruction instead. Rewind RIP first - * if we emulated INT3 before. - */ - if (kvm_exception_is_soft(vector)) { - if (vector == BP_VECTOR && int3_injected && - kvm_is_linear_rip(&svm->vcpu, svm->int3_rip)) - kvm_rip_write(&svm->vcpu, - kvm_rip_read(&svm->vcpu) - - int3_injected); + if (kvm_exception_is_soft(vector)) break; - } if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) { u32 err = svm->vmcb->control.exit_int_info_err; kvm_queue_exception_e(&svm->vcpu, vector, err); @@ -3191,24 +2875,24 @@ static void svm_cpuid_update(struct kvm_vcpu *vcpu) } static const struct trace_print_flags svm_exit_reasons_str[] = { - { SVM_EXIT_READ_CR0, "read_cr0" }, - { SVM_EXIT_READ_CR3, "read_cr3" }, - { SVM_EXIT_READ_CR4, "read_cr4" }, - { SVM_EXIT_READ_CR8, "read_cr8" }, - { SVM_EXIT_WRITE_CR0, "write_cr0" }, - { SVM_EXIT_WRITE_CR3, "write_cr3" }, - { SVM_EXIT_WRITE_CR4, "write_cr4" }, - { SVM_EXIT_WRITE_CR8, "write_cr8" }, - { SVM_EXIT_READ_DR0, "read_dr0" }, - { SVM_EXIT_READ_DR1, "read_dr1" }, - { SVM_EXIT_READ_DR2, "read_dr2" }, - { SVM_EXIT_READ_DR3, "read_dr3" }, - { SVM_EXIT_WRITE_DR0, "write_dr0" }, - { SVM_EXIT_WRITE_DR1, "write_dr1" }, - { SVM_EXIT_WRITE_DR2, "write_dr2" }, - { SVM_EXIT_WRITE_DR3, "write_dr3" }, - { SVM_EXIT_WRITE_DR5, "write_dr5" }, - { SVM_EXIT_WRITE_DR7, "write_dr7" }, + { SVM_EXIT_READ_CR0, "read_cr0" }, + { SVM_EXIT_READ_CR3, "read_cr3" }, + { SVM_EXIT_READ_CR4, "read_cr4" }, + { SVM_EXIT_READ_CR8, "read_cr8" }, + { SVM_EXIT_WRITE_CR0, "write_cr0" }, + { SVM_EXIT_WRITE_CR3, "write_cr3" }, + { SVM_EXIT_WRITE_CR4, "write_cr4" }, + { SVM_EXIT_WRITE_CR8, "write_cr8" }, + { SVM_EXIT_READ_DR0, "read_dr0" }, + { SVM_EXIT_READ_DR1, "read_dr1" }, + { SVM_EXIT_READ_DR2, "read_dr2" }, + { SVM_EXIT_READ_DR3, "read_dr3" }, + { SVM_EXIT_WRITE_DR0, "write_dr0" }, + { SVM_EXIT_WRITE_DR1, "write_dr1" }, + { SVM_EXIT_WRITE_DR2, "write_dr2" }, + { SVM_EXIT_WRITE_DR3, "write_dr3" }, + { SVM_EXIT_WRITE_DR5, "write_dr5" }, + { SVM_EXIT_WRITE_DR7, "write_dr7" }, { SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" }, { SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" }, { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, @@ -3257,10 +2941,8 @@ static void svm_fpu_deactivate(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - svm->vmcb->control.intercept_exceptions |= 1 << NM_VECTOR; - if (is_nested(svm)) - svm->nested.hsave->control.intercept_exceptions |= 1 << NM_VECTOR; update_cr0_intercept(svm); + svm->vmcb->control.intercept_exceptions |= 1 << NM_VECTOR; } static struct kvm_x86_ops svm_x86_ops = { diff --git a/arch/x86/kvm/timer.c b/arch/x86/kvm/timer.c index 4ddadb1a5ffe..eea40439066c 100644 --- a/arch/x86/kvm/timer.c +++ b/arch/x86/kvm/timer.c @@ -12,8 +12,7 @@ static int __kvm_timer_fn(struct kvm_vcpu *vcpu, struct kvm_timer *ktimer) /* * There is a race window between reading and incrementing, but we do * not care about potentially loosing timer events in the !reinject - * case anyway. Note: KVM_REQ_PENDING_TIMER is implicitly checked - * in vcpu_enter_guest. + * case anyway. */ if (ktimer->reinject || !atomic_read(&ktimer->pending)) { atomic_inc(&ktimer->pending); diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 32c912c40bf8..6ad30a29f044 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -5,6 +5,8 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM kvm +#define TRACE_INCLUDE_PATH arch/x86/kvm +#define TRACE_INCLUDE_FILE trace /* * Tracepoint for guest mode entry. @@ -182,8 +184,8 @@ TRACE_EVENT(kvm_apic, * Tracepoint for kvm guest exit: */ TRACE_EVENT(kvm_exit, - TP_PROTO(unsigned int exit_reason, struct kvm_vcpu *vcpu), - TP_ARGS(exit_reason, vcpu), + TP_PROTO(unsigned int exit_reason, unsigned long guest_rip), + TP_ARGS(exit_reason, guest_rip), TP_STRUCT__entry( __field( unsigned int, exit_reason ) @@ -192,7 +194,7 @@ TRACE_EVENT(kvm_exit, TP_fast_assign( __entry->exit_reason = exit_reason; - __entry->guest_rip = kvm_rip_read(vcpu); + __entry->guest_rip = guest_rip; ), TP_printk("reason %s rip 0x%lx", @@ -219,38 +221,6 @@ TRACE_EVENT(kvm_inj_virq, TP_printk("irq %u", __entry->irq) ); -#define EXS(x) { x##_VECTOR, "#" #x } - -#define kvm_trace_sym_exc \ - EXS(DE), EXS(DB), EXS(BP), EXS(OF), EXS(BR), EXS(UD), EXS(NM), \ - EXS(DF), EXS(TS), EXS(NP), EXS(SS), EXS(GP), EXS(PF), \ - EXS(MF), EXS(MC) - -/* - * Tracepoint for kvm interrupt injection: - */ -TRACE_EVENT(kvm_inj_exception, - TP_PROTO(unsigned exception, bool has_error, unsigned error_code), - TP_ARGS(exception, has_error, error_code), - - TP_STRUCT__entry( - __field( u8, exception ) - __field( u8, has_error ) - __field( u32, error_code ) - ), - - TP_fast_assign( - __entry->exception = exception; - __entry->has_error = has_error; - __entry->error_code = error_code; - ), - - TP_printk("%s (0x%x)", - __print_symbolic(__entry->exception, kvm_trace_sym_exc), - /* FIXME: don't print error_code if not present */ - __entry->has_error ? __entry->error_code : 0) -); - /* * Tracepoint for page fault. */ @@ -443,34 +413,12 @@ TRACE_EVENT(kvm_nested_vmrun, ), TP_printk("rip: 0x%016llx vmcb: 0x%016llx nrip: 0x%016llx int_ctl: 0x%08x " - "event_inj: 0x%08x npt: %s", + "event_inj: 0x%08x npt: %s\n", __entry->rip, __entry->vmcb, __entry->nested_rip, __entry->int_ctl, __entry->event_inj, __entry->npt ? "on" : "off") ); -TRACE_EVENT(kvm_nested_intercepts, - TP_PROTO(__u16 cr_read, __u16 cr_write, __u32 exceptions, __u64 intercept), - TP_ARGS(cr_read, cr_write, exceptions, intercept), - - TP_STRUCT__entry( - __field( __u16, cr_read ) - __field( __u16, cr_write ) - __field( __u32, exceptions ) - __field( __u64, intercept ) - ), - - TP_fast_assign( - __entry->cr_read = cr_read; - __entry->cr_write = cr_write; - __entry->exceptions = exceptions; - __entry->intercept = intercept; - ), - - TP_printk("cr_read: %04x cr_write: %04x excp: %08x intercept: %016llx", - __entry->cr_read, __entry->cr_write, __entry->exceptions, - __entry->intercept) -); /* * Tracepoint for #VMEXIT while nested */ @@ -499,7 +447,7 @@ TRACE_EVENT(kvm_nested_vmexit, __entry->exit_int_info_err = exit_int_info_err; ), TP_printk("rip: 0x%016llx reason: %s ext_inf1: 0x%016llx " - "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x", + "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x\n", __entry->rip, ftrace_print_symbols_seq(p, __entry->exit_code, kvm_x86_ops->exit_reasons_str), @@ -534,7 +482,7 @@ TRACE_EVENT(kvm_nested_vmexit_inject, ), TP_printk("reason: %s ext_inf1: 0x%016llx " - "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x", + "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x\n", ftrace_print_symbols_seq(p, __entry->exit_code, kvm_x86_ops->exit_reasons_str), __entry->exit_info1, __entry->exit_info2, @@ -556,7 +504,7 @@ TRACE_EVENT(kvm_nested_intr_vmexit, __entry->rip = rip ), - TP_printk("rip: 0x%016llx", __entry->rip) + TP_printk("rip: 0x%016llx\n", __entry->rip) ); /* @@ -578,7 +526,7 @@ TRACE_EVENT(kvm_invlpga, __entry->address = address; ), - TP_printk("rip: 0x%016llx asid: %d address: 0x%016llx", + TP_printk("rip: 0x%016llx asid: %d address: 0x%016llx\n", __entry->rip, __entry->asid, __entry->address) ); @@ -599,16 +547,11 @@ TRACE_EVENT(kvm_skinit, __entry->slb = slb; ), - TP_printk("rip: 0x%016llx slb: 0x%08x", + TP_printk("rip: 0x%016llx slb: 0x%08x\n", __entry->rip, __entry->slb) ); #endif /* _TRACE_KVM_H */ -#undef TRACE_INCLUDE_PATH -#define TRACE_INCLUDE_PATH arch/x86/kvm -#undef TRACE_INCLUDE_FILE -#define TRACE_INCLUDE_FILE trace - /* This part must be outside protection */ #include <trace/define_trace.h> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 753ffc2ed12b..686492ed3079 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -232,56 +232,56 @@ static const u32 vmx_msr_index[] = { }; #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) -static inline bool is_page_fault(u32 intr_info) +static inline int is_page_fault(u32 intr_info) { return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | INTR_INFO_VALID_MASK)) == (INTR_TYPE_HARD_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK); } -static inline bool is_no_device(u32 intr_info) +static inline int is_no_device(u32 intr_info) { return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | INTR_INFO_VALID_MASK)) == (INTR_TYPE_HARD_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK); } -static inline bool is_invalid_opcode(u32 intr_info) +static inline int is_invalid_opcode(u32 intr_info) { return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | INTR_INFO_VALID_MASK)) == (INTR_TYPE_HARD_EXCEPTION | UD_VECTOR | INTR_INFO_VALID_MASK); } -static inline bool is_external_interrupt(u32 intr_info) +static inline int is_external_interrupt(u32 intr_info) { return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) == (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); } -static inline bool is_machine_check(u32 intr_info) +static inline int is_machine_check(u32 intr_info) { return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | INTR_INFO_VALID_MASK)) == (INTR_TYPE_HARD_EXCEPTION | MC_VECTOR | INTR_INFO_VALID_MASK); } -static inline bool cpu_has_vmx_msr_bitmap(void) +static inline int cpu_has_vmx_msr_bitmap(void) { return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS; } -static inline bool cpu_has_vmx_tpr_shadow(void) +static inline int cpu_has_vmx_tpr_shadow(void) { return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW; } -static inline bool vm_need_tpr_shadow(struct kvm *kvm) +static inline int vm_need_tpr_shadow(struct kvm *kvm) { return (cpu_has_vmx_tpr_shadow()) && (irqchip_in_kernel(kvm)); } -static inline bool cpu_has_secondary_exec_ctrls(void) +static inline int cpu_has_secondary_exec_ctrls(void) { return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; @@ -301,80 +301,80 @@ static inline bool cpu_has_vmx_flexpriority(void) static inline bool cpu_has_vmx_ept_execute_only(void) { - return vmx_capability.ept & VMX_EPT_EXECUTE_ONLY_BIT; + return !!(vmx_capability.ept & VMX_EPT_EXECUTE_ONLY_BIT); } static inline bool cpu_has_vmx_eptp_uncacheable(void) { - return vmx_capability.ept & VMX_EPTP_UC_BIT; + return !!(vmx_capability.ept & VMX_EPTP_UC_BIT); } static inline bool cpu_has_vmx_eptp_writeback(void) { - return vmx_capability.ept & VMX_EPTP_WB_BIT; + return !!(vmx_capability.ept & VMX_EPTP_WB_BIT); } static inline bool cpu_has_vmx_ept_2m_page(void) { - return vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT; + return !!(vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT); } static inline bool cpu_has_vmx_ept_1g_page(void) { - return vmx_capability.ept & VMX_EPT_1GB_PAGE_BIT; + return !!(vmx_capability.ept & VMX_EPT_1GB_PAGE_BIT); } -static inline bool cpu_has_vmx_invept_individual_addr(void) +static inline int cpu_has_vmx_invept_individual_addr(void) { - return vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT; + return !!(vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT); } -static inline bool cpu_has_vmx_invept_context(void) +static inline int cpu_has_vmx_invept_context(void) { - return vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT; + return !!(vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT); } -static inline bool cpu_has_vmx_invept_global(void) +static inline int cpu_has_vmx_invept_global(void) { - return vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT; + return !!(vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT); } -static inline bool cpu_has_vmx_ept(void) +static inline int cpu_has_vmx_ept(void) { return vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_ENABLE_EPT; } -static inline bool cpu_has_vmx_unrestricted_guest(void) +static inline int cpu_has_vmx_unrestricted_guest(void) { return vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_UNRESTRICTED_GUEST; } -static inline bool cpu_has_vmx_ple(void) +static inline int cpu_has_vmx_ple(void) { return vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_PAUSE_LOOP_EXITING; } -static inline bool vm_need_virtualize_apic_accesses(struct kvm *kvm) +static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) { return flexpriority_enabled && irqchip_in_kernel(kvm); } -static inline bool cpu_has_vmx_vpid(void) +static inline int cpu_has_vmx_vpid(void) { return vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_ENABLE_VPID; } -static inline bool cpu_has_vmx_rdtscp(void) +static inline int cpu_has_vmx_rdtscp(void) { return vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_RDTSCP; } -static inline bool cpu_has_virtual_nmis(void) +static inline int cpu_has_virtual_nmis(void) { return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; } @@ -598,11 +598,11 @@ static void reload_tss(void) /* * VT restores TR but not its size. Useless. */ - struct desc_ptr gdt; + struct descriptor_table gdt; struct desc_struct *descs; - native_store_gdt(&gdt); - descs = (void *)gdt.address; + kvm_get_gdt(&gdt); + descs = (void *)gdt.base; descs[GDT_ENTRY_TSS].type = 9; /* available TSS */ load_TR_desc(); } @@ -632,43 +632,6 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) return true; } -static unsigned long segment_base(u16 selector) -{ - struct desc_ptr gdt; - struct desc_struct *d; - unsigned long table_base; - unsigned long v; - - if (!(selector & ~3)) - return 0; - - native_store_gdt(&gdt); - table_base = gdt.address; - - if (selector & 4) { /* from ldt */ - u16 ldt_selector = kvm_read_ldt(); - - if (!(ldt_selector & ~3)) - return 0; - - table_base = segment_base(ldt_selector); - } - d = (struct desc_struct *)(table_base + (selector & ~7)); - v = get_desc_base(d); -#ifdef CONFIG_X86_64 - if (d->s == 0 && (d->type == 2 || d->type == 9 || d->type == 11)) - v |= ((unsigned long)((struct ldttss_desc64 *)d)->base3) << 32; -#endif - return v; -} - -static inline unsigned long kvm_read_tr_base(void) -{ - u16 tr; - asm("str %0" : "=g"(tr)); - return segment_base(tr); -} - static void vmx_save_host_state(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -793,7 +756,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) } if (vcpu->cpu != cpu) { - struct desc_ptr dt; + struct descriptor_table dt; unsigned long sysenter_esp; vcpu->cpu = cpu; @@ -802,8 +765,8 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) * processors. */ vmcs_writel(HOST_TR_BASE, kvm_read_tr_base()); /* 22.2.4 */ - native_store_gdt(&dt); - vmcs_writel(HOST_GDTR_BASE, dt.address); /* 22.2.4 */ + kvm_get_gdt(&dt); + vmcs_writel(HOST_GDTR_BASE, dt.base); /* 22.2.4 */ rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */ @@ -876,9 +839,9 @@ static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) int ret = 0; if (interruptibility & GUEST_INTR_STATE_STI) - ret |= KVM_X86_SHADOW_INT_STI; + ret |= X86_SHADOW_INT_STI; if (interruptibility & GUEST_INTR_STATE_MOV_SS) - ret |= KVM_X86_SHADOW_INT_MOV_SS; + ret |= X86_SHADOW_INT_MOV_SS; return ret & mask; } @@ -890,9 +853,9 @@ static void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) interruptibility &= ~(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS); - if (mask & KVM_X86_SHADOW_INT_MOV_SS) + if (mask & X86_SHADOW_INT_MOV_SS) interruptibility |= GUEST_INTR_STATE_MOV_SS; - else if (mask & KVM_X86_SHADOW_INT_STI) + if (mask & X86_SHADOW_INT_STI) interruptibility |= GUEST_INTR_STATE_STI; if ((interruptibility != interruptibility_old)) @@ -1965,28 +1928,28 @@ static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) *l = (ar >> 13) & 1; } -static void vmx_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) +static void vmx_get_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) { - dt->size = vmcs_read32(GUEST_IDTR_LIMIT); - dt->address = vmcs_readl(GUEST_IDTR_BASE); + dt->limit = vmcs_read32(GUEST_IDTR_LIMIT); + dt->base = vmcs_readl(GUEST_IDTR_BASE); } -static void vmx_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) +static void vmx_set_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) { - vmcs_write32(GUEST_IDTR_LIMIT, dt->size); - vmcs_writel(GUEST_IDTR_BASE, dt->address); + vmcs_write32(GUEST_IDTR_LIMIT, dt->limit); + vmcs_writel(GUEST_IDTR_BASE, dt->base); } -static void vmx_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) +static void vmx_get_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) { - dt->size = vmcs_read32(GUEST_GDTR_LIMIT); - dt->address = vmcs_readl(GUEST_GDTR_BASE); + dt->limit = vmcs_read32(GUEST_GDTR_LIMIT); + dt->base = vmcs_readl(GUEST_GDTR_BASE); } -static void vmx_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) +static void vmx_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) { - vmcs_write32(GUEST_GDTR_LIMIT, dt->size); - vmcs_writel(GUEST_GDTR_BASE, dt->address); + vmcs_write32(GUEST_GDTR_LIMIT, dt->limit); + vmcs_writel(GUEST_GDTR_BASE, dt->base); } static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg) @@ -2365,7 +2328,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) u32 junk; u64 host_pat, tsc_this, tsc_base; unsigned long a; - struct desc_ptr dt; + struct descriptor_table dt; int i; unsigned long kvm_vmx_return; u32 exec_control; @@ -2446,8 +2409,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */ - native_store_idt(&dt); - vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */ + kvm_get_idt(&dt); + vmcs_writel(HOST_IDTR_BASE, dt.base); /* 22.2.4 */ asm("mov $.Lkvm_vmx_return, %0" : "=r"(kvm_vmx_return)); vmcs_writel(HOST_RIP, kvm_vmx_return); /* 22.2.5 */ @@ -2979,20 +2942,22 @@ static int handle_io(struct kvm_vcpu *vcpu) int size, in, string; unsigned port; + ++vcpu->stat.io_exits; exit_qualification = vmcs_readl(EXIT_QUALIFICATION); string = (exit_qualification & 16) != 0; - in = (exit_qualification & 8) != 0; - ++vcpu->stat.io_exits; - - if (string || in) - return !(emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DO_MMIO); + if (string) { + if (emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DO_MMIO) + return 0; + return 1; + } - port = exit_qualification >> 16; size = (exit_qualification & 7) + 1; - skip_emulated_instruction(vcpu); + in = (exit_qualification & 8) != 0; + port = exit_qualification >> 16; - return kvm_fast_pio_out(vcpu, size, port); + skip_emulated_instruction(vcpu); + return kvm_emulate_pio(vcpu, in, size, port); } static void @@ -3604,7 +3569,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) u32 exit_reason = vmx->exit_reason; u32 vectoring_info = vmx->idt_vectoring_info; - trace_kvm_exit(exit_reason, vcpu); + trace_kvm_exit(exit_reason, kvm_rip_read(vcpu)); /* If guest state is invalid, start emulating */ if (vmx->emulation_required && emulate_invalid_guest_state) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 83d9c254c420..24cd0ee896e9 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -41,7 +41,7 @@ #include <linux/srcu.h> #include <linux/slab.h> #include <trace/events/kvm.h> - +#undef TRACE_INCLUDE_FILE #define CREATE_TRACE_POINTS #include "trace.h" @@ -223,6 +223,34 @@ static void drop_user_return_notifiers(void *ignore) kvm_on_user_return(&smsr->urn); } +unsigned long segment_base(u16 selector) +{ + struct descriptor_table gdt; + struct desc_struct *d; + unsigned long table_base; + unsigned long v; + + if (selector == 0) + return 0; + + kvm_get_gdt(&gdt); + table_base = gdt.base; + + if (selector & 4) { /* from ldt */ + u16 ldt_selector = kvm_read_ldt(); + + table_base = segment_base(ldt_selector); + } + d = (struct desc_struct *)(table_base + (selector & ~7)); + v = get_desc_base(d); +#ifdef CONFIG_X86_64 + if (d->s == 0 && (d->type == 2 || d->type == 9 || d->type == 11)) + v |= ((unsigned long)((struct ldttss_desc64 *)d)->base3) << 32; +#endif + return v; +} +EXPORT_SYMBOL_GPL(segment_base); + u64 kvm_get_apic_base(struct kvm_vcpu *vcpu) { if (irqchip_in_kernel(vcpu->kvm)) @@ -405,6 +433,8 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) #ifdef CONFIG_X86_64 if (cr0 & 0xffffffff00000000UL) { + printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n", + cr0, kvm_read_cr0(vcpu)); kvm_inject_gp(vcpu, 0); return; } @@ -413,11 +443,14 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) cr0 &= ~CR0_RESERVED_BITS; if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) { + printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n"); kvm_inject_gp(vcpu, 0); return; } if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) { + printk(KERN_DEBUG "set_cr0: #GP, set PG flag " + "and a clear PE flag\n"); kvm_inject_gp(vcpu, 0); return; } @@ -428,11 +461,15 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) int cs_db, cs_l; if (!is_pae(vcpu)) { + printk(KERN_DEBUG "set_cr0: #GP, start paging " + "in long mode while PAE is disabled\n"); kvm_inject_gp(vcpu, 0); return; } kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); if (cs_l) { + printk(KERN_DEBUG "set_cr0: #GP, start paging " + "in long mode while CS.L == 1\n"); kvm_inject_gp(vcpu, 0); return; @@ -440,6 +477,8 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) } else #endif if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.cr3)) { + printk(KERN_DEBUG "set_cr0: #GP, pdptrs " + "reserved bits\n"); kvm_inject_gp(vcpu, 0); return; } @@ -447,6 +486,7 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) } kvm_x86_ops->set_cr0(vcpu, cr0); + vcpu->arch.cr0 = cr0; kvm_mmu_reset_context(vcpu); return; @@ -465,23 +505,28 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE; if (cr4 & CR4_RESERVED_BITS) { + printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n"); kvm_inject_gp(vcpu, 0); return; } if (is_long_mode(vcpu)) { if (!(cr4 & X86_CR4_PAE)) { + printk(KERN_DEBUG "set_cr4: #GP, clearing PAE while " + "in long mode\n"); kvm_inject_gp(vcpu, 0); return; } } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE) && ((cr4 ^ old_cr4) & pdptr_bits) && !load_pdptrs(vcpu, vcpu->arch.cr3)) { + printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n"); kvm_inject_gp(vcpu, 0); return; } if (cr4 & X86_CR4_VMXE) { + printk(KERN_DEBUG "set_cr4: #GP, setting VMXE\n"); kvm_inject_gp(vcpu, 0); return; } @@ -502,16 +547,21 @@ void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) if (is_long_mode(vcpu)) { if (cr3 & CR3_L_MODE_RESERVED_BITS) { + printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n"); kvm_inject_gp(vcpu, 0); return; } } else { if (is_pae(vcpu)) { if (cr3 & CR3_PAE_RESERVED_BITS) { + printk(KERN_DEBUG + "set_cr3: #GP, reserved bits\n"); kvm_inject_gp(vcpu, 0); return; } if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) { + printk(KERN_DEBUG "set_cr3: #GP, pdptrs " + "reserved bits\n"); kvm_inject_gp(vcpu, 0); return; } @@ -543,6 +593,7 @@ EXPORT_SYMBOL_GPL(kvm_set_cr3); void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) { if (cr8 & CR8_RESERVED_BITS) { + printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8); kvm_inject_gp(vcpu, 0); return; } @@ -598,12 +649,15 @@ static u32 emulated_msrs[] = { static void set_efer(struct kvm_vcpu *vcpu, u64 efer) { if (efer & efer_reserved_bits) { + printk(KERN_DEBUG "set_efer: 0x%llx #GP, reserved bits\n", + efer); kvm_inject_gp(vcpu, 0); return; } if (is_paging(vcpu) && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) { + printk(KERN_DEBUG "set_efer: #GP, change LME while paging\n"); kvm_inject_gp(vcpu, 0); return; } @@ -613,6 +667,7 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer) feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) { + printk(KERN_DEBUG "set_efer: #GP, enable FFXSR w/o CPUID capability\n"); kvm_inject_gp(vcpu, 0); return; } @@ -623,6 +678,7 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer) feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) { + printk(KERN_DEBUG "set_efer: #GP, enable SVM w/o SVM\n"); kvm_inject_gp(vcpu, 0); return; } @@ -911,13 +967,9 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data) if (msr >= MSR_IA32_MC0_CTL && msr < MSR_IA32_MC0_CTL + 4 * bank_num) { u32 offset = msr - MSR_IA32_MC0_CTL; - /* only 0 or all 1s can be written to IA32_MCi_CTL - * some Linux kernels though clear bit 10 in bank 4 to - * workaround a BIOS/GART TBL issue on AMD K8s, ignore - * this to avoid an uncatched #GP in the guest - */ + /* only 0 or all 1s can be written to IA32_MCi_CTL */ if ((offset & 0x3) == 0 && - data != 0 && (data | (1 << 10)) != ~(u64)0) + data != 0 && data != ~(u64)0) return -1; vcpu->arch.mce_banks[offset] = data; break; @@ -1061,7 +1113,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) break; case MSR_K7_HWCR: data &= ~(u64)0x40; /* ignore flush filter disable */ - data &= ~(u64)0x100; /* ignore ignne emulation enable */ if (data != 0) { pr_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n", data); @@ -1520,7 +1571,6 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_HYPERV_VAPIC: case KVM_CAP_HYPERV_SPIN: case KVM_CAP_PCI_SEGMENT: - case KVM_CAP_DEBUGREGS: case KVM_CAP_X86_ROBUST_SINGLESTEP: r = 1; break; @@ -2073,20 +2123,14 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, { vcpu_load(vcpu); - events->exception.injected = - vcpu->arch.exception.pending && - !kvm_exception_is_soft(vcpu->arch.exception.nr); + events->exception.injected = vcpu->arch.exception.pending; events->exception.nr = vcpu->arch.exception.nr; events->exception.has_error_code = vcpu->arch.exception.has_error_code; events->exception.error_code = vcpu->arch.exception.error_code; - events->interrupt.injected = - vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft; + events->interrupt.injected = vcpu->arch.interrupt.pending; events->interrupt.nr = vcpu->arch.interrupt.nr; - events->interrupt.soft = 0; - events->interrupt.shadow = - kvm_x86_ops->get_interrupt_shadow(vcpu, - KVM_X86_SHADOW_INT_MOV_SS | KVM_X86_SHADOW_INT_STI); + events->interrupt.soft = vcpu->arch.interrupt.soft; events->nmi.injected = vcpu->arch.nmi_injected; events->nmi.pending = vcpu->arch.nmi_pending; @@ -2095,8 +2139,7 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, events->sipi_vector = vcpu->arch.sipi_vector; events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING - | KVM_VCPUEVENT_VALID_SIPI_VECTOR - | KVM_VCPUEVENT_VALID_SHADOW); + | KVM_VCPUEVENT_VALID_SIPI_VECTOR); vcpu_put(vcpu); } @@ -2105,8 +2148,7 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, struct kvm_vcpu_events *events) { if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING - | KVM_VCPUEVENT_VALID_SIPI_VECTOR - | KVM_VCPUEVENT_VALID_SHADOW)) + | KVM_VCPUEVENT_VALID_SIPI_VECTOR)) return -EINVAL; vcpu_load(vcpu); @@ -2121,9 +2163,6 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, vcpu->arch.interrupt.soft = events->interrupt.soft; if (vcpu->arch.interrupt.pending && irqchip_in_kernel(vcpu->kvm)) kvm_pic_clear_isr_ack(vcpu->kvm); - if (events->flags & KVM_VCPUEVENT_VALID_SHADOW) - kvm_x86_ops->set_interrupt_shadow(vcpu, - events->interrupt.shadow); vcpu->arch.nmi_injected = events->nmi.injected; if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING) @@ -2138,36 +2177,6 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, return 0; } -static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu, - struct kvm_debugregs *dbgregs) -{ - vcpu_load(vcpu); - - memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db)); - dbgregs->dr6 = vcpu->arch.dr6; - dbgregs->dr7 = vcpu->arch.dr7; - dbgregs->flags = 0; - - vcpu_put(vcpu); -} - -static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, - struct kvm_debugregs *dbgregs) -{ - if (dbgregs->flags) - return -EINVAL; - - vcpu_load(vcpu); - - memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db)); - vcpu->arch.dr6 = dbgregs->dr6; - vcpu->arch.dr7 = dbgregs->dr7; - - vcpu_put(vcpu); - - return 0; -} - long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -2346,29 +2355,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events); break; } - case KVM_GET_DEBUGREGS: { - struct kvm_debugregs dbgregs; - - kvm_vcpu_ioctl_x86_get_debugregs(vcpu, &dbgregs); - - r = -EFAULT; - if (copy_to_user(argp, &dbgregs, - sizeof(struct kvm_debugregs))) - break; - r = 0; - break; - } - case KVM_SET_DEBUGREGS: { - struct kvm_debugregs dbgregs; - - r = -EFAULT; - if (copy_from_user(&dbgregs, argp, - sizeof(struct kvm_debugregs))) - break; - - r = kvm_vcpu_ioctl_x86_set_debugregs(vcpu, &dbgregs); - break; - } default: r = -EINVAL; } @@ -2835,13 +2821,11 @@ long kvm_arch_vm_ioctl(struct file *filp, r = -EFAULT; if (copy_from_user(&irq_event, argp, sizeof irq_event)) goto out; - r = -ENXIO; if (irqchip_in_kernel(kvm)) { __s32 status; status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irq_event.irq, irq_event.level); if (ioctl == KVM_IRQ_LINE_STATUS) { - r = -EFAULT; irq_event.status = status; if (copy_to_user(argp, &irq_event, sizeof irq_event)) @@ -3057,18 +3041,6 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) return kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, v); } -static void kvm_set_segment(struct kvm_vcpu *vcpu, - struct kvm_segment *var, int seg) -{ - kvm_x86_ops->set_segment(vcpu, var, seg); -} - -void kvm_get_segment(struct kvm_vcpu *vcpu, - struct kvm_segment *var, int seg) -{ - kvm_x86_ops->get_segment(vcpu, var, seg); -} - gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) { u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; @@ -3149,17 +3121,14 @@ static int kvm_read_guest_virt_system(gva_t addr, void *val, unsigned int bytes, return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, error); } -static int kvm_write_guest_virt_system(gva_t addr, void *val, - unsigned int bytes, - struct kvm_vcpu *vcpu, - u32 *error) +static int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes, + struct kvm_vcpu *vcpu, u32 *error) { void *data = val; int r = X86EMUL_CONTINUE; while (bytes) { - gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr, - PFERR_WRITE_MASK, error); + gpa_t gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, error); unsigned offset = addr & (PAGE_SIZE-1); unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset); int ret; @@ -3182,6 +3151,7 @@ out: return r; } + static int emulator_read_emulated(unsigned long addr, void *val, unsigned int bytes, @@ -3247,8 +3217,7 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, static int emulator_write_emulated_onepage(unsigned long addr, const void *val, unsigned int bytes, - struct kvm_vcpu *vcpu, - bool mmu_only) + struct kvm_vcpu *vcpu) { gpa_t gpa; u32 error_code; @@ -3264,10 +3233,6 @@ static int emulator_write_emulated_onepage(unsigned long addr, if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) goto mmio; - if (mmu_only) { - kvm_mmu_pte_write(vcpu, gpa, val, bytes, 1); - return X86EMUL_CONTINUE; - } if (emulator_write_phys(vcpu, gpa, val, bytes)) return X86EMUL_CONTINUE; @@ -3288,180 +3253,66 @@ mmio: return X86EMUL_CONTINUE; } -int __emulator_write_emulated(unsigned long addr, +int emulator_write_emulated(unsigned long addr, const void *val, unsigned int bytes, - struct kvm_vcpu *vcpu, - bool mmu_only) + struct kvm_vcpu *vcpu) { /* Crossing a page boundary? */ if (((addr + bytes - 1) ^ addr) & PAGE_MASK) { int rc, now; now = -addr & ~PAGE_MASK; - rc = emulator_write_emulated_onepage(addr, val, now, vcpu, - mmu_only); + rc = emulator_write_emulated_onepage(addr, val, now, vcpu); if (rc != X86EMUL_CONTINUE) return rc; addr += now; val += now; bytes -= now; } - return emulator_write_emulated_onepage(addr, val, bytes, vcpu, - mmu_only); -} - -int emulator_write_emulated(unsigned long addr, - const void *val, - unsigned int bytes, - struct kvm_vcpu *vcpu) -{ - return __emulator_write_emulated(addr, val, bytes, vcpu, false); + return emulator_write_emulated_onepage(addr, val, bytes, vcpu); } EXPORT_SYMBOL_GPL(emulator_write_emulated); -#define CMPXCHG_TYPE(t, ptr, old, new) \ - (cmpxchg((t *)(ptr), *(t *)(old), *(t *)(new)) == *(t *)(old)) - -#ifdef CONFIG_X86_64 -# define CMPXCHG64(ptr, old, new) CMPXCHG_TYPE(u64, ptr, old, new) -#else -# define CMPXCHG64(ptr, old, new) \ - (cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old)) -#endif - static int emulator_cmpxchg_emulated(unsigned long addr, const void *old, const void *new, unsigned int bytes, struct kvm_vcpu *vcpu) { - gpa_t gpa; - struct page *page; - char *kaddr; - bool exchanged; - + printk_once(KERN_WARNING "kvm: emulating exchange as write\n"); +#ifndef CONFIG_X86_64 /* guests cmpxchg8b have to be emulated atomically */ - if (bytes > 8 || (bytes & (bytes - 1))) - goto emul_write; + if (bytes == 8) { + gpa_t gpa; + struct page *page; + char *kaddr; + u64 val; - gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL); + gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL); - if (gpa == UNMAPPED_GVA || - (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) - goto emul_write; + if (gpa == UNMAPPED_GVA || + (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) + goto emul_write; - if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK)) - goto emul_write; + if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK)) + goto emul_write; - page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); + val = *(u64 *)new; - kaddr = kmap_atomic(page, KM_USER0); - kaddr += offset_in_page(gpa); - switch (bytes) { - case 1: - exchanged = CMPXCHG_TYPE(u8, kaddr, old, new); - break; - case 2: - exchanged = CMPXCHG_TYPE(u16, kaddr, old, new); - break; - case 4: - exchanged = CMPXCHG_TYPE(u32, kaddr, old, new); - break; - case 8: - exchanged = CMPXCHG64(kaddr, old, new); - break; - default: - BUG(); - } - kunmap_atomic(kaddr, KM_USER0); - kvm_release_page_dirty(page); - - if (!exchanged) - return X86EMUL_CMPXCHG_FAILED; - - return __emulator_write_emulated(addr, new, bytes, vcpu, true); + page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); + kaddr = kmap_atomic(page, KM_USER0); + set_64bit((u64 *)(kaddr + offset_in_page(gpa)), val); + kunmap_atomic(kaddr, KM_USER0); + kvm_release_page_dirty(page); + } emul_write: - printk_once(KERN_WARNING "kvm: emulating exchange as write\n"); +#endif return emulator_write_emulated(addr, new, bytes, vcpu); } -static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) -{ - /* TODO: String I/O for in kernel device */ - int r; - - if (vcpu->arch.pio.in) - r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port, - vcpu->arch.pio.size, pd); - else - r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS, - vcpu->arch.pio.port, vcpu->arch.pio.size, - pd); - return r; -} - - -static int emulator_pio_in_emulated(int size, unsigned short port, void *val, - unsigned int count, struct kvm_vcpu *vcpu) -{ - if (vcpu->arch.pio.count) - goto data_avail; - - trace_kvm_pio(1, port, size, 1); - - vcpu->arch.pio.port = port; - vcpu->arch.pio.in = 1; - vcpu->arch.pio.count = count; - vcpu->arch.pio.size = size; - - if (!kernel_pio(vcpu, vcpu->arch.pio_data)) { - data_avail: - memcpy(val, vcpu->arch.pio_data, size * count); - vcpu->arch.pio.count = 0; - return 1; - } - - vcpu->run->exit_reason = KVM_EXIT_IO; - vcpu->run->io.direction = KVM_EXIT_IO_IN; - vcpu->run->io.size = size; - vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; - vcpu->run->io.count = count; - vcpu->run->io.port = port; - - return 0; -} - -static int emulator_pio_out_emulated(int size, unsigned short port, - const void *val, unsigned int count, - struct kvm_vcpu *vcpu) -{ - trace_kvm_pio(0, port, size, 1); - - vcpu->arch.pio.port = port; - vcpu->arch.pio.in = 0; - vcpu->arch.pio.count = count; - vcpu->arch.pio.size = size; - - memcpy(vcpu->arch.pio_data, val, size * count); - - if (!kernel_pio(vcpu, vcpu->arch.pio_data)) { - vcpu->arch.pio.count = 0; - return 1; - } - - vcpu->run->exit_reason = KVM_EXIT_IO; - vcpu->run->io.direction = KVM_EXIT_IO_OUT; - vcpu->run->io.size = size; - vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; - vcpu->run->io.count = count; - vcpu->run->io.port = port; - - return 0; -} - static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg) { return kvm_x86_ops->get_segment_base(vcpu, seg); @@ -3510,167 +3361,12 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) } EXPORT_SYMBOL_GPL(kvm_report_emulation_failure); -static u64 mk_cr_64(u64 curr_cr, u32 new_val) -{ - return (curr_cr & ~((1ULL << 32) - 1)) | new_val; -} - -static unsigned long emulator_get_cr(int cr, struct kvm_vcpu *vcpu) -{ - unsigned long value; - - switch (cr) { - case 0: - value = kvm_read_cr0(vcpu); - break; - case 2: - value = vcpu->arch.cr2; - break; - case 3: - value = vcpu->arch.cr3; - break; - case 4: - value = kvm_read_cr4(vcpu); - break; - case 8: - value = kvm_get_cr8(vcpu); - break; - default: - vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); - return 0; - } - - return value; -} - -static void emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu) -{ - switch (cr) { - case 0: - kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val)); - break; - case 2: - vcpu->arch.cr2 = val; - break; - case 3: - kvm_set_cr3(vcpu, val); - break; - case 4: - kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val)); - break; - case 8: - kvm_set_cr8(vcpu, val & 0xfUL); - break; - default: - vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); - } -} - -static int emulator_get_cpl(struct kvm_vcpu *vcpu) -{ - return kvm_x86_ops->get_cpl(vcpu); -} - -static void emulator_get_gdt(struct desc_ptr *dt, struct kvm_vcpu *vcpu) -{ - kvm_x86_ops->get_gdt(vcpu, dt); -} - -static bool emulator_get_cached_descriptor(struct desc_struct *desc, int seg, - struct kvm_vcpu *vcpu) -{ - struct kvm_segment var; - - kvm_get_segment(vcpu, &var, seg); - - if (var.unusable) - return false; - - if (var.g) - var.limit >>= 12; - set_desc_limit(desc, var.limit); - set_desc_base(desc, (unsigned long)var.base); - desc->type = var.type; - desc->s = var.s; - desc->dpl = var.dpl; - desc->p = var.present; - desc->avl = var.avl; - desc->l = var.l; - desc->d = var.db; - desc->g = var.g; - - return true; -} - -static void emulator_set_cached_descriptor(struct desc_struct *desc, int seg, - struct kvm_vcpu *vcpu) -{ - struct kvm_segment var; - - /* needed to preserve selector */ - kvm_get_segment(vcpu, &var, seg); - - var.base = get_desc_base(desc); - var.limit = get_desc_limit(desc); - if (desc->g) - var.limit = (var.limit << 12) | 0xfff; - var.type = desc->type; - var.present = desc->p; - var.dpl = desc->dpl; - var.db = desc->d; - var.s = desc->s; - var.l = desc->l; - var.g = desc->g; - var.avl = desc->avl; - var.present = desc->p; - var.unusable = !var.present; - var.padding = 0; - - kvm_set_segment(vcpu, &var, seg); - return; -} - -static u16 emulator_get_segment_selector(int seg, struct kvm_vcpu *vcpu) -{ - struct kvm_segment kvm_seg; - - kvm_get_segment(vcpu, &kvm_seg, seg); - return kvm_seg.selector; -} - -static void emulator_set_segment_selector(u16 sel, int seg, - struct kvm_vcpu *vcpu) -{ - struct kvm_segment kvm_seg; - - kvm_get_segment(vcpu, &kvm_seg, seg); - kvm_seg.selector = sel; - kvm_set_segment(vcpu, &kvm_seg, seg); -} - -static void emulator_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) -{ - kvm_x86_ops->set_rflags(vcpu, rflags); -} - static struct x86_emulate_ops emulate_ops = { .read_std = kvm_read_guest_virt_system, - .write_std = kvm_write_guest_virt_system, .fetch = kvm_fetch_guest_virt, .read_emulated = emulator_read_emulated, .write_emulated = emulator_write_emulated, .cmpxchg_emulated = emulator_cmpxchg_emulated, - .pio_in_emulated = emulator_pio_in_emulated, - .pio_out_emulated = emulator_pio_out_emulated, - .get_cached_descriptor = emulator_get_cached_descriptor, - .set_cached_descriptor = emulator_set_cached_descriptor, - .get_segment_selector = emulator_get_segment_selector, - .set_segment_selector = emulator_set_segment_selector, - .get_gdt = emulator_get_gdt, - .get_cr = emulator_get_cr, - .set_cr = emulator_set_cr, - .cpl = emulator_get_cpl, - .set_rflags = emulator_set_rflags, }; static void cache_all_regs(struct kvm_vcpu *vcpu) @@ -3701,14 +3397,14 @@ int emulate_instruction(struct kvm_vcpu *vcpu, cache_all_regs(vcpu); vcpu->mmio_is_write = 0; + vcpu->arch.pio.string = 0; if (!(emulation_type & EMULTYPE_NO_DECODE)) { int cs_db, cs_l; kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); vcpu->arch.emulate_ctxt.vcpu = vcpu; - vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu); - vcpu->arch.emulate_ctxt.eip = kvm_rip_read(vcpu); + vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu); vcpu->arch.emulate_ctxt.mode = (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM) @@ -3760,20 +3456,16 @@ int emulate_instruction(struct kvm_vcpu *vcpu, return EMULATE_DONE; } -restart: r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); shadow_mask = vcpu->arch.emulate_ctxt.interruptibility; if (r == 0) kvm_x86_ops->set_interrupt_shadow(vcpu, shadow_mask); - if (vcpu->arch.pio.count) { - if (!vcpu->arch.pio.in) - vcpu->arch.pio.count = 0; + if (vcpu->arch.pio.string) return EMULATE_DO_MMIO; - } - if (r || vcpu->mmio_is_write) { + if ((r || vcpu->mmio_is_write) && run) { run->exit_reason = KVM_EXIT_MMIO; run->mmio.phys_addr = vcpu->mmio_phys_addr; memcpy(run->mmio.data, vcpu->mmio_data, 8); @@ -3783,7 +3475,7 @@ restart: if (r) { if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) - goto done; + return EMULATE_DONE; if (!vcpu->mmio_needed) { kvm_report_emulation_failure(vcpu, "mmio"); return EMULATE_FAIL; @@ -3791,31 +3483,214 @@ restart: return EMULATE_DO_MMIO; } + kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); + if (vcpu->mmio_is_write) { vcpu->mmio_needed = 0; return EMULATE_DO_MMIO; } -done: - if (vcpu->arch.exception.pending) - vcpu->arch.emulate_ctxt.restart = false; - - if (vcpu->arch.emulate_ctxt.restart) - goto restart; - return EMULATE_DONE; } EXPORT_SYMBOL_GPL(emulate_instruction); -int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port) +static int pio_copy_data(struct kvm_vcpu *vcpu) { - unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX); - int ret = emulator_pio_out_emulated(size, port, &val, 1, vcpu); - /* do not return to emulator after return from userspace */ - vcpu->arch.pio.count = 0; + void *p = vcpu->arch.pio_data; + gva_t q = vcpu->arch.pio.guest_gva; + unsigned bytes; + int ret; + u32 error_code; + + bytes = vcpu->arch.pio.size * vcpu->arch.pio.cur_count; + if (vcpu->arch.pio.in) + ret = kvm_write_guest_virt(q, p, bytes, vcpu, &error_code); + else + ret = kvm_read_guest_virt(q, p, bytes, vcpu, &error_code); + + if (ret == X86EMUL_PROPAGATE_FAULT) + kvm_inject_page_fault(vcpu, q, error_code); + return ret; } -EXPORT_SYMBOL_GPL(kvm_fast_pio_out); + +int complete_pio(struct kvm_vcpu *vcpu) +{ + struct kvm_pio_request *io = &vcpu->arch.pio; + long delta; + int r; + unsigned long val; + + if (!io->string) { + if (io->in) { + val = kvm_register_read(vcpu, VCPU_REGS_RAX); + memcpy(&val, vcpu->arch.pio_data, io->size); + kvm_register_write(vcpu, VCPU_REGS_RAX, val); + } + } else { + if (io->in) { + r = pio_copy_data(vcpu); + if (r) + goto out; + } + + delta = 1; + if (io->rep) { + delta *= io->cur_count; + /* + * The size of the register should really depend on + * current address size. + */ + val = kvm_register_read(vcpu, VCPU_REGS_RCX); + val -= delta; + kvm_register_write(vcpu, VCPU_REGS_RCX, val); + } + if (io->down) + delta = -delta; + delta *= io->size; + if (io->in) { + val = kvm_register_read(vcpu, VCPU_REGS_RDI); + val += delta; + kvm_register_write(vcpu, VCPU_REGS_RDI, val); + } else { + val = kvm_register_read(vcpu, VCPU_REGS_RSI); + val += delta; + kvm_register_write(vcpu, VCPU_REGS_RSI, val); + } + } +out: + io->count -= io->cur_count; + io->cur_count = 0; + + return 0; +} + +static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) +{ + /* TODO: String I/O for in kernel device */ + int r; + + if (vcpu->arch.pio.in) + r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port, + vcpu->arch.pio.size, pd); + else + r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS, + vcpu->arch.pio.port, vcpu->arch.pio.size, + pd); + return r; +} + +static int pio_string_write(struct kvm_vcpu *vcpu) +{ + struct kvm_pio_request *io = &vcpu->arch.pio; + void *pd = vcpu->arch.pio_data; + int i, r = 0; + + for (i = 0; i < io->cur_count; i++) { + if (kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS, + io->port, io->size, pd)) { + r = -EOPNOTSUPP; + break; + } + pd += io->size; + } + return r; +} + +int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in, int size, unsigned port) +{ + unsigned long val; + + trace_kvm_pio(!in, port, size, 1); + + vcpu->run->exit_reason = KVM_EXIT_IO; + vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; + vcpu->run->io.size = vcpu->arch.pio.size = size; + vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; + vcpu->run->io.count = vcpu->arch.pio.count = vcpu->arch.pio.cur_count = 1; + vcpu->run->io.port = vcpu->arch.pio.port = port; + vcpu->arch.pio.in = in; + vcpu->arch.pio.string = 0; + vcpu->arch.pio.down = 0; + vcpu->arch.pio.rep = 0; + + if (!vcpu->arch.pio.in) { + val = kvm_register_read(vcpu, VCPU_REGS_RAX); + memcpy(vcpu->arch.pio_data, &val, 4); + } + + if (!kernel_pio(vcpu, vcpu->arch.pio_data)) { + complete_pio(vcpu); + return 1; + } + return 0; +} +EXPORT_SYMBOL_GPL(kvm_emulate_pio); + +int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in, + int size, unsigned long count, int down, + gva_t address, int rep, unsigned port) +{ + unsigned now, in_page; + int ret = 0; + + trace_kvm_pio(!in, port, size, count); + + vcpu->run->exit_reason = KVM_EXIT_IO; + vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; + vcpu->run->io.size = vcpu->arch.pio.size = size; + vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; + vcpu->run->io.count = vcpu->arch.pio.count = vcpu->arch.pio.cur_count = count; + vcpu->run->io.port = vcpu->arch.pio.port = port; + vcpu->arch.pio.in = in; + vcpu->arch.pio.string = 1; + vcpu->arch.pio.down = down; + vcpu->arch.pio.rep = rep; + + if (!count) { + kvm_x86_ops->skip_emulated_instruction(vcpu); + return 1; + } + + if (!down) + in_page = PAGE_SIZE - offset_in_page(address); + else + in_page = offset_in_page(address) + size; + now = min(count, (unsigned long)in_page / size); + if (!now) + now = 1; + if (down) { + /* + * String I/O in reverse. Yuck. Kill the guest, fix later. + */ + pr_unimpl(vcpu, "guest string pio down\n"); + kvm_inject_gp(vcpu, 0); + return 1; + } + vcpu->run->io.count = now; + vcpu->arch.pio.cur_count = now; + + if (vcpu->arch.pio.cur_count == vcpu->arch.pio.count) + kvm_x86_ops->skip_emulated_instruction(vcpu); + + vcpu->arch.pio.guest_gva = address; + + if (!vcpu->arch.pio.in) { + /* string PIO write */ + ret = pio_copy_data(vcpu); + if (ret == X86EMUL_PROPAGATE_FAULT) + return 1; + if (ret == 0 && !pio_string_write(vcpu)) { + complete_pio(vcpu); + if (vcpu->arch.pio.count == 0) + ret = 1; + } + } + /* no string PIO read support yet */ + + return ret; +} +EXPORT_SYMBOL_GPL(kvm_emulate_pio_string); static void bounce_off(void *info) { @@ -4086,23 +3961,88 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu) kvm_x86_ops->patch_hypercall(vcpu, instruction); - return __emulator_write_emulated(rip, instruction, 3, vcpu, false); + return emulator_write_emulated(rip, instruction, 3, vcpu); +} + +static u64 mk_cr_64(u64 curr_cr, u32 new_val) +{ + return (curr_cr & ~((1ULL << 32) - 1)) | new_val; } void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) { - struct desc_ptr dt = { limit, base }; + struct descriptor_table dt = { limit, base }; kvm_x86_ops->set_gdt(vcpu, &dt); } void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) { - struct desc_ptr dt = { limit, base }; + struct descriptor_table dt = { limit, base }; kvm_x86_ops->set_idt(vcpu, &dt); } +void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw, + unsigned long *rflags) +{ + kvm_lmsw(vcpu, msw); + *rflags = kvm_get_rflags(vcpu); +} + +unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr) +{ + unsigned long value; + + switch (cr) { + case 0: + value = kvm_read_cr0(vcpu); + break; + case 2: + value = vcpu->arch.cr2; + break; + case 3: + value = vcpu->arch.cr3; + break; + case 4: + value = kvm_read_cr4(vcpu); + break; + case 8: + value = kvm_get_cr8(vcpu); + break; + default: + vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); + return 0; + } + + return value; +} + +void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val, + unsigned long *rflags) +{ + switch (cr) { + case 0: + kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val)); + *rflags = kvm_get_rflags(vcpu); + break; + case 2: + vcpu->arch.cr2 = val; + break; + case 3: + kvm_set_cr3(vcpu, val); + break; + case 4: + kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val)); + break; + case 8: + kvm_set_cr8(vcpu, val & 0xfUL); + break; + default: + vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); + } +} + static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i) { struct kvm_cpuid_entry2 *e = &vcpu->arch.cpuid_entries[i]; @@ -4282,9 +4222,6 @@ static void inject_pending_event(struct kvm_vcpu *vcpu) { /* try to reinject previous events if any */ if (vcpu->arch.exception.pending) { - trace_kvm_inj_exception(vcpu->arch.exception.nr, - vcpu->arch.exception.has_error_code, - vcpu->arch.exception.error_code); kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr, vcpu->arch.exception.has_error_code, vcpu->arch.exception.error_code); @@ -4545,17 +4482,24 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) if (!irqchip_in_kernel(vcpu->kvm)) kvm_set_cr8(vcpu, kvm_run->cr8); - if (vcpu->arch.pio.count || vcpu->mmio_needed || - vcpu->arch.emulate_ctxt.restart) { - if (vcpu->mmio_needed) { - memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); - vcpu->mmio_read_completed = 1; - vcpu->mmio_needed = 0; - } + if (vcpu->arch.pio.cur_count) { + r = complete_pio(vcpu); + if (r) + goto out; + } + if (vcpu->mmio_needed) { + memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); + vcpu->mmio_read_completed = 1; + vcpu->mmio_needed = 0; + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); - r = emulate_instruction(vcpu, 0, 0, EMULTYPE_NO_DECODE); + r = emulate_instruction(vcpu, vcpu->arch.mmio_fault_cr2, 0, + EMULTYPE_NO_DECODE); srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); if (r == EMULATE_DO_MMIO) { + /* + * Read-modify-write. Back to userspace. + */ r = 0; goto out; } @@ -4638,6 +4582,12 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) return 0; } +void kvm_get_segment(struct kvm_vcpu *vcpu, + struct kvm_segment *var, int seg) +{ + kvm_x86_ops->get_segment(vcpu, var, seg); +} + void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) { struct kvm_segment cs; @@ -4651,7 +4601,7 @@ EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits); int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { - struct desc_ptr dt; + struct descriptor_table dt; vcpu_load(vcpu); @@ -4666,11 +4616,11 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); kvm_x86_ops->get_idt(vcpu, &dt); - sregs->idt.limit = dt.size; - sregs->idt.base = dt.address; + sregs->idt.limit = dt.limit; + sregs->idt.base = dt.base; kvm_x86_ops->get_gdt(vcpu, &dt); - sregs->gdt.limit = dt.size; - sregs->gdt.base = dt.address; + sregs->gdt.limit = dt.limit; + sregs->gdt.base = dt.base; sregs->cr0 = kvm_read_cr0(vcpu); sregs->cr2 = vcpu->arch.cr2; @@ -4709,30 +4659,559 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, return 0; } +static void kvm_set_segment(struct kvm_vcpu *vcpu, + struct kvm_segment *var, int seg) +{ + kvm_x86_ops->set_segment(vcpu, var, seg); +} + +static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector, + struct kvm_segment *kvm_desct) +{ + kvm_desct->base = get_desc_base(seg_desc); + kvm_desct->limit = get_desc_limit(seg_desc); + if (seg_desc->g) { + kvm_desct->limit <<= 12; + kvm_desct->limit |= 0xfff; + } + kvm_desct->selector = selector; + kvm_desct->type = seg_desc->type; + kvm_desct->present = seg_desc->p; + kvm_desct->dpl = seg_desc->dpl; + kvm_desct->db = seg_desc->d; + kvm_desct->s = seg_desc->s; + kvm_desct->l = seg_desc->l; + kvm_desct->g = seg_desc->g; + kvm_desct->avl = seg_desc->avl; + if (!selector) + kvm_desct->unusable = 1; + else + kvm_desct->unusable = 0; + kvm_desct->padding = 0; +} + +static void get_segment_descriptor_dtable(struct kvm_vcpu *vcpu, + u16 selector, + struct descriptor_table *dtable) +{ + if (selector & 1 << 2) { + struct kvm_segment kvm_seg; + + kvm_get_segment(vcpu, &kvm_seg, VCPU_SREG_LDTR); + + if (kvm_seg.unusable) + dtable->limit = 0; + else + dtable->limit = kvm_seg.limit; + dtable->base = kvm_seg.base; + } + else + kvm_x86_ops->get_gdt(vcpu, dtable); +} + +/* allowed just for 8 bytes segments */ +static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, + struct desc_struct *seg_desc) +{ + struct descriptor_table dtable; + u16 index = selector >> 3; + int ret; + u32 err; + gva_t addr; + + get_segment_descriptor_dtable(vcpu, selector, &dtable); + + if (dtable.limit < index * 8 + 7) { + kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc); + return X86EMUL_PROPAGATE_FAULT; + } + addr = dtable.base + index * 8; + ret = kvm_read_guest_virt_system(addr, seg_desc, sizeof(*seg_desc), + vcpu, &err); + if (ret == X86EMUL_PROPAGATE_FAULT) + kvm_inject_page_fault(vcpu, addr, err); + + return ret; +} + +/* allowed just for 8 bytes segments */ +static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, + struct desc_struct *seg_desc) +{ + struct descriptor_table dtable; + u16 index = selector >> 3; + + get_segment_descriptor_dtable(vcpu, selector, &dtable); + + if (dtable.limit < index * 8 + 7) + return 1; + return kvm_write_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu, NULL); +} + +static gpa_t get_tss_base_addr_write(struct kvm_vcpu *vcpu, + struct desc_struct *seg_desc) +{ + u32 base_addr = get_desc_base(seg_desc); + + return kvm_mmu_gva_to_gpa_write(vcpu, base_addr, NULL); +} + +static gpa_t get_tss_base_addr_read(struct kvm_vcpu *vcpu, + struct desc_struct *seg_desc) +{ + u32 base_addr = get_desc_base(seg_desc); + + return kvm_mmu_gva_to_gpa_read(vcpu, base_addr, NULL); +} + +static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg) +{ + struct kvm_segment kvm_seg; + + kvm_get_segment(vcpu, &kvm_seg, seg); + return kvm_seg.selector; +} + +static int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int seg) +{ + struct kvm_segment segvar = { + .base = selector << 4, + .limit = 0xffff, + .selector = selector, + .type = 3, + .present = 1, + .dpl = 3, + .db = 0, + .s = 1, + .l = 0, + .g = 0, + .avl = 0, + .unusable = 0, + }; + kvm_x86_ops->set_segment(vcpu, &segvar, seg); + return X86EMUL_CONTINUE; +} + +static int is_vm86_segment(struct kvm_vcpu *vcpu, int seg) +{ + return (seg != VCPU_SREG_LDTR) && + (seg != VCPU_SREG_TR) && + (kvm_get_rflags(vcpu) & X86_EFLAGS_VM); +} + +int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg) +{ + struct kvm_segment kvm_seg; + struct desc_struct seg_desc; + u8 dpl, rpl, cpl; + unsigned err_vec = GP_VECTOR; + u32 err_code = 0; + bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */ + int ret; + + if (is_vm86_segment(vcpu, seg) || !is_protmode(vcpu)) + return kvm_load_realmode_segment(vcpu, selector, seg); + + /* NULL selector is not valid for TR, CS and SS */ + if ((seg == VCPU_SREG_CS || seg == VCPU_SREG_SS || seg == VCPU_SREG_TR) + && null_selector) + goto exception; + + /* TR should be in GDT only */ + if (seg == VCPU_SREG_TR && (selector & (1 << 2))) + goto exception; + + ret = load_guest_segment_descriptor(vcpu, selector, &seg_desc); + if (ret) + return ret; + + seg_desct_to_kvm_desct(&seg_desc, selector, &kvm_seg); + + if (null_selector) { /* for NULL selector skip all following checks */ + kvm_seg.unusable = 1; + goto load; + } + + err_code = selector & 0xfffc; + err_vec = GP_VECTOR; + + /* can't load system descriptor into segment selecor */ + if (seg <= VCPU_SREG_GS && !kvm_seg.s) + goto exception; + + if (!kvm_seg.present) { + err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR; + goto exception; + } + + rpl = selector & 3; + dpl = kvm_seg.dpl; + cpl = kvm_x86_ops->get_cpl(vcpu); + + switch (seg) { + case VCPU_SREG_SS: + /* + * segment is not a writable data segment or segment + * selector's RPL != CPL or segment selector's RPL != CPL + */ + if (rpl != cpl || (kvm_seg.type & 0xa) != 0x2 || dpl != cpl) + goto exception; + break; + case VCPU_SREG_CS: + if (!(kvm_seg.type & 8)) + goto exception; + + if (kvm_seg.type & 4) { + /* conforming */ + if (dpl > cpl) + goto exception; + } else { + /* nonconforming */ + if (rpl > cpl || dpl != cpl) + goto exception; + } + /* CS(RPL) <- CPL */ + selector = (selector & 0xfffc) | cpl; + break; + case VCPU_SREG_TR: + if (kvm_seg.s || (kvm_seg.type != 1 && kvm_seg.type != 9)) + goto exception; + break; + case VCPU_SREG_LDTR: + if (kvm_seg.s || kvm_seg.type != 2) + goto exception; + break; + default: /* DS, ES, FS, or GS */ + /* + * segment is not a data or readable code segment or + * ((segment is a data or nonconforming code segment) + * and (both RPL and CPL > DPL)) + */ + if ((kvm_seg.type & 0xa) == 0x8 || + (((kvm_seg.type & 0xc) != 0xc) && (rpl > dpl && cpl > dpl))) + goto exception; + break; + } + + if (!kvm_seg.unusable && kvm_seg.s) { + /* mark segment as accessed */ + kvm_seg.type |= 1; + seg_desc.type |= 1; + save_guest_segment_descriptor(vcpu, selector, &seg_desc); + } +load: + kvm_set_segment(vcpu, &kvm_seg, seg); + return X86EMUL_CONTINUE; +exception: + kvm_queue_exception_e(vcpu, err_vec, err_code); + return X86EMUL_PROPAGATE_FAULT; +} + +static void save_state_to_tss32(struct kvm_vcpu *vcpu, + struct tss_segment_32 *tss) +{ + tss->cr3 = vcpu->arch.cr3; + tss->eip = kvm_rip_read(vcpu); + tss->eflags = kvm_get_rflags(vcpu); + tss->eax = kvm_register_read(vcpu, VCPU_REGS_RAX); + tss->ecx = kvm_register_read(vcpu, VCPU_REGS_RCX); + tss->edx = kvm_register_read(vcpu, VCPU_REGS_RDX); + tss->ebx = kvm_register_read(vcpu, VCPU_REGS_RBX); + tss->esp = kvm_register_read(vcpu, VCPU_REGS_RSP); + tss->ebp = kvm_register_read(vcpu, VCPU_REGS_RBP); + tss->esi = kvm_register_read(vcpu, VCPU_REGS_RSI); + tss->edi = kvm_register_read(vcpu, VCPU_REGS_RDI); + tss->es = get_segment_selector(vcpu, VCPU_SREG_ES); + tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS); + tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS); + tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS); + tss->fs = get_segment_selector(vcpu, VCPU_SREG_FS); + tss->gs = get_segment_selector(vcpu, VCPU_SREG_GS); + tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR); +} + +static void kvm_load_segment_selector(struct kvm_vcpu *vcpu, u16 sel, int seg) +{ + struct kvm_segment kvm_seg; + kvm_get_segment(vcpu, &kvm_seg, seg); + kvm_seg.selector = sel; + kvm_set_segment(vcpu, &kvm_seg, seg); +} + +static int load_state_from_tss32(struct kvm_vcpu *vcpu, + struct tss_segment_32 *tss) +{ + kvm_set_cr3(vcpu, tss->cr3); + + kvm_rip_write(vcpu, tss->eip); + kvm_set_rflags(vcpu, tss->eflags | 2); + + kvm_register_write(vcpu, VCPU_REGS_RAX, tss->eax); + kvm_register_write(vcpu, VCPU_REGS_RCX, tss->ecx); + kvm_register_write(vcpu, VCPU_REGS_RDX, tss->edx); + kvm_register_write(vcpu, VCPU_REGS_RBX, tss->ebx); + kvm_register_write(vcpu, VCPU_REGS_RSP, tss->esp); + kvm_register_write(vcpu, VCPU_REGS_RBP, tss->ebp); + kvm_register_write(vcpu, VCPU_REGS_RSI, tss->esi); + kvm_register_write(vcpu, VCPU_REGS_RDI, tss->edi); + + /* + * SDM says that segment selectors are loaded before segment + * descriptors + */ + kvm_load_segment_selector(vcpu, tss->ldt_selector, VCPU_SREG_LDTR); + kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES); + kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS); + kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS); + kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS); + kvm_load_segment_selector(vcpu, tss->fs, VCPU_SREG_FS); + kvm_load_segment_selector(vcpu, tss->gs, VCPU_SREG_GS); + + /* + * Now load segment descriptors. If fault happenes at this stage + * it is handled in a context of new task + */ + if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, VCPU_SREG_LDTR)) + return 1; + + if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES)) + return 1; + + if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS)) + return 1; + + if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS)) + return 1; + + if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS)) + return 1; + + if (kvm_load_segment_descriptor(vcpu, tss->fs, VCPU_SREG_FS)) + return 1; + + if (kvm_load_segment_descriptor(vcpu, tss->gs, VCPU_SREG_GS)) + return 1; + return 0; +} + +static void save_state_to_tss16(struct kvm_vcpu *vcpu, + struct tss_segment_16 *tss) +{ + tss->ip = kvm_rip_read(vcpu); + tss->flag = kvm_get_rflags(vcpu); + tss->ax = kvm_register_read(vcpu, VCPU_REGS_RAX); + tss->cx = kvm_register_read(vcpu, VCPU_REGS_RCX); + tss->dx = kvm_register_read(vcpu, VCPU_REGS_RDX); + tss->bx = kvm_register_read(vcpu, VCPU_REGS_RBX); + tss->sp = kvm_register_read(vcpu, VCPU_REGS_RSP); + tss->bp = kvm_register_read(vcpu, VCPU_REGS_RBP); + tss->si = kvm_register_read(vcpu, VCPU_REGS_RSI); + tss->di = kvm_register_read(vcpu, VCPU_REGS_RDI); + + tss->es = get_segment_selector(vcpu, VCPU_SREG_ES); + tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS); + tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS); + tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS); + tss->ldt = get_segment_selector(vcpu, VCPU_SREG_LDTR); +} + +static int load_state_from_tss16(struct kvm_vcpu *vcpu, + struct tss_segment_16 *tss) +{ + kvm_rip_write(vcpu, tss->ip); + kvm_set_rflags(vcpu, tss->flag | 2); + kvm_register_write(vcpu, VCPU_REGS_RAX, tss->ax); + kvm_register_write(vcpu, VCPU_REGS_RCX, tss->cx); + kvm_register_write(vcpu, VCPU_REGS_RDX, tss->dx); + kvm_register_write(vcpu, VCPU_REGS_RBX, tss->bx); + kvm_register_write(vcpu, VCPU_REGS_RSP, tss->sp); + kvm_register_write(vcpu, VCPU_REGS_RBP, tss->bp); + kvm_register_write(vcpu, VCPU_REGS_RSI, tss->si); + kvm_register_write(vcpu, VCPU_REGS_RDI, tss->di); + + /* + * SDM says that segment selectors are loaded before segment + * descriptors + */ + kvm_load_segment_selector(vcpu, tss->ldt, VCPU_SREG_LDTR); + kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES); + kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS); + kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS); + kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS); + + /* + * Now load segment descriptors. If fault happenes at this stage + * it is handled in a context of new task + */ + if (kvm_load_segment_descriptor(vcpu, tss->ldt, VCPU_SREG_LDTR)) + return 1; + + if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES)) + return 1; + + if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS)) + return 1; + + if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS)) + return 1; + + if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS)) + return 1; + return 0; +} + +static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector, + u16 old_tss_sel, u32 old_tss_base, + struct desc_struct *nseg_desc) +{ + struct tss_segment_16 tss_segment_16; + int ret = 0; + + if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_16, + sizeof tss_segment_16)) + goto out; + + save_state_to_tss16(vcpu, &tss_segment_16); + + if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_16, + sizeof tss_segment_16)) + goto out; + + if (kvm_read_guest(vcpu->kvm, get_tss_base_addr_read(vcpu, nseg_desc), + &tss_segment_16, sizeof tss_segment_16)) + goto out; + + if (old_tss_sel != 0xffff) { + tss_segment_16.prev_task_link = old_tss_sel; + + if (kvm_write_guest(vcpu->kvm, + get_tss_base_addr_write(vcpu, nseg_desc), + &tss_segment_16.prev_task_link, + sizeof tss_segment_16.prev_task_link)) + goto out; + } + + if (load_state_from_tss16(vcpu, &tss_segment_16)) + goto out; + + ret = 1; +out: + return ret; +} + +static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector, + u16 old_tss_sel, u32 old_tss_base, + struct desc_struct *nseg_desc) +{ + struct tss_segment_32 tss_segment_32; + int ret = 0; + + if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_32, + sizeof tss_segment_32)) + goto out; + + save_state_to_tss32(vcpu, &tss_segment_32); + + if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_32, + sizeof tss_segment_32)) + goto out; + + if (kvm_read_guest(vcpu->kvm, get_tss_base_addr_read(vcpu, nseg_desc), + &tss_segment_32, sizeof tss_segment_32)) + goto out; + + if (old_tss_sel != 0xffff) { + tss_segment_32.prev_task_link = old_tss_sel; + + if (kvm_write_guest(vcpu->kvm, + get_tss_base_addr_write(vcpu, nseg_desc), + &tss_segment_32.prev_task_link, + sizeof tss_segment_32.prev_task_link)) + goto out; + } + + if (load_state_from_tss32(vcpu, &tss_segment_32)) + goto out; + + ret = 1; +out: + return ret; +} + int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) { - int cs_db, cs_l, ret; - cache_all_regs(vcpu); + struct kvm_segment tr_seg; + struct desc_struct cseg_desc; + struct desc_struct nseg_desc; + int ret = 0; + u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR); + u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR); - kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); + old_tss_base = kvm_mmu_gva_to_gpa_write(vcpu, old_tss_base, NULL); - vcpu->arch.emulate_ctxt.vcpu = vcpu; - vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu); - vcpu->arch.emulate_ctxt.eip = kvm_rip_read(vcpu); - vcpu->arch.emulate_ctxt.mode = - (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : - (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM) - ? X86EMUL_MODE_VM86 : cs_l - ? X86EMUL_MODE_PROT64 : cs_db - ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; + /* FIXME: Handle errors. Failure to read either TSS or their + * descriptors should generate a pagefault. + */ + if (load_guest_segment_descriptor(vcpu, tss_selector, &nseg_desc)) + goto out; - ret = emulator_task_switch(&vcpu->arch.emulate_ctxt, &emulate_ops, - tss_selector, reason); + if (load_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc)) + goto out; + + if (reason != TASK_SWITCH_IRET) { + int cpl; - if (ret == X86EMUL_CONTINUE) - kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); + cpl = kvm_x86_ops->get_cpl(vcpu); + if ((tss_selector & 3) > nseg_desc.dpl || cpl > nseg_desc.dpl) { + kvm_queue_exception_e(vcpu, GP_VECTOR, 0); + return 1; + } + } - return (ret != X86EMUL_CONTINUE); + if (!nseg_desc.p || get_desc_limit(&nseg_desc) < 0x67) { + kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector & 0xfffc); + return 1; + } + + if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) { + cseg_desc.type &= ~(1 << 1); //clear the B flag + save_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc); + } + + if (reason == TASK_SWITCH_IRET) { + u32 eflags = kvm_get_rflags(vcpu); + kvm_set_rflags(vcpu, eflags & ~X86_EFLAGS_NT); + } + + /* set back link to prev task only if NT bit is set in eflags + note that old_tss_sel is not used afetr this point */ + if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE) + old_tss_sel = 0xffff; + + if (nseg_desc.type & 8) + ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_sel, + old_tss_base, &nseg_desc); + else + ret = kvm_task_switch_16(vcpu, tss_selector, old_tss_sel, + old_tss_base, &nseg_desc); + + if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) { + u32 eflags = kvm_get_rflags(vcpu); + kvm_set_rflags(vcpu, eflags | X86_EFLAGS_NT); + } + + if (reason != TASK_SWITCH_IRET) { + nseg_desc.type |= (1 << 1); + save_guest_segment_descriptor(vcpu, tss_selector, + &nseg_desc); + } + + kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0(vcpu) | X86_CR0_TS); + seg_desct_to_kvm_desct(&nseg_desc, tss_selector, &tr_seg); + tr_seg.type = 11; + kvm_set_segment(vcpu, &tr_seg, VCPU_SREG_TR); +out: + return ret; } EXPORT_SYMBOL_GPL(kvm_task_switch); @@ -4741,15 +5220,15 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, { int mmu_reset_needed = 0; int pending_vec, max_bits; - struct desc_ptr dt; + struct descriptor_table dt; vcpu_load(vcpu); - dt.size = sregs->idt.limit; - dt.address = sregs->idt.base; + dt.limit = sregs->idt.limit; + dt.base = sregs->idt.base; kvm_x86_ops->set_idt(vcpu, &dt); - dt.size = sregs->gdt.limit; - dt.address = sregs->gdt.base; + dt.limit = sregs->gdt.limit; + dt.base = sregs->gdt.base; kvm_x86_ops->set_gdt(vcpu, &dt); vcpu->arch.cr2 = sregs->cr2; @@ -4848,9 +5327,11 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK); } - if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) - vcpu->arch.singlestep_rip = kvm_rip_read(vcpu) + - get_segment_base(vcpu, VCPU_SREG_CS); + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { + vcpu->arch.singlestep_cs = + get_segment_selector(vcpu, VCPU_SREG_CS); + vcpu->arch.singlestep_rip = kvm_rip_read(vcpu); + } /* * Trigger an rflags update that will inject or remove the trace @@ -5341,22 +5822,13 @@ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu) return kvm_x86_ops->interrupt_allowed(vcpu); } -bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip) -{ - unsigned long current_rip = kvm_rip_read(vcpu) + - get_segment_base(vcpu, VCPU_SREG_CS); - - return current_rip == linear_rip; -} -EXPORT_SYMBOL_GPL(kvm_is_linear_rip); - unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu) { unsigned long rflags; rflags = kvm_x86_ops->get_rflags(vcpu); if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) - rflags &= ~X86_EFLAGS_TF; + rflags &= ~(unsigned long)(X86_EFLAGS_TF | X86_EFLAGS_RF); return rflags; } EXPORT_SYMBOL_GPL(kvm_get_rflags); @@ -5364,8 +5836,10 @@ EXPORT_SYMBOL_GPL(kvm_get_rflags); void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) { if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP && - kvm_is_linear_rip(vcpu, vcpu->arch.singlestep_rip)) - rflags |= X86_EFLAGS_TF; + vcpu->arch.singlestep_cs == + get_segment_selector(vcpu, VCPU_SREG_CS) && + vcpu->arch.singlestep_rip == kvm_rip_read(vcpu)) + rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF; kvm_x86_ops->set_rflags(vcpu, rflags); } EXPORT_SYMBOL_GPL(kvm_set_rflags); @@ -5381,4 +5855,3 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit); -EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts); diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 23ea02253900..60df9c84ecae 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -160,7 +160,6 @@ struct kvm_pit_config { #define KVM_EXIT_DCR 15 #define KVM_EXIT_NMI 16 #define KVM_EXIT_INTERNAL_ERROR 17 -#define KVM_EXIT_OSI 18 /* For KVM_EXIT_INTERNAL_ERROR */ #define KVM_INTERNAL_ERROR_EMULATION 1 @@ -260,10 +259,6 @@ struct kvm_run { __u32 ndata; __u64 data[16]; } internal; - /* KVM_EXIT_OSI */ - struct { - __u64 gprs[32]; - } osi; /* Fix the size of the union. */ char padding[256]; }; @@ -405,15 +400,6 @@ struct kvm_ioeventfd { __u8 pad[36]; }; -/* for KVM_ENABLE_CAP */ -struct kvm_enable_cap { - /* in */ - __u32 cap; - __u32 flags; - __u64 args[4]; - __u8 pad[64]; -}; - #define KVMIO 0xAE /* @@ -515,15 +501,7 @@ struct kvm_enable_cap { #define KVM_CAP_HYPERV_VAPIC 45 #define KVM_CAP_HYPERV_SPIN 46 #define KVM_CAP_PCI_SEGMENT 47 -#define KVM_CAP_PPC_PAIRED_SINGLES 48 -#define KVM_CAP_INTR_SHADOW 49 -#ifdef __KVM_HAVE_DEBUGREGS -#define KVM_CAP_DEBUGREGS 50 -#endif #define KVM_CAP_X86_ROBUST_SINGLESTEP 51 -#define KVM_CAP_PPC_OSI 52 -#define KVM_CAP_PPC_UNSET_IRQ 53 -#define KVM_CAP_ENABLE_CAP 54 #ifdef KVM_CAP_IRQ_ROUTING @@ -710,10 +688,6 @@ struct kvm_clock_data { /* Available with KVM_CAP_VCPU_EVENTS */ #define KVM_GET_VCPU_EVENTS _IOR(KVMIO, 0x9f, struct kvm_vcpu_events) #define KVM_SET_VCPU_EVENTS _IOW(KVMIO, 0xa0, struct kvm_vcpu_events) -/* Available with KVM_CAP_DEBUGREGS */ -#define KVM_GET_DEBUGREGS _IOR(KVMIO, 0xa1, struct kvm_debugregs) -#define KVM_SET_DEBUGREGS _IOW(KVMIO, 0xa2, struct kvm_debugregs) -#define KVM_ENABLE_CAP _IOW(KVMIO, 0xa3, struct kvm_enable_cap) #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 8e91fa7049c6..a3fd0f91d943 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -54,7 +54,7 @@ extern struct kmem_cache *kvm_vcpu_cache; */ struct kvm_io_bus { int dev_count; -#define NR_IOBUS_DEVS 200 +#define NR_IOBUS_DEVS 6 struct kvm_io_device *devs[NR_IOBUS_DEVS]; }; diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index 6dd3a51ab1cb..b17d49dfc3ef 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -5,6 +5,7 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM kvm +#define TRACE_INCLUDE_FILE kvm #if defined(__KVM_HAVE_IOAPIC) TRACE_EVENT(kvm_set_irq, diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c index 4d10b1e047f4..02ff2b19dbe2 100644 --- a/virt/kvm/assigned-dev.c +++ b/virt/kvm/assigned-dev.c @@ -316,16 +316,12 @@ static int assigned_device_enable_host_msix(struct kvm *kvm, kvm_assigned_dev_intr, 0, "kvm_assigned_msix_device", (void *)dev); + /* FIXME: free requested_irq's on failure */ if (r) - goto err; + return r; } return 0; -err: - for (i -= 1; i >= 0; i--) - free_irq(dev->host_msix_entries[i].vector, (void *)dev); - pci_disable_msix(dev->dev); - return r; } #endif diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c index 53850177163f..36e258029649 100644 --- a/virt/kvm/coalesced_mmio.c +++ b/virt/kvm/coalesced_mmio.c @@ -120,10 +120,8 @@ int kvm_coalesced_mmio_init(struct kvm *kvm) return ret; out_free_dev: - kvm->coalesced_mmio_dev = NULL; kfree(dev); out_free_page: - kvm->coalesced_mmio_ring = NULL; __free_page(page); out_err: return ret; @@ -141,7 +139,7 @@ int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, struct kvm_coalesced_mmio_dev *dev = kvm->coalesced_mmio_dev; if (dev == NULL) - return -ENXIO; + return -EINVAL; mutex_lock(&kvm->slots_lock); if (dev->nb_zones >= KVM_COALESCED_MMIO_ZONE_MAX) { @@ -164,7 +162,7 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm, struct kvm_coalesced_mmio_zone *z; if (dev == NULL) - return -ENXIO; + return -EINVAL; mutex_lock(&kvm->slots_lock); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index be7c50a562c5..5a0cd194dce0 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -418,6 +418,9 @@ static struct kvm *kvm_create_vm(void) spin_lock(&kvm_lock); list_add(&kvm->vm_list, &vm_list); spin_unlock(&kvm_lock); +#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET + kvm_coalesced_mmio_init(kvm); +#endif out: return kvm; @@ -907,11 +910,6 @@ int memslot_id(struct kvm *kvm, gfn_t gfn) return memslot - slots->memslots; } -static unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn) -{ - return slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE; -} - unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) { struct kvm_memory_slot *slot; @@ -920,7 +918,7 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) slot = gfn_to_memslot_unaliased(kvm, gfn); if (!slot || slot->flags & KVM_MEMSLOT_INVALID) return bad_hva(); - return gfn_to_hva_memslot(slot, gfn); + return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE); } EXPORT_SYMBOL_GPL(gfn_to_hva); @@ -970,6 +968,11 @@ pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) } EXPORT_SYMBOL_GPL(gfn_to_pfn); +static unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn) +{ + return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE); +} + pfn_t gfn_to_pfn_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn) { @@ -1599,6 +1602,7 @@ static long kvm_vm_ioctl(struct file *filp, r = -EFAULT; if (copy_from_user(&zone, argp, sizeof zone)) goto out; + r = -ENXIO; r = kvm_vm_ioctl_register_coalesced_mmio(kvm, &zone); if (r) goto out; @@ -1610,6 +1614,7 @@ static long kvm_vm_ioctl(struct file *filp, r = -EFAULT; if (copy_from_user(&zone, argp, sizeof zone)) goto out; + r = -ENXIO; r = kvm_vm_ioctl_unregister_coalesced_mmio(kvm, &zone); if (r) goto out; @@ -1743,19 +1748,12 @@ static struct file_operations kvm_vm_fops = { static int kvm_dev_ioctl_create_vm(void) { - int fd, r; + int fd; struct kvm *kvm; kvm = kvm_create_vm(); if (IS_ERR(kvm)) return PTR_ERR(kvm); -#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET - r = kvm_coalesced_mmio_init(kvm); - if (r < 0) { - kvm_put_kvm(kvm); - return r; - } -#endif fd = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR); if (fd < 0) kvm_put_kvm(kvm); @@ -2274,6 +2272,7 @@ EXPORT_SYMBOL_GPL(kvm_init); void kvm_exit(void) { + tracepoint_synchronize_unregister(); kvm_exit_debug(); misc_deregister(&kvm_dev); kmem_cache_destroy(kvm_vcpu_cache); |