summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2008-12-18 22:01:20 +1100
committerStephen Rothwell <sfr@canb.auug.org.au>2008-12-18 22:01:20 +1100
commitb19a7ac69a762f8d427b8e2867338453e2063ce1 (patch)
treea22039046c596f974c7935ecc32b5772f5796cea
parent9360c7e5621cb1ab2e36be2cdccf4513dfd24331 (diff)
Revert "Merge commit 'kvm/master'"
This reverts commit b1b5bb8a5db9cf0b5a06fe3aab735a602285bf3b.
-rw-r--r--MAINTAINERS2
-rw-r--r--arch/ia64/include/asm/kvm.h13
-rw-r--r--arch/ia64/include/asm/kvm_host.h196
-rw-r--r--arch/ia64/kvm/Makefile2
-rw-r--r--arch/ia64/kvm/asm-offsets.c11
-rw-r--r--arch/ia64/kvm/kvm-ia64.c111
-rw-r--r--arch/ia64/kvm/kvm_lib.c15
-rw-r--r--arch/ia64/kvm/kvm_minstate.h4
-rw-r--r--arch/ia64/kvm/misc.h3
-rw-r--r--arch/ia64/kvm/mmio.c38
-rw-r--r--arch/ia64/kvm/process.c29
-rw-r--r--arch/ia64/kvm/vcpu.c76
-rw-r--r--arch/ia64/kvm/vcpu.h5
-rw-r--r--arch/ia64/kvm/vmm.c29
-rw-r--r--arch/ia64/kvm/vmm_ivt.S1469
-rw-r--r--arch/ia64/kvm/vtlb.c4
-rw-r--r--arch/powerpc/include/asm/disassemble.h80
-rw-r--r--arch/powerpc/include/asm/kvm.h7
-rw-r--r--arch/powerpc/include/asm/kvm_44x.h61
-rw-r--r--arch/powerpc/include/asm/kvm_host.h116
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h86
-rw-r--r--arch/powerpc/include/asm/mmu-44x.h1
-rw-r--r--arch/powerpc/kernel/asm-offsets.c21
-rw-r--r--arch/powerpc/kvm/44x.c228
-rw-r--r--arch/powerpc/kvm/44x_emulate.c371
-rw-r--r--arch/powerpc/kvm/44x_tlb.c463
-rw-r--r--arch/powerpc/kvm/44x_tlb.h26
-rw-r--r--arch/powerpc/kvm/Kconfig28
-rw-r--r--arch/powerpc/kvm/Makefile12
-rw-r--r--arch/powerpc/kvm/booke.h60
-rw-r--r--arch/powerpc/kvm/booke_guest.c (renamed from arch/powerpc/kvm/booke.c)418
-rw-r--r--arch/powerpc/kvm/booke_host.c83
-rw-r--r--arch/powerpc/kvm/booke_interrupts.S72
-rw-r--r--arch/powerpc/kvm/emulate.c447
-rw-r--r--arch/powerpc/kvm/powerpc.c139
-rw-r--r--arch/powerpc/kvm/timing.c239
-rw-r--r--arch/powerpc/kvm/timing.h102
-rw-r--r--arch/s390/include/asm/kvm.h7
-rw-r--r--arch/s390/kvm/kvm-s390.c45
-rw-r--r--arch/x86/include/asm/kvm.h18
-rw-r--r--arch/x86/include/asm/kvm_host.h82
-rw-r--r--arch/x86/include/asm/kvm_x86_emulate.h11
-rw-r--r--arch/x86/include/asm/msr-index.h7
-rw-r--r--arch/x86/include/asm/mtrr.h25
-rw-r--r--arch/x86/include/asm/virtext.h132
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c12
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c4
-rw-r--r--arch/x86/kernel/cpu/mtrr/mtrr.h18
-rw-r--r--arch/x86/kernel/crash.c18
-rw-r--r--arch/x86/kernel/kvmclock.c10
-rw-r--r--arch/x86/kernel/reboot.c62
-rw-r--r--arch/x86/kvm/i8254.c19
-rw-r--r--arch/x86/kvm/irq.h1
-rw-r--r--arch/x86/kvm/kvm_svm.h18
-rw-r--r--arch/x86/kvm/lapic.c58
-rw-r--r--arch/x86/kvm/mmu.c440
-rw-r--r--arch/x86/kvm/paging_tmpl.h35
-rw-r--r--arch/x86/kvm/svm.c980
-rw-r--r--arch/x86/kvm/svm.h (renamed from arch/x86/include/asm/svm.h)4
-rw-r--r--arch/x86/kvm/vmx.c601
-rw-r--r--arch/x86/kvm/vmx.h (renamed from arch/x86/include/asm/vmx.h)32
-rw-r--r--arch/x86/kvm/x86.c232
-rw-r--r--arch/x86/kvm/x86_emulate.c297
-rw-r--r--fs/anon_inodes.c7
-rw-r--r--include/linux/kvm.h69
-rw-r--r--include/linux/kvm_host.h18
-rw-r--r--virt/kvm/ioapic.c8
-rw-r--r--virt/kvm/ioapic.h2
-rw-r--r--virt/kvm/irq_comm.c19
-rw-r--r--virt/kvm/kvm_main.c426
-rw-r--r--virt/kvm/kvm_trace.c1
71 files changed, 2852 insertions, 5933 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 33e69bdc5184..5f8640b08eaa 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2537,6 +2537,8 @@ W: http://kvm.qumranet.com
S: Supported
KERNEL VIRTUAL MACHINE For Itanium (KVM/IA64)
+P: Anthony Xu
+M: anthony.xu@intel.com
P: Xiantao Zhang
M: xiantao.zhang@intel.com
L: kvm-ia64@vger.kernel.org
diff --git a/arch/ia64/include/asm/kvm.h b/arch/ia64/include/asm/kvm.h
index 3cf1baed68ba..f38472ac2267 100644
--- a/arch/ia64/include/asm/kvm.h
+++ b/arch/ia64/include/asm/kvm.h
@@ -166,6 +166,8 @@ struct saved_vpd {
};
struct kvm_regs {
+ char *saved_guest;
+ char *saved_stack;
struct saved_vpd vpd;
/*Arch-regs*/
int mp_state;
@@ -198,10 +200,6 @@ struct kvm_regs {
unsigned long fp_psr; /*used for lazy float register */
unsigned long saved_gp;
/*for phycial emulation */
-
- union context saved_guest;
-
- unsigned long reserved[64]; /* for future use */
};
struct kvm_sregs {
@@ -210,11 +208,4 @@ struct kvm_sregs {
struct kvm_fpu {
};
-struct kvm_debug_exit_arch {
-};
-
-/* for KVM_SET_GUEST_DEBUG */
-struct kvm_guest_debug_arch {
-};
-
#endif
diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index 0560f3fae538..c60d324da540 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -23,6 +23,17 @@
#ifndef __ASM_KVM_HOST_H
#define __ASM_KVM_HOST_H
+
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/kvm.h>
+#include <linux/kvm_para.h>
+#include <linux/kvm_types.h>
+
+#include <asm/pal.h>
+#include <asm/sal.h>
+
+#define KVM_MAX_VCPUS 4
#define KVM_MEMORY_SLOTS 32
/* memory slots that does not exposed to userspace */
#define KVM_PRIVATE_MEM_SLOTS 4
@@ -39,132 +50,70 @@
#define EXIT_REASON_EXTERNAL_INTERRUPT 6
#define EXIT_REASON_IPI 7
#define EXIT_REASON_PTC_G 8
-#define EXIT_REASON_DEBUG 20
/*Define vmm address space and vm data space.*/
-#define KVM_VMM_SIZE (__IA64_UL_CONST(16)<<20)
+#define KVM_VMM_SIZE (16UL<<20)
#define KVM_VMM_SHIFT 24
-#define KVM_VMM_BASE 0xD000000000000000
-#define VMM_SIZE (__IA64_UL_CONST(8)<<20)
+#define KVM_VMM_BASE 0xD000000000000000UL
+#define VMM_SIZE (8UL<<20)
/*
* Define vm_buffer, used by PAL Services, base address.
- * Note: vm_buffer is in the VMM-BLOCK, the size must be < 8M
+ * Note: vmbuffer is in the VMM-BLOCK, the size must be < 8M
*/
#define KVM_VM_BUFFER_BASE (KVM_VMM_BASE + VMM_SIZE)
-#define KVM_VM_BUFFER_SIZE (__IA64_UL_CONST(8)<<20)
-
-/*
- * kvm guest's data area looks as follow:
- *
- * +----------------------+ ------- KVM_VM_DATA_SIZE
- * | vcpu[n]'s data | | ___________________KVM_STK_OFFSET
- * | | | / |
- * | .......... | | /vcpu's struct&stack |
- * | .......... | | /---------------------|---- 0
- * | vcpu[5]'s data | | / vpd |
- * | vcpu[4]'s data | |/-----------------------|
- * | vcpu[3]'s data | / vtlb |
- * | vcpu[2]'s data | /|------------------------|
- * | vcpu[1]'s data |/ | vhpt |
- * | vcpu[0]'s data |____________________________|
- * +----------------------+ |
- * | memory dirty log | |
- * +----------------------+ |
- * | vm's data struct | |
- * +----------------------+ |
- * | | |
- * | | |
- * | | |
- * | | |
- * | | |
- * | | |
- * | | |
- * | vm's p2m table | |
- * | | |
- * | | |
- * | | | |
- * vm's data->| | | |
- * +----------------------+ ------- 0
- * To support large memory, needs to increase the size of p2m.
- * To support more vcpus, needs to ensure it has enough space to
- * hold vcpus' data.
- */
-
-#define KVM_VM_DATA_SHIFT 26
-#define KVM_VM_DATA_SIZE (__IA64_UL_CONST(1) << KVM_VM_DATA_SHIFT)
-#define KVM_VM_DATA_BASE (KVM_VMM_BASE + KVM_VM_DATA_SIZE)
-
-#define KVM_P2M_BASE KVM_VM_DATA_BASE
-#define KVM_P2M_SIZE (__IA64_UL_CONST(24) << 20)
-
-#define VHPT_SHIFT 16
-#define VHPT_SIZE (__IA64_UL_CONST(1) << VHPT_SHIFT)
-#define VHPT_NUM_ENTRIES (__IA64_UL_CONST(1) << (VHPT_SHIFT-5))
-
-#define VTLB_SHIFT 16
-#define VTLB_SIZE (__IA64_UL_CONST(1) << VTLB_SHIFT)
-#define VTLB_NUM_ENTRIES (1UL << (VHPT_SHIFT-5))
-
-#define VPD_SHIFT 16
-#define VPD_SIZE (__IA64_UL_CONST(1) << VPD_SHIFT)
-
-#define VCPU_STRUCT_SHIFT 16
-#define VCPU_STRUCT_SIZE (__IA64_UL_CONST(1) << VCPU_STRUCT_SHIFT)
-
-#define KVM_STK_OFFSET VCPU_STRUCT_SIZE
-
-#define KVM_VM_STRUCT_SHIFT 19
-#define KVM_VM_STRUCT_SIZE (__IA64_UL_CONST(1) << KVM_VM_STRUCT_SHIFT)
-
-#define KVM_MEM_DIRY_LOG_SHIFT 19
-#define KVM_MEM_DIRTY_LOG_SIZE (__IA64_UL_CONST(1) << KVM_MEM_DIRY_LOG_SHIFT)
-
-#ifndef __ASSEMBLY__
-
-/*Define the max vcpus and memory for Guests.*/
-#define KVM_MAX_VCPUS (KVM_VM_DATA_SIZE - KVM_P2M_SIZE - KVM_VM_STRUCT_SIZE -\
- KVM_MEM_DIRTY_LOG_SIZE) / sizeof(struct kvm_vcpu_data)
-#define KVM_MAX_MEM_SIZE (KVM_P2M_SIZE >> 3 << PAGE_SHIFT)
-
-#define VMM_LOG_LEN 256
-
-#include <linux/types.h>
-#include <linux/mm.h>
-#include <linux/kvm.h>
-#include <linux/kvm_para.h>
-#include <linux/kvm_types.h>
-
-#include <asm/pal.h>
-#include <asm/sal.h>
-#include <asm/page.h>
-
-struct kvm_vcpu_data {
- char vcpu_vhpt[VHPT_SIZE];
- char vcpu_vtlb[VTLB_SIZE];
- char vcpu_vpd[VPD_SIZE];
- char vcpu_struct[VCPU_STRUCT_SIZE];
-};
-
-struct kvm_vm_data {
- char kvm_p2m[KVM_P2M_SIZE];
- char kvm_vm_struct[KVM_VM_STRUCT_SIZE];
- char kvm_mem_dirty_log[KVM_MEM_DIRTY_LOG_SIZE];
- struct kvm_vcpu_data vcpu_data[KVM_MAX_VCPUS];
-};
-
-#define VCPU_BASE(n) KVM_VM_DATA_BASE + \
- offsetof(struct kvm_vm_data, vcpu_data[n])
-#define VM_BASE KVM_VM_DATA_BASE + \
- offsetof(struct kvm_vm_data, kvm_vm_struct)
-#define KVM_MEM_DIRTY_LOG_BASE KVM_VM_DATA_BASE + \
- offsetof(struct kvm_vm_data, kvm_mem_dirty_log)
-
-#define VHPT_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vhpt))
-#define VTLB_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vtlb))
-#define VPD_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vpd))
-#define VCPU_STRUCT_BASE(n) (VCPU_BASE(n) + \
- offsetof(struct kvm_vcpu_data, vcpu_struct))
+#define KVM_VM_BUFFER_SIZE (8UL<<20)
+
+/*Define Virtual machine data layout.*/
+#define KVM_VM_DATA_SHIFT 24
+#define KVM_VM_DATA_SIZE (1UL << KVM_VM_DATA_SHIFT)
+#define KVM_VM_DATA_BASE (KVM_VMM_BASE + KVM_VMM_SIZE)
+
+
+#define KVM_P2M_BASE KVM_VM_DATA_BASE
+#define KVM_P2M_OFS 0
+#define KVM_P2M_SIZE (8UL << 20)
+
+#define KVM_VHPT_BASE (KVM_P2M_BASE + KVM_P2M_SIZE)
+#define KVM_VHPT_OFS KVM_P2M_SIZE
+#define KVM_VHPT_BLOCK_SIZE (2UL << 20)
+#define VHPT_SHIFT 18
+#define VHPT_SIZE (1UL << VHPT_SHIFT)
+#define VHPT_NUM_ENTRIES (1<<(VHPT_SHIFT-5))
+
+#define KVM_VTLB_BASE (KVM_VHPT_BASE+KVM_VHPT_BLOCK_SIZE)
+#define KVM_VTLB_OFS (KVM_VHPT_OFS+KVM_VHPT_BLOCK_SIZE)
+#define KVM_VTLB_BLOCK_SIZE (1UL<<20)
+#define VTLB_SHIFT 17
+#define VTLB_SIZE (1UL<<VTLB_SHIFT)
+#define VTLB_NUM_ENTRIES (1<<(VTLB_SHIFT-5))
+
+#define KVM_VPD_BASE (KVM_VTLB_BASE+KVM_VTLB_BLOCK_SIZE)
+#define KVM_VPD_OFS (KVM_VTLB_OFS+KVM_VTLB_BLOCK_SIZE)
+#define KVM_VPD_BLOCK_SIZE (2UL<<20)
+#define VPD_SHIFT 16
+#define VPD_SIZE (1UL<<VPD_SHIFT)
+
+#define KVM_VCPU_BASE (KVM_VPD_BASE+KVM_VPD_BLOCK_SIZE)
+#define KVM_VCPU_OFS (KVM_VPD_OFS+KVM_VPD_BLOCK_SIZE)
+#define KVM_VCPU_BLOCK_SIZE (2UL<<20)
+#define VCPU_SHIFT 18
+#define VCPU_SIZE (1UL<<VCPU_SHIFT)
+#define MAX_VCPU_NUM KVM_VCPU_BLOCK_SIZE/VCPU_SIZE
+
+#define KVM_VM_BASE (KVM_VCPU_BASE+KVM_VCPU_BLOCK_SIZE)
+#define KVM_VM_OFS (KVM_VCPU_OFS+KVM_VCPU_BLOCK_SIZE)
+#define KVM_VM_BLOCK_SIZE (1UL<<19)
+
+#define KVM_MEM_DIRTY_LOG_BASE (KVM_VM_BASE+KVM_VM_BLOCK_SIZE)
+#define KVM_MEM_DIRTY_LOG_OFS (KVM_VM_OFS+KVM_VM_BLOCK_SIZE)
+#define KVM_MEM_DIRTY_LOG_SIZE (1UL<<19)
+
+/* Get vpd, vhpt, tlb, vcpu, base*/
+#define VPD_ADDR(n) (KVM_VPD_BASE+n*VPD_SIZE)
+#define VHPT_ADDR(n) (KVM_VHPT_BASE+n*VHPT_SIZE)
+#define VTLB_ADDR(n) (KVM_VTLB_BASE+n*VTLB_SIZE)
+#define VCPU_ADDR(n) (KVM_VCPU_BASE+n*VCPU_SIZE)
/*IO section definitions*/
#define IOREQ_READ 1
@@ -440,7 +389,6 @@ struct kvm_vcpu_arch {
unsigned long opcode;
unsigned long cause;
- char log_buf[VMM_LOG_LEN];
union context host;
union context guest;
};
@@ -455,13 +403,14 @@ struct kvm_sal_data {
};
struct kvm_arch {
- spinlock_t dirty_log_lock;
-
unsigned long vm_base;
unsigned long metaphysical_rr0;
unsigned long metaphysical_rr4;
unsigned long vmm_init_rr;
-
+ unsigned long vhpt_base;
+ unsigned long vtlb_base;
+ unsigned long vpd_base;
+ spinlock_t dirty_log_lock;
struct kvm_ioapic *vioapic;
struct kvm_vm_stat stat;
struct kvm_sal_data rdv_sal_data;
@@ -563,7 +512,7 @@ struct kvm_pt_regs {
static inline struct kvm_pt_regs *vcpu_regs(struct kvm_vcpu *v)
{
- return (struct kvm_pt_regs *) ((unsigned long) v + KVM_STK_OFFSET) - 1;
+ return (struct kvm_pt_regs *) ((unsigned long) v + IA64_STK_OFFSET) - 1;
}
typedef int kvm_vmm_entry(void);
@@ -582,6 +531,5 @@ int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run);
void kvm_sal_emul(struct kvm_vcpu *vcpu);
static inline void kvm_inject_nmi(struct kvm_vcpu *vcpu) {}
-#endif /* __ASSEMBLY__*/
#endif
diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile
index 76464dc312e6..92cef66ca268 100644
--- a/arch/ia64/kvm/Makefile
+++ b/arch/ia64/kvm/Makefile
@@ -60,7 +60,7 @@ obj-$(CONFIG_KVM) += kvm.o
CFLAGS_vcpu.o += -mfixed-range=f2-f5,f12-f127
kvm-intel-objs = vmm.o vmm_ivt.o trampoline.o vcpu.o optvfault.o mmio.o \
- vtlb.o process.o kvm_lib.o
+ vtlb.o process.o
#Add link memcpy and memset to avoid possible structure assignment error
kvm-intel-objs += memcpy.o memset.o
obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/ia64/kvm/asm-offsets.c b/arch/ia64/kvm/asm-offsets.c
index 0c3564a7a033..4e3dc13a619c 100644
--- a/arch/ia64/kvm/asm-offsets.c
+++ b/arch/ia64/kvm/asm-offsets.c
@@ -24,10 +24,19 @@
#include <linux/autoconf.h>
#include <linux/kvm_host.h>
-#include <linux/kbuild.h>
#include "vcpu.h"
+#define task_struct kvm_vcpu
+
+#define DEFINE(sym, val) \
+ asm volatile("\n->" #sym " (%0) " #val : : "i" (val))
+
+#define BLANK() asm volatile("\n->" : :)
+
+#define OFFSET(_sym, _str, _mem) \
+ DEFINE(_sym, offsetof(_str, _mem));
+
void foo(void)
{
DEFINE(VMM_TASK_SIZE, sizeof(struct kvm_vcpu));
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 3138d762b2df..af1464f7a6ad 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -180,6 +180,7 @@ int kvm_dev_ioctl_check_extension(long ext)
switch (ext) {
case KVM_CAP_IRQCHIP:
+ case KVM_CAP_USER_MEMORY:
case KVM_CAP_MP_STATE:
r = 1;
@@ -438,6 +439,7 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
expires = div64_u64(itc_diff, cyc_per_usec);
kt = ktime_set(0, 1000 * expires);
+ down_read(&vcpu->kvm->slots_lock);
vcpu->arch.ht_active = 1;
hrtimer_start(p_ht, kt, HRTIMER_MODE_ABS);
@@ -450,6 +452,7 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
vcpu->arch.mp_state =
KVM_MP_STATE_RUNNABLE;
+ up_read(&vcpu->kvm->slots_lock);
if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE)
return -EINTR;
@@ -473,13 +476,6 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu,
return 1;
}
-static int handle_vcpu_debug(struct kvm_vcpu *vcpu,
- struct kvm_run *kvm_run)
-{
- printk("VMM: %s", vcpu->arch.log_buf);
- return 1;
-}
-
static int (*kvm_vti_exit_handlers[])(struct kvm_vcpu *vcpu,
struct kvm_run *kvm_run) = {
[EXIT_REASON_VM_PANIC] = handle_vm_error,
@@ -491,7 +487,6 @@ static int (*kvm_vti_exit_handlers[])(struct kvm_vcpu *vcpu,
[EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt,
[EXIT_REASON_IPI] = handle_ipi,
[EXIT_REASON_PTC_G] = handle_global_purge,
- [EXIT_REASON_DEBUG] = handle_vcpu_debug,
};
@@ -703,24 +698,27 @@ out:
return r;
}
+/*
+ * Allocate 16M memory for every vm to hold its specific data.
+ * Its memory map is defined in kvm_host.h.
+ */
static struct kvm *kvm_alloc_kvm(void)
{
struct kvm *kvm;
uint64_t vm_base;
- BUG_ON(sizeof(struct kvm) > KVM_VM_STRUCT_SIZE);
-
vm_base = __get_free_pages(GFP_KERNEL, get_order(KVM_VM_DATA_SIZE));
if (!vm_base)
return ERR_PTR(-ENOMEM);
+ printk(KERN_DEBUG"kvm: VM data's base Address:0x%lx\n", vm_base);
+ /* Zero all pages before use! */
memset((void *)vm_base, 0, KVM_VM_DATA_SIZE);
- kvm = (struct kvm *)(vm_base +
- offsetof(struct kvm_vm_data, kvm_vm_struct));
+
+ kvm = (struct kvm *)(vm_base + KVM_VM_OFS);
kvm->arch.vm_base = vm_base;
- printk(KERN_DEBUG"kvm: vm's data area:0x%lx\n", vm_base);
return kvm;
}
@@ -762,12 +760,21 @@ static void kvm_build_io_pmt(struct kvm *kvm)
static void kvm_init_vm(struct kvm *kvm)
{
+ long vm_base;
+
BUG_ON(!kvm);
kvm->arch.metaphysical_rr0 = GUEST_PHYSICAL_RR0;
kvm->arch.metaphysical_rr4 = GUEST_PHYSICAL_RR4;
kvm->arch.vmm_init_rr = VMM_INIT_RR;
+ vm_base = kvm->arch.vm_base;
+ if (vm_base) {
+ kvm->arch.vhpt_base = vm_base + KVM_VHPT_OFS;
+ kvm->arch.vtlb_base = vm_base + KVM_VTLB_OFS;
+ kvm->arch.vpd_base = vm_base + KVM_VPD_OFS;
+ }
+
/*
*Fill P2M entries for MMIO/IO ranges
*/
@@ -831,8 +838,9 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
- struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
int i;
+ struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
+ int r;
vcpu_load(vcpu);
@@ -849,7 +857,18 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
vpd->vpr = regs->vpd.vpr;
- memcpy(&vcpu->arch.guest, &regs->saved_guest, sizeof(union context));
+ r = -EFAULT;
+ r = copy_from_user(&vcpu->arch.guest, regs->saved_guest,
+ sizeof(union context));
+ if (r)
+ goto out;
+ r = copy_from_user(vcpu + 1, regs->saved_stack +
+ sizeof(struct kvm_vcpu),
+ IA64_STK_OFFSET - sizeof(struct kvm_vcpu));
+ if (r)
+ goto out;
+ vcpu->arch.exit_data =
+ ((struct kvm_vcpu *)(regs->saved_stack))->arch.exit_data;
RESTORE_REGS(mp_state);
RESTORE_REGS(vmm_rr);
@@ -883,8 +902,9 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
set_bit(KVM_REQ_RESUME, &vcpu->requests);
vcpu_put(vcpu);
-
- return 0;
+ r = 0;
+out:
+ return r;
}
long kvm_arch_vm_ioctl(struct file *filp,
@@ -1146,11 +1166,10 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
/*Set entry address for first run.*/
regs->cr_iip = PALE_RESET_ENTRY;
- /*Initialize itc offset for vcpus*/
+ /*Initilize itc offset for vcpus*/
itc_offset = 0UL - ia64_getreg(_IA64_REG_AR_ITC);
- for (i = 0; i < KVM_MAX_VCPUS; i++) {
- v = (struct kvm_vcpu *)((char *)vcpu +
- sizeof(struct kvm_vcpu_data) * i);
+ for (i = 0; i < MAX_VCPU_NUM; i++) {
+ v = (struct kvm_vcpu *)((char *)vcpu + VCPU_SIZE * i);
v->arch.itc_offset = itc_offset;
v->arch.last_itc = 0;
}
@@ -1164,7 +1183,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
vcpu->arch.apic->vcpu = vcpu;
p_ctx->gr[1] = 0;
- p_ctx->gr[12] = (unsigned long)((char *)vmm_vcpu + KVM_STK_OFFSET);
+ p_ctx->gr[12] = (unsigned long)((char *)vmm_vcpu + IA64_STK_OFFSET);
p_ctx->gr[13] = (unsigned long)vmm_vcpu;
p_ctx->psr = 0x1008522000UL;
p_ctx->ar[40] = FPSR_DEFAULT; /*fpsr*/
@@ -1199,12 +1218,12 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
vcpu->arch.hlt_timer.function = hlt_timer_fn;
vcpu->arch.last_run_cpu = -1;
- vcpu->arch.vpd = (struct vpd *)VPD_BASE(vcpu->vcpu_id);
+ vcpu->arch.vpd = (struct vpd *)VPD_ADDR(vcpu->vcpu_id);
vcpu->arch.vsa_base = kvm_vsa_base;
vcpu->arch.__gp = kvm_vmm_gp;
vcpu->arch.dirty_log_lock_pa = __pa(&kvm->arch.dirty_log_lock);
- vcpu->arch.vhpt.hash = (struct thash_data *)VHPT_BASE(vcpu->vcpu_id);
- vcpu->arch.vtlb.hash = (struct thash_data *)VTLB_BASE(vcpu->vcpu_id);
+ vcpu->arch.vhpt.hash = (struct thash_data *)VHPT_ADDR(vcpu->vcpu_id);
+ vcpu->arch.vtlb.hash = (struct thash_data *)VTLB_ADDR(vcpu->vcpu_id);
init_ptce_info(vcpu);
r = 0;
@@ -1254,22 +1273,12 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
int r;
int cpu;
- BUG_ON(sizeof(struct kvm_vcpu) > VCPU_STRUCT_SIZE/2);
-
- r = -EINVAL;
- if (id >= KVM_MAX_VCPUS) {
- printk(KERN_ERR"kvm: Can't configure vcpus > %ld",
- KVM_MAX_VCPUS);
- goto fail;
- }
-
r = -ENOMEM;
if (!vm_base) {
printk(KERN_ERR"kvm: Create vcpu[%d] error!\n", id);
goto fail;
}
- vcpu = (struct kvm_vcpu *)(vm_base + offsetof(struct kvm_vm_data,
- vcpu_data[id].vcpu_struct));
+ vcpu = (struct kvm_vcpu *)(vm_base + KVM_VCPU_OFS + VCPU_SIZE * id);
vcpu->kvm = kvm;
cpu = get_cpu();
@@ -1302,8 +1311,8 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
return -EINVAL;
}
-int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
- struct kvm_guest_debug *dbg)
+int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
+ struct kvm_debug_guest *dbg)
{
return -EINVAL;
}
@@ -1365,9 +1374,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
- struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
int i;
-
+ int r;
+ struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
vcpu_load(vcpu);
for (i = 0; i < 16; i++) {
@@ -1382,8 +1391,14 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
regs->vpd.vpsr = vpd->vpsr;
regs->vpd.vpr = vpd->vpr;
- memcpy(&regs->saved_guest, &vcpu->arch.guest, sizeof(union context));
-
+ r = -EFAULT;
+ r = copy_to_user(regs->saved_guest, &vcpu->arch.guest,
+ sizeof(union context));
+ if (r)
+ goto out;
+ r = copy_to_user(regs->saved_stack, (void *)vcpu, IA64_STK_OFFSET);
+ if (r)
+ goto out;
SAVE_REGS(mp_state);
SAVE_REGS(vmm_rr);
memcpy(regs->itrs, vcpu->arch.itrs, sizeof(struct thash_data) * NITRS);
@@ -1411,9 +1426,10 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
SAVE_REGS(metaphysical_saved_rr4);
SAVE_REGS(fp_psr);
SAVE_REGS(saved_gp);
-
vcpu_put(vcpu);
- return 0;
+ r = 0;
+out:
+ return r;
}
void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
@@ -1441,9 +1457,6 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot];
unsigned long base_gfn = memslot->base_gfn;
- if (base_gfn + npages > (KVM_MAX_MEM_SIZE >> PAGE_SHIFT))
- return -ENOMEM;
-
for (i = 0; i < npages; i++) {
pfn = gfn_to_pfn(kvm, base_gfn + i);
if (!kvm_is_mmio_pfn(pfn)) {
@@ -1618,8 +1631,8 @@ static int kvm_ia64_sync_dirty_log(struct kvm *kvm,
struct kvm_memory_slot *memslot;
int r, i;
long n, base;
- unsigned long *dirty_bitmap = (unsigned long *)(kvm->arch.vm_base +
- offsetof(struct kvm_vm_data, kvm_mem_dirty_log));
+ unsigned long *dirty_bitmap = (unsigned long *)((void *)kvm - KVM_VM_OFS
+ + KVM_MEM_DIRTY_LOG_OFS);
r = -EINVAL;
if (log->slot >= KVM_MEMORY_SLOTS)
diff --git a/arch/ia64/kvm/kvm_lib.c b/arch/ia64/kvm/kvm_lib.c
deleted file mode 100644
index a85cb611ecd7..000000000000
--- a/arch/ia64/kvm/kvm_lib.c
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * kvm_lib.c: Compile some libraries for kvm-intel module.
- *
- * Just include kernel's library, and disable symbols export.
- * Copyright (C) 2008, Intel Corporation.
- * Xiantao Zhang (xiantao.zhang@intel.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- */
-#undef CONFIG_MODULES
-#include "../../../lib/vsprintf.c"
-#include "../../../lib/ctype.c"
diff --git a/arch/ia64/kvm/kvm_minstate.h b/arch/ia64/kvm/kvm_minstate.h
index b2bcaa2787aa..2cc41d17cf99 100644
--- a/arch/ia64/kvm/kvm_minstate.h
+++ b/arch/ia64/kvm/kvm_minstate.h
@@ -24,8 +24,6 @@
#include <asm/asmmacro.h>
#include <asm/types.h>
#include <asm/kregs.h>
-#include <asm/kvm_host.h>
-
#include "asm-offsets.h"
#define KVM_MINSTATE_START_SAVE_MIN \
@@ -35,7 +33,7 @@
addl r22 = VMM_RBS_OFFSET,r1; /* compute base of RBS */ \
;; \
lfetch.fault.excl.nt1 [r22]; \
- addl r1 = KVM_STK_OFFSET-VMM_PT_REGS_SIZE, r1; \
+ addl r1 = IA64_STK_OFFSET-VMM_PT_REGS_SIZE,r1; /* compute base of memory stack */ \
mov r23 = ar.bspstore; /* save ar.bspstore */ \
;; \
mov ar.bspstore = r22; /* switch to kernel RBS */\
diff --git a/arch/ia64/kvm/misc.h b/arch/ia64/kvm/misc.h
index dd979e00b574..e585c4607344 100644
--- a/arch/ia64/kvm/misc.h
+++ b/arch/ia64/kvm/misc.h
@@ -27,8 +27,7 @@
*/
static inline uint64_t *kvm_host_get_pmt(struct kvm *kvm)
{
- return (uint64_t *)(kvm->arch.vm_base +
- offsetof(struct kvm_vm_data, kvm_p2m));
+ return (uint64_t *)(kvm->arch.vm_base + KVM_P2M_OFS);
}
static inline void kvm_set_pmt_entry(struct kvm *kvm, gfn_t gfn,
diff --git a/arch/ia64/kvm/mmio.c b/arch/ia64/kvm/mmio.c
index 21f63fffc379..7f1a858bc69f 100644
--- a/arch/ia64/kvm/mmio.c
+++ b/arch/ia64/kvm/mmio.c
@@ -66,25 +66,31 @@ void lsapic_write(struct kvm_vcpu *v, unsigned long addr,
switch (addr) {
case PIB_OFST_INTA:
- panic_vm(v, "Undefined write on PIB INTA\n");
+ /*panic_domain(NULL, "Undefined write on PIB INTA\n");*/
+ panic_vm(v);
break;
case PIB_OFST_XTP:
if (length == 1) {
vlsapic_write_xtp(v, val);
} else {
- panic_vm(v, "Undefined write on PIB XTP\n");
+ /*panic_domain(NULL,
+ "Undefined write on PIB XTP\n");*/
+ panic_vm(v);
}
break;
default:
if (PIB_LOW_HALF(addr)) {
- /*Lower half */
+ /*lower half */
if (length != 8)
- panic_vm(v, "Can't LHF write with size %ld!\n",
- length);
+ /*panic_domain(NULL,
+ "Can't LHF write with size %ld!\n",
+ length);*/
+ panic_vm(v);
else
vlsapic_write_ipi(v, addr, val);
- } else { /*Upper half */
- panic_vm(v, "IPI-UHF write %lx\n", addr);
+ } else { /* upper half
+ printk("IPI-UHF write %lx\n",addr);*/
+ panic_vm(v);
}
break;
}
@@ -102,18 +108,22 @@ unsigned long lsapic_read(struct kvm_vcpu *v, unsigned long addr,
if (length == 1) /* 1 byte load */
; /* There is no i8259, there is no INTA access*/
else
- panic_vm(v, "Undefined read on PIB INTA\n");
+ /*panic_domain(NULL,"Undefined read on PIB INTA\n"); */
+ panic_vm(v);
break;
case PIB_OFST_XTP:
if (length == 1) {
result = VLSAPIC_XTP(v);
+ /* printk("read xtp %lx\n", result); */
} else {
- panic_vm(v, "Undefined read on PIB XTP\n");
+ /*panic_domain(NULL,
+ "Undefined read on PIB XTP\n");*/
+ panic_vm(v);
}
break;
default:
- panic_vm(v, "Undefined addr access for lsapic!\n");
+ panic_vm(v);
break;
}
return result;
@@ -152,7 +162,7 @@ static void mmio_access(struct kvm_vcpu *vcpu, u64 src_pa, u64 *dest,
/* it's necessary to ensure zero extending */
*dest = p->u.ioreq.data & (~0UL >> (64-(s*8)));
} else
- panic_vm(vcpu, "Unhandled mmio access returned!\n");
+ panic_vm(vcpu);
out:
local_irq_restore(psr);
return ;
@@ -314,9 +324,7 @@ void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma)
return;
} else {
inst_type = -1;
- panic_vm(vcpu, "Unsupported MMIO access instruction! \
- Bunld[0]=0x%lx, Bundle[1]=0x%lx\n",
- bundle.i64[0], bundle.i64[1]);
+ panic_vm(vcpu);
}
size = 1 << size;
@@ -327,7 +335,7 @@ void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma)
if (inst_type == SL_INTEGER)
vcpu_set_gr(vcpu, inst.M1.r1, data, 0);
else
- panic_vm(vcpu, "Unsupported instruction type!\n");
+ panic_vm(vcpu);
}
vcpu_increment_iip(vcpu);
diff --git a/arch/ia64/kvm/process.c b/arch/ia64/kvm/process.c
index 552d07724207..800817307b7b 100644
--- a/arch/ia64/kvm/process.c
+++ b/arch/ia64/kvm/process.c
@@ -527,8 +527,7 @@ void reflect_interruption(u64 ifa, u64 isr, u64 iim,
vector = vec2off[vec];
if (!(vpsr & IA64_PSR_IC) && (vector != IA64_DATA_NESTED_TLB_VECTOR)) {
- panic_vm(vcpu, "Interruption with vector :0x%lx occurs "
- "with psr.ic = 0\n", vector);
+ panic_vm(vcpu);
return;
}
@@ -587,7 +586,7 @@ static void set_pal_call_result(struct kvm_vcpu *vcpu)
vcpu_set_gr(vcpu, 10, p->u.pal_data.ret.v1, 0);
vcpu_set_gr(vcpu, 11, p->u.pal_data.ret.v2, 0);
} else
- panic_vm(vcpu, "Mis-set for exit reason!\n");
+ panic_vm(vcpu);
}
static void set_sal_call_data(struct kvm_vcpu *vcpu)
@@ -615,7 +614,7 @@ static void set_sal_call_result(struct kvm_vcpu *vcpu)
vcpu_set_gr(vcpu, 10, p->u.sal_data.ret.r10, 0);
vcpu_set_gr(vcpu, 11, p->u.sal_data.ret.r11, 0);
} else
- panic_vm(vcpu, "Mis-set for exit reason!\n");
+ panic_vm(vcpu);
}
void kvm_ia64_handle_break(unsigned long ifa, struct kvm_pt_regs *regs,
@@ -681,7 +680,7 @@ static void generate_exirq(struct kvm_vcpu *vcpu)
vpsr = VCPU(vcpu, vpsr);
isr = vpsr & IA64_PSR_RI;
if (!(vpsr & IA64_PSR_IC))
- panic_vm(vcpu, "Trying to inject one IRQ with psr.ic=0\n");
+ panic_vm(vcpu);
reflect_interruption(0, isr, 0, 12, regs); /* EXT IRQ */
}
@@ -942,20 +941,8 @@ static void vcpu_do_resume(struct kvm_vcpu *vcpu)
ia64_set_pta(vcpu->arch.vhpt.pta.val);
}
-static void vmm_sanity_check(struct kvm_vcpu *vcpu)
-{
- struct exit_ctl_data *p = &vcpu->arch.exit_data;
-
- if (!vmm_sanity && p->exit_reason != EXIT_REASON_DEBUG) {
- panic_vm(vcpu, "Failed to do vmm sanity check,"
- "it maybe caused by crashed vmm!!\n\n");
- }
-}
-
static void kvm_do_resume_op(struct kvm_vcpu *vcpu)
{
- vmm_sanity_check(vcpu); /*Guarantee vcpu runing on healthy vmm!*/
-
if (test_and_clear_bit(KVM_REQ_RESUME, &vcpu->requests)) {
vcpu_do_resume(vcpu);
return;
@@ -981,11 +968,3 @@ void vmm_transition(struct kvm_vcpu *vcpu)
1, 0, 0, 0, 0, 0);
kvm_do_resume_op(vcpu);
}
-
-void vmm_panic_handler(u64 vec)
-{
- struct kvm_vcpu *vcpu = current_vcpu;
- vmm_sanity = 0;
- panic_vm(vcpu, "Unexpected interruption occurs in VMM, vector:0x%lx\n",
- vec2off[vec]);
-}
diff --git a/arch/ia64/kvm/vcpu.c b/arch/ia64/kvm/vcpu.c
index ecd526b55323..e44027ce5667 100644
--- a/arch/ia64/kvm/vcpu.c
+++ b/arch/ia64/kvm/vcpu.c
@@ -816,9 +816,8 @@ static void vcpu_set_itc(struct kvm_vcpu *vcpu, u64 val)
unsigned long vitv = VCPU(vcpu, itv);
if (vcpu->vcpu_id == 0) {
- for (i = 0; i < KVM_MAX_VCPUS; i++) {
- v = (struct kvm_vcpu *)((char *)vcpu +
- sizeof(struct kvm_vcpu_data) * i);
+ for (i = 0; i < MAX_VCPU_NUM; i++) {
+ v = (struct kvm_vcpu *)((char *)vcpu + VCPU_SIZE * i);
VMX(v, itc_offset) = itc_offset;
VMX(v, last_itc) = 0;
}
@@ -1651,8 +1650,7 @@ void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val)
* Otherwise panic
*/
if (val & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM))
- panic_vm(vcpu, "Only support guests with vpsr.pk =0 \
- & vpsr.is=0\n");
+ panic_vm(vcpu);
/*
* For those IA64_PSR bits: id/da/dd/ss/ed/ia
@@ -2105,7 +2103,7 @@ void kvm_init_all_rr(struct kvm_vcpu *vcpu)
if (is_physical_mode(vcpu)) {
if (vcpu->arch.mode_flags & GUEST_PHY_EMUL)
- panic_vm(vcpu, "Machine Status conflicts!\n");
+ panic_vm(vcpu);
ia64_set_rr((VRN0 << VRN_SHIFT), vcpu->arch.metaphysical_rr0);
ia64_dv_serialize_data();
@@ -2154,70 +2152,10 @@ int vmm_entry(void)
return 0;
}
-static void kvm_show_registers(struct kvm_pt_regs *regs)
-{
- unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri;
-
- struct kvm_vcpu *vcpu = current_vcpu;
- if (vcpu != NULL)
- printk("vcpu 0x%p vcpu %d\n",
- vcpu, vcpu->vcpu_id);
-
- printk("psr : %016lx ifs : %016lx ip : [<%016lx>]\n",
- regs->cr_ipsr, regs->cr_ifs, ip);
-
- printk("unat: %016lx pfs : %016lx rsc : %016lx\n",
- regs->ar_unat, regs->ar_pfs, regs->ar_rsc);
- printk("rnat: %016lx bspstore: %016lx pr : %016lx\n",
- regs->ar_rnat, regs->ar_bspstore, regs->pr);
- printk("ldrs: %016lx ccv : %016lx fpsr: %016lx\n",
- regs->loadrs, regs->ar_ccv, regs->ar_fpsr);
- printk("csd : %016lx ssd : %016lx\n", regs->ar_csd, regs->ar_ssd);
- printk("b0 : %016lx b6 : %016lx b7 : %016lx\n", regs->b0,
- regs->b6, regs->b7);
- printk("f6 : %05lx%016lx f7 : %05lx%016lx\n",
- regs->f6.u.bits[1], regs->f6.u.bits[0],
- regs->f7.u.bits[1], regs->f7.u.bits[0]);
- printk("f8 : %05lx%016lx f9 : %05lx%016lx\n",
- regs->f8.u.bits[1], regs->f8.u.bits[0],
- regs->f9.u.bits[1], regs->f9.u.bits[0]);
- printk("f10 : %05lx%016lx f11 : %05lx%016lx\n",
- regs->f10.u.bits[1], regs->f10.u.bits[0],
- regs->f11.u.bits[1], regs->f11.u.bits[0]);
-
- printk("r1 : %016lx r2 : %016lx r3 : %016lx\n", regs->r1,
- regs->r2, regs->r3);
- printk("r8 : %016lx r9 : %016lx r10 : %016lx\n", regs->r8,
- regs->r9, regs->r10);
- printk("r11 : %016lx r12 : %016lx r13 : %016lx\n", regs->r11,
- regs->r12, regs->r13);
- printk("r14 : %016lx r15 : %016lx r16 : %016lx\n", regs->r14,
- regs->r15, regs->r16);
- printk("r17 : %016lx r18 : %016lx r19 : %016lx\n", regs->r17,
- regs->r18, regs->r19);
- printk("r20 : %016lx r21 : %016lx r22 : %016lx\n", regs->r20,
- regs->r21, regs->r22);
- printk("r23 : %016lx r24 : %016lx r25 : %016lx\n", regs->r23,
- regs->r24, regs->r25);
- printk("r26 : %016lx r27 : %016lx r28 : %016lx\n", regs->r26,
- regs->r27, regs->r28);
- printk("r29 : %016lx r30 : %016lx r31 : %016lx\n", regs->r29,
- regs->r30, regs->r31);
-
-}
-
-void panic_vm(struct kvm_vcpu *v, const char *fmt, ...)
-{
- va_list args;
- char buf[256];
-
- struct kvm_pt_regs *regs = vcpu_regs(v);
+void panic_vm(struct kvm_vcpu *v)
+{
struct exit_ctl_data *p = &v->arch.exit_data;
- va_start(args, fmt);
- vsnprintf(buf, sizeof(buf), fmt, args);
- va_end(args);
- printk(buf);
- kvm_show_registers(regs);
+
p->exit_reason = EXIT_REASON_VM_PANIC;
vmm_transition(v);
/*Never to return*/
diff --git a/arch/ia64/kvm/vcpu.h b/arch/ia64/kvm/vcpu.h
index b2f12a562bdf..e9b2a4e121c0 100644
--- a/arch/ia64/kvm/vcpu.h
+++ b/arch/ia64/kvm/vcpu.h
@@ -737,12 +737,9 @@ void kvm_init_vtlb(struct kvm_vcpu *v);
void kvm_init_vhpt(struct kvm_vcpu *v);
void thash_init(struct thash_cb *hcb, u64 sz);
-void panic_vm(struct kvm_vcpu *v, const char *fmt, ...);
+void panic_vm(struct kvm_vcpu *v);
extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2, u64 arg3,
u64 arg4, u64 arg5, u64 arg6, u64 arg7);
-
-extern long vmm_sanity;
-
#endif
#endif /* __VCPU_H__ */
diff --git a/arch/ia64/kvm/vmm.c b/arch/ia64/kvm/vmm.c
index 9eee5c04bacc..2275bf4e681a 100644
--- a/arch/ia64/kvm/vmm.c
+++ b/arch/ia64/kvm/vmm.c
@@ -20,7 +20,6 @@
*/
-#include<linux/kernel.h>
#include<linux/module.h>
#include<asm/fpswa.h>
@@ -32,8 +31,6 @@ MODULE_LICENSE("GPL");
extern char kvm_ia64_ivt;
extern fpswa_interface_t *vmm_fpswa_interface;
-long vmm_sanity = 1;
-
struct kvm_vmm_info vmm_info = {
.module = THIS_MODULE,
.vmm_entry = vmm_entry,
@@ -65,31 +62,5 @@ void vmm_spin_unlock(spinlock_t *lock)
{
_vmm_raw_spin_unlock(lock);
}
-
-static void vcpu_debug_exit(struct kvm_vcpu *vcpu)
-{
- struct exit_ctl_data *p = &vcpu->arch.exit_data;
- long psr;
-
- local_irq_save(psr);
- p->exit_reason = EXIT_REASON_DEBUG;
- vmm_transition(vcpu);
- local_irq_restore(psr);
-}
-
-asmlinkage int printk(const char *fmt, ...)
-{
- struct kvm_vcpu *vcpu = current_vcpu;
- va_list args;
- int r;
-
- memset(vcpu->arch.log_buf, 0, VMM_LOG_LEN);
- va_start(args, fmt);
- r = vsnprintf(vcpu->arch.log_buf, VMM_LOG_LEN, fmt, args);
- va_end(args);
- vcpu_debug_exit(vcpu);
- return r;
-}
-
module_init(kvm_vmm_init)
module_exit(kvm_vmm_exit)
diff --git a/arch/ia64/kvm/vmm_ivt.S b/arch/ia64/kvm/vmm_ivt.S
index 3ef1a017a318..c1d7251a1480 100644
--- a/arch/ia64/kvm/vmm_ivt.S
+++ b/arch/ia64/kvm/vmm_ivt.S
@@ -1,5 +1,5 @@
/*
- * arch/ia64/kvm/vmm_ivt.S
+ * /ia64/kvm_ivt.S
*
* Copyright (C) 1998-2001, 2003 Hewlett-Packard Co
* Stephane Eranian <eranian@hpl.hp.com>
@@ -70,39 +70,32 @@
# define PSR_DEFAULT_BITS 0
#endif
+
#define KVM_FAULT(n) \
- kvm_fault_##n:; \
- mov r19=n;; \
- br.sptk.many kvm_vmm_panic; \
- ;; \
+ kvm_fault_##n:; \
+ mov r19=n;; \
+ br.sptk.many kvm_fault_##n; \
+ ;; \
+
#define KVM_REFLECT(n) \
- mov r31=pr; \
- mov r19=n; /* prepare to save predicates */ \
- mov r29=cr.ipsr; \
- ;; \
- tbit.z p6,p7=r29,IA64_PSR_VM_BIT; \
-(p7) br.sptk.many kvm_dispatch_reflection; \
- br.sptk.many kvm_vmm_panic; \
-
-GLOBAL_ENTRY(kvm_vmm_panic)
- KVM_SAVE_MIN_WITH_COVER_R19
- alloc r14=ar.pfs,0,0,1,0
- mov out0=r15
- adds r3=8,r2 // set up second base pointer
- ;;
- ssm psr.ic
- ;;
- srlz.i // guarantee that interruption collection is on
- ;;
- //(p15) ssm psr.i // restore psr.i
- addl r14=@gprel(ia64_leave_hypervisor),gp
- ;;
- KVM_SAVE_REST
- mov rp=r14
- ;;
- br.call.sptk.many b6=vmm_panic_handler;
-END(kvm_vmm_panic)
+ mov r31=pr; \
+ mov r19=n; /* prepare to save predicates */ \
+ mov r29=cr.ipsr; \
+ ;; \
+ tbit.z p6,p7=r29,IA64_PSR_VM_BIT; \
+(p7)br.sptk.many kvm_dispatch_reflection; \
+ br.sptk.many kvm_panic; \
+
+
+GLOBAL_ENTRY(kvm_panic)
+ br.sptk.many kvm_panic
+ ;;
+END(kvm_panic)
+
+
+
+
.section .text.ivt,"ax"
@@ -112,307 +105,308 @@ kvm_ia64_ivt:
///////////////////////////////////////////////////////////////
// 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47)
ENTRY(kvm_vhpt_miss)
- KVM_FAULT(0)
+ KVM_FAULT(0)
END(kvm_vhpt_miss)
+
.org kvm_ia64_ivt+0x400
////////////////////////////////////////////////////////////////
// 0x0400 Entry 1 (size 64 bundles) ITLB (21)
ENTRY(kvm_itlb_miss)
- mov r31 = pr
- mov r29=cr.ipsr;
- ;;
- tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
-(p6) br.sptk kvm_alt_itlb_miss
- mov r19 = 1
- br.sptk kvm_itlb_miss_dispatch
- KVM_FAULT(1);
+ mov r31 = pr
+ mov r29=cr.ipsr;
+ ;;
+ tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
+ (p6) br.sptk kvm_alt_itlb_miss
+ mov r19 = 1
+ br.sptk kvm_itlb_miss_dispatch
+ KVM_FAULT(1);
END(kvm_itlb_miss)
.org kvm_ia64_ivt+0x0800
//////////////////////////////////////////////////////////////////
// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48)
ENTRY(kvm_dtlb_miss)
- mov r31 = pr
- mov r29=cr.ipsr;
- ;;
- tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
-(p6) br.sptk kvm_alt_dtlb_miss
- br.sptk kvm_dtlb_miss_dispatch
+ mov r31 = pr
+ mov r29=cr.ipsr;
+ ;;
+ tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
+(p6)br.sptk kvm_alt_dtlb_miss
+ br.sptk kvm_dtlb_miss_dispatch
END(kvm_dtlb_miss)
.org kvm_ia64_ivt+0x0c00
////////////////////////////////////////////////////////////////////
// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19)
ENTRY(kvm_alt_itlb_miss)
- mov r16=cr.ifa // get address that caused the TLB miss
- ;;
- movl r17=PAGE_KERNEL
- mov r24=cr.ipsr
- movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
- ;;
- and r19=r19,r16 // clear ed, reserved bits, and PTE control bits
- ;;
- or r19=r17,r19 // insert PTE control bits into r19
- ;;
- movl r20=IA64_GRANULE_SHIFT<<2
- ;;
- mov cr.itir=r20
- ;;
- itc.i r19 // insert the TLB entry
- mov pr=r31,-1
- rfi
+ mov r16=cr.ifa // get address that caused the TLB miss
+ ;;
+ movl r17=PAGE_KERNEL
+ mov r24=cr.ipsr
+ movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
+ ;;
+ and r19=r19,r16 // clear ed, reserved bits, and PTE control bits
+ ;;
+ or r19=r17,r19 // insert PTE control bits into r19
+ ;;
+ movl r20=IA64_GRANULE_SHIFT<<2
+ ;;
+ mov cr.itir=r20
+ ;;
+ itc.i r19 // insert the TLB entry
+ mov pr=r31,-1
+ rfi
END(kvm_alt_itlb_miss)
.org kvm_ia64_ivt+0x1000
/////////////////////////////////////////////////////////////////////
// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46)
ENTRY(kvm_alt_dtlb_miss)
- mov r16=cr.ifa // get address that caused the TLB miss
- ;;
- movl r17=PAGE_KERNEL
- movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
- mov r24=cr.ipsr
- ;;
- and r19=r19,r16 // clear ed, reserved bits, and PTE control bits
- ;;
- or r19=r19,r17 // insert PTE control bits into r19
- ;;
- movl r20=IA64_GRANULE_SHIFT<<2
- ;;
- mov cr.itir=r20
- ;;
- itc.d r19 // insert the TLB entry
- mov pr=r31,-1
- rfi
+ mov r16=cr.ifa // get address that caused the TLB miss
+ ;;
+ movl r17=PAGE_KERNEL
+ movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
+ mov r24=cr.ipsr
+ ;;
+ and r19=r19,r16 // clear ed, reserved bits, and PTE control bits
+ ;;
+ or r19=r19,r17 // insert PTE control bits into r19
+ ;;
+ movl r20=IA64_GRANULE_SHIFT<<2
+ ;;
+ mov cr.itir=r20
+ ;;
+ itc.d r19 // insert the TLB entry
+ mov pr=r31,-1
+ rfi
END(kvm_alt_dtlb_miss)
.org kvm_ia64_ivt+0x1400
//////////////////////////////////////////////////////////////////////
// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45)
ENTRY(kvm_nested_dtlb_miss)
- KVM_FAULT(5)
+ KVM_FAULT(5)
END(kvm_nested_dtlb_miss)
.org kvm_ia64_ivt+0x1800
/////////////////////////////////////////////////////////////////////
// 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24)
ENTRY(kvm_ikey_miss)
- KVM_REFLECT(6)
+ KVM_REFLECT(6)
END(kvm_ikey_miss)
.org kvm_ia64_ivt+0x1c00
/////////////////////////////////////////////////////////////////////
// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
ENTRY(kvm_dkey_miss)
- KVM_REFLECT(7)
+ KVM_REFLECT(7)
END(kvm_dkey_miss)
.org kvm_ia64_ivt+0x2000
////////////////////////////////////////////////////////////////////
// 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54)
ENTRY(kvm_dirty_bit)
- KVM_REFLECT(8)
+ KVM_REFLECT(8)
END(kvm_dirty_bit)
.org kvm_ia64_ivt+0x2400
////////////////////////////////////////////////////////////////////
// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27)
ENTRY(kvm_iaccess_bit)
- KVM_REFLECT(9)
+ KVM_REFLECT(9)
END(kvm_iaccess_bit)
.org kvm_ia64_ivt+0x2800
///////////////////////////////////////////////////////////////////
// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55)
ENTRY(kvm_daccess_bit)
- KVM_REFLECT(10)
+ KVM_REFLECT(10)
END(kvm_daccess_bit)
.org kvm_ia64_ivt+0x2c00
/////////////////////////////////////////////////////////////////
// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33)
ENTRY(kvm_break_fault)
- mov r31=pr
- mov r19=11
- mov r29=cr.ipsr
- ;;
- KVM_SAVE_MIN_WITH_COVER_R19
- ;;
- alloc r14=ar.pfs,0,0,4,0 //(must be first in insn group!)
- mov out0=cr.ifa
- mov out2=cr.isr // FIXME: pity to make this slow access twice
- mov out3=cr.iim // FIXME: pity to make this slow access twice
- adds r3=8,r2 // set up second base pointer
- ;;
- ssm psr.ic
- ;;
- srlz.i // guarantee that interruption collection is on
- ;;
- //(p15)ssm psr.i // restore psr.i
- addl r14=@gprel(ia64_leave_hypervisor),gp
- ;;
- KVM_SAVE_REST
- mov rp=r14
- ;;
- adds out1=16,sp
- br.call.sptk.many b6=kvm_ia64_handle_break
- ;;
+ mov r31=pr
+ mov r19=11
+ mov r29=cr.ipsr
+ ;;
+ KVM_SAVE_MIN_WITH_COVER_R19
+ ;;
+ alloc r14=ar.pfs,0,0,4,0 // now it's safe (must be first in insn group!)
+ mov out0=cr.ifa
+ mov out2=cr.isr // FIXME: pity to make this slow access twice
+ mov out3=cr.iim // FIXME: pity to make this slow access twice
+ adds r3=8,r2 // set up second base pointer
+ ;;
+ ssm psr.ic
+ ;;
+ srlz.i // guarantee that interruption collection is on
+ ;;
+ //(p15)ssm psr.i // restore psr.i
+ addl r14=@gprel(ia64_leave_hypervisor),gp
+ ;;
+ KVM_SAVE_REST
+ mov rp=r14
+ ;;
+ adds out1=16,sp
+ br.call.sptk.many b6=kvm_ia64_handle_break
+ ;;
END(kvm_break_fault)
.org kvm_ia64_ivt+0x3000
/////////////////////////////////////////////////////////////////
// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4)
ENTRY(kvm_interrupt)
- mov r31=pr // prepare to save predicates
- mov r19=12
- mov r29=cr.ipsr
- ;;
- tbit.z p6,p7=r29,IA64_PSR_VM_BIT
- tbit.z p0,p15=r29,IA64_PSR_I_BIT
- ;;
-(p7) br.sptk kvm_dispatch_interrupt
- ;;
- mov r27=ar.rsc /* M */
- mov r20=r1 /* A */
- mov r25=ar.unat /* M */
- mov r26=ar.pfs /* I */
- mov r28=cr.iip /* M */
- cover /* B (or nothing) */
- ;;
- mov r1=sp
- ;;
- invala /* M */
- mov r30=cr.ifs
- ;;
- addl r1=-VMM_PT_REGS_SIZE,r1
- ;;
- adds r17=2*L1_CACHE_BYTES,r1 /* really: biggest cache-line size */
- adds r16=PT(CR_IPSR),r1
- ;;
- lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES
- st8 [r16]=r29 /* save cr.ipsr */
- ;;
- lfetch.fault.excl.nt1 [r17]
- mov r29=b0
- ;;
- adds r16=PT(R8),r1 /* initialize first base pointer */
- adds r17=PT(R9),r1 /* initialize second base pointer */
- mov r18=r0 /* make sure r18 isn't NaT */
- ;;
+ mov r31=pr // prepare to save predicates
+ mov r19=12
+ mov r29=cr.ipsr
+ ;;
+ tbit.z p6,p7=r29,IA64_PSR_VM_BIT
+ tbit.z p0,p15=r29,IA64_PSR_I_BIT
+ ;;
+(p7) br.sptk kvm_dispatch_interrupt
+ ;;
+ mov r27=ar.rsc /* M */
+ mov r20=r1 /* A */
+ mov r25=ar.unat /* M */
+ mov r26=ar.pfs /* I */
+ mov r28=cr.iip /* M */
+ cover /* B (or nothing) */
+ ;;
+ mov r1=sp
+ ;;
+ invala /* M */
+ mov r30=cr.ifs
+ ;;
+ addl r1=-VMM_PT_REGS_SIZE,r1
+ ;;
+ adds r17=2*L1_CACHE_BYTES,r1 /* really: biggest cache-line size */
+ adds r16=PT(CR_IPSR),r1
+ ;;
+ lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES
+ st8 [r16]=r29 /* save cr.ipsr */
+ ;;
+ lfetch.fault.excl.nt1 [r17]
+ mov r29=b0
+ ;;
+ adds r16=PT(R8),r1 /* initialize first base pointer */
+ adds r17=PT(R9),r1 /* initialize second base pointer */
+ mov r18=r0 /* make sure r18 isn't NaT */
+ ;;
.mem.offset 0,0; st8.spill [r16]=r8,16
.mem.offset 8,0; st8.spill [r17]=r9,16
;;
.mem.offset 0,0; st8.spill [r16]=r10,24
.mem.offset 8,0; st8.spill [r17]=r11,24
;;
- st8 [r16]=r28,16 /* save cr.iip */
- st8 [r17]=r30,16 /* save cr.ifs */
- mov r8=ar.fpsr /* M */
- mov r9=ar.csd
- mov r10=ar.ssd
- movl r11=FPSR_DEFAULT /* L-unit */
- ;;
- st8 [r16]=r25,16 /* save ar.unat */
- st8 [r17]=r26,16 /* save ar.pfs */
- shl r18=r18,16 /* compute ar.rsc to be used for "loadrs" */
- ;;
- st8 [r16]=r27,16 /* save ar.rsc */
- adds r17=16,r17 /* skip over ar_rnat field */
- ;;
- st8 [r17]=r31,16 /* save predicates */
- adds r16=16,r16 /* skip over ar_bspstore field */
- ;;
- st8 [r16]=r29,16 /* save b0 */
- st8 [r17]=r18,16 /* save ar.rsc value for "loadrs" */
- ;;
+ st8 [r16]=r28,16 /* save cr.iip */
+ st8 [r17]=r30,16 /* save cr.ifs */
+ mov r8=ar.fpsr /* M */
+ mov r9=ar.csd
+ mov r10=ar.ssd
+ movl r11=FPSR_DEFAULT /* L-unit */
+ ;;
+ st8 [r16]=r25,16 /* save ar.unat */
+ st8 [r17]=r26,16 /* save ar.pfs */
+ shl r18=r18,16 /* compute ar.rsc to be used for "loadrs" */
+ ;;
+ st8 [r16]=r27,16 /* save ar.rsc */
+ adds r17=16,r17 /* skip over ar_rnat field */
+ ;;
+ st8 [r17]=r31,16 /* save predicates */
+ adds r16=16,r16 /* skip over ar_bspstore field */
+ ;;
+ st8 [r16]=r29,16 /* save b0 */
+ st8 [r17]=r18,16 /* save ar.rsc value for "loadrs" */
+ ;;
.mem.offset 0,0; st8.spill [r16]=r20,16 /* save original r1 */
.mem.offset 8,0; st8.spill [r17]=r12,16
- adds r12=-16,r1
- /* switch to kernel memory stack (with 16 bytes of scratch) */
- ;;
+ adds r12=-16,r1
+ /* switch to kernel memory stack (with 16 bytes of scratch) */
+ ;;
.mem.offset 0,0; st8.spill [r16]=r13,16
.mem.offset 8,0; st8.spill [r17]=r8,16 /* save ar.fpsr */
- ;;
+ ;;
.mem.offset 0,0; st8.spill [r16]=r15,16
.mem.offset 8,0; st8.spill [r17]=r14,16
- dep r14=-1,r0,60,4
- ;;
+ dep r14=-1,r0,60,4
+ ;;
.mem.offset 0,0; st8.spill [r16]=r2,16
.mem.offset 8,0; st8.spill [r17]=r3,16
- adds r2=VMM_PT_REGS_R16_OFFSET,r1
- adds r14 = VMM_VCPU_GP_OFFSET,r13
- ;;
- mov r8=ar.ccv
- ld8 r14 = [r14]
- ;;
- mov r1=r14 /* establish kernel global pointer */
- ;; \
- bsw.1
- ;;
- alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group
- mov out0=r13
- ;;
- ssm psr.ic
- ;;
- srlz.i
- ;;
- //(p15) ssm psr.i
- adds r3=8,r2 // set up second base pointer for SAVE_REST
- srlz.i // ensure everybody knows psr.ic is back on
- ;;
+ adds r2=VMM_PT_REGS_R16_OFFSET,r1
+ adds r14 = VMM_VCPU_GP_OFFSET,r13
+ ;;
+ mov r8=ar.ccv
+ ld8 r14 = [r14]
+ ;;
+ mov r1=r14 /* establish kernel global pointer */
+ ;; \
+ bsw.1
+ ;;
+ alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group
+ mov out0=r13
+ ;;
+ ssm psr.ic
+ ;;
+ srlz.i
+ ;;
+ //(p15) ssm psr.i
+ adds r3=8,r2 // set up second base pointer for SAVE_REST
+ srlz.i // ensure everybody knows psr.ic is back on
+ ;;
.mem.offset 0,0; st8.spill [r2]=r16,16
.mem.offset 8,0; st8.spill [r3]=r17,16
- ;;
+ ;;
.mem.offset 0,0; st8.spill [r2]=r18,16
.mem.offset 8,0; st8.spill [r3]=r19,16
- ;;
+ ;;
.mem.offset 0,0; st8.spill [r2]=r20,16
.mem.offset 8,0; st8.spill [r3]=r21,16
- mov r18=b6
- ;;
+ mov r18=b6
+ ;;
.mem.offset 0,0; st8.spill [r2]=r22,16
.mem.offset 8,0; st8.spill [r3]=r23,16
- mov r19=b7
- ;;
+ mov r19=b7
+ ;;
.mem.offset 0,0; st8.spill [r2]=r24,16
.mem.offset 8,0; st8.spill [r3]=r25,16
- ;;
+ ;;
.mem.offset 0,0; st8.spill [r2]=r26,16
.mem.offset 8,0; st8.spill [r3]=r27,16
- ;;
+ ;;
.mem.offset 0,0; st8.spill [r2]=r28,16
.mem.offset 8,0; st8.spill [r3]=r29,16
- ;;
+ ;;
.mem.offset 0,0; st8.spill [r2]=r30,16
.mem.offset 8,0; st8.spill [r3]=r31,32
- ;;
- mov ar.fpsr=r11 /* M-unit */
- st8 [r2]=r8,8 /* ar.ccv */
- adds r24=PT(B6)-PT(F7),r3
- ;;
- stf.spill [r2]=f6,32
- stf.spill [r3]=f7,32
- ;;
- stf.spill [r2]=f8,32
- stf.spill [r3]=f9,32
- ;;
- stf.spill [r2]=f10
- stf.spill [r3]=f11
- adds r25=PT(B7)-PT(F11),r3
- ;;
- st8 [r24]=r18,16 /* b6 */
- st8 [r25]=r19,16 /* b7 */
- ;;
- st8 [r24]=r9 /* ar.csd */
- st8 [r25]=r10 /* ar.ssd */
- ;;
- srlz.d // make sure we see the effect of cr.ivr
- addl r14=@gprel(ia64_leave_nested),gp
- ;;
- mov rp=r14
- br.call.sptk.many b6=kvm_ia64_handle_irq
- ;;
+ ;;
+ mov ar.fpsr=r11 /* M-unit */
+ st8 [r2]=r8,8 /* ar.ccv */
+ adds r24=PT(B6)-PT(F7),r3
+ ;;
+ stf.spill [r2]=f6,32
+ stf.spill [r3]=f7,32
+ ;;
+ stf.spill [r2]=f8,32
+ stf.spill [r3]=f9,32
+ ;;
+ stf.spill [r2]=f10
+ stf.spill [r3]=f11
+ adds r25=PT(B7)-PT(F11),r3
+ ;;
+ st8 [r24]=r18,16 /* b6 */
+ st8 [r25]=r19,16 /* b7 */
+ ;;
+ st8 [r24]=r9 /* ar.csd */
+ st8 [r25]=r10 /* ar.ssd */
+ ;;
+ srlz.d // make sure we see the effect of cr.ivr
+ addl r14=@gprel(ia64_leave_nested),gp
+ ;;
+ mov rp=r14
+ br.call.sptk.many b6=kvm_ia64_handle_irq
+ ;;
END(kvm_interrupt)
.global kvm_dispatch_vexirq
@@ -420,385 +414,387 @@ END(kvm_interrupt)
//////////////////////////////////////////////////////////////////////
// 0x3400 Entry 13 (size 64 bundles) Reserved
ENTRY(kvm_virtual_exirq)
- mov r31=pr
- mov r19=13
- mov r30 =r0
- ;;
+ mov r31=pr
+ mov r19=13
+ mov r30 =r0
+ ;;
kvm_dispatch_vexirq:
- cmp.eq p6,p0 = 1,r30
- ;;
-(p6) add r29 = VMM_VCPU_SAVED_GP_OFFSET,r21
- ;;
-(p6) ld8 r1 = [r29]
- ;;
- KVM_SAVE_MIN_WITH_COVER_R19
- alloc r14=ar.pfs,0,0,1,0
- mov out0=r13
-
- ssm psr.ic
- ;;
- srlz.i // guarantee that interruption collection is on
- ;;
- //(p15) ssm psr.i // restore psr.i
- adds r3=8,r2 // set up second base pointer
- ;;
- KVM_SAVE_REST
- addl r14=@gprel(ia64_leave_hypervisor),gp
- ;;
- mov rp=r14
- br.call.sptk.many b6=kvm_vexirq
+ cmp.eq p6,p0 = 1,r30
+ ;;
+(p6)add r29 = VMM_VCPU_SAVED_GP_OFFSET,r21
+ ;;
+(p6)ld8 r1 = [r29]
+ ;;
+ KVM_SAVE_MIN_WITH_COVER_R19
+ alloc r14=ar.pfs,0,0,1,0
+ mov out0=r13
+
+ ssm psr.ic
+ ;;
+ srlz.i // guarantee that interruption collection is on
+ ;;
+ //(p15) ssm psr.i // restore psr.i
+ adds r3=8,r2 // set up second base pointer
+ ;;
+ KVM_SAVE_REST
+ addl r14=@gprel(ia64_leave_hypervisor),gp
+ ;;
+ mov rp=r14
+ br.call.sptk.many b6=kvm_vexirq
END(kvm_virtual_exirq)
.org kvm_ia64_ivt+0x3800
/////////////////////////////////////////////////////////////////////
// 0x3800 Entry 14 (size 64 bundles) Reserved
- KVM_FAULT(14)
- // this code segment is from 2.6.16.13
+ KVM_FAULT(14)
+ // this code segment is from 2.6.16.13
+
.org kvm_ia64_ivt+0x3c00
///////////////////////////////////////////////////////////////////////
// 0x3c00 Entry 15 (size 64 bundles) Reserved
- KVM_FAULT(15)
+ KVM_FAULT(15)
+
.org kvm_ia64_ivt+0x4000
///////////////////////////////////////////////////////////////////////
// 0x4000 Entry 16 (size 64 bundles) Reserved
- KVM_FAULT(16)
+ KVM_FAULT(16)
.org kvm_ia64_ivt+0x4400
//////////////////////////////////////////////////////////////////////
// 0x4400 Entry 17 (size 64 bundles) Reserved
- KVM_FAULT(17)
+ KVM_FAULT(17)
.org kvm_ia64_ivt+0x4800
//////////////////////////////////////////////////////////////////////
// 0x4800 Entry 18 (size 64 bundles) Reserved
- KVM_FAULT(18)
+ KVM_FAULT(18)
.org kvm_ia64_ivt+0x4c00
//////////////////////////////////////////////////////////////////////
// 0x4c00 Entry 19 (size 64 bundles) Reserved
- KVM_FAULT(19)
+ KVM_FAULT(19)
.org kvm_ia64_ivt+0x5000
//////////////////////////////////////////////////////////////////////
// 0x5000 Entry 20 (size 16 bundles) Page Not Present
ENTRY(kvm_page_not_present)
- KVM_REFLECT(20)
+ KVM_REFLECT(20)
END(kvm_page_not_present)
.org kvm_ia64_ivt+0x5100
///////////////////////////////////////////////////////////////////////
// 0x5100 Entry 21 (size 16 bundles) Key Permission vector
ENTRY(kvm_key_permission)
- KVM_REFLECT(21)
+ KVM_REFLECT(21)
END(kvm_key_permission)
.org kvm_ia64_ivt+0x5200
//////////////////////////////////////////////////////////////////////
// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26)
ENTRY(kvm_iaccess_rights)
- KVM_REFLECT(22)
+ KVM_REFLECT(22)
END(kvm_iaccess_rights)
.org kvm_ia64_ivt+0x5300
//////////////////////////////////////////////////////////////////////
// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53)
ENTRY(kvm_daccess_rights)
- KVM_REFLECT(23)
+ KVM_REFLECT(23)
END(kvm_daccess_rights)
.org kvm_ia64_ivt+0x5400
/////////////////////////////////////////////////////////////////////
// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39)
ENTRY(kvm_general_exception)
- KVM_REFLECT(24)
- KVM_FAULT(24)
+ KVM_REFLECT(24)
+ KVM_FAULT(24)
END(kvm_general_exception)
.org kvm_ia64_ivt+0x5500
//////////////////////////////////////////////////////////////////////
// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35)
ENTRY(kvm_disabled_fp_reg)
- KVM_REFLECT(25)
+ KVM_REFLECT(25)
END(kvm_disabled_fp_reg)
.org kvm_ia64_ivt+0x5600
////////////////////////////////////////////////////////////////////
// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50)
ENTRY(kvm_nat_consumption)
- KVM_REFLECT(26)
+ KVM_REFLECT(26)
END(kvm_nat_consumption)
.org kvm_ia64_ivt+0x5700
/////////////////////////////////////////////////////////////////////
// 0x5700 Entry 27 (size 16 bundles) Speculation (40)
ENTRY(kvm_speculation_vector)
- KVM_REFLECT(27)
+ KVM_REFLECT(27)
END(kvm_speculation_vector)
.org kvm_ia64_ivt+0x5800
/////////////////////////////////////////////////////////////////////
// 0x5800 Entry 28 (size 16 bundles) Reserved
- KVM_FAULT(28)
+ KVM_FAULT(28)
.org kvm_ia64_ivt+0x5900
///////////////////////////////////////////////////////////////////
// 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56)
ENTRY(kvm_debug_vector)
- KVM_FAULT(29)
+ KVM_FAULT(29)
END(kvm_debug_vector)
.org kvm_ia64_ivt+0x5a00
///////////////////////////////////////////////////////////////
// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57)
ENTRY(kvm_unaligned_access)
- KVM_REFLECT(30)
+ KVM_REFLECT(30)
END(kvm_unaligned_access)
.org kvm_ia64_ivt+0x5b00
//////////////////////////////////////////////////////////////////////
// 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57)
ENTRY(kvm_unsupported_data_reference)
- KVM_REFLECT(31)
+ KVM_REFLECT(31)
END(kvm_unsupported_data_reference)
.org kvm_ia64_ivt+0x5c00
////////////////////////////////////////////////////////////////////
// 0x5c00 Entry 32 (size 16 bundles) Floating Point FAULT (65)
ENTRY(kvm_floating_point_fault)
- KVM_REFLECT(32)
+ KVM_REFLECT(32)
END(kvm_floating_point_fault)
.org kvm_ia64_ivt+0x5d00
/////////////////////////////////////////////////////////////////////
// 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66)
ENTRY(kvm_floating_point_trap)
- KVM_REFLECT(33)
+ KVM_REFLECT(33)
END(kvm_floating_point_trap)
.org kvm_ia64_ivt+0x5e00
//////////////////////////////////////////////////////////////////////
// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66)
ENTRY(kvm_lower_privilege_trap)
- KVM_REFLECT(34)
+ KVM_REFLECT(34)
END(kvm_lower_privilege_trap)
.org kvm_ia64_ivt+0x5f00
//////////////////////////////////////////////////////////////////////
// 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68)
ENTRY(kvm_taken_branch_trap)
- KVM_REFLECT(35)
+ KVM_REFLECT(35)
END(kvm_taken_branch_trap)
.org kvm_ia64_ivt+0x6000
////////////////////////////////////////////////////////////////////
// 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69)
ENTRY(kvm_single_step_trap)
- KVM_REFLECT(36)
+ KVM_REFLECT(36)
END(kvm_single_step_trap)
.global kvm_virtualization_fault_back
.org kvm_ia64_ivt+0x6100
/////////////////////////////////////////////////////////////////////
// 0x6100 Entry 37 (size 16 bundles) Virtualization Fault
ENTRY(kvm_virtualization_fault)
- mov r31=pr
- adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
- ;;
- st8 [r16] = r1
- adds r17 = VMM_VCPU_GP_OFFSET, r21
- ;;
- ld8 r1 = [r17]
- cmp.eq p6,p0=EVENT_MOV_FROM_AR,r24
- cmp.eq p7,p0=EVENT_MOV_FROM_RR,r24
- cmp.eq p8,p0=EVENT_MOV_TO_RR,r24
- cmp.eq p9,p0=EVENT_RSM,r24
- cmp.eq p10,p0=EVENT_SSM,r24
- cmp.eq p11,p0=EVENT_MOV_TO_PSR,r24
- cmp.eq p12,p0=EVENT_THASH,r24
-(p6) br.dptk.many kvm_asm_mov_from_ar
-(p7) br.dptk.many kvm_asm_mov_from_rr
-(p8) br.dptk.many kvm_asm_mov_to_rr
-(p9) br.dptk.many kvm_asm_rsm
-(p10) br.dptk.many kvm_asm_ssm
-(p11) br.dptk.many kvm_asm_mov_to_psr
-(p12) br.dptk.many kvm_asm_thash
- ;;
+ mov r31=pr
+ adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
+ ;;
+ st8 [r16] = r1
+ adds r17 = VMM_VCPU_GP_OFFSET, r21
+ ;;
+ ld8 r1 = [r17]
+ cmp.eq p6,p0=EVENT_MOV_FROM_AR,r24
+ cmp.eq p7,p0=EVENT_MOV_FROM_RR,r24
+ cmp.eq p8,p0=EVENT_MOV_TO_RR,r24
+ cmp.eq p9,p0=EVENT_RSM,r24
+ cmp.eq p10,p0=EVENT_SSM,r24
+ cmp.eq p11,p0=EVENT_MOV_TO_PSR,r24
+ cmp.eq p12,p0=EVENT_THASH,r24
+ (p6) br.dptk.many kvm_asm_mov_from_ar
+ (p7) br.dptk.many kvm_asm_mov_from_rr
+ (p8) br.dptk.many kvm_asm_mov_to_rr
+ (p9) br.dptk.many kvm_asm_rsm
+ (p10) br.dptk.many kvm_asm_ssm
+ (p11) br.dptk.many kvm_asm_mov_to_psr
+ (p12) br.dptk.many kvm_asm_thash
+ ;;
kvm_virtualization_fault_back:
- adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
- ;;
- ld8 r1 = [r16]
- ;;
- mov r19=37
- adds r16 = VMM_VCPU_CAUSE_OFFSET,r21
- adds r17 = VMM_VCPU_OPCODE_OFFSET,r21
- ;;
- st8 [r16] = r24
- st8 [r17] = r25
- ;;
- cmp.ne p6,p0=EVENT_RFI, r24
-(p6) br.sptk kvm_dispatch_virtualization_fault
- ;;
- adds r18=VMM_VPD_BASE_OFFSET,r21
- ;;
- ld8 r18=[r18]
- ;;
- adds r18=VMM_VPD_VIFS_OFFSET,r18
- ;;
- ld8 r18=[r18]
- ;;
- tbit.z p6,p0=r18,63
-(p6) br.sptk kvm_dispatch_virtualization_fault
- ;;
-//if vifs.v=1 desert current register frame
- alloc r18=ar.pfs,0,0,0,0
- br.sptk kvm_dispatch_virtualization_fault
+ adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
+ ;;
+ ld8 r1 = [r16]
+ ;;
+ mov r19=37
+ adds r16 = VMM_VCPU_CAUSE_OFFSET,r21
+ adds r17 = VMM_VCPU_OPCODE_OFFSET,r21
+ ;;
+ st8 [r16] = r24
+ st8 [r17] = r25
+ ;;
+ cmp.ne p6,p0=EVENT_RFI, r24
+ (p6) br.sptk kvm_dispatch_virtualization_fault
+ ;;
+ adds r18=VMM_VPD_BASE_OFFSET,r21
+ ;;
+ ld8 r18=[r18]
+ ;;
+ adds r18=VMM_VPD_VIFS_OFFSET,r18
+ ;;
+ ld8 r18=[r18]
+ ;;
+ tbit.z p6,p0=r18,63
+ (p6) br.sptk kvm_dispatch_virtualization_fault
+ ;;
+ //if vifs.v=1 desert current register frame
+ alloc r18=ar.pfs,0,0,0,0
+ br.sptk kvm_dispatch_virtualization_fault
END(kvm_virtualization_fault)
.org kvm_ia64_ivt+0x6200
//////////////////////////////////////////////////////////////
// 0x6200 Entry 38 (size 16 bundles) Reserved
- KVM_FAULT(38)
+ KVM_FAULT(38)
.org kvm_ia64_ivt+0x6300
/////////////////////////////////////////////////////////////////
// 0x6300 Entry 39 (size 16 bundles) Reserved
- KVM_FAULT(39)
+ KVM_FAULT(39)
.org kvm_ia64_ivt+0x6400
/////////////////////////////////////////////////////////////////
// 0x6400 Entry 40 (size 16 bundles) Reserved
- KVM_FAULT(40)
+ KVM_FAULT(40)
.org kvm_ia64_ivt+0x6500
//////////////////////////////////////////////////////////////////
// 0x6500 Entry 41 (size 16 bundles) Reserved
- KVM_FAULT(41)
+ KVM_FAULT(41)
.org kvm_ia64_ivt+0x6600
//////////////////////////////////////////////////////////////////
// 0x6600 Entry 42 (size 16 bundles) Reserved
- KVM_FAULT(42)
+ KVM_FAULT(42)
.org kvm_ia64_ivt+0x6700
//////////////////////////////////////////////////////////////////
// 0x6700 Entry 43 (size 16 bundles) Reserved
- KVM_FAULT(43)
+ KVM_FAULT(43)
.org kvm_ia64_ivt+0x6800
//////////////////////////////////////////////////////////////////
// 0x6800 Entry 44 (size 16 bundles) Reserved
- KVM_FAULT(44)
+ KVM_FAULT(44)
.org kvm_ia64_ivt+0x6900
///////////////////////////////////////////////////////////////////
// 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception
//(17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77)
ENTRY(kvm_ia32_exception)
- KVM_FAULT(45)
+ KVM_FAULT(45)
END(kvm_ia32_exception)
.org kvm_ia64_ivt+0x6a00
////////////////////////////////////////////////////////////////////
// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept (30,31,59,70,71)
ENTRY(kvm_ia32_intercept)
- KVM_FAULT(47)
+ KVM_FAULT(47)
END(kvm_ia32_intercept)
.org kvm_ia64_ivt+0x6c00
/////////////////////////////////////////////////////////////////////
// 0x6c00 Entry 48 (size 16 bundles) Reserved
- KVM_FAULT(48)
+ KVM_FAULT(48)
.org kvm_ia64_ivt+0x6d00
//////////////////////////////////////////////////////////////////////
// 0x6d00 Entry 49 (size 16 bundles) Reserved
- KVM_FAULT(49)
+ KVM_FAULT(49)
.org kvm_ia64_ivt+0x6e00
//////////////////////////////////////////////////////////////////////
// 0x6e00 Entry 50 (size 16 bundles) Reserved
- KVM_FAULT(50)
+ KVM_FAULT(50)
.org kvm_ia64_ivt+0x6f00
/////////////////////////////////////////////////////////////////////
// 0x6f00 Entry 51 (size 16 bundles) Reserved
- KVM_FAULT(52)
+ KVM_FAULT(52)
.org kvm_ia64_ivt+0x7100
////////////////////////////////////////////////////////////////////
// 0x7100 Entry 53 (size 16 bundles) Reserved
- KVM_FAULT(53)
+ KVM_FAULT(53)
.org kvm_ia64_ivt+0x7200
/////////////////////////////////////////////////////////////////////
// 0x7200 Entry 54 (size 16 bundles) Reserved
- KVM_FAULT(54)
+ KVM_FAULT(54)
.org kvm_ia64_ivt+0x7300
////////////////////////////////////////////////////////////////////
// 0x7300 Entry 55 (size 16 bundles) Reserved
- KVM_FAULT(55)
+ KVM_FAULT(55)
.org kvm_ia64_ivt+0x7400
////////////////////////////////////////////////////////////////////
// 0x7400 Entry 56 (size 16 bundles) Reserved
- KVM_FAULT(56)
+ KVM_FAULT(56)
.org kvm_ia64_ivt+0x7500
/////////////////////////////////////////////////////////////////////
// 0x7500 Entry 57 (size 16 bundles) Reserved
- KVM_FAULT(57)
+ KVM_FAULT(57)
.org kvm_ia64_ivt+0x7600
/////////////////////////////////////////////////////////////////////
// 0x7600 Entry 58 (size 16 bundles) Reserved
- KVM_FAULT(58)
+ KVM_FAULT(58)
.org kvm_ia64_ivt+0x7700
////////////////////////////////////////////////////////////////////
// 0x7700 Entry 59 (size 16 bundles) Reserved
- KVM_FAULT(59)
+ KVM_FAULT(59)
.org kvm_ia64_ivt+0x7800
////////////////////////////////////////////////////////////////////
// 0x7800 Entry 60 (size 16 bundles) Reserved
- KVM_FAULT(60)
+ KVM_FAULT(60)
.org kvm_ia64_ivt+0x7900
/////////////////////////////////////////////////////////////////////
// 0x7900 Entry 61 (size 16 bundles) Reserved
- KVM_FAULT(61)
+ KVM_FAULT(61)
.org kvm_ia64_ivt+0x7a00
/////////////////////////////////////////////////////////////////////
// 0x7a00 Entry 62 (size 16 bundles) Reserved
- KVM_FAULT(62)
+ KVM_FAULT(62)
.org kvm_ia64_ivt+0x7b00
/////////////////////////////////////////////////////////////////////
// 0x7b00 Entry 63 (size 16 bundles) Reserved
- KVM_FAULT(63)
+ KVM_FAULT(63)
.org kvm_ia64_ivt+0x7c00
////////////////////////////////////////////////////////////////////
// 0x7c00 Entry 64 (size 16 bundles) Reserved
- KVM_FAULT(64)
+ KVM_FAULT(64)
.org kvm_ia64_ivt+0x7d00
/////////////////////////////////////////////////////////////////////
// 0x7d00 Entry 65 (size 16 bundles) Reserved
- KVM_FAULT(65)
+ KVM_FAULT(65)
.org kvm_ia64_ivt+0x7e00
/////////////////////////////////////////////////////////////////////
// 0x7e00 Entry 66 (size 16 bundles) Reserved
- KVM_FAULT(66)
+ KVM_FAULT(66)
.org kvm_ia64_ivt+0x7f00
////////////////////////////////////////////////////////////////////
// 0x7f00 Entry 67 (size 16 bundles) Reserved
- KVM_FAULT(67)
+ KVM_FAULT(67)
.org kvm_ia64_ivt+0x8000
// There is no particular reason for this code to be here, other than that
@@ -808,128 +804,132 @@ END(kvm_ia32_intercept)
ENTRY(kvm_dtlb_miss_dispatch)
- mov r19 = 2
- KVM_SAVE_MIN_WITH_COVER_R19
- alloc r14=ar.pfs,0,0,3,0
- mov out0=cr.ifa
- mov out1=r15
- adds r3=8,r2 // set up second base pointer
- ;;
- ssm psr.ic
- ;;
- srlz.i // guarantee that interruption collection is on
- ;;
- //(p15) ssm psr.i // restore psr.i
- addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
- ;;
- KVM_SAVE_REST
- KVM_SAVE_EXTRA
- mov rp=r14
- ;;
- adds out2=16,r12
- br.call.sptk.many b6=kvm_page_fault
+ mov r19 = 2
+ KVM_SAVE_MIN_WITH_COVER_R19
+ alloc r14=ar.pfs,0,0,3,0
+ mov out0=cr.ifa
+ mov out1=r15
+ adds r3=8,r2 // set up second base pointer
+ ;;
+ ssm psr.ic
+ ;;
+ srlz.i // guarantee that interruption collection is on
+ ;;
+ //(p15) ssm psr.i // restore psr.i
+ addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
+ ;;
+ KVM_SAVE_REST
+ KVM_SAVE_EXTRA
+ mov rp=r14
+ ;;
+ adds out2=16,r12
+ br.call.sptk.many b6=kvm_page_fault
END(kvm_dtlb_miss_dispatch)
ENTRY(kvm_itlb_miss_dispatch)
- KVM_SAVE_MIN_WITH_COVER_R19
- alloc r14=ar.pfs,0,0,3,0
- mov out0=cr.ifa
- mov out1=r15
- adds r3=8,r2 // set up second base pointer
- ;;
- ssm psr.ic
- ;;
- srlz.i // guarantee that interruption collection is on
- ;;
- //(p15) ssm psr.i // restore psr.i
- addl r14=@gprel(ia64_leave_hypervisor),gp
- ;;
- KVM_SAVE_REST
- mov rp=r14
- ;;
- adds out2=16,r12
- br.call.sptk.many b6=kvm_page_fault
+ KVM_SAVE_MIN_WITH_COVER_R19
+ alloc r14=ar.pfs,0,0,3,0
+ mov out0=cr.ifa
+ mov out1=r15
+ adds r3=8,r2 // set up second base pointer
+ ;;
+ ssm psr.ic
+ ;;
+ srlz.i // guarantee that interruption collection is on
+ ;;
+ //(p15) ssm psr.i // restore psr.i
+ addl r14=@gprel(ia64_leave_hypervisor),gp
+ ;;
+ KVM_SAVE_REST
+ mov rp=r14
+ ;;
+ adds out2=16,r12
+ br.call.sptk.many b6=kvm_page_fault
END(kvm_itlb_miss_dispatch)
ENTRY(kvm_dispatch_reflection)
-/*
- * Input:
- * psr.ic: off
- * r19: intr type (offset into ivt, see ia64_int.h)
- * r31: contains saved predicates (pr)
- */
- KVM_SAVE_MIN_WITH_COVER_R19
- alloc r14=ar.pfs,0,0,5,0
- mov out0=cr.ifa
- mov out1=cr.isr
- mov out2=cr.iim
- mov out3=r15
- adds r3=8,r2 // set up second base pointer
- ;;
- ssm psr.ic
- ;;
- srlz.i // guarantee that interruption collection is on
- ;;
- //(p15) ssm psr.i // restore psr.i
- addl r14=@gprel(ia64_leave_hypervisor),gp
- ;;
- KVM_SAVE_REST
- mov rp=r14
- ;;
- adds out4=16,r12
- br.call.sptk.many b6=reflect_interruption
+ /*
+ * Input:
+ * psr.ic: off
+ * r19: intr type (offset into ivt, see ia64_int.h)
+ * r31: contains saved predicates (pr)
+ */
+ KVM_SAVE_MIN_WITH_COVER_R19
+ alloc r14=ar.pfs,0,0,5,0
+ mov out0=cr.ifa
+ mov out1=cr.isr
+ mov out2=cr.iim
+ mov out3=r15
+ adds r3=8,r2 // set up second base pointer
+ ;;
+ ssm psr.ic
+ ;;
+ srlz.i // guarantee that interruption collection is on
+ ;;
+ //(p15) ssm psr.i // restore psr.i
+ addl r14=@gprel(ia64_leave_hypervisor),gp
+ ;;
+ KVM_SAVE_REST
+ mov rp=r14
+ ;;
+ adds out4=16,r12
+ br.call.sptk.many b6=reflect_interruption
END(kvm_dispatch_reflection)
ENTRY(kvm_dispatch_virtualization_fault)
- adds r16 = VMM_VCPU_CAUSE_OFFSET,r21
- adds r17 = VMM_VCPU_OPCODE_OFFSET,r21
- ;;
- st8 [r16] = r24
- st8 [r17] = r25
- ;;
- KVM_SAVE_MIN_WITH_COVER_R19
- ;;
- alloc r14=ar.pfs,0,0,2,0 // (must be first in insn group!)
- mov out0=r13 //vcpu
- adds r3=8,r2 // set up second base pointer
- ;;
- ssm psr.ic
- ;;
- srlz.i // guarantee that interruption collection is on
- ;;
- //(p15) ssm psr.i // restore psr.i
- addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
- ;;
- KVM_SAVE_REST
- KVM_SAVE_EXTRA
- mov rp=r14
- ;;
- adds out1=16,sp //regs
- br.call.sptk.many b6=kvm_emulate
+ adds r16 = VMM_VCPU_CAUSE_OFFSET,r21
+ adds r17 = VMM_VCPU_OPCODE_OFFSET,r21
+ ;;
+ st8 [r16] = r24
+ st8 [r17] = r25
+ ;;
+ KVM_SAVE_MIN_WITH_COVER_R19
+ ;;
+ alloc r14=ar.pfs,0,0,2,0 // now it's safe (must be first in insn group!)
+ mov out0=r13 //vcpu
+ adds r3=8,r2 // set up second base pointer
+ ;;
+ ssm psr.ic
+ ;;
+ srlz.i // guarantee that interruption collection is on
+ ;;
+ //(p15) ssm psr.i // restore psr.i
+ addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
+ ;;
+ KVM_SAVE_REST
+ KVM_SAVE_EXTRA
+ mov rp=r14
+ ;;
+ adds out1=16,sp //regs
+ br.call.sptk.many b6=kvm_emulate
END(kvm_dispatch_virtualization_fault)
ENTRY(kvm_dispatch_interrupt)
- KVM_SAVE_MIN_WITH_COVER_R19 // uses r31; defines r2 and r3
- ;;
- alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group
- adds r3=8,r2 // set up second base pointer for SAVE_REST
- ;;
- ssm psr.ic
- ;;
- srlz.i
- ;;
- //(p15) ssm psr.i
- addl r14=@gprel(ia64_leave_hypervisor),gp
- ;;
- KVM_SAVE_REST
- mov rp=r14
- ;;
- mov out0=r13 // pass pointer to pt_regs as second arg
- br.call.sptk.many b6=kvm_ia64_handle_irq
+ KVM_SAVE_MIN_WITH_COVER_R19 // uses r31; defines r2 and r3
+ ;;
+ alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group
+ //mov out0=cr.ivr // pass cr.ivr as first arg
+ adds r3=8,r2 // set up second base pointer for SAVE_REST
+ ;;
+ ssm psr.ic
+ ;;
+ srlz.i
+ ;;
+ //(p15) ssm psr.i
+ addl r14=@gprel(ia64_leave_hypervisor),gp
+ ;;
+ KVM_SAVE_REST
+ mov rp=r14
+ ;;
+ mov out0=r13 // pass pointer to pt_regs as second arg
+ br.call.sptk.many b6=kvm_ia64_handle_irq
END(kvm_dispatch_interrupt)
+
+
+
GLOBAL_ENTRY(ia64_leave_nested)
rsm psr.i
;;
@@ -1008,7 +1008,7 @@ GLOBAL_ENTRY(ia64_leave_nested)
;;
ldf.fill f11=[r2]
// mov r18=r13
-// mov r21=r13
+// mov r21=r13
adds r16=PT(CR_IPSR)+16,r12
adds r17=PT(CR_IIP)+16,r12
;;
@@ -1058,135 +1058,138 @@ GLOBAL_ENTRY(ia64_leave_nested)
rfi
END(ia64_leave_nested)
+
+
GLOBAL_ENTRY(ia64_leave_hypervisor_prepare)
-/*
- * work.need_resched etc. mustn't get changed
- *by this CPU before it returns to
- * user- or fsys-mode, hence we disable interrupts early on:
- */
- adds r2 = PT(R4)+16,r12
- adds r3 = PT(R5)+16,r12
- adds r8 = PT(EML_UNAT)+16,r12
- ;;
- ld8 r8 = [r8]
- ;;
- mov ar.unat=r8
- ;;
- ld8.fill r4=[r2],16 //load r4
- ld8.fill r5=[r3],16 //load r5
- ;;
- ld8.fill r6=[r2] //load r6
- ld8.fill r7=[r3] //load r7
- ;;
+ /*
+ * work.need_resched etc. mustn't get changed
+ *by this CPU before it returns to
+ ;;
+ * user- or fsys-mode, hence we disable interrupts early on:
+ */
+ adds r2 = PT(R4)+16,r12
+ adds r3 = PT(R5)+16,r12
+ adds r8 = PT(EML_UNAT)+16,r12
+ ;;
+ ld8 r8 = [r8]
+ ;;
+ mov ar.unat=r8
+ ;;
+ ld8.fill r4=[r2],16 //load r4
+ ld8.fill r5=[r3],16 //load r5
+ ;;
+ ld8.fill r6=[r2] //load r6
+ ld8.fill r7=[r3] //load r7
+ ;;
END(ia64_leave_hypervisor_prepare)
//fall through
GLOBAL_ENTRY(ia64_leave_hypervisor)
- rsm psr.i
- ;;
- br.call.sptk.many b0=leave_hypervisor_tail
- ;;
- adds r20=PT(PR)+16,r12
- adds r8=PT(EML_UNAT)+16,r12
- ;;
- ld8 r8=[r8]
- ;;
- mov ar.unat=r8
- ;;
- lfetch [r20],PT(CR_IPSR)-PT(PR)
- adds r2 = PT(B6)+16,r12
- adds r3 = PT(B7)+16,r12
- ;;
- lfetch [r20]
- ;;
- ld8 r24=[r2],16 /* B6 */
- ld8 r25=[r3],16 /* B7 */
- ;;
- ld8 r26=[r2],16 /* ar_csd */
- ld8 r27=[r3],16 /* ar_ssd */
- mov b6 = r24
- ;;
- ld8.fill r8=[r2],16
- ld8.fill r9=[r3],16
- mov b7 = r25
- ;;
- mov ar.csd = r26
- mov ar.ssd = r27
- ;;
- ld8.fill r10=[r2],PT(R15)-PT(R10)
- ld8.fill r11=[r3],PT(R14)-PT(R11)
- ;;
- ld8.fill r15=[r2],PT(R16)-PT(R15)
- ld8.fill r14=[r3],PT(R17)-PT(R14)
- ;;
- ld8.fill r16=[r2],16
- ld8.fill r17=[r3],16
- ;;
- ld8.fill r18=[r2],16
- ld8.fill r19=[r3],16
- ;;
- ld8.fill r20=[r2],16
- ld8.fill r21=[r3],16
- ;;
- ld8.fill r22=[r2],16
- ld8.fill r23=[r3],16
- ;;
- ld8.fill r24=[r2],16
- ld8.fill r25=[r3],16
- ;;
- ld8.fill r26=[r2],16
- ld8.fill r27=[r3],16
- ;;
- ld8.fill r28=[r2],16
- ld8.fill r29=[r3],16
- ;;
- ld8.fill r30=[r2],PT(F6)-PT(R30)
- ld8.fill r31=[r3],PT(F7)-PT(R31)
- ;;
- rsm psr.i | psr.ic
- // initiate turning off of interrupt and interruption collection
- invala // invalidate ALAT
- ;;
- srlz.i // ensure interruption collection is off
- ;;
- bsw.0
- ;;
- adds r16 = PT(CR_IPSR)+16,r12
- adds r17 = PT(CR_IIP)+16,r12
- mov r21=r13 // get current
- ;;
- ld8 r31=[r16],16 // load cr.ipsr
- ld8 r30=[r17],16 // load cr.iip
- ;;
- ld8 r29=[r16],16 // load cr.ifs
- ld8 r28=[r17],16 // load ar.unat
- ;;
- ld8 r27=[r16],16 // load ar.pfs
- ld8 r26=[r17],16 // load ar.rsc
- ;;
- ld8 r25=[r16],16 // load ar.rnat
- ld8 r24=[r17],16 // load ar.bspstore
- ;;
- ld8 r23=[r16],16 // load predicates
- ld8 r22=[r17],16 // load b0
- ;;
- ld8 r20=[r16],16 // load ar.rsc value for "loadrs"
- ld8.fill r1=[r17],16 //load r1
- ;;
- ld8.fill r12=[r16],16 //load r12
- ld8.fill r13=[r17],PT(R2)-PT(R13) //load r13
- ;;
- ld8 r19=[r16],PT(R3)-PT(AR_FPSR) //load ar_fpsr
- ld8.fill r2=[r17],PT(AR_CCV)-PT(R2) //load r2
- ;;
- ld8.fill r3=[r16] //load r3
- ld8 r18=[r17] //load ar_ccv
- ;;
- mov ar.fpsr=r19
- mov ar.ccv=r18
- shr.u r18=r20,16
- ;;
+ rsm psr.i
+ ;;
+ br.call.sptk.many b0=leave_hypervisor_tail
+ ;;
+ adds r20=PT(PR)+16,r12
+ adds r8=PT(EML_UNAT)+16,r12
+ ;;
+ ld8 r8=[r8]
+ ;;
+ mov ar.unat=r8
+ ;;
+ lfetch [r20],PT(CR_IPSR)-PT(PR)
+ adds r2 = PT(B6)+16,r12
+ adds r3 = PT(B7)+16,r12
+ ;;
+ lfetch [r20]
+ ;;
+ ld8 r24=[r2],16 /* B6 */
+ ld8 r25=[r3],16 /* B7 */
+ ;;
+ ld8 r26=[r2],16 /* ar_csd */
+ ld8 r27=[r3],16 /* ar_ssd */
+ mov b6 = r24
+ ;;
+ ld8.fill r8=[r2],16
+ ld8.fill r9=[r3],16
+ mov b7 = r25
+ ;;
+ mov ar.csd = r26
+ mov ar.ssd = r27
+ ;;
+ ld8.fill r10=[r2],PT(R15)-PT(R10)
+ ld8.fill r11=[r3],PT(R14)-PT(R11)
+ ;;
+ ld8.fill r15=[r2],PT(R16)-PT(R15)
+ ld8.fill r14=[r3],PT(R17)-PT(R14)
+ ;;
+ ld8.fill r16=[r2],16
+ ld8.fill r17=[r3],16
+ ;;
+ ld8.fill r18=[r2],16
+ ld8.fill r19=[r3],16
+ ;;
+ ld8.fill r20=[r2],16
+ ld8.fill r21=[r3],16
+ ;;
+ ld8.fill r22=[r2],16
+ ld8.fill r23=[r3],16
+ ;;
+ ld8.fill r24=[r2],16
+ ld8.fill r25=[r3],16
+ ;;
+ ld8.fill r26=[r2],16
+ ld8.fill r27=[r3],16
+ ;;
+ ld8.fill r28=[r2],16
+ ld8.fill r29=[r3],16
+ ;;
+ ld8.fill r30=[r2],PT(F6)-PT(R30)
+ ld8.fill r31=[r3],PT(F7)-PT(R31)
+ ;;
+ rsm psr.i | psr.ic
+ // initiate turning off of interrupt and interruption collection
+ invala // invalidate ALAT
+ ;;
+ srlz.i // ensure interruption collection is off
+ ;;
+ bsw.0
+ ;;
+ adds r16 = PT(CR_IPSR)+16,r12
+ adds r17 = PT(CR_IIP)+16,r12
+ mov r21=r13 // get current
+ ;;
+ ld8 r31=[r16],16 // load cr.ipsr
+ ld8 r30=[r17],16 // load cr.iip
+ ;;
+ ld8 r29=[r16],16 // load cr.ifs
+ ld8 r28=[r17],16 // load ar.unat
+ ;;
+ ld8 r27=[r16],16 // load ar.pfs
+ ld8 r26=[r17],16 // load ar.rsc
+ ;;
+ ld8 r25=[r16],16 // load ar.rnat
+ ld8 r24=[r17],16 // load ar.bspstore
+ ;;
+ ld8 r23=[r16],16 // load predicates
+ ld8 r22=[r17],16 // load b0
+ ;;
+ ld8 r20=[r16],16 // load ar.rsc value for "loadrs"
+ ld8.fill r1=[r17],16 //load r1
+ ;;
+ ld8.fill r12=[r16],16 //load r12
+ ld8.fill r13=[r17],PT(R2)-PT(R13) //load r13
+ ;;
+ ld8 r19=[r16],PT(R3)-PT(AR_FPSR) //load ar_fpsr
+ ld8.fill r2=[r17],PT(AR_CCV)-PT(R2) //load r2
+ ;;
+ ld8.fill r3=[r16] //load r3
+ ld8 r18=[r17] //load ar_ccv
+ ;;
+ mov ar.fpsr=r19
+ mov ar.ccv=r18
+ shr.u r18=r20,16
+ ;;
kvm_rbs_switch:
- mov r19=96
+ mov r19=96
kvm_dont_preserve_current_frame:
/*
@@ -1198,76 +1201,76 @@ kvm_dont_preserve_current_frame:
# define pReturn p7
# define Nregs 14
- alloc loc0=ar.pfs,2,Nregs-2,2,0
- shr.u loc1=r18,9 // RNaTslots <= floor(dirtySize / (64*8))
- sub r19=r19,r18 // r19 = (physStackedSize + 8) - dirtySize
- ;;
- mov ar.rsc=r20 // load ar.rsc to be used for "loadrs"
- shladd in0=loc1,3,r19
- mov in1=0
- ;;
- TEXT_ALIGN(32)
+ alloc loc0=ar.pfs,2,Nregs-2,2,0
+ shr.u loc1=r18,9 // RNaTslots <= floor(dirtySize / (64*8))
+ sub r19=r19,r18 // r19 = (physStackedSize + 8) - dirtySize
+ ;;
+ mov ar.rsc=r20 // load ar.rsc to be used for "loadrs"
+ shladd in0=loc1,3,r19
+ mov in1=0
+ ;;
+ TEXT_ALIGN(32)
kvm_rse_clear_invalid:
- alloc loc0=ar.pfs,2,Nregs-2,2,0
- cmp.lt pRecurse,p0=Nregs*8,in0
- // if more than Nregs regs left to clear, (re)curse
- add out0=-Nregs*8,in0
- add out1=1,in1 // increment recursion count
- mov loc1=0
- mov loc2=0
- ;;
- mov loc3=0
- mov loc4=0
- mov loc5=0
- mov loc6=0
- mov loc7=0
+ alloc loc0=ar.pfs,2,Nregs-2,2,0
+ cmp.lt pRecurse,p0=Nregs*8,in0
+ // if more than Nregs regs left to clear, (re)curse
+ add out0=-Nregs*8,in0
+ add out1=1,in1 // increment recursion count
+ mov loc1=0
+ mov loc2=0
+ ;;
+ mov loc3=0
+ mov loc4=0
+ mov loc5=0
+ mov loc6=0
+ mov loc7=0
(pRecurse) br.call.dptk.few b0=kvm_rse_clear_invalid
- ;;
- mov loc8=0
- mov loc9=0
- cmp.ne pReturn,p0=r0,in1
- // if recursion count != 0, we need to do a br.ret
- mov loc10=0
- mov loc11=0
+ ;;
+ mov loc8=0
+ mov loc9=0
+ cmp.ne pReturn,p0=r0,in1
+ // if recursion count != 0, we need to do a br.ret
+ mov loc10=0
+ mov loc11=0
(pReturn) br.ret.dptk.many b0
# undef pRecurse
# undef pReturn
// loadrs has already been shifted
- alloc r16=ar.pfs,0,0,0,0 // drop current register frame
- ;;
- loadrs
- ;;
- mov ar.bspstore=r24
- ;;
- mov ar.unat=r28
- mov ar.rnat=r25
- mov ar.rsc=r26
- ;;
- mov cr.ipsr=r31
- mov cr.iip=r30
- mov cr.ifs=r29
- mov ar.pfs=r27
- adds r18=VMM_VPD_BASE_OFFSET,r21
- ;;
- ld8 r18=[r18] //vpd
- adds r17=VMM_VCPU_ISR_OFFSET,r21
- ;;
- ld8 r17=[r17]
- adds r19=VMM_VPD_VPSR_OFFSET,r18
- ;;
- ld8 r19=[r19] //vpsr
- mov r25=r18
- adds r16= VMM_VCPU_GP_OFFSET,r21
- ;;
- ld8 r16= [r16] // Put gp in r24
- movl r24=@gprel(ia64_vmm_entry) // calculate return address
- ;;
- add r24=r24,r16
- ;;
- br.sptk.many kvm_vps_sync_write // call the service
- ;;
+ alloc r16=ar.pfs,0,0,0,0 // drop current register frame
+ ;;
+ loadrs
+ ;;
+ mov ar.bspstore=r24
+ ;;
+ mov ar.unat=r28
+ mov ar.rnat=r25
+ mov ar.rsc=r26
+ ;;
+ mov cr.ipsr=r31
+ mov cr.iip=r30
+ mov cr.ifs=r29
+ mov ar.pfs=r27
+ adds r18=VMM_VPD_BASE_OFFSET,r21
+ ;;
+ ld8 r18=[r18] //vpd
+ adds r17=VMM_VCPU_ISR_OFFSET,r21
+ ;;
+ ld8 r17=[r17]
+ adds r19=VMM_VPD_VPSR_OFFSET,r18
+ ;;
+ ld8 r19=[r19] //vpsr
+ mov r25=r18
+ adds r16= VMM_VCPU_GP_OFFSET,r21
+ ;;
+ ld8 r16= [r16] // Put gp in r24
+ movl r24=@gprel(ia64_vmm_entry) // calculate return address
+ ;;
+ add r24=r24,r16
+ ;;
+ br.sptk.many kvm_vps_sync_write // call the service
+ ;;
END(ia64_leave_hypervisor)
// fall through
GLOBAL_ENTRY(ia64_vmm_entry)
@@ -1280,14 +1283,16 @@ GLOBAL_ENTRY(ia64_vmm_entry)
* r22:b0
* r23:predicate
*/
- mov r24=r22
- mov r25=r18
- tbit.nz p1,p2 = r19,IA64_PSR_IC_BIT // p1=vpsr.ic
-(p1) br.cond.sptk.few kvm_vps_resume_normal
-(p2) br.cond.sptk.many kvm_vps_resume_handler
- ;;
+ mov r24=r22
+ mov r25=r18
+ tbit.nz p1,p2 = r19,IA64_PSR_IC_BIT // p1=vpsr.ic
+ (p1) br.cond.sptk.few kvm_vps_resume_normal
+ (p2) br.cond.sptk.many kvm_vps_resume_handler
+ ;;
END(ia64_vmm_entry)
+
+
/*
* extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2,
* u64 arg3, u64 arg4, u64 arg5,
@@ -1305,88 +1310,88 @@ psrsave = loc2
entry = loc3
hostret = r24
- alloc pfssave=ar.pfs,4,4,0,0
- mov rpsave=rp
- adds entry=VMM_VCPU_VSA_BASE_OFFSET, r13
- ;;
- ld8 entry=[entry]
-1: mov hostret=ip
- mov r25=in1 // copy arguments
- mov r26=in2
- mov r27=in3
- mov psrsave=psr
- ;;
- tbit.nz p6,p0=psrsave,14 // IA64_PSR_I
- tbit.nz p7,p0=psrsave,13 // IA64_PSR_IC
- ;;
- add hostret=2f-1b,hostret // calculate return address
- add entry=entry,in0
- ;;
- rsm psr.i | psr.ic
- ;;
- srlz.i
- mov b6=entry
- br.cond.sptk b6 // call the service
+ alloc pfssave=ar.pfs,4,4,0,0
+ mov rpsave=rp
+ adds entry=VMM_VCPU_VSA_BASE_OFFSET, r13
+ ;;
+ ld8 entry=[entry]
+1: mov hostret=ip
+ mov r25=in1 // copy arguments
+ mov r26=in2
+ mov r27=in3
+ mov psrsave=psr
+ ;;
+ tbit.nz p6,p0=psrsave,14 // IA64_PSR_I
+ tbit.nz p7,p0=psrsave,13 // IA64_PSR_IC
+ ;;
+ add hostret=2f-1b,hostret // calculate return address
+ add entry=entry,in0
+ ;;
+ rsm psr.i | psr.ic
+ ;;
+ srlz.i
+ mov b6=entry
+ br.cond.sptk b6 // call the service
2:
-// Architectural sequence for enabling interrupts if necessary
+ // Architectural sequence for enabling interrupts if necessary
(p7) ssm psr.ic
- ;;
+ ;;
(p7) srlz.i
- ;;
+ ;;
//(p6) ssm psr.i
- ;;
- mov rp=rpsave
- mov ar.pfs=pfssave
- mov r8=r31
- ;;
- srlz.d
- br.ret.sptk rp
+ ;;
+ mov rp=rpsave
+ mov ar.pfs=pfssave
+ mov r8=r31
+ ;;
+ srlz.d
+ br.ret.sptk rp
END(ia64_call_vsa)
#define INIT_BSPSTORE ((4<<30)-(12<<20)-0x100)
GLOBAL_ENTRY(vmm_reset_entry)
- //set up ipsr, iip, vpd.vpsr, dcr
- // For IPSR: it/dt/rt=1, i/ic=1, si=1, vm/bn=1
- // For DCR: all bits 0
- bsw.0
- ;;
- mov r21 =r13
- adds r14=-VMM_PT_REGS_SIZE, r12
- ;;
- movl r6=0x501008826000 // IPSR dt/rt/it:1;i/ic:1, si:1, vm/bn:1
- movl r10=0x8000000000000000
- adds r16=PT(CR_IIP), r14
- adds r20=PT(R1), r14
- ;;
- rsm psr.ic | psr.i
- ;;
- srlz.i
- ;;
- mov ar.rsc = 0
- ;;
- flushrs
- ;;
- mov ar.bspstore = 0
- // clear BSPSTORE
- ;;
- mov cr.ipsr=r6
- mov cr.ifs=r10
- ld8 r4 = [r16] // Set init iip for first run.
- ld8 r1 = [r20]
- ;;
- mov cr.iip=r4
- adds r16=VMM_VPD_BASE_OFFSET,r13
- ;;
- ld8 r18=[r16]
- ;;
- adds r19=VMM_VPD_VPSR_OFFSET,r18
- ;;
- ld8 r19=[r19]
- mov r17=r0
- mov r22=r0
- mov r23=r0
- br.cond.sptk ia64_vmm_entry
- br.ret.sptk b0
+ //set up ipsr, iip, vpd.vpsr, dcr
+ // For IPSR: it/dt/rt=1, i/ic=1, si=1, vm/bn=1
+ // For DCR: all bits 0
+ bsw.0
+ ;;
+ mov r21 =r13
+ adds r14=-VMM_PT_REGS_SIZE, r12
+ ;;
+ movl r6=0x501008826000 // IPSR dt/rt/it:1;i/ic:1, si:1, vm/bn:1
+ movl r10=0x8000000000000000
+ adds r16=PT(CR_IIP), r14
+ adds r20=PT(R1), r14
+ ;;
+ rsm psr.ic | psr.i
+ ;;
+ srlz.i
+ ;;
+ mov ar.rsc = 0
+ ;;
+ flushrs
+ ;;
+ mov ar.bspstore = 0
+ // clear BSPSTORE
+ ;;
+ mov cr.ipsr=r6
+ mov cr.ifs=r10
+ ld8 r4 = [r16] // Set init iip for first run.
+ ld8 r1 = [r20]
+ ;;
+ mov cr.iip=r4
+ adds r16=VMM_VPD_BASE_OFFSET,r13
+ ;;
+ ld8 r18=[r16]
+ ;;
+ adds r19=VMM_VPD_VPSR_OFFSET,r18
+ ;;
+ ld8 r19=[r19]
+ mov r17=r0
+ mov r22=r0
+ mov r23=r0
+ br.cond.sptk ia64_vmm_entry
+ br.ret.sptk b0
END(vmm_reset_entry)
diff --git a/arch/ia64/kvm/vtlb.c b/arch/ia64/kvm/vtlb.c
index 6b6307a3bd55..e22b93361e08 100644
--- a/arch/ia64/kvm/vtlb.c
+++ b/arch/ia64/kvm/vtlb.c
@@ -183,8 +183,8 @@ void mark_pages_dirty(struct kvm_vcpu *v, u64 pte, u64 ps)
u64 i, dirty_pages = 1;
u64 base_gfn = (pte&_PAGE_PPN_MASK) >> PAGE_SHIFT;
spinlock_t *lock = __kvm_va(v->arch.dirty_log_lock_pa);
- void *dirty_bitmap = (void *)KVM_MEM_DIRTY_LOG_BASE;
-
+ void *dirty_bitmap = (void *)v - (KVM_VCPU_OFS + v->vcpu_id * VCPU_SIZE)
+ + KVM_MEM_DIRTY_LOG_OFS;
dirty_pages <<= ps <= PAGE_SHIFT ? 0 : ps - PAGE_SHIFT;
vmm_spin_lock(lock);
diff --git a/arch/powerpc/include/asm/disassemble.h b/arch/powerpc/include/asm/disassemble.h
deleted file mode 100644
index 9b198d1b3b2b..000000000000
--- a/arch/powerpc/include/asm/disassemble.h
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- *
- * Copyright IBM Corp. 2008
- *
- * Authors: Hollis Blanchard <hollisb@us.ibm.com>
- */
-
-#ifndef __ASM_PPC_DISASSEMBLE_H__
-#define __ASM_PPC_DISASSEMBLE_H__
-
-#include <linux/types.h>
-
-static inline unsigned int get_op(u32 inst)
-{
- return inst >> 26;
-}
-
-static inline unsigned int get_xop(u32 inst)
-{
- return (inst >> 1) & 0x3ff;
-}
-
-static inline unsigned int get_sprn(u32 inst)
-{
- return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
-}
-
-static inline unsigned int get_dcrn(u32 inst)
-{
- return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
-}
-
-static inline unsigned int get_rt(u32 inst)
-{
- return (inst >> 21) & 0x1f;
-}
-
-static inline unsigned int get_rs(u32 inst)
-{
- return (inst >> 21) & 0x1f;
-}
-
-static inline unsigned int get_ra(u32 inst)
-{
- return (inst >> 16) & 0x1f;
-}
-
-static inline unsigned int get_rb(u32 inst)
-{
- return (inst >> 11) & 0x1f;
-}
-
-static inline unsigned int get_rc(u32 inst)
-{
- return inst & 0x1;
-}
-
-static inline unsigned int get_ws(u32 inst)
-{
- return (inst >> 11) & 0x1f;
-}
-
-static inline unsigned int get_d(u32 inst)
-{
- return inst & 0xffff;
-}
-
-#endif /* __ASM_PPC_DISASSEMBLE_H__ */
diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h
index 755f1b1948c5..f993e4198d5c 100644
--- a/arch/powerpc/include/asm/kvm.h
+++ b/arch/powerpc/include/asm/kvm.h
@@ -52,11 +52,4 @@ struct kvm_fpu {
__u64 fpr[32];
};
-struct kvm_debug_exit_arch {
-};
-
-/* for KVM_SET_GUEST_DEBUG */
-struct kvm_guest_debug_arch {
-};
-
#endif /* __LINUX_KVM_POWERPC_H */
diff --git a/arch/powerpc/include/asm/kvm_44x.h b/arch/powerpc/include/asm/kvm_44x.h
deleted file mode 100644
index f49031b632ca..000000000000
--- a/arch/powerpc/include/asm/kvm_44x.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- *
- * Copyright IBM Corp. 2008
- *
- * Authors: Hollis Blanchard <hollisb@us.ibm.com>
- */
-
-#ifndef __ASM_44X_H__
-#define __ASM_44X_H__
-
-#include <linux/kvm_host.h>
-
-#define PPC44x_TLB_SIZE 64
-
-/* If the guest is expecting it, this can be as large as we like; we'd just
- * need to find some way of advertising it. */
-#define KVM44x_GUEST_TLB_SIZE 64
-
-struct kvmppc_44x_shadow_ref {
- struct page *page;
- u16 gtlb_index;
- u8 writeable;
- u8 tid;
-};
-
-struct kvmppc_vcpu_44x {
- /* Unmodified copy of the guest's TLB. */
- struct kvmppc_44x_tlbe guest_tlb[KVM44x_GUEST_TLB_SIZE];
-
- /* References to guest pages in the hardware TLB. */
- struct kvmppc_44x_shadow_ref shadow_refs[PPC44x_TLB_SIZE];
-
- /* State of the shadow TLB at guest context switch time. */
- struct kvmppc_44x_tlbe shadow_tlb[PPC44x_TLB_SIZE];
- u8 shadow_tlb_mod[PPC44x_TLB_SIZE];
-
- struct kvm_vcpu vcpu;
-};
-
-static inline struct kvmppc_vcpu_44x *to_44x(struct kvm_vcpu *vcpu)
-{
- return container_of(vcpu, struct kvmppc_vcpu_44x, vcpu);
-}
-
-void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid);
-void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu);
-void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu);
-
-#endif /* __ASM_44X_H__ */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index c1e436fe7738..34b52b7180cd 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -64,58 +64,27 @@ struct kvm_vcpu_stat {
u32 halt_wakeup;
};
-struct kvmppc_44x_tlbe {
+struct tlbe {
u32 tid; /* Only the low 8 bits are used. */
u32 word0;
u32 word1;
u32 word2;
};
-enum kvm_exit_types {
- MMIO_EXITS,
- DCR_EXITS,
- SIGNAL_EXITS,
- ITLB_REAL_MISS_EXITS,
- ITLB_VIRT_MISS_EXITS,
- DTLB_REAL_MISS_EXITS,
- DTLB_VIRT_MISS_EXITS,
- SYSCALL_EXITS,
- ISI_EXITS,
- DSI_EXITS,
- EMULATED_INST_EXITS,
- EMULATED_MTMSRWE_EXITS,
- EMULATED_WRTEE_EXITS,
- EMULATED_MTSPR_EXITS,
- EMULATED_MFSPR_EXITS,
- EMULATED_MTMSR_EXITS,
- EMULATED_MFMSR_EXITS,
- EMULATED_TLBSX_EXITS,
- EMULATED_TLBWE_EXITS,
- EMULATED_RFI_EXITS,
- DEC_EXITS,
- EXT_INTR_EXITS,
- HALT_WAKEUP,
- USR_PR_INST,
- FP_UNAVAIL,
- DEBUG_EXITS,
- TIMEINGUEST,
- __NUMBER_OF_KVM_EXIT_TYPES
-};
-
-/* allow access to big endian 32bit upper/lower parts and 64bit var */
-struct kvmppc_exit_timing {
- union {
- u64 tv64;
- struct {
- u32 tbu, tbl;
- } tv32;
- };
-};
-
struct kvm_arch {
};
struct kvm_vcpu_arch {
+ /* Unmodified copy of the guest's TLB. */
+ struct tlbe guest_tlb[PPC44x_TLB_SIZE];
+ /* TLB that's actually used when the guest is running. */
+ struct tlbe shadow_tlb[PPC44x_TLB_SIZE];
+ /* Pages which are referenced in the shadow TLB. */
+ struct page *shadow_pages[PPC44x_TLB_SIZE];
+
+ /* Track which TLB entries we've modified in the current exit. */
+ u8 shadow_tlb_mod[PPC44x_TLB_SIZE];
+
u32 host_stack;
u32 host_pid;
u32 host_dbcr0;
@@ -125,32 +94,32 @@ struct kvm_vcpu_arch {
u32 host_msr;
u64 fpr[32];
- ulong gpr[32];
+ u32 gpr[32];
- ulong pc;
+ u32 pc;
u32 cr;
- ulong ctr;
- ulong lr;
- ulong xer;
+ u32 ctr;
+ u32 lr;
+ u32 xer;
- ulong msr;
+ u32 msr;
u32 mmucr;
- ulong sprg0;
- ulong sprg1;
- ulong sprg2;
- ulong sprg3;
- ulong sprg4;
- ulong sprg5;
- ulong sprg6;
- ulong sprg7;
- ulong srr0;
- ulong srr1;
- ulong csrr0;
- ulong csrr1;
- ulong dsrr0;
- ulong dsrr1;
- ulong dear;
- ulong esr;
+ u32 sprg0;
+ u32 sprg1;
+ u32 sprg2;
+ u32 sprg3;
+ u32 sprg4;
+ u32 sprg5;
+ u32 sprg6;
+ u32 sprg7;
+ u32 srr0;
+ u32 srr1;
+ u32 csrr0;
+ u32 csrr1;
+ u32 dsrr0;
+ u32 dsrr1;
+ u32 dear;
+ u32 esr;
u32 dec;
u32 decar;
u32 tbl;
@@ -158,7 +127,7 @@ struct kvm_vcpu_arch {
u32 tcr;
u32 tsr;
u32 ivor[16];
- ulong ivpr;
+ u32 ivpr;
u32 pir;
u32 shadow_pid;
@@ -171,22 +140,9 @@ struct kvm_vcpu_arch {
u32 dbcr0;
u32 dbcr1;
-#ifdef CONFIG_KVM_EXIT_TIMING
- struct kvmppc_exit_timing timing_exit;
- struct kvmppc_exit_timing timing_last_enter;
- u32 last_exit_type;
- u32 timing_count_type[__NUMBER_OF_KVM_EXIT_TYPES];
- u64 timing_sum_duration[__NUMBER_OF_KVM_EXIT_TYPES];
- u64 timing_sum_quad_duration[__NUMBER_OF_KVM_EXIT_TYPES];
- u64 timing_min_duration[__NUMBER_OF_KVM_EXIT_TYPES];
- u64 timing_max_duration[__NUMBER_OF_KVM_EXIT_TYPES];
- u64 timing_last_exit;
- struct dentry *debugfs_exit_timing;
-#endif
-
u32 last_inst;
- ulong fault_dear;
- ulong fault_esr;
+ u32 fault_dear;
+ u32 fault_esr;
gpa_t paddr_accessed;
u8 io_gpr; /* GPR used as IO source/target */
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index dfc644263826..bb62ad876de3 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -29,6 +29,11 @@
#include <linux/kvm_types.h>
#include <linux/kvm_host.h>
+struct kvm_tlb {
+ struct tlbe guest_tlb[PPC44x_TLB_SIZE];
+ struct tlbe shadow_tlb[PPC44x_TLB_SIZE];
+};
+
enum emulation_result {
EMULATE_DONE, /* no further processing */
EMULATE_DO_MMIO, /* kvm_run filled with MMIO request */
@@ -36,6 +41,9 @@ enum emulation_result {
EMULATE_FAIL, /* can't emulate this instruction */
};
+extern const unsigned char exception_priority[];
+extern const unsigned char priority_exception[];
+
extern int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
extern char kvmppc_handlers_start[];
extern unsigned long kvmppc_handler_len;
@@ -50,47 +58,51 @@ extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
extern int kvmppc_emulate_instruction(struct kvm_run *run,
struct kvm_vcpu *vcpu);
extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu);
-extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
-extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr,
- u64 asid, u32 flags, u32 max_bytes,
- unsigned int gtlb_idx);
+extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn,
+ u64 asid, u32 flags);
+extern void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, gva_t eaddr,
+ gva_t eend, u32 asid);
extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode);
extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid);
-/* Core-specific hooks */
-
-extern struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm,
- unsigned int id);
-extern void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu);
-extern int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu);
-extern int kvmppc_core_check_processor_compat(void);
-extern int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
- struct kvm_translation *tr);
-
-extern void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
-extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu);
-
-extern void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu);
-extern void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu);
-
-extern void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu);
-extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu);
-extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu);
-extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu);
-extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
- struct kvm_interrupt *irq);
-
-extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
- unsigned int op, int *advance);
-extern int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs);
-extern int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt);
-
-
-extern int kvmppc_booke_init(void);
-extern void kvmppc_booke_exit(void);
-
-extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu);
+/* XXX Book E specific */
+extern void kvmppc_tlbe_set_modified(struct kvm_vcpu *vcpu, unsigned int i);
+
+extern void kvmppc_check_and_deliver_interrupts(struct kvm_vcpu *vcpu);
+
+static inline void kvmppc_queue_exception(struct kvm_vcpu *vcpu, int exception)
+{
+ unsigned int priority = exception_priority[exception];
+ set_bit(priority, &vcpu->arch.pending_exceptions);
+}
+
+static inline void kvmppc_clear_exception(struct kvm_vcpu *vcpu, int exception)
+{
+ unsigned int priority = exception_priority[exception];
+ clear_bit(priority, &vcpu->arch.pending_exceptions);
+}
+
+/* Helper function for "full" MSR writes. No need to call this if only EE is
+ * changing. */
+static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
+{
+ if ((new_msr & MSR_PR) != (vcpu->arch.msr & MSR_PR))
+ kvmppc_mmu_priv_switch(vcpu, new_msr & MSR_PR);
+
+ vcpu->arch.msr = new_msr;
+
+ if (vcpu->arch.msr & MSR_WE)
+ kvm_vcpu_block(vcpu);
+}
+
+static inline void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid)
+{
+ if (vcpu->arch.pid != new_pid) {
+ vcpu->arch.pid = new_pid;
+ vcpu->arch.swap_pid = 1;
+ }
+}
extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu);
diff --git a/arch/powerpc/include/asm/mmu-44x.h b/arch/powerpc/include/asm/mmu-44x.h
index dffa71a6ef69..a825524c981a 100644
--- a/arch/powerpc/include/asm/mmu-44x.h
+++ b/arch/powerpc/include/asm/mmu-44x.h
@@ -54,7 +54,6 @@
#ifndef __ASSEMBLY__
extern unsigned int tlb_44x_hwater;
-extern unsigned int tlb_44x_index;
typedef struct {
unsigned long id;
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 833b67fd7b00..050abfd5c17c 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -23,6 +23,9 @@
#include <linux/mm.h>
#include <linux/suspend.h>
#include <linux/hrtimer.h>
+#ifdef CONFIG_KVM
+#include <linux/kvm_host.h>
+#endif
#ifdef CONFIG_PPC64
#include <linux/time.h>
#include <linux/hardirq.h>
@@ -48,9 +51,6 @@
#ifdef CONFIG_PPC_ISERIES
#include <asm/iseries/alpaca.h>
#endif
-#ifdef CONFIG_KVM
-#include <asm/kvm_44x.h>
-#endif
#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
#include "head_booke.h"
@@ -356,10 +356,12 @@ int main(void)
DEFINE(PTE_SIZE, sizeof(pte_t));
#ifdef CONFIG_KVM
- DEFINE(TLBE_BYTES, sizeof(struct kvmppc_44x_tlbe));
+ DEFINE(TLBE_BYTES, sizeof(struct tlbe));
DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack));
DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid));
+ DEFINE(VCPU_SHADOW_TLB, offsetof(struct kvm_vcpu, arch.shadow_tlb));
+ DEFINE(VCPU_SHADOW_MOD, offsetof(struct kvm_vcpu, arch.shadow_tlb_mod));
DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr));
DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr));
DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr));
@@ -378,16 +380,5 @@ int main(void)
DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr));
#endif
-#ifdef CONFIG_KVM_EXIT_TIMING
- DEFINE(VCPU_TIMING_EXIT_TBU, offsetof(struct kvm_vcpu,
- arch.timing_exit.tv32.tbu));
- DEFINE(VCPU_TIMING_EXIT_TBL, offsetof(struct kvm_vcpu,
- arch.timing_exit.tv32.tbl));
- DEFINE(VCPU_TIMING_LAST_ENTER_TBU, offsetof(struct kvm_vcpu,
- arch.timing_last_enter.tv32.tbu));
- DEFINE(VCPU_TIMING_LAST_ENTER_TBL, offsetof(struct kvm_vcpu,
- arch.timing_last_enter.tv32.tbl));
-#endif
-
return 0;
}
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
deleted file mode 100644
index a66bec57265a..000000000000
--- a/arch/powerpc/kvm/44x.c
+++ /dev/null
@@ -1,228 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- *
- * Copyright IBM Corp. 2008
- *
- * Authors: Hollis Blanchard <hollisb@us.ibm.com>
- */
-
-#include <linux/kvm_host.h>
-#include <linux/err.h>
-
-#include <asm/reg.h>
-#include <asm/cputable.h>
-#include <asm/tlbflush.h>
-#include <asm/kvm_44x.h>
-#include <asm/kvm_ppc.h>
-
-#include "44x_tlb.h"
-
-/* Note: clearing MSR[DE] just means that the debug interrupt will not be
- * delivered *immediately*. Instead, it simply sets the appropriate DBSR bits.
- * If those DBSR bits are still set when MSR[DE] is re-enabled, the interrupt
- * will be delivered as an "imprecise debug event" (which is indicated by
- * DBSR[IDE].
- */
-static void kvm44x_disable_debug_interrupts(void)
-{
- mtmsr(mfmsr() & ~MSR_DE);
-}
-
-void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu)
-{
- kvm44x_disable_debug_interrupts();
-
- mtspr(SPRN_IAC1, vcpu->arch.host_iac[0]);
- mtspr(SPRN_IAC2, vcpu->arch.host_iac[1]);
- mtspr(SPRN_IAC3, vcpu->arch.host_iac[2]);
- mtspr(SPRN_IAC4, vcpu->arch.host_iac[3]);
- mtspr(SPRN_DBCR1, vcpu->arch.host_dbcr1);
- mtspr(SPRN_DBCR2, vcpu->arch.host_dbcr2);
- mtspr(SPRN_DBCR0, vcpu->arch.host_dbcr0);
- mtmsr(vcpu->arch.host_msr);
-}
-
-void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
-{
- struct kvm_guest_debug *dbg = &vcpu->guest_debug;
- u32 dbcr0 = 0;
-
- vcpu->arch.host_msr = mfmsr();
- kvm44x_disable_debug_interrupts();
-
- /* Save host debug register state. */
- vcpu->arch.host_iac[0] = mfspr(SPRN_IAC1);
- vcpu->arch.host_iac[1] = mfspr(SPRN_IAC2);
- vcpu->arch.host_iac[2] = mfspr(SPRN_IAC3);
- vcpu->arch.host_iac[3] = mfspr(SPRN_IAC4);
- vcpu->arch.host_dbcr0 = mfspr(SPRN_DBCR0);
- vcpu->arch.host_dbcr1 = mfspr(SPRN_DBCR1);
- vcpu->arch.host_dbcr2 = mfspr(SPRN_DBCR2);
-
- /* set registers up for guest */
-
- if (dbg->bp[0]) {
- mtspr(SPRN_IAC1, dbg->bp[0]);
- dbcr0 |= DBCR0_IAC1 | DBCR0_IDM;
- }
- if (dbg->bp[1]) {
- mtspr(SPRN_IAC2, dbg->bp[1]);
- dbcr0 |= DBCR0_IAC2 | DBCR0_IDM;
- }
- if (dbg->bp[2]) {
- mtspr(SPRN_IAC3, dbg->bp[2]);
- dbcr0 |= DBCR0_IAC3 | DBCR0_IDM;
- }
- if (dbg->bp[3]) {
- mtspr(SPRN_IAC4, dbg->bp[3]);
- dbcr0 |= DBCR0_IAC4 | DBCR0_IDM;
- }
-
- mtspr(SPRN_DBCR0, dbcr0);
- mtspr(SPRN_DBCR1, 0);
- mtspr(SPRN_DBCR2, 0);
-}
-
-void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
-{
- kvmppc_44x_tlb_load(vcpu);
-}
-
-void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
-{
- kvmppc_44x_tlb_put(vcpu);
-}
-
-int kvmppc_core_check_processor_compat(void)
-{
- int r;
-
- if (strcmp(cur_cpu_spec->platform, "ppc440") == 0)
- r = 0;
- else
- r = -ENOTSUPP;
-
- return r;
-}
-
-int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
-{
- struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
- struct kvmppc_44x_tlbe *tlbe = &vcpu_44x->guest_tlb[0];
- int i;
-
- tlbe->tid = 0;
- tlbe->word0 = PPC44x_TLB_16M | PPC44x_TLB_VALID;
- tlbe->word1 = 0;
- tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR;
-
- tlbe++;
- tlbe->tid = 0;
- tlbe->word0 = 0xef600000 | PPC44x_TLB_4K | PPC44x_TLB_VALID;
- tlbe->word1 = 0xef600000;
- tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR
- | PPC44x_TLB_I | PPC44x_TLB_G;
-
- /* Since the guest can directly access the timebase, it must know the
- * real timebase frequency. Accordingly, it must see the state of
- * CCR1[TCS]. */
- vcpu->arch.ccr1 = mfspr(SPRN_CCR1);
-
- for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++)
- vcpu_44x->shadow_refs[i].gtlb_index = -1;
-
- return 0;
-}
-
-/* 'linear_address' is actually an encoding of AS|PID|EADDR . */
-int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
- struct kvm_translation *tr)
-{
- struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
- struct kvmppc_44x_tlbe *gtlbe;
- int index;
- gva_t eaddr;
- u8 pid;
- u8 as;
-
- eaddr = tr->linear_address;
- pid = (tr->linear_address >> 32) & 0xff;
- as = (tr->linear_address >> 40) & 0x1;
-
- index = kvmppc_44x_tlb_index(vcpu, eaddr, pid, as);
- if (index == -1) {
- tr->valid = 0;
- return 0;
- }
-
- gtlbe = &vcpu_44x->guest_tlb[index];
-
- tr->physical_address = tlb_xlate(gtlbe, eaddr);
- /* XXX what does "writeable" and "usermode" even mean? */
- tr->valid = 1;
-
- return 0;
-}
-
-struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
-{
- struct kvmppc_vcpu_44x *vcpu_44x;
- struct kvm_vcpu *vcpu;
- int err;
-
- vcpu_44x = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
- if (!vcpu_44x) {
- err = -ENOMEM;
- goto out;
- }
-
- vcpu = &vcpu_44x->vcpu;
- err = kvm_vcpu_init(vcpu, kvm, id);
- if (err)
- goto free_vcpu;
-
- return vcpu;
-
-free_vcpu:
- kmem_cache_free(kvm_vcpu_cache, vcpu_44x);
-out:
- return ERR_PTR(err);
-}
-
-void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
-{
- struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
-
- kvm_vcpu_uninit(vcpu);
- kmem_cache_free(kvm_vcpu_cache, vcpu_44x);
-}
-
-static int kvmppc_44x_init(void)
-{
- int r;
-
- r = kvmppc_booke_init();
- if (r)
- return r;
-
- return kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), THIS_MODULE);
-}
-
-static void kvmppc_44x_exit(void)
-{
- kvmppc_booke_exit();
-}
-
-module_init(kvmppc_44x_init);
-module_exit(kvmppc_44x_exit);
diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c
deleted file mode 100644
index 82489a743a6f..000000000000
--- a/arch/powerpc/kvm/44x_emulate.c
+++ /dev/null
@@ -1,371 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- *
- * Copyright IBM Corp. 2008
- *
- * Authors: Hollis Blanchard <hollisb@us.ibm.com>
- */
-
-#include <asm/kvm_ppc.h>
-#include <asm/dcr.h>
-#include <asm/dcr-regs.h>
-#include <asm/disassemble.h>
-#include <asm/kvm_44x.h>
-#include "timing.h"
-
-#include "booke.h"
-#include "44x_tlb.h"
-
-#define OP_RFI 19
-
-#define XOP_RFI 50
-#define XOP_MFMSR 83
-#define XOP_WRTEE 131
-#define XOP_MTMSR 146
-#define XOP_WRTEEI 163
-#define XOP_MFDCR 323
-#define XOP_MTDCR 451
-#define XOP_TLBSX 914
-#define XOP_ICCCI 966
-#define XOP_TLBWE 978
-
-static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
-{
- vcpu->arch.pc = vcpu->arch.srr0;
- kvmppc_set_msr(vcpu, vcpu->arch.srr1);
-}
-
-int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
- unsigned int inst, int *advance)
-{
- int emulated = EMULATE_DONE;
- int dcrn;
- int ra;
- int rb;
- int rc;
- int rs;
- int rt;
- int ws;
-
- switch (get_op(inst)) {
- case OP_RFI:
- switch (get_xop(inst)) {
- case XOP_RFI:
- kvmppc_emul_rfi(vcpu);
- kvmppc_set_exit_type(vcpu, EMULATED_RFI_EXITS);
- *advance = 0;
- break;
-
- default:
- emulated = EMULATE_FAIL;
- break;
- }
- break;
-
- case 31:
- switch (get_xop(inst)) {
-
- case XOP_MFMSR:
- rt = get_rt(inst);
- vcpu->arch.gpr[rt] = vcpu->arch.msr;
- kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS);
- break;
-
- case XOP_MTMSR:
- rs = get_rs(inst);
- kvmppc_set_exit_type(vcpu, EMULATED_MTMSR_EXITS);
- kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]);
- break;
-
- case XOP_WRTEE:
- rs = get_rs(inst);
- vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
- | (vcpu->arch.gpr[rs] & MSR_EE);
- kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
- break;
-
- case XOP_WRTEEI:
- vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
- | (inst & MSR_EE);
- kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
- break;
-
- case XOP_MFDCR:
- dcrn = get_dcrn(inst);
- rt = get_rt(inst);
-
- /* The guest may access CPR0 registers to determine the timebase
- * frequency, and it must know the real host frequency because it
- * can directly access the timebase registers.
- *
- * It would be possible to emulate those accesses in userspace,
- * but userspace can really only figure out the end frequency.
- * We could decompose that into the factors that compute it, but
- * that's tricky math, and it's easier to just report the real
- * CPR0 values.
- */
- switch (dcrn) {
- case DCRN_CPR0_CONFIG_ADDR:
- vcpu->arch.gpr[rt] = vcpu->arch.cpr0_cfgaddr;
- break;
- case DCRN_CPR0_CONFIG_DATA:
- local_irq_disable();
- mtdcr(DCRN_CPR0_CONFIG_ADDR,
- vcpu->arch.cpr0_cfgaddr);
- vcpu->arch.gpr[rt] = mfdcr(DCRN_CPR0_CONFIG_DATA);
- local_irq_enable();
- break;
- default:
- run->dcr.dcrn = dcrn;
- run->dcr.data = 0;
- run->dcr.is_write = 0;
- vcpu->arch.io_gpr = rt;
- vcpu->arch.dcr_needed = 1;
- kvmppc_account_exit(vcpu, DCR_EXITS);
- emulated = EMULATE_DO_DCR;
- }
-
- break;
-
- case XOP_MTDCR:
- dcrn = get_dcrn(inst);
- rs = get_rs(inst);
-
- /* emulate some access in kernel */
- switch (dcrn) {
- case DCRN_CPR0_CONFIG_ADDR:
- vcpu->arch.cpr0_cfgaddr = vcpu->arch.gpr[rs];
- break;
- default:
- run->dcr.dcrn = dcrn;
- run->dcr.data = vcpu->arch.gpr[rs];
- run->dcr.is_write = 1;
- vcpu->arch.dcr_needed = 1;
- kvmppc_account_exit(vcpu, DCR_EXITS);
- emulated = EMULATE_DO_DCR;
- }
-
- break;
-
- case XOP_TLBWE:
- ra = get_ra(inst);
- rs = get_rs(inst);
- ws = get_ws(inst);
- emulated = kvmppc_44x_emul_tlbwe(vcpu, ra, rs, ws);
- break;
-
- case XOP_TLBSX:
- rt = get_rt(inst);
- ra = get_ra(inst);
- rb = get_rb(inst);
- rc = get_rc(inst);
- emulated = kvmppc_44x_emul_tlbsx(vcpu, rt, ra, rb, rc);
- break;
-
- case XOP_ICCCI:
- break;
-
- default:
- emulated = EMULATE_FAIL;
- }
-
- break;
-
- default:
- emulated = EMULATE_FAIL;
- }
-
- return emulated;
-}
-
-int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
-{
- switch (sprn) {
- case SPRN_MMUCR:
- vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break;
- case SPRN_PID:
- kvmppc_set_pid(vcpu, vcpu->arch.gpr[rs]); break;
- case SPRN_CCR0:
- vcpu->arch.ccr0 = vcpu->arch.gpr[rs]; break;
- case SPRN_CCR1:
- vcpu->arch.ccr1 = vcpu->arch.gpr[rs]; break;
- case SPRN_DEAR:
- vcpu->arch.dear = vcpu->arch.gpr[rs]; break;
- case SPRN_ESR:
- vcpu->arch.esr = vcpu->arch.gpr[rs]; break;
- case SPRN_DBCR0:
- vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break;
- case SPRN_DBCR1:
- vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break;
- case SPRN_TSR:
- vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break;
- case SPRN_TCR:
- vcpu->arch.tcr = vcpu->arch.gpr[rs];
- kvmppc_emulate_dec(vcpu);
- break;
-
- /* Note: SPRG4-7 are user-readable. These values are
- * loaded into the real SPRGs when resuming the
- * guest. */
- case SPRN_SPRG4:
- vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break;
- case SPRN_SPRG5:
- vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break;
- case SPRN_SPRG6:
- vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break;
- case SPRN_SPRG7:
- vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break;
-
- case SPRN_IVPR:
- vcpu->arch.ivpr = vcpu->arch.gpr[rs];
- break;
- case SPRN_IVOR0:
- vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = vcpu->arch.gpr[rs];
- break;
- case SPRN_IVOR1:
- vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = vcpu->arch.gpr[rs];
- break;
- case SPRN_IVOR2:
- vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = vcpu->arch.gpr[rs];
- break;
- case SPRN_IVOR3:
- vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = vcpu->arch.gpr[rs];
- break;
- case SPRN_IVOR4:
- vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = vcpu->arch.gpr[rs];
- break;
- case SPRN_IVOR5:
- vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = vcpu->arch.gpr[rs];
- break;
- case SPRN_IVOR6:
- vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = vcpu->arch.gpr[rs];
- break;
- case SPRN_IVOR7:
- vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = vcpu->arch.gpr[rs];
- break;
- case SPRN_IVOR8:
- vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = vcpu->arch.gpr[rs];
- break;
- case SPRN_IVOR9:
- vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = vcpu->arch.gpr[rs];
- break;
- case SPRN_IVOR10:
- vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = vcpu->arch.gpr[rs];
- break;
- case SPRN_IVOR11:
- vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = vcpu->arch.gpr[rs];
- break;
- case SPRN_IVOR12:
- vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = vcpu->arch.gpr[rs];
- break;
- case SPRN_IVOR13:
- vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = vcpu->arch.gpr[rs];
- break;
- case SPRN_IVOR14:
- vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = vcpu->arch.gpr[rs];
- break;
- case SPRN_IVOR15:
- vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = vcpu->arch.gpr[rs];
- break;
-
- default:
- return EMULATE_FAIL;
- }
-
- kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS);
- return EMULATE_DONE;
-}
-
-int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
-{
- switch (sprn) {
- /* 440 */
- case SPRN_MMUCR:
- vcpu->arch.gpr[rt] = vcpu->arch.mmucr; break;
- case SPRN_CCR0:
- vcpu->arch.gpr[rt] = vcpu->arch.ccr0; break;
- case SPRN_CCR1:
- vcpu->arch.gpr[rt] = vcpu->arch.ccr1; break;
-
- /* Book E */
- case SPRN_PID:
- vcpu->arch.gpr[rt] = vcpu->arch.pid; break;
- case SPRN_IVPR:
- vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break;
- case SPRN_DEAR:
- vcpu->arch.gpr[rt] = vcpu->arch.dear; break;
- case SPRN_ESR:
- vcpu->arch.gpr[rt] = vcpu->arch.esr; break;
- case SPRN_DBCR0:
- vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break;
- case SPRN_DBCR1:
- vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break;
-
- case SPRN_IVOR0:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL];
- break;
- case SPRN_IVOR1:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK];
- break;
- case SPRN_IVOR2:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE];
- break;
- case SPRN_IVOR3:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE];
- break;
- case SPRN_IVOR4:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL];
- break;
- case SPRN_IVOR5:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT];
- break;
- case SPRN_IVOR6:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM];
- break;
- case SPRN_IVOR7:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL];
- break;
- case SPRN_IVOR8:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL];
- break;
- case SPRN_IVOR9:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL];
- break;
- case SPRN_IVOR10:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER];
- break;
- case SPRN_IVOR11:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT];
- break;
- case SPRN_IVOR12:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG];
- break;
- case SPRN_IVOR13:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS];
- break;
- case SPRN_IVOR14:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS];
- break;
- case SPRN_IVOR15:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG];
- break;
-
- default:
- return EMULATE_FAIL;
- }
-
- kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS);
- return EMULATE_DONE;
-}
-
diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
index 9a34b8edb9e2..ad72c6f9811f 100644
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -22,103 +22,20 @@
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <linux/highmem.h>
-
-#include <asm/tlbflush.h>
#include <asm/mmu-44x.h>
#include <asm/kvm_ppc.h>
-#include <asm/kvm_44x.h>
-#include "timing.h"
#include "44x_tlb.h"
-#ifndef PPC44x_TLBE_SIZE
-#define PPC44x_TLBE_SIZE PPC44x_TLB_4K
-#endif
-
-#define PAGE_SIZE_4K (1<<12)
-#define PAGE_MASK_4K (~(PAGE_SIZE_4K - 1))
-
-#define PPC44x_TLB_UATTR_MASK \
- (PPC44x_TLB_U0|PPC44x_TLB_U1|PPC44x_TLB_U2|PPC44x_TLB_U3)
#define PPC44x_TLB_USER_PERM_MASK (PPC44x_TLB_UX|PPC44x_TLB_UR|PPC44x_TLB_UW)
#define PPC44x_TLB_SUPER_PERM_MASK (PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW)
-#ifdef DEBUG
-void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
-{
- struct kvmppc_44x_tlbe *tlbe;
- int i;
-
- printk("vcpu %d TLB dump:\n", vcpu->vcpu_id);
- printk("| %2s | %3s | %8s | %8s | %8s |\n",
- "nr", "tid", "word0", "word1", "word2");
-
- for (i = 0; i < ARRAY_SIZE(vcpu_44x->guest_tlb); i++) {
- tlbe = &vcpu_44x->guest_tlb[i];
- if (tlbe->word0 & PPC44x_TLB_VALID)
- printk(" G%2d | %02X | %08X | %08X | %08X |\n",
- i, tlbe->tid, tlbe->word0, tlbe->word1,
- tlbe->word2);
- }
-}
-#endif
-
-static inline void kvmppc_44x_tlbie(unsigned int index)
-{
- /* 0 <= index < 64, so the V bit is clear and we can use the index as
- * word0. */
- asm volatile(
- "tlbwe %[index], %[index], 0\n"
- :
- : [index] "r"(index)
- );
-}
-
-static inline void kvmppc_44x_tlbre(unsigned int index,
- struct kvmppc_44x_tlbe *tlbe)
-{
- asm volatile(
- "tlbre %[word0], %[index], 0\n"
- "mfspr %[tid], %[sprn_mmucr]\n"
- "andi. %[tid], %[tid], 0xff\n"
- "tlbre %[word1], %[index], 1\n"
- "tlbre %[word2], %[index], 2\n"
- : [word0] "=r"(tlbe->word0),
- [word1] "=r"(tlbe->word1),
- [word2] "=r"(tlbe->word2),
- [tid] "=r"(tlbe->tid)
- : [index] "r"(index),
- [sprn_mmucr] "i"(SPRN_MMUCR)
- : "cc"
- );
-}
-
-static inline void kvmppc_44x_tlbwe(unsigned int index,
- struct kvmppc_44x_tlbe *stlbe)
-{
- unsigned long tmp;
-
- asm volatile(
- "mfspr %[tmp], %[sprn_mmucr]\n"
- "rlwimi %[tmp], %[tid], 0, 0xff\n"
- "mtspr %[sprn_mmucr], %[tmp]\n"
- "tlbwe %[word0], %[index], 0\n"
- "tlbwe %[word1], %[index], 1\n"
- "tlbwe %[word2], %[index], 2\n"
- : [tmp] "=&r"(tmp)
- : [word0] "r"(stlbe->word0),
- [word1] "r"(stlbe->word1),
- [word2] "r"(stlbe->word2),
- [tid] "r"(stlbe->tid),
- [index] "r"(index),
- [sprn_mmucr] "i"(SPRN_MMUCR)
- );
-}
+static unsigned int kvmppc_tlb_44x_pos;
static u32 kvmppc_44x_tlb_shadow_attrib(u32 attrib, int usermode)
{
- /* We only care about the guest's permission and user bits. */
- attrib &= PPC44x_TLB_PERM_MASK|PPC44x_TLB_UATTR_MASK;
+ /* Mask off reserved bits. */
+ attrib &= PPC44x_TLB_PERM_MASK|PPC44x_TLB_ATTR_MASK;
if (!usermode) {
/* Guest is in supervisor mode, so we need to translate guest
@@ -130,60 +47,18 @@ static u32 kvmppc_44x_tlb_shadow_attrib(u32 attrib, int usermode)
/* Make sure host can always access this memory. */
attrib |= PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW;
- /* WIMGE = 0b00100 */
- attrib |= PPC44x_TLB_M;
-
return attrib;
}
-/* Load shadow TLB back into hardware. */
-void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu)
-{
- struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
- int i;
-
- for (i = 0; i <= tlb_44x_hwater; i++) {
- struct kvmppc_44x_tlbe *stlbe = &vcpu_44x->shadow_tlb[i];
-
- if (get_tlb_v(stlbe) && get_tlb_ts(stlbe))
- kvmppc_44x_tlbwe(i, stlbe);
- }
-}
-
-static void kvmppc_44x_tlbe_set_modified(struct kvmppc_vcpu_44x *vcpu_44x,
- unsigned int i)
-{
- vcpu_44x->shadow_tlb_mod[i] = 1;
-}
-
-/* Save hardware TLB to the vcpu, and invalidate all guest mappings. */
-void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu)
-{
- struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
- int i;
-
- for (i = 0; i <= tlb_44x_hwater; i++) {
- struct kvmppc_44x_tlbe *stlbe = &vcpu_44x->shadow_tlb[i];
-
- if (vcpu_44x->shadow_tlb_mod[i])
- kvmppc_44x_tlbre(i, stlbe);
-
- if (get_tlb_v(stlbe) && get_tlb_ts(stlbe))
- kvmppc_44x_tlbie(i);
- }
-}
-
-
/* Search the guest TLB for a matching entry. */
int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid,
unsigned int as)
{
- struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
int i;
/* XXX Replace loop with fancy data structures. */
- for (i = 0; i < ARRAY_SIZE(vcpu_44x->guest_tlb); i++) {
- struct kvmppc_44x_tlbe *tlbe = &vcpu_44x->guest_tlb[i];
+ for (i = 0; i < PPC44x_TLB_SIZE; i++) {
+ struct tlbe *tlbe = &vcpu->arch.guest_tlb[i];
unsigned int tid;
if (eaddr < get_tlb_eaddr(tlbe))
@@ -208,89 +83,78 @@ int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid,
return -1;
}
-int kvmppc_44x_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
+struct tlbe *kvmppc_44x_itlb_search(struct kvm_vcpu *vcpu, gva_t eaddr)
{
unsigned int as = !!(vcpu->arch.msr & MSR_IS);
+ unsigned int index;
- return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
+ index = kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
+ if (index == -1)
+ return NULL;
+ return &vcpu->arch.guest_tlb[index];
}
-int kvmppc_44x_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
+struct tlbe *kvmppc_44x_dtlb_search(struct kvm_vcpu *vcpu, gva_t eaddr)
{
unsigned int as = !!(vcpu->arch.msr & MSR_DS);
+ unsigned int index;
- return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
+ index = kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
+ if (index == -1)
+ return NULL;
+ return &vcpu->arch.guest_tlb[index];
}
-static void kvmppc_44x_shadow_release(struct kvmppc_vcpu_44x *vcpu_44x,
- unsigned int stlb_index)
+static int kvmppc_44x_tlbe_is_writable(struct tlbe *tlbe)
{
- struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[stlb_index];
-
- if (!ref->page)
- return;
-
- /* Discard from the TLB. */
- /* Note: we could actually invalidate a host mapping, if the host overwrote
- * this TLB entry since we inserted a guest mapping. */
- kvmppc_44x_tlbie(stlb_index);
-
- /* Now release the page. */
- if (ref->writeable)
- kvm_release_page_dirty(ref->page);
- else
- kvm_release_page_clean(ref->page);
-
- ref->page = NULL;
+ return tlbe->word2 & (PPC44x_TLB_SW|PPC44x_TLB_UW);
+}
- /* XXX set tlb_44x_index to stlb_index? */
+static void kvmppc_44x_shadow_release(struct kvm_vcpu *vcpu,
+ unsigned int index)
+{
+ struct tlbe *stlbe = &vcpu->arch.shadow_tlb[index];
+ struct page *page = vcpu->arch.shadow_pages[index];
- KVMTRACE_1D(STLB_INVAL, &vcpu_44x->vcpu, stlb_index, handler);
+ if (get_tlb_v(stlbe)) {
+ if (kvmppc_44x_tlbe_is_writable(stlbe))
+ kvm_release_page_dirty(page);
+ else
+ kvm_release_page_clean(page);
+ }
}
void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu)
{
- struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
int i;
for (i = 0; i <= tlb_44x_hwater; i++)
- kvmppc_44x_shadow_release(vcpu_44x, i);
+ kvmppc_44x_shadow_release(vcpu, i);
}
-/**
- * kvmppc_mmu_map -- create a host mapping for guest memory
- *
- * If the guest wanted a larger page than the host supports, only the first
- * host page is mapped here and the rest are demand faulted.
- *
- * If the guest wanted a smaller page than the host page size, we map only the
- * guest-size page (i.e. not a full host page mapping).
- *
- * Caller must ensure that the specified guest TLB entry is safe to insert into
- * the shadow TLB.
- */
-void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, u64 asid,
- u32 flags, u32 max_bytes, unsigned int gtlb_index)
+void kvmppc_tlbe_set_modified(struct kvm_vcpu *vcpu, unsigned int i)
+{
+ vcpu->arch.shadow_tlb_mod[i] = 1;
+}
+
+/* Caller must ensure that the specified guest TLB entry is safe to insert into
+ * the shadow TLB. */
+void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid,
+ u32 flags)
{
- struct kvmppc_44x_tlbe stlbe;
- struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
- struct kvmppc_44x_shadow_ref *ref;
struct page *new_page;
+ struct tlbe *stlbe;
hpa_t hpaddr;
- gfn_t gfn;
unsigned int victim;
- /* Select TLB entry to clobber. Indirectly guard against races with the TLB
- * miss handler by disabling interrupts. */
- local_irq_disable();
- victim = ++tlb_44x_index;
- if (victim > tlb_44x_hwater)
- victim = 0;
- tlb_44x_index = victim;
- local_irq_enable();
+ /* Future optimization: don't overwrite the TLB entry containing the
+ * current PC (or stack?). */
+ victim = kvmppc_tlb_44x_pos++;
+ if (kvmppc_tlb_44x_pos > tlb_44x_hwater)
+ kvmppc_tlb_44x_pos = 0;
+ stlbe = &vcpu->arch.shadow_tlb[victim];
/* Get reference to new page. */
- gfn = gpaddr >> PAGE_SHIFT;
new_page = gfn_to_page(vcpu->kvm, gfn);
if (is_error_page(new_page)) {
printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n", gfn);
@@ -299,8 +163,10 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, u64 asid,
}
hpaddr = page_to_phys(new_page);
- /* Invalidate any previous shadow mappings. */
- kvmppc_44x_shadow_release(vcpu_44x, victim);
+ /* Drop reference to old page. */
+ kvmppc_44x_shadow_release(vcpu, victim);
+
+ vcpu->arch.shadow_pages[victim] = new_page;
/* XXX Make sure (va, size) doesn't overlap any other
* entries. 440x6 user manual says the result would be
@@ -308,193 +174,78 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, u64 asid,
/* XXX what about AS? */
- /* Force TS=1 for all guest mappings. */
- stlbe.word0 = PPC44x_TLB_VALID | PPC44x_TLB_TS;
-
- if (max_bytes >= PAGE_SIZE) {
- /* Guest mapping is larger than or equal to host page size. We can use
- * a "native" host mapping. */
- stlbe.word0 |= (gvaddr & PAGE_MASK) | PPC44x_TLBE_SIZE;
- } else {
- /* Guest mapping is smaller than host page size. We must restrict the
- * size of the mapping to be at most the smaller of the two, but for
- * simplicity we fall back to a 4K mapping (this is probably what the
- * guest is using anyways). */
- stlbe.word0 |= (gvaddr & PAGE_MASK_4K) | PPC44x_TLB_4K;
-
- /* 'hpaddr' is a host page, which is larger than the mapping we're
- * inserting here. To compensate, we must add the in-page offset to the
- * sub-page. */
- hpaddr |= gpaddr & (PAGE_MASK ^ PAGE_MASK_4K);
- }
+ stlbe->tid = !(asid & 0xff);
- stlbe.word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf);
- stlbe.word2 = kvmppc_44x_tlb_shadow_attrib(flags,
+ /* Force TS=1 for all guest mappings. */
+ /* For now we hardcode 4KB mappings, but it will be important to
+ * use host large pages in the future. */
+ stlbe->word0 = (gvaddr & PAGE_MASK) | PPC44x_TLB_VALID | PPC44x_TLB_TS
+ | PPC44x_TLB_4K;
+ stlbe->word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf);
+ stlbe->word2 = kvmppc_44x_tlb_shadow_attrib(flags,
vcpu->arch.msr & MSR_PR);
- stlbe.tid = !(asid & 0xff);
-
- /* Keep track of the reference so we can properly release it later. */
- ref = &vcpu_44x->shadow_refs[victim];
- ref->page = new_page;
- ref->gtlb_index = gtlb_index;
- ref->writeable = !!(stlbe.word2 & PPC44x_TLB_UW);
- ref->tid = stlbe.tid;
-
- /* Insert shadow mapping into hardware TLB. */
- kvmppc_44x_tlbe_set_modified(vcpu_44x, victim);
- kvmppc_44x_tlbwe(victim, &stlbe);
- KVMTRACE_5D(STLB_WRITE, vcpu, victim, stlbe.tid, stlbe.word0, stlbe.word1,
- stlbe.word2, handler);
-}
+ kvmppc_tlbe_set_modified(vcpu, victim);
-/* For a particular guest TLB entry, invalidate the corresponding host TLB
- * mappings and release the host pages. */
-static void kvmppc_44x_invalidate(struct kvm_vcpu *vcpu,
- unsigned int gtlb_index)
-{
- struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
- int i;
-
- for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) {
- struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[i];
- if (ref->gtlb_index == gtlb_index)
- kvmppc_44x_shadow_release(vcpu_44x, i);
- }
+ KVMTRACE_5D(STLB_WRITE, vcpu, victim,
+ stlbe->tid, stlbe->word0, stlbe->word1, stlbe->word2,
+ handler);
}
-void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode)
+void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, gva_t eaddr,
+ gva_t eend, u32 asid)
{
- vcpu->arch.shadow_pid = !usermode;
-}
-
-void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid)
-{
- struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+ unsigned int pid = !(asid & 0xff);
int i;
- if (unlikely(vcpu->arch.pid == new_pid))
- return;
-
- vcpu->arch.pid = new_pid;
-
- /* Guest userspace runs with TID=0 mappings and PID=0, to make sure it
- * can't access guest kernel mappings (TID=1). When we switch to a new
- * guest PID, which will also use host PID=0, we must discard the old guest
- * userspace mappings. */
- for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) {
- struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[i];
-
- if (ref->tid == 0)
- kvmppc_44x_shadow_release(vcpu_44x, i);
- }
-}
-
-static int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
- const struct kvmppc_44x_tlbe *tlbe)
-{
- gpa_t gpa;
-
- if (!get_tlb_v(tlbe))
- return 0;
-
- /* Does it match current guest AS? */
- /* XXX what about IS != DS? */
- if (get_tlb_ts(tlbe) != !!(vcpu->arch.msr & MSR_IS))
- return 0;
-
- gpa = get_tlb_raddr(tlbe);
- if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT))
- /* Mapping is not for RAM. */
- return 0;
-
- return 1;
-}
-
-int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
-{
- struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
- struct kvmppc_44x_tlbe *tlbe;
- unsigned int gtlb_index;
-
- gtlb_index = vcpu->arch.gpr[ra];
- if (gtlb_index > KVM44x_GUEST_TLB_SIZE) {
- printk("%s: index %d\n", __func__, gtlb_index);
- kvmppc_dump_vcpu(vcpu);
- return EMULATE_FAIL;
- }
-
- tlbe = &vcpu_44x->guest_tlb[gtlb_index];
-
- /* Invalidate shadow mappings for the about-to-be-clobbered TLB entry. */
- if (tlbe->word0 & PPC44x_TLB_VALID)
- kvmppc_44x_invalidate(vcpu, gtlb_index);
-
- switch (ws) {
- case PPC44x_TLB_PAGEID:
- tlbe->tid = get_mmucr_stid(vcpu);
- tlbe->word0 = vcpu->arch.gpr[rs];
- break;
-
- case PPC44x_TLB_XLAT:
- tlbe->word1 = vcpu->arch.gpr[rs];
- break;
-
- case PPC44x_TLB_ATTRIB:
- tlbe->word2 = vcpu->arch.gpr[rs];
- break;
-
- default:
- return EMULATE_FAIL;
- }
+ /* XXX Replace loop with fancy data structures. */
+ for (i = 0; i <= tlb_44x_hwater; i++) {
+ struct tlbe *stlbe = &vcpu->arch.shadow_tlb[i];
+ unsigned int tid;
- if (tlbe_is_host_safe(vcpu, tlbe)) {
- u64 asid;
- gva_t eaddr;
- gpa_t gpaddr;
- u32 flags;
- u32 bytes;
+ if (!get_tlb_v(stlbe))
+ continue;
- eaddr = get_tlb_eaddr(tlbe);
- gpaddr = get_tlb_raddr(tlbe);
+ if (eend < get_tlb_eaddr(stlbe))
+ continue;
- /* Use the advertised page size to mask effective and real addrs. */
- bytes = get_tlb_bytes(tlbe);
- eaddr &= ~(bytes - 1);
- gpaddr &= ~(bytes - 1);
+ if (eaddr > get_tlb_end(stlbe))
+ continue;
- asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid;
- flags = tlbe->word2 & 0xffff;
+ tid = get_tlb_tid(stlbe);
+ if (tid && (tid != pid))
+ continue;
- kvmppc_mmu_map(vcpu, eaddr, gpaddr, asid, flags, bytes, gtlb_index);
+ kvmppc_44x_shadow_release(vcpu, i);
+ stlbe->word0 = 0;
+ kvmppc_tlbe_set_modified(vcpu, i);
+ KVMTRACE_5D(STLB_INVAL, vcpu, i,
+ stlbe->tid, stlbe->word0, stlbe->word1,
+ stlbe->word2, handler);
}
-
- KVMTRACE_5D(GTLB_WRITE, vcpu, gtlb_index, tlbe->tid, tlbe->word0,
- tlbe->word1, tlbe->word2, handler);
-
- kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS);
- return EMULATE_DONE;
}
-int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb, u8 rc)
+/* Invalidate all mappings on the privilege switch after PID has been changed.
+ * The guest always runs with PID=1, so we must clear the entire TLB when
+ * switching address spaces. */
+void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode)
{
- u32 ea;
- int gtlb_index;
- unsigned int as = get_mmucr_sts(vcpu);
- unsigned int pid = get_mmucr_stid(vcpu);
-
- ea = vcpu->arch.gpr[rb];
- if (ra)
- ea += vcpu->arch.gpr[ra];
-
- gtlb_index = kvmppc_44x_tlb_index(vcpu, ea, pid, as);
- if (rc) {
- if (gtlb_index < 0)
- vcpu->arch.cr &= ~0x20000000;
- else
- vcpu->arch.cr |= 0x20000000;
+ int i;
+
+ if (vcpu->arch.swap_pid) {
+ /* XXX Replace loop with fancy data structures. */
+ for (i = 0; i <= tlb_44x_hwater; i++) {
+ struct tlbe *stlbe = &vcpu->arch.shadow_tlb[i];
+
+ /* Future optimization: clear only userspace mappings. */
+ kvmppc_44x_shadow_release(vcpu, i);
+ stlbe->word0 = 0;
+ kvmppc_tlbe_set_modified(vcpu, i);
+ KVMTRACE_5D(STLB_INVAL, vcpu, i,
+ stlbe->tid, stlbe->word0, stlbe->word1,
+ stlbe->word2, handler);
+ }
+ vcpu->arch.swap_pid = 0;
}
- vcpu->arch.gpr[rt] = gtlb_index;
- kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS);
- return EMULATE_DONE;
+ vcpu->arch.shadow_pid = !usermode;
}
diff --git a/arch/powerpc/kvm/44x_tlb.h b/arch/powerpc/kvm/44x_tlb.h
index 772191f29e62..2ccd46b6f6b7 100644
--- a/arch/powerpc/kvm/44x_tlb.h
+++ b/arch/powerpc/kvm/44x_tlb.h
@@ -25,52 +25,48 @@
extern int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr,
unsigned int pid, unsigned int as);
-extern int kvmppc_44x_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr);
-extern int kvmppc_44x_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr);
-
-extern int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb,
- u8 rc);
-extern int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws);
+extern struct tlbe *kvmppc_44x_dtlb_search(struct kvm_vcpu *vcpu, gva_t eaddr);
+extern struct tlbe *kvmppc_44x_itlb_search(struct kvm_vcpu *vcpu, gva_t eaddr);
/* TLB helper functions */
-static inline unsigned int get_tlb_size(const struct kvmppc_44x_tlbe *tlbe)
+static inline unsigned int get_tlb_size(const struct tlbe *tlbe)
{
return (tlbe->word0 >> 4) & 0xf;
}
-static inline gva_t get_tlb_eaddr(const struct kvmppc_44x_tlbe *tlbe)
+static inline gva_t get_tlb_eaddr(const struct tlbe *tlbe)
{
return tlbe->word0 & 0xfffffc00;
}
-static inline gva_t get_tlb_bytes(const struct kvmppc_44x_tlbe *tlbe)
+static inline gva_t get_tlb_bytes(const struct tlbe *tlbe)
{
unsigned int pgsize = get_tlb_size(tlbe);
return 1 << 10 << (pgsize << 1);
}
-static inline gva_t get_tlb_end(const struct kvmppc_44x_tlbe *tlbe)
+static inline gva_t get_tlb_end(const struct tlbe *tlbe)
{
return get_tlb_eaddr(tlbe) + get_tlb_bytes(tlbe) - 1;
}
-static inline u64 get_tlb_raddr(const struct kvmppc_44x_tlbe *tlbe)
+static inline u64 get_tlb_raddr(const struct tlbe *tlbe)
{
u64 word1 = tlbe->word1;
return ((word1 & 0xf) << 32) | (word1 & 0xfffffc00);
}
-static inline unsigned int get_tlb_tid(const struct kvmppc_44x_tlbe *tlbe)
+static inline unsigned int get_tlb_tid(const struct tlbe *tlbe)
{
return tlbe->tid & 0xff;
}
-static inline unsigned int get_tlb_ts(const struct kvmppc_44x_tlbe *tlbe)
+static inline unsigned int get_tlb_ts(const struct tlbe *tlbe)
{
return (tlbe->word0 >> 8) & 0x1;
}
-static inline unsigned int get_tlb_v(const struct kvmppc_44x_tlbe *tlbe)
+static inline unsigned int get_tlb_v(const struct tlbe *tlbe)
{
return (tlbe->word0 >> 9) & 0x1;
}
@@ -85,7 +81,7 @@ static inline unsigned int get_mmucr_sts(const struct kvm_vcpu *vcpu)
return (vcpu->arch.mmucr >> 16) & 0x1;
}
-static inline gpa_t tlb_xlate(struct kvmppc_44x_tlbe *tlbe, gva_t eaddr)
+static inline gpa_t tlb_xlate(struct tlbe *tlbe, gva_t eaddr)
{
unsigned int pgmask = get_tlb_bytes(tlbe) - 1;
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 6dbdc4817d80..53aaa66b25e5 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -15,33 +15,27 @@ menuconfig VIRTUALIZATION
if VIRTUALIZATION
config KVM
- bool
+ bool "Kernel-based Virtual Machine (KVM) support"
+ depends on 44x && EXPERIMENTAL
select PREEMPT_NOTIFIERS
select ANON_INODES
-
-config KVM_440
- bool "KVM support for PowerPC 440 processors"
- depends on EXPERIMENTAL && 44x
- select KVM
+ # We can only run on Book E hosts so far
+ select KVM_BOOKE_HOST
---help---
- Support running unmodified 440 guest kernels in virtual machines on
- 440 host processors.
+ Support hosting virtualized guest machines. You will also
+ need to select one or more of the processor modules below.
This module provides access to the hardware capabilities through
a character device node named /dev/kvm.
If unsure, say N.
-config KVM_EXIT_TIMING
- bool "Detailed exit timing"
- depends on KVM
+config KVM_BOOKE_HOST
+ bool "KVM host support for Book E PowerPC processors"
+ depends on KVM && 44x
---help---
- Calculate elapsed time for every exit/enter cycle. A per-vcpu
- report is available in debugfs kvm/vm#_vcpu#_timing.
- The overhead is relatively small, however it is not recommended for
- production environments.
-
- If unsure, say N.
+ Provides host support for KVM on Book E PowerPC processors. Currently
+ this works on 440 processors only.
config KVM_TRACE
bool "KVM trace support"
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index df7ba59e6d53..2a5d4397ac4b 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -8,16 +8,10 @@ common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
common-objs-$(CONFIG_KVM_TRACE) += $(addprefix ../../../virt/kvm/, kvm_trace.o)
-kvm-objs := $(common-objs-y) powerpc.o emulate.o
-obj-$(CONFIG_KVM_EXIT_TIMING) += timing.o
+kvm-objs := $(common-objs-y) powerpc.o emulate.o booke_guest.o
obj-$(CONFIG_KVM) += kvm.o
AFLAGS_booke_interrupts.o := -I$(obj)
-kvm-440-objs := \
- booke.o \
- booke_interrupts.o \
- 44x.o \
- 44x_tlb.o \
- 44x_emulate.o
-obj-$(CONFIG_KVM_440) += kvm-440.o
+kvm-booke-host-objs := booke_host.o booke_interrupts.o 44x_tlb.o
+obj-$(CONFIG_KVM_BOOKE_HOST) += kvm-booke-host.o
diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h
deleted file mode 100644
index cf7c94ca24bf..000000000000
--- a/arch/powerpc/kvm/booke.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- *
- * Copyright IBM Corp. 2008
- *
- * Authors: Hollis Blanchard <hollisb@us.ibm.com>
- */
-
-#ifndef __KVM_BOOKE_H__
-#define __KVM_BOOKE_H__
-
-#include <linux/types.h>
-#include <linux/kvm_host.h>
-#include "timing.h"
-
-/* interrupt priortity ordering */
-#define BOOKE_IRQPRIO_DATA_STORAGE 0
-#define BOOKE_IRQPRIO_INST_STORAGE 1
-#define BOOKE_IRQPRIO_ALIGNMENT 2
-#define BOOKE_IRQPRIO_PROGRAM 3
-#define BOOKE_IRQPRIO_FP_UNAVAIL 4
-#define BOOKE_IRQPRIO_SYSCALL 5
-#define BOOKE_IRQPRIO_AP_UNAVAIL 6
-#define BOOKE_IRQPRIO_DTLB_MISS 7
-#define BOOKE_IRQPRIO_ITLB_MISS 8
-#define BOOKE_IRQPRIO_MACHINE_CHECK 9
-#define BOOKE_IRQPRIO_DEBUG 10
-#define BOOKE_IRQPRIO_CRITICAL 11
-#define BOOKE_IRQPRIO_WATCHDOG 12
-#define BOOKE_IRQPRIO_EXTERNAL 13
-#define BOOKE_IRQPRIO_FIT 14
-#define BOOKE_IRQPRIO_DECREMENTER 15
-
-/* Helper function for "full" MSR writes. No need to call this if only EE is
- * changing. */
-static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
-{
- if ((new_msr & MSR_PR) != (vcpu->arch.msr & MSR_PR))
- kvmppc_mmu_priv_switch(vcpu, new_msr & MSR_PR);
-
- vcpu->arch.msr = new_msr;
-
- if (vcpu->arch.msr & MSR_WE) {
- kvm_vcpu_block(vcpu);
- kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS);
- };
-}
-
-#endif /* __KVM_BOOKE_H__ */
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke_guest.c
index 35485dd6927e..7b2591e26bae 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke_guest.c
@@ -24,26 +24,21 @@
#include <linux/module.h>
#include <linux/vmalloc.h>
#include <linux/fs.h>
-
#include <asm/cputable.h>
#include <asm/uaccess.h>
#include <asm/kvm_ppc.h>
-#include "timing.h"
-#include <asm/cacheflush.h>
-#include <asm/kvm_44x.h>
-#include "booke.h"
#include "44x_tlb.h"
-unsigned long kvmppc_booke_handlers;
-
#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
struct kvm_stats_debugfs_item debugfs_entries[] = {
+ { "exits", VCPU_STAT(sum_exits) },
{ "mmio", VCPU_STAT(mmio_exits) },
{ "dcr", VCPU_STAT(dcr_exits) },
{ "sig", VCPU_STAT(signal_exits) },
+ { "light", VCPU_STAT(light_exits) },
{ "itlb_r", VCPU_STAT(itlb_real_miss_exits) },
{ "itlb_v", VCPU_STAT(itlb_virt_miss_exits) },
{ "dtlb_r", VCPU_STAT(dtlb_real_miss_exits) },
@@ -58,19 +53,103 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ NULL }
};
+static const u32 interrupt_msr_mask[16] = {
+ [BOOKE_INTERRUPT_CRITICAL] = MSR_ME,
+ [BOOKE_INTERRUPT_MACHINE_CHECK] = 0,
+ [BOOKE_INTERRUPT_DATA_STORAGE] = MSR_CE|MSR_ME|MSR_DE,
+ [BOOKE_INTERRUPT_INST_STORAGE] = MSR_CE|MSR_ME|MSR_DE,
+ [BOOKE_INTERRUPT_EXTERNAL] = MSR_CE|MSR_ME|MSR_DE,
+ [BOOKE_INTERRUPT_ALIGNMENT] = MSR_CE|MSR_ME|MSR_DE,
+ [BOOKE_INTERRUPT_PROGRAM] = MSR_CE|MSR_ME|MSR_DE,
+ [BOOKE_INTERRUPT_FP_UNAVAIL] = MSR_CE|MSR_ME|MSR_DE,
+ [BOOKE_INTERRUPT_SYSCALL] = MSR_CE|MSR_ME|MSR_DE,
+ [BOOKE_INTERRUPT_AP_UNAVAIL] = MSR_CE|MSR_ME|MSR_DE,
+ [BOOKE_INTERRUPT_DECREMENTER] = MSR_CE|MSR_ME|MSR_DE,
+ [BOOKE_INTERRUPT_FIT] = MSR_CE|MSR_ME|MSR_DE,
+ [BOOKE_INTERRUPT_WATCHDOG] = MSR_ME,
+ [BOOKE_INTERRUPT_DTLB_MISS] = MSR_CE|MSR_ME|MSR_DE,
+ [BOOKE_INTERRUPT_ITLB_MISS] = MSR_CE|MSR_ME|MSR_DE,
+ [BOOKE_INTERRUPT_DEBUG] = MSR_ME,
+};
+
+const unsigned char exception_priority[] = {
+ [BOOKE_INTERRUPT_DATA_STORAGE] = 0,
+ [BOOKE_INTERRUPT_INST_STORAGE] = 1,
+ [BOOKE_INTERRUPT_ALIGNMENT] = 2,
+ [BOOKE_INTERRUPT_PROGRAM] = 3,
+ [BOOKE_INTERRUPT_FP_UNAVAIL] = 4,
+ [BOOKE_INTERRUPT_SYSCALL] = 5,
+ [BOOKE_INTERRUPT_AP_UNAVAIL] = 6,
+ [BOOKE_INTERRUPT_DTLB_MISS] = 7,
+ [BOOKE_INTERRUPT_ITLB_MISS] = 8,
+ [BOOKE_INTERRUPT_MACHINE_CHECK] = 9,
+ [BOOKE_INTERRUPT_DEBUG] = 10,
+ [BOOKE_INTERRUPT_CRITICAL] = 11,
+ [BOOKE_INTERRUPT_WATCHDOG] = 12,
+ [BOOKE_INTERRUPT_EXTERNAL] = 13,
+ [BOOKE_INTERRUPT_FIT] = 14,
+ [BOOKE_INTERRUPT_DECREMENTER] = 15,
+};
+
+const unsigned char priority_exception[] = {
+ BOOKE_INTERRUPT_DATA_STORAGE,
+ BOOKE_INTERRUPT_INST_STORAGE,
+ BOOKE_INTERRUPT_ALIGNMENT,
+ BOOKE_INTERRUPT_PROGRAM,
+ BOOKE_INTERRUPT_FP_UNAVAIL,
+ BOOKE_INTERRUPT_SYSCALL,
+ BOOKE_INTERRUPT_AP_UNAVAIL,
+ BOOKE_INTERRUPT_DTLB_MISS,
+ BOOKE_INTERRUPT_ITLB_MISS,
+ BOOKE_INTERRUPT_MACHINE_CHECK,
+ BOOKE_INTERRUPT_DEBUG,
+ BOOKE_INTERRUPT_CRITICAL,
+ BOOKE_INTERRUPT_WATCHDOG,
+ BOOKE_INTERRUPT_EXTERNAL,
+ BOOKE_INTERRUPT_FIT,
+ BOOKE_INTERRUPT_DECREMENTER,
+};
+
+
+void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
+{
+ struct tlbe *tlbe;
+ int i;
+
+ printk("vcpu %d TLB dump:\n", vcpu->vcpu_id);
+ printk("| %2s | %3s | %8s | %8s | %8s |\n",
+ "nr", "tid", "word0", "word1", "word2");
+
+ for (i = 0; i < PPC44x_TLB_SIZE; i++) {
+ tlbe = &vcpu->arch.guest_tlb[i];
+ if (tlbe->word0 & PPC44x_TLB_VALID)
+ printk(" G%2d | %02X | %08X | %08X | %08X |\n",
+ i, tlbe->tid, tlbe->word0, tlbe->word1,
+ tlbe->word2);
+ }
+
+ for (i = 0; i < PPC44x_TLB_SIZE; i++) {
+ tlbe = &vcpu->arch.shadow_tlb[i];
+ if (tlbe->word0 & PPC44x_TLB_VALID)
+ printk(" S%2d | %02X | %08X | %08X | %08X |\n",
+ i, tlbe->tid, tlbe->word0, tlbe->word1,
+ tlbe->word2);
+ }
+}
+
/* TODO: use vcpu_printf() */
void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu)
{
int i;
- printk("pc: %08lx msr: %08lx\n", vcpu->arch.pc, vcpu->arch.msr);
- printk("lr: %08lx ctr: %08lx\n", vcpu->arch.lr, vcpu->arch.ctr);
- printk("srr0: %08lx srr1: %08lx\n", vcpu->arch.srr0, vcpu->arch.srr1);
+ printk("pc: %08x msr: %08x\n", vcpu->arch.pc, vcpu->arch.msr);
+ printk("lr: %08x ctr: %08x\n", vcpu->arch.lr, vcpu->arch.ctr);
+ printk("srr0: %08x srr1: %08x\n", vcpu->arch.srr0, vcpu->arch.srr1);
printk("exceptions: %08lx\n", vcpu->arch.pending_exceptions);
for (i = 0; i < 32; i += 4) {
- printk("gpr%02d: %08lx %08lx %08lx %08lx\n", i,
+ printk("gpr%02d: %08x %08x %08x %08x\n", i,
vcpu->arch.gpr[i],
vcpu->arch.gpr[i+1],
vcpu->arch.gpr[i+2],
@@ -78,96 +157,69 @@ void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu)
}
}
-static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu,
- unsigned int priority)
+/* Check if we are ready to deliver the interrupt */
+static int kvmppc_can_deliver_interrupt(struct kvm_vcpu *vcpu, int interrupt)
{
- set_bit(priority, &vcpu->arch.pending_exceptions);
-}
+ int r;
-void kvmppc_core_queue_program(struct kvm_vcpu *vcpu)
-{
- kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM);
-}
-
-void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu)
-{
- kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DECREMENTER);
-}
-
-int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu)
-{
- return test_bit(BOOKE_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions);
-}
-
-void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
- struct kvm_interrupt *irq)
-{
- kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_EXTERNAL);
-}
-
-/* Deliver the interrupt of the corresponding priority, if possible. */
-static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
- unsigned int priority)
-{
- int allowed = 0;
- ulong msr_mask;
-
- switch (priority) {
- case BOOKE_IRQPRIO_PROGRAM:
- case BOOKE_IRQPRIO_DTLB_MISS:
- case BOOKE_IRQPRIO_ITLB_MISS:
- case BOOKE_IRQPRIO_SYSCALL:
- case BOOKE_IRQPRIO_DATA_STORAGE:
- case BOOKE_IRQPRIO_INST_STORAGE:
- case BOOKE_IRQPRIO_FP_UNAVAIL:
- case BOOKE_IRQPRIO_AP_UNAVAIL:
- case BOOKE_IRQPRIO_ALIGNMENT:
- allowed = 1;
- msr_mask = MSR_CE|MSR_ME|MSR_DE;
+ switch (interrupt) {
+ case BOOKE_INTERRUPT_CRITICAL:
+ r = vcpu->arch.msr & MSR_CE;
break;
- case BOOKE_IRQPRIO_CRITICAL:
- case BOOKE_IRQPRIO_WATCHDOG:
- allowed = vcpu->arch.msr & MSR_CE;
- msr_mask = MSR_ME;
+ case BOOKE_INTERRUPT_MACHINE_CHECK:
+ r = vcpu->arch.msr & MSR_ME;
break;
- case BOOKE_IRQPRIO_MACHINE_CHECK:
- allowed = vcpu->arch.msr & MSR_ME;
- msr_mask = 0;
+ case BOOKE_INTERRUPT_EXTERNAL:
+ r = vcpu->arch.msr & MSR_EE;
break;
- case BOOKE_IRQPRIO_EXTERNAL:
- case BOOKE_IRQPRIO_DECREMENTER:
- case BOOKE_IRQPRIO_FIT:
- allowed = vcpu->arch.msr & MSR_EE;
- msr_mask = MSR_CE|MSR_ME|MSR_DE;
+ case BOOKE_INTERRUPT_DECREMENTER:
+ r = vcpu->arch.msr & MSR_EE;
break;
- case BOOKE_IRQPRIO_DEBUG:
- allowed = vcpu->arch.msr & MSR_DE;
- msr_mask = MSR_ME;
+ case BOOKE_INTERRUPT_FIT:
+ r = vcpu->arch.msr & MSR_EE;
break;
+ case BOOKE_INTERRUPT_WATCHDOG:
+ r = vcpu->arch.msr & MSR_CE;
+ break;
+ case BOOKE_INTERRUPT_DEBUG:
+ r = vcpu->arch.msr & MSR_DE;
+ break;
+ default:
+ r = 1;
}
- if (allowed) {
- vcpu->arch.srr0 = vcpu->arch.pc;
- vcpu->arch.srr1 = vcpu->arch.msr;
- vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority];
- kvmppc_set_msr(vcpu, vcpu->arch.msr & msr_mask);
+ return r;
+}
- clear_bit(priority, &vcpu->arch.pending_exceptions);
+static void kvmppc_deliver_interrupt(struct kvm_vcpu *vcpu, int interrupt)
+{
+ switch (interrupt) {
+ case BOOKE_INTERRUPT_DECREMENTER:
+ vcpu->arch.tsr |= TSR_DIS;
+ break;
}
- return allowed;
+ vcpu->arch.srr0 = vcpu->arch.pc;
+ vcpu->arch.srr1 = vcpu->arch.msr;
+ vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[interrupt];
+ kvmppc_set_msr(vcpu, vcpu->arch.msr & interrupt_msr_mask[interrupt]);
}
/* Check pending exceptions and deliver one, if possible. */
-void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
+void kvmppc_check_and_deliver_interrupts(struct kvm_vcpu *vcpu)
{
unsigned long *pending = &vcpu->arch.pending_exceptions;
+ unsigned int exception;
unsigned int priority;
- priority = __ffs(*pending);
+ priority = find_first_bit(pending, BITS_PER_BYTE * sizeof(*pending));
while (priority <= BOOKE_MAX_INTERRUPT) {
- if (kvmppc_booke_irqprio_deliver(vcpu, priority))
+ exception = priority_exception[priority];
+ if (kvmppc_can_deliver_interrupt(vcpu, exception)) {
+ kvmppc_clear_exception(vcpu, exception);
+ kvmppc_deliver_interrupt(vcpu, exception);
break;
+ }
priority = find_next_bit(pending,
BITS_PER_BYTE * sizeof(*pending),
@@ -186,9 +238,6 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
enum emulation_result er;
int r = RESUME_HOST;
- /* update before a new last_exit_type is rewritten */
- kvmppc_update_timing_stats(vcpu);
-
local_irq_enable();
run->exit_reason = KVM_EXIT_UNKNOWN;
@@ -202,19 +251,21 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
break;
case BOOKE_INTERRUPT_EXTERNAL:
- kvmppc_account_exit(vcpu, EXT_INTR_EXITS);
- if (need_resched())
- cond_resched();
- r = RESUME_GUEST;
- break;
-
case BOOKE_INTERRUPT_DECREMENTER:
/* Since we switched IVPR back to the host's value, the host
* handled this interrupt the moment we enabled interrupts.
* Now we just offer it a chance to reschedule the guest. */
- kvmppc_account_exit(vcpu, DEC_EXITS);
+
+ /* XXX At this point the TLB still holds our shadow TLB, so if
+ * we do reschedule the host will fault over it. Perhaps we
+ * should politely restore the host's entries to minimize
+ * misses before ceding control. */
if (need_resched())
cond_resched();
+ if (exit_nr == BOOKE_INTERRUPT_DECREMENTER)
+ vcpu->stat.dec_exits++;
+ else
+ vcpu->stat.ext_intr_exits++;
r = RESUME_GUEST;
break;
@@ -223,19 +274,17 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
/* Program traps generated by user-level software must be handled
* by the guest kernel. */
vcpu->arch.esr = vcpu->arch.fault_esr;
- kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM);
+ kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_PROGRAM);
r = RESUME_GUEST;
- kvmppc_account_exit(vcpu, USR_PR_INST);
break;
}
er = kvmppc_emulate_instruction(run, vcpu);
switch (er) {
case EMULATE_DONE:
- /* don't overwrite subtypes, just account kvm_stats */
- kvmppc_account_exit_stat(vcpu, EMULATED_INST_EXITS);
/* Future optimization: only reload non-volatiles if
* they were actually modified by emulation. */
+ vcpu->stat.emulated_inst_exits++;
r = RESUME_GUEST_NV;
break;
case EMULATE_DO_DCR:
@@ -244,7 +293,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
break;
case EMULATE_FAIL:
/* XXX Deliver Program interrupt to guest. */
- printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
+ printk(KERN_CRIT "%s: emulation at %x failed (%08x)\n",
__func__, vcpu->arch.pc, vcpu->arch.last_inst);
/* For debugging, encode the failing instruction and
* report it to userspace. */
@@ -258,53 +307,48 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
break;
case BOOKE_INTERRUPT_FP_UNAVAIL:
- kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_FP_UNAVAIL);
- kvmppc_account_exit(vcpu, FP_UNAVAIL);
+ kvmppc_queue_exception(vcpu, exit_nr);
r = RESUME_GUEST;
break;
case BOOKE_INTERRUPT_DATA_STORAGE:
vcpu->arch.dear = vcpu->arch.fault_dear;
vcpu->arch.esr = vcpu->arch.fault_esr;
- kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DATA_STORAGE);
- kvmppc_account_exit(vcpu, DSI_EXITS);
+ kvmppc_queue_exception(vcpu, exit_nr);
+ vcpu->stat.dsi_exits++;
r = RESUME_GUEST;
break;
case BOOKE_INTERRUPT_INST_STORAGE:
vcpu->arch.esr = vcpu->arch.fault_esr;
- kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_INST_STORAGE);
- kvmppc_account_exit(vcpu, ISI_EXITS);
+ kvmppc_queue_exception(vcpu, exit_nr);
+ vcpu->stat.isi_exits++;
r = RESUME_GUEST;
break;
case BOOKE_INTERRUPT_SYSCALL:
- kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SYSCALL);
- kvmppc_account_exit(vcpu, SYSCALL_EXITS);
+ kvmppc_queue_exception(vcpu, exit_nr);
+ vcpu->stat.syscall_exits++;
r = RESUME_GUEST;
break;
- /* XXX move to a 440-specific file. */
case BOOKE_INTERRUPT_DTLB_MISS: {
- struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
- struct kvmppc_44x_tlbe *gtlbe;
+ struct tlbe *gtlbe;
unsigned long eaddr = vcpu->arch.fault_dear;
- int gtlb_index;
gfn_t gfn;
/* Check the guest TLB. */
- gtlb_index = kvmppc_44x_dtlb_index(vcpu, eaddr);
- if (gtlb_index < 0) {
+ gtlbe = kvmppc_44x_dtlb_search(vcpu, eaddr);
+ if (!gtlbe) {
/* The guest didn't have a mapping for it. */
- kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS);
+ kvmppc_queue_exception(vcpu, exit_nr);
vcpu->arch.dear = vcpu->arch.fault_dear;
vcpu->arch.esr = vcpu->arch.fault_esr;
- kvmppc_account_exit(vcpu, DTLB_REAL_MISS_EXITS);
+ vcpu->stat.dtlb_real_miss_exits++;
r = RESUME_GUEST;
break;
}
- gtlbe = &vcpu_44x->guest_tlb[gtlb_index];
vcpu->arch.paddr_accessed = tlb_xlate(gtlbe, eaddr);
gfn = vcpu->arch.paddr_accessed >> PAGE_SHIFT;
@@ -315,45 +359,38 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
* b) the guest used a large mapping which we're faking
* Either way, we need to satisfy the fault without
* invoking the guest. */
- kvmppc_mmu_map(vcpu, eaddr, vcpu->arch.paddr_accessed, gtlbe->tid,
- gtlbe->word2, get_tlb_bytes(gtlbe), gtlb_index);
- kvmppc_account_exit(vcpu, DTLB_VIRT_MISS_EXITS);
+ kvmppc_mmu_map(vcpu, eaddr, gfn, gtlbe->tid,
+ gtlbe->word2);
+ vcpu->stat.dtlb_virt_miss_exits++;
r = RESUME_GUEST;
} else {
/* Guest has mapped and accessed a page which is not
* actually RAM. */
r = kvmppc_emulate_mmio(run, vcpu);
- kvmppc_account_exit(vcpu, MMIO_EXITS);
}
break;
}
- /* XXX move to a 440-specific file. */
case BOOKE_INTERRUPT_ITLB_MISS: {
- struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
- struct kvmppc_44x_tlbe *gtlbe;
+ struct tlbe *gtlbe;
unsigned long eaddr = vcpu->arch.pc;
- gpa_t gpaddr;
gfn_t gfn;
- int gtlb_index;
r = RESUME_GUEST;
/* Check the guest TLB. */
- gtlb_index = kvmppc_44x_itlb_index(vcpu, eaddr);
- if (gtlb_index < 0) {
+ gtlbe = kvmppc_44x_itlb_search(vcpu, eaddr);
+ if (!gtlbe) {
/* The guest didn't have a mapping for it. */
- kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ITLB_MISS);
- kvmppc_account_exit(vcpu, ITLB_REAL_MISS_EXITS);
+ kvmppc_queue_exception(vcpu, exit_nr);
+ vcpu->stat.itlb_real_miss_exits++;
break;
}
- kvmppc_account_exit(vcpu, ITLB_VIRT_MISS_EXITS);
+ vcpu->stat.itlb_virt_miss_exits++;
- gtlbe = &vcpu_44x->guest_tlb[gtlb_index];
- gpaddr = tlb_xlate(gtlbe, eaddr);
- gfn = gpaddr >> PAGE_SHIFT;
+ gfn = tlb_xlate(gtlbe, eaddr) >> PAGE_SHIFT;
if (kvm_is_visible_gfn(vcpu->kvm, gfn)) {
/* The guest TLB had a mapping, but the shadow TLB
@@ -362,11 +399,12 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
* b) the guest used a large mapping which we're faking
* Either way, we need to satisfy the fault without
* invoking the guest. */
- kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlbe->tid,
- gtlbe->word2, get_tlb_bytes(gtlbe), gtlb_index);
+ kvmppc_mmu_map(vcpu, eaddr, gfn, gtlbe->tid,
+ gtlbe->word2);
} else {
/* Guest mapped and leaped at non-RAM! */
- kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_MACHINE_CHECK);
+ kvmppc_queue_exception(vcpu,
+ BOOKE_INTERRUPT_MACHINE_CHECK);
}
break;
@@ -383,7 +421,6 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
mtspr(SPRN_DBSR, dbsr);
run->exit_reason = KVM_EXIT_DEBUG;
- kvmppc_account_exit(vcpu, DEBUG_EXITS);
r = RESUME_HOST;
break;
}
@@ -395,8 +432,10 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
local_irq_disable();
- kvmppc_core_deliver_interrupts(vcpu);
+ kvmppc_check_and_deliver_interrupts(vcpu);
+ /* Do some exit accounting. */
+ vcpu->stat.sum_exits++;
if (!(r & RESUME_HOST)) {
/* To avoid clobbering exit_reason, only check for signals if
* we aren't already exiting to userspace for some other
@@ -404,7 +443,22 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
if (signal_pending(current)) {
run->exit_reason = KVM_EXIT_INTR;
r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV);
- kvmppc_account_exit(vcpu, SIGNAL_EXITS);
+
+ vcpu->stat.signal_exits++;
+ } else {
+ vcpu->stat.light_exits++;
+ }
+ } else {
+ switch (run->exit_reason) {
+ case KVM_EXIT_MMIO:
+ vcpu->stat.mmio_exits++;
+ break;
+ case KVM_EXIT_DCR:
+ vcpu->stat.dcr_exits++;
+ break;
+ case KVM_EXIT_INTR:
+ vcpu->stat.signal_exits++;
+ break;
}
}
@@ -414,6 +468,20 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
/* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */
int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
{
+ struct tlbe *tlbe = &vcpu->arch.guest_tlb[0];
+
+ tlbe->tid = 0;
+ tlbe->word0 = PPC44x_TLB_16M | PPC44x_TLB_VALID;
+ tlbe->word1 = 0;
+ tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR;
+
+ tlbe++;
+ tlbe->tid = 0;
+ tlbe->word0 = 0xef600000 | PPC44x_TLB_4K | PPC44x_TLB_VALID;
+ tlbe->word1 = 0xef600000;
+ tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR
+ | PPC44x_TLB_I | PPC44x_TLB_G;
+
vcpu->arch.pc = 0;
vcpu->arch.msr = 0;
vcpu->arch.gpr[1] = (16<<20) - 8; /* -8 for the callee-save LR slot */
@@ -424,9 +492,12 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
* before it's programmed its own IVPR. */
vcpu->arch.ivpr = 0x55550000;
- kvmppc_init_timing_stats(vcpu);
+ /* Since the guest can directly access the timebase, it must know the
+ * real timebase frequency. Accordingly, it must see the state of
+ * CCR1[TCS]. */
+ vcpu->arch.ccr1 = mfspr(SPRN_CCR1);
- return kvmppc_core_vcpu_setup(vcpu);
+ return 0;
}
int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
@@ -465,7 +536,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
vcpu->arch.ctr = regs->ctr;
vcpu->arch.lr = regs->lr;
vcpu->arch.xer = regs->xer;
- kvmppc_set_msr(vcpu, regs->msr);
+ vcpu->arch.msr = regs->msr;
vcpu->arch.srr0 = regs->srr0;
vcpu->arch.srr1 = regs->srr1;
vcpu->arch.sprg0 = regs->sprg0;
@@ -504,62 +575,31 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
return -ENOTSUPP;
}
+/* 'linear_address' is actually an encoding of AS|PID|EADDR . */
int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
struct kvm_translation *tr)
{
- return kvmppc_core_vcpu_translate(vcpu, tr);
-}
+ struct tlbe *gtlbe;
+ int index;
+ gva_t eaddr;
+ u8 pid;
+ u8 as;
+
+ eaddr = tr->linear_address;
+ pid = (tr->linear_address >> 32) & 0xff;
+ as = (tr->linear_address >> 40) & 0x1;
+
+ index = kvmppc_44x_tlb_index(vcpu, eaddr, pid, as);
+ if (index == -1) {
+ tr->valid = 0;
+ return 0;
+ }
-int kvmppc_booke_init(void)
-{
- unsigned long ivor[16];
- unsigned long max_ivor = 0;
- int i;
+ gtlbe = &vcpu->arch.guest_tlb[index];
- /* We install our own exception handlers by hijacking IVPR. IVPR must
- * be 16-bit aligned, so we need a 64KB allocation. */
- kvmppc_booke_handlers = __get_free_pages(GFP_KERNEL | __GFP_ZERO,
- VCPU_SIZE_ORDER);
- if (!kvmppc_booke_handlers)
- return -ENOMEM;
-
- /* XXX make sure our handlers are smaller than Linux's */
-
- /* Copy our interrupt handlers to match host IVORs. That way we don't
- * have to swap the IVORs on every guest/host transition. */
- ivor[0] = mfspr(SPRN_IVOR0);
- ivor[1] = mfspr(SPRN_IVOR1);
- ivor[2] = mfspr(SPRN_IVOR2);
- ivor[3] = mfspr(SPRN_IVOR3);
- ivor[4] = mfspr(SPRN_IVOR4);
- ivor[5] = mfspr(SPRN_IVOR5);
- ivor[6] = mfspr(SPRN_IVOR6);
- ivor[7] = mfspr(SPRN_IVOR7);
- ivor[8] = mfspr(SPRN_IVOR8);
- ivor[9] = mfspr(SPRN_IVOR9);
- ivor[10] = mfspr(SPRN_IVOR10);
- ivor[11] = mfspr(SPRN_IVOR11);
- ivor[12] = mfspr(SPRN_IVOR12);
- ivor[13] = mfspr(SPRN_IVOR13);
- ivor[14] = mfspr(SPRN_IVOR14);
- ivor[15] = mfspr(SPRN_IVOR15);
-
- for (i = 0; i < 16; i++) {
- if (ivor[i] > max_ivor)
- max_ivor = ivor[i];
-
- memcpy((void *)kvmppc_booke_handlers + ivor[i],
- kvmppc_handlers_start + i * kvmppc_handler_len,
- kvmppc_handler_len);
- }
- flush_icache_range(kvmppc_booke_handlers,
- kvmppc_booke_handlers + max_ivor + kvmppc_handler_len);
+ tr->physical_address = tlb_xlate(gtlbe, eaddr);
+ /* XXX what does "writeable" and "usermode" even mean? */
+ tr->valid = 1;
return 0;
}
-
-void __exit kvmppc_booke_exit(void)
-{
- free_pages(kvmppc_booke_handlers, VCPU_SIZE_ORDER);
- kvm_exit();
-}
diff --git a/arch/powerpc/kvm/booke_host.c b/arch/powerpc/kvm/booke_host.c
new file mode 100644
index 000000000000..b480341bc31e
--- /dev/null
+++ b/arch/powerpc/kvm/booke_host.c
@@ -0,0 +1,83 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/kvm_host.h>
+#include <linux/module.h>
+#include <asm/cacheflush.h>
+#include <asm/kvm_ppc.h>
+
+unsigned long kvmppc_booke_handlers;
+
+static int kvmppc_booke_init(void)
+{
+ unsigned long ivor[16];
+ unsigned long max_ivor = 0;
+ int i;
+
+ /* We install our own exception handlers by hijacking IVPR. IVPR must
+ * be 16-bit aligned, so we need a 64KB allocation. */
+ kvmppc_booke_handlers = __get_free_pages(GFP_KERNEL | __GFP_ZERO,
+ VCPU_SIZE_ORDER);
+ if (!kvmppc_booke_handlers)
+ return -ENOMEM;
+
+ /* XXX make sure our handlers are smaller than Linux's */
+
+ /* Copy our interrupt handlers to match host IVORs. That way we don't
+ * have to swap the IVORs on every guest/host transition. */
+ ivor[0] = mfspr(SPRN_IVOR0);
+ ivor[1] = mfspr(SPRN_IVOR1);
+ ivor[2] = mfspr(SPRN_IVOR2);
+ ivor[3] = mfspr(SPRN_IVOR3);
+ ivor[4] = mfspr(SPRN_IVOR4);
+ ivor[5] = mfspr(SPRN_IVOR5);
+ ivor[6] = mfspr(SPRN_IVOR6);
+ ivor[7] = mfspr(SPRN_IVOR7);
+ ivor[8] = mfspr(SPRN_IVOR8);
+ ivor[9] = mfspr(SPRN_IVOR9);
+ ivor[10] = mfspr(SPRN_IVOR10);
+ ivor[11] = mfspr(SPRN_IVOR11);
+ ivor[12] = mfspr(SPRN_IVOR12);
+ ivor[13] = mfspr(SPRN_IVOR13);
+ ivor[14] = mfspr(SPRN_IVOR14);
+ ivor[15] = mfspr(SPRN_IVOR15);
+
+ for (i = 0; i < 16; i++) {
+ if (ivor[i] > max_ivor)
+ max_ivor = ivor[i];
+
+ memcpy((void *)kvmppc_booke_handlers + ivor[i],
+ kvmppc_handlers_start + i * kvmppc_handler_len,
+ kvmppc_handler_len);
+ }
+ flush_icache_range(kvmppc_booke_handlers,
+ kvmppc_booke_handlers + max_ivor + kvmppc_handler_len);
+
+ return kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE);
+}
+
+static void __exit kvmppc_booke_exit(void)
+{
+ free_pages(kvmppc_booke_handlers, VCPU_SIZE_ORDER);
+ kvm_exit();
+}
+
+module_init(kvmppc_booke_init)
+module_exit(kvmppc_booke_exit)
diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S
index 084ebcd7dd83..95e165baf85f 100644
--- a/arch/powerpc/kvm/booke_interrupts.S
+++ b/arch/powerpc/kvm/booke_interrupts.S
@@ -107,18 +107,6 @@ _GLOBAL(kvmppc_resume_host)
li r6, 1
slw r6, r6, r5
-#ifdef CONFIG_KVM_EXIT_TIMING
- /* save exit time */
-1:
- mfspr r7, SPRN_TBRU
- mfspr r8, SPRN_TBRL
- mfspr r9, SPRN_TBRU
- cmpw r9, r7
- bne 1b
- stw r8, VCPU_TIMING_EXIT_TBL(r4)
- stw r9, VCPU_TIMING_EXIT_TBU(r4)
-#endif
-
/* Save the faulting instruction and all GPRs for emulation. */
andi. r7, r6, NEED_INST_MASK
beq ..skip_inst_copy
@@ -347,6 +335,54 @@ lightweight_exit:
lwz r3, VCPU_SHADOW_PID(r4)
mtspr SPRN_PID, r3
+ /* Prevent all asynchronous TLB updates. */
+ mfmsr r5
+ lis r6, (MSR_EE|MSR_CE|MSR_ME|MSR_DE)@h
+ ori r6, r6, (MSR_EE|MSR_CE|MSR_ME|MSR_DE)@l
+ andc r6, r5, r6
+ mtmsr r6
+
+ /* Load the guest mappings, leaving the host's "pinned" kernel mappings
+ * in place. */
+ mfspr r10, SPRN_MMUCR /* Save host MMUCR. */
+ li r5, PPC44x_TLB_SIZE
+ lis r5, tlb_44x_hwater@ha
+ lwz r5, tlb_44x_hwater@l(r5)
+ mtctr r5
+ addi r9, r4, VCPU_SHADOW_TLB
+ addi r5, r4, VCPU_SHADOW_MOD
+ li r3, 0
+1:
+ lbzx r7, r3, r5
+ cmpwi r7, 0
+ beq 3f
+
+ /* Load guest entry. */
+ mulli r11, r3, TLBE_BYTES
+ add r11, r11, r9
+ lwz r7, 0(r11)
+ mtspr SPRN_MMUCR, r7
+ lwz r7, 4(r11)
+ tlbwe r7, r3, PPC44x_TLB_PAGEID
+ lwz r7, 8(r11)
+ tlbwe r7, r3, PPC44x_TLB_XLAT
+ lwz r7, 12(r11)
+ tlbwe r7, r3, PPC44x_TLB_ATTRIB
+3:
+ addi r3, r3, 1 /* Increment index. */
+ bdnz 1b
+
+ mtspr SPRN_MMUCR, r10 /* Restore host MMUCR. */
+
+ /* Clear bitmap of modified TLB entries */
+ li r5, PPC44x_TLB_SIZE>>2
+ mtctr r5
+ addi r5, r4, VCPU_SHADOW_MOD - 4
+ li r6, 0
+1:
+ stwu r6, 4(r5)
+ bdnz 1b
+
iccci 0, 0 /* XXX hack */
/* Load some guest volatiles. */
@@ -387,18 +423,6 @@ lightweight_exit:
lwz r3, VCPU_SPRG7(r4)
mtspr SPRN_SPRG7, r3
-#ifdef CONFIG_KVM_EXIT_TIMING
- /* save enter time */
-1:
- mfspr r6, SPRN_TBRU
- mfspr r7, SPRN_TBRL
- mfspr r8, SPRN_TBRU
- cmpw r8, r6
- bne 1b
- stw r7, VCPU_TIMING_LAST_ENTER_TBL(r4)
- stw r8, VCPU_TIMING_LAST_ENTER_TBU(r4)
-#endif
-
/* Finish loading guest volatiles and jump to guest. */
lwz r3, VCPU_CTR(r4)
mtctr r3
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index d1d38daa93fb..0fce4fbdc20d 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -23,14 +23,161 @@
#include <linux/string.h>
#include <linux/kvm_host.h>
-#include <asm/reg.h>
+#include <asm/dcr.h>
+#include <asm/dcr-regs.h>
#include <asm/time.h>
#include <asm/byteorder.h>
#include <asm/kvm_ppc.h>
-#include <asm/disassemble.h>
-#include "timing.h"
-void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
+#include "44x_tlb.h"
+
+/* Instruction decoding */
+static inline unsigned int get_op(u32 inst)
+{
+ return inst >> 26;
+}
+
+static inline unsigned int get_xop(u32 inst)
+{
+ return (inst >> 1) & 0x3ff;
+}
+
+static inline unsigned int get_sprn(u32 inst)
+{
+ return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
+}
+
+static inline unsigned int get_dcrn(u32 inst)
+{
+ return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
+}
+
+static inline unsigned int get_rt(u32 inst)
+{
+ return (inst >> 21) & 0x1f;
+}
+
+static inline unsigned int get_rs(u32 inst)
+{
+ return (inst >> 21) & 0x1f;
+}
+
+static inline unsigned int get_ra(u32 inst)
+{
+ return (inst >> 16) & 0x1f;
+}
+
+static inline unsigned int get_rb(u32 inst)
+{
+ return (inst >> 11) & 0x1f;
+}
+
+static inline unsigned int get_rc(u32 inst)
+{
+ return inst & 0x1;
+}
+
+static inline unsigned int get_ws(u32 inst)
+{
+ return (inst >> 11) & 0x1f;
+}
+
+static inline unsigned int get_d(u32 inst)
+{
+ return inst & 0xffff;
+}
+
+static int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
+ const struct tlbe *tlbe)
+{
+ gpa_t gpa;
+
+ if (!get_tlb_v(tlbe))
+ return 0;
+
+ /* Does it match current guest AS? */
+ /* XXX what about IS != DS? */
+ if (get_tlb_ts(tlbe) != !!(vcpu->arch.msr & MSR_IS))
+ return 0;
+
+ gpa = get_tlb_raddr(tlbe);
+ if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT))
+ /* Mapping is not for RAM. */
+ return 0;
+
+ return 1;
+}
+
+static int kvmppc_emul_tlbwe(struct kvm_vcpu *vcpu, u32 inst)
+{
+ u64 eaddr;
+ u64 raddr;
+ u64 asid;
+ u32 flags;
+ struct tlbe *tlbe;
+ unsigned int ra;
+ unsigned int rs;
+ unsigned int ws;
+ unsigned int index;
+
+ ra = get_ra(inst);
+ rs = get_rs(inst);
+ ws = get_ws(inst);
+
+ index = vcpu->arch.gpr[ra];
+ if (index > PPC44x_TLB_SIZE) {
+ printk("%s: index %d\n", __func__, index);
+ kvmppc_dump_vcpu(vcpu);
+ return EMULATE_FAIL;
+ }
+
+ tlbe = &vcpu->arch.guest_tlb[index];
+
+ /* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */
+ if (tlbe->word0 & PPC44x_TLB_VALID) {
+ eaddr = get_tlb_eaddr(tlbe);
+ asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid;
+ kvmppc_mmu_invalidate(vcpu, eaddr, get_tlb_end(tlbe), asid);
+ }
+
+ switch (ws) {
+ case PPC44x_TLB_PAGEID:
+ tlbe->tid = vcpu->arch.mmucr & 0xff;
+ tlbe->word0 = vcpu->arch.gpr[rs];
+ break;
+
+ case PPC44x_TLB_XLAT:
+ tlbe->word1 = vcpu->arch.gpr[rs];
+ break;
+
+ case PPC44x_TLB_ATTRIB:
+ tlbe->word2 = vcpu->arch.gpr[rs];
+ break;
+
+ default:
+ return EMULATE_FAIL;
+ }
+
+ if (tlbe_is_host_safe(vcpu, tlbe)) {
+ eaddr = get_tlb_eaddr(tlbe);
+ raddr = get_tlb_raddr(tlbe);
+ asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid;
+ flags = tlbe->word2 & 0xffff;
+
+ /* Create a 4KB mapping on the host. If the guest wanted a
+ * large page, only the first 4KB is mapped here and the rest
+ * are mapped on the fly. */
+ kvmppc_mmu_map(vcpu, eaddr, raddr >> PAGE_SHIFT, asid, flags);
+ }
+
+ KVMTRACE_5D(GTLB_WRITE, vcpu, index,
+ tlbe->tid, tlbe->word0, tlbe->word1, tlbe->word2,
+ handler);
+
+ return EMULATE_DONE;
+}
+
+static void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
{
if (vcpu->arch.tcr & TCR_DIE) {
/* The decrementer ticks at the same rate as the timebase, so
@@ -46,6 +193,12 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
}
}
+static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.pc = vcpu->arch.srr0;
+ kvmppc_set_msr(vcpu, vcpu->arch.srr1);
+}
+
/* XXX to do:
* lhax
* lhaux
@@ -60,30 +213,40 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
*
* XXX is_bigendian should depend on MMU mapping or MSR[LE]
*/
-/* XXX Should probably auto-generate instruction decoding for a particular core
- * from opcode tables in the future. */
int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
{
u32 inst = vcpu->arch.last_inst;
u32 ea;
int ra;
int rb;
+ int rc;
int rs;
int rt;
int sprn;
+ int dcrn;
enum emulation_result emulated = EMULATE_DONE;
int advance = 1;
- /* this default type might be overwritten by subcategories */
- kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS);
-
switch (get_op(inst)) {
- case 3: /* trap */
- vcpu->arch.esr |= ESR_PTR;
- kvmppc_core_queue_program(vcpu);
+ case 3: /* trap */
+ printk("trap!\n");
+ kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_PROGRAM);
advance = 0;
break;
+ case 19:
+ switch (get_xop(inst)) {
+ case 50: /* rfi */
+ kvmppc_emul_rfi(vcpu);
+ advance = 0;
+ break;
+
+ default:
+ emulated = EMULATE_FAIL;
+ break;
+ }
+ break;
+
case 31:
switch (get_xop(inst)) {
@@ -92,11 +255,27 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
break;
+ case 83: /* mfmsr */
+ rt = get_rt(inst);
+ vcpu->arch.gpr[rt] = vcpu->arch.msr;
+ break;
+
case 87: /* lbzx */
rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
break;
+ case 131: /* wrtee */
+ rs = get_rs(inst);
+ vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
+ | (vcpu->arch.gpr[rs] & MSR_EE);
+ break;
+
+ case 146: /* mtmsr */
+ rs = get_rs(inst);
+ kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]);
+ break;
+
case 151: /* stwx */
rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu,
@@ -104,6 +283,11 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
4, 1);
break;
+ case 163: /* wrteei */
+ vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
+ | (inst & MSR_EE);
+ break;
+
case 215: /* stbx */
rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu,
@@ -144,6 +328,42 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
vcpu->arch.gpr[ra] = ea;
break;
+ case 323: /* mfdcr */
+ dcrn = get_dcrn(inst);
+ rt = get_rt(inst);
+
+ /* The guest may access CPR0 registers to determine the timebase
+ * frequency, and it must know the real host frequency because it
+ * can directly access the timebase registers.
+ *
+ * It would be possible to emulate those accesses in userspace,
+ * but userspace can really only figure out the end frequency.
+ * We could decompose that into the factors that compute it, but
+ * that's tricky math, and it's easier to just report the real
+ * CPR0 values.
+ */
+ switch (dcrn) {
+ case DCRN_CPR0_CONFIG_ADDR:
+ vcpu->arch.gpr[rt] = vcpu->arch.cpr0_cfgaddr;
+ break;
+ case DCRN_CPR0_CONFIG_DATA:
+ local_irq_disable();
+ mtdcr(DCRN_CPR0_CONFIG_ADDR,
+ vcpu->arch.cpr0_cfgaddr);
+ vcpu->arch.gpr[rt] = mfdcr(DCRN_CPR0_CONFIG_DATA);
+ local_irq_enable();
+ break;
+ default:
+ run->dcr.dcrn = dcrn;
+ run->dcr.data = 0;
+ run->dcr.is_write = 0;
+ vcpu->arch.io_gpr = rt;
+ vcpu->arch.dcr_needed = 1;
+ emulated = EMULATE_DO_DCR;
+ }
+
+ break;
+
case 339: /* mfspr */
sprn = get_sprn(inst);
rt = get_rt(inst);
@@ -153,8 +373,26 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
vcpu->arch.gpr[rt] = vcpu->arch.srr0; break;
case SPRN_SRR1:
vcpu->arch.gpr[rt] = vcpu->arch.srr1; break;
+ case SPRN_MMUCR:
+ vcpu->arch.gpr[rt] = vcpu->arch.mmucr; break;
+ case SPRN_PID:
+ vcpu->arch.gpr[rt] = vcpu->arch.pid; break;
+ case SPRN_IVPR:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break;
+ case SPRN_CCR0:
+ vcpu->arch.gpr[rt] = vcpu->arch.ccr0; break;
+ case SPRN_CCR1:
+ vcpu->arch.gpr[rt] = vcpu->arch.ccr1; break;
case SPRN_PVR:
vcpu->arch.gpr[rt] = vcpu->arch.pvr; break;
+ case SPRN_DEAR:
+ vcpu->arch.gpr[rt] = vcpu->arch.dear; break;
+ case SPRN_ESR:
+ vcpu->arch.gpr[rt] = vcpu->arch.esr; break;
+ case SPRN_DBCR0:
+ vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break;
+ case SPRN_DBCR1:
+ vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break;
/* Note: mftb and TBRL/TBWL are user-accessible, so
* the guest can always access the real TB anyways.
@@ -175,12 +413,42 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
/* Note: SPRG4-7 are user-readable, so we don't get
* a trap. */
+ case SPRN_IVOR0:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[0]; break;
+ case SPRN_IVOR1:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[1]; break;
+ case SPRN_IVOR2:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[2]; break;
+ case SPRN_IVOR3:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[3]; break;
+ case SPRN_IVOR4:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[4]; break;
+ case SPRN_IVOR5:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[5]; break;
+ case SPRN_IVOR6:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[6]; break;
+ case SPRN_IVOR7:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[7]; break;
+ case SPRN_IVOR8:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[8]; break;
+ case SPRN_IVOR9:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[9]; break;
+ case SPRN_IVOR10:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[10]; break;
+ case SPRN_IVOR11:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[11]; break;
+ case SPRN_IVOR12:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[12]; break;
+ case SPRN_IVOR13:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[13]; break;
+ case SPRN_IVOR14:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[14]; break;
+ case SPRN_IVOR15:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[15]; break;
+
default:
- emulated = kvmppc_core_emulate_mfspr(vcpu, sprn, rt);
- if (emulated == EMULATE_FAIL) {
- printk("mfspr: unknown spr %x\n", sprn);
- vcpu->arch.gpr[rt] = 0;
- }
+ printk("mfspr: unknown spr %x\n", sprn);
+ vcpu->arch.gpr[rt] = 0;
break;
}
break;
@@ -210,6 +478,25 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
vcpu->arch.gpr[ra] = ea;
break;
+ case 451: /* mtdcr */
+ dcrn = get_dcrn(inst);
+ rs = get_rs(inst);
+
+ /* emulate some access in kernel */
+ switch (dcrn) {
+ case DCRN_CPR0_CONFIG_ADDR:
+ vcpu->arch.cpr0_cfgaddr = vcpu->arch.gpr[rs];
+ break;
+ default:
+ run->dcr.dcrn = dcrn;
+ run->dcr.data = vcpu->arch.gpr[rs];
+ run->dcr.is_write = 1;
+ vcpu->arch.dcr_needed = 1;
+ emulated = EMULATE_DO_DCR;
+ }
+
+ break;
+
case 467: /* mtspr */
sprn = get_sprn(inst);
rs = get_rs(inst);
@@ -218,6 +505,22 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
vcpu->arch.srr0 = vcpu->arch.gpr[rs]; break;
case SPRN_SRR1:
vcpu->arch.srr1 = vcpu->arch.gpr[rs]; break;
+ case SPRN_MMUCR:
+ vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break;
+ case SPRN_PID:
+ kvmppc_set_pid(vcpu, vcpu->arch.gpr[rs]); break;
+ case SPRN_CCR0:
+ vcpu->arch.ccr0 = vcpu->arch.gpr[rs]; break;
+ case SPRN_CCR1:
+ vcpu->arch.ccr1 = vcpu->arch.gpr[rs]; break;
+ case SPRN_DEAR:
+ vcpu->arch.dear = vcpu->arch.gpr[rs]; break;
+ case SPRN_ESR:
+ vcpu->arch.esr = vcpu->arch.gpr[rs]; break;
+ case SPRN_DBCR0:
+ vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break;
+ case SPRN_DBCR1:
+ vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break;
/* XXX We need to context-switch the timebase for
* watchdog and FIT. */
@@ -229,6 +532,14 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
kvmppc_emulate_dec(vcpu);
break;
+ case SPRN_TSR:
+ vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break;
+
+ case SPRN_TCR:
+ vcpu->arch.tcr = vcpu->arch.gpr[rs];
+ kvmppc_emulate_dec(vcpu);
+ break;
+
case SPRN_SPRG0:
vcpu->arch.sprg0 = vcpu->arch.gpr[rs]; break;
case SPRN_SPRG1:
@@ -238,10 +549,56 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
case SPRN_SPRG3:
vcpu->arch.sprg3 = vcpu->arch.gpr[rs]; break;
+ /* Note: SPRG4-7 are user-readable. These values are
+ * loaded into the real SPRGs when resuming the
+ * guest. */
+ case SPRN_SPRG4:
+ vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break;
+ case SPRN_SPRG5:
+ vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break;
+ case SPRN_SPRG6:
+ vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break;
+ case SPRN_SPRG7:
+ vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break;
+
+ case SPRN_IVPR:
+ vcpu->arch.ivpr = vcpu->arch.gpr[rs]; break;
+ case SPRN_IVOR0:
+ vcpu->arch.ivor[0] = vcpu->arch.gpr[rs]; break;
+ case SPRN_IVOR1:
+ vcpu->arch.ivor[1] = vcpu->arch.gpr[rs]; break;
+ case SPRN_IVOR2:
+ vcpu->arch.ivor[2] = vcpu->arch.gpr[rs]; break;
+ case SPRN_IVOR3:
+ vcpu->arch.ivor[3] = vcpu->arch.gpr[rs]; break;
+ case SPRN_IVOR4:
+ vcpu->arch.ivor[4] = vcpu->arch.gpr[rs]; break;
+ case SPRN_IVOR5:
+ vcpu->arch.ivor[5] = vcpu->arch.gpr[rs]; break;
+ case SPRN_IVOR6:
+ vcpu->arch.ivor[6] = vcpu->arch.gpr[rs]; break;
+ case SPRN_IVOR7:
+ vcpu->arch.ivor[7] = vcpu->arch.gpr[rs]; break;
+ case SPRN_IVOR8:
+ vcpu->arch.ivor[8] = vcpu->arch.gpr[rs]; break;
+ case SPRN_IVOR9:
+ vcpu->arch.ivor[9] = vcpu->arch.gpr[rs]; break;
+ case SPRN_IVOR10:
+ vcpu->arch.ivor[10] = vcpu->arch.gpr[rs]; break;
+ case SPRN_IVOR11:
+ vcpu->arch.ivor[11] = vcpu->arch.gpr[rs]; break;
+ case SPRN_IVOR12:
+ vcpu->arch.ivor[12] = vcpu->arch.gpr[rs]; break;
+ case SPRN_IVOR13:
+ vcpu->arch.ivor[13] = vcpu->arch.gpr[rs]; break;
+ case SPRN_IVOR14:
+ vcpu->arch.ivor[14] = vcpu->arch.gpr[rs]; break;
+ case SPRN_IVOR15:
+ vcpu->arch.ivor[15] = vcpu->arch.gpr[rs]; break;
+
default:
- emulated = kvmppc_core_emulate_mtspr(vcpu, sprn, rs);
- if (emulated == EMULATE_FAIL)
- printk("mtspr: unknown spr %x\n", sprn);
+ printk("mtspr: unknown spr %x\n", sprn);
+ emulated = EMULATE_FAIL;
break;
}
break;
@@ -272,6 +629,36 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
4, 0);
break;
+ case 978: /* tlbwe */
+ emulated = kvmppc_emul_tlbwe(vcpu, inst);
+ break;
+
+ case 914: { /* tlbsx */
+ int index;
+ unsigned int as = get_mmucr_sts(vcpu);
+ unsigned int pid = get_mmucr_stid(vcpu);
+
+ rt = get_rt(inst);
+ ra = get_ra(inst);
+ rb = get_rb(inst);
+ rc = get_rc(inst);
+
+ ea = vcpu->arch.gpr[rb];
+ if (ra)
+ ea += vcpu->arch.gpr[ra];
+
+ index = kvmppc_44x_tlb_index(vcpu, ea, pid, as);
+ if (rc) {
+ if (index < 0)
+ vcpu->arch.cr &= ~0x20000000;
+ else
+ vcpu->arch.cr |= 0x20000000;
+ }
+ vcpu->arch.gpr[rt] = index;
+
+ }
+ break;
+
case 790: /* lhbrx */
rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0);
@@ -287,9 +674,14 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
2, 0);
break;
+ case 966: /* iccci */
+ break;
+
default:
- /* Attempt core-specific emulation below. */
+ printk("unknown: op %d xop %d\n", get_op(inst),
+ get_xop(inst));
emulated = EMULATE_FAIL;
+ break;
}
break;
@@ -372,19 +764,12 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
break;
default:
+ printk("unknown op %d\n", get_op(inst));
emulated = EMULATE_FAIL;
+ break;
}
- if (emulated == EMULATE_FAIL) {
- emulated = kvmppc_core_emulate_op(run, vcpu, inst, &advance);
- if (emulated == EMULATE_FAIL) {
- advance = 0;
- printk(KERN_ERR "Couldn't emulate instruction 0x%08x "
- "(op %d xop %d)\n", inst, get_op(inst), get_xop(inst));
- }
- }
-
- KVMTRACE_3D(PPC_INSTR, vcpu, inst, (int)vcpu->arch.pc, emulated, entryexit);
+ KVMTRACE_3D(PPC_INSTR, vcpu, inst, vcpu->arch.pc, emulated, entryexit);
if (advance)
vcpu->arch.pc += 4; /* Advance past emulated instruction. */
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 74b576a4815c..fda9baada132 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -28,7 +28,7 @@
#include <asm/uaccess.h>
#include <asm/kvm_ppc.h>
#include <asm/tlbflush.h>
-#include "timing.h"
+
gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
{
@@ -98,7 +98,14 @@ void kvm_arch_hardware_unsetup(void)
void kvm_arch_check_processor_compat(void *rtn)
{
- *(int *)rtn = kvmppc_core_check_processor_compat();
+ int r;
+
+ if (strcmp(cur_cpu_spec->platform, "ppc440") == 0)
+ r = 0;
+ else
+ r = -ENOTSUPP;
+
+ *(int *)rtn = r;
}
struct kvm *kvm_arch_create_vm(void)
@@ -136,6 +143,9 @@ int kvm_dev_ioctl_check_extension(long ext)
int r;
switch (ext) {
+ case KVM_CAP_USER_MEMORY:
+ r = 1;
+ break;
case KVM_CAP_COALESCED_MMIO:
r = KVM_COALESCED_MMIO_PAGE_OFFSET;
break;
@@ -168,15 +178,30 @@ void kvm_arch_flush_shadow(struct kvm *kvm)
struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
{
struct kvm_vcpu *vcpu;
- vcpu = kvmppc_core_vcpu_create(kvm, id);
- kvmppc_create_vcpu_debugfs(vcpu, id);
+ int err;
+
+ vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
+ if (!vcpu) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ err = kvm_vcpu_init(vcpu, kvm, id);
+ if (err)
+ goto free_vcpu;
+
return vcpu;
+
+free_vcpu:
+ kmem_cache_free(kvm_vcpu_cache, vcpu);
+out:
+ return ERR_PTR(err);
}
void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
{
- kvmppc_remove_vcpu_debugfs(vcpu);
- kvmppc_core_vcpu_free(vcpu);
+ kvm_vcpu_uninit(vcpu);
+ kmem_cache_free(kvm_vcpu_cache, vcpu);
}
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
@@ -186,14 +211,16 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
{
- return kvmppc_core_pending_dec(vcpu);
+ unsigned int priority = exception_priority[BOOKE_INTERRUPT_DECREMENTER];
+
+ return test_bit(priority, &vcpu->arch.pending_exceptions);
}
static void kvmppc_decrementer_func(unsigned long data)
{
struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
- kvmppc_core_queue_dec(vcpu);
+ kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_DECREMENTER);
if (waitqueue_active(&vcpu->wq)) {
wake_up_interruptible(&vcpu->wq);
@@ -214,24 +241,100 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
kvmppc_core_destroy_mmu(vcpu);
}
+/* Note: clearing MSR[DE] just means that the debug interrupt will not be
+ * delivered *immediately*. Instead, it simply sets the appropriate DBSR bits.
+ * If those DBSR bits are still set when MSR[DE] is re-enabled, the interrupt
+ * will be delivered as an "imprecise debug event" (which is indicated by
+ * DBSR[IDE].
+ */
+static void kvmppc_disable_debug_interrupts(void)
+{
+ mtmsr(mfmsr() & ~MSR_DE);
+}
+
+static void kvmppc_restore_host_debug_state(struct kvm_vcpu *vcpu)
+{
+ kvmppc_disable_debug_interrupts();
+
+ mtspr(SPRN_IAC1, vcpu->arch.host_iac[0]);
+ mtspr(SPRN_IAC2, vcpu->arch.host_iac[1]);
+ mtspr(SPRN_IAC3, vcpu->arch.host_iac[2]);
+ mtspr(SPRN_IAC4, vcpu->arch.host_iac[3]);
+ mtspr(SPRN_DBCR1, vcpu->arch.host_dbcr1);
+ mtspr(SPRN_DBCR2, vcpu->arch.host_dbcr2);
+ mtspr(SPRN_DBCR0, vcpu->arch.host_dbcr0);
+ mtmsr(vcpu->arch.host_msr);
+}
+
+static void kvmppc_load_guest_debug_registers(struct kvm_vcpu *vcpu)
+{
+ struct kvm_guest_debug *dbg = &vcpu->guest_debug;
+ u32 dbcr0 = 0;
+
+ vcpu->arch.host_msr = mfmsr();
+ kvmppc_disable_debug_interrupts();
+
+ /* Save host debug register state. */
+ vcpu->arch.host_iac[0] = mfspr(SPRN_IAC1);
+ vcpu->arch.host_iac[1] = mfspr(SPRN_IAC2);
+ vcpu->arch.host_iac[2] = mfspr(SPRN_IAC3);
+ vcpu->arch.host_iac[3] = mfspr(SPRN_IAC4);
+ vcpu->arch.host_dbcr0 = mfspr(SPRN_DBCR0);
+ vcpu->arch.host_dbcr1 = mfspr(SPRN_DBCR1);
+ vcpu->arch.host_dbcr2 = mfspr(SPRN_DBCR2);
+
+ /* set registers up for guest */
+
+ if (dbg->bp[0]) {
+ mtspr(SPRN_IAC1, dbg->bp[0]);
+ dbcr0 |= DBCR0_IAC1 | DBCR0_IDM;
+ }
+ if (dbg->bp[1]) {
+ mtspr(SPRN_IAC2, dbg->bp[1]);
+ dbcr0 |= DBCR0_IAC2 | DBCR0_IDM;
+ }
+ if (dbg->bp[2]) {
+ mtspr(SPRN_IAC3, dbg->bp[2]);
+ dbcr0 |= DBCR0_IAC3 | DBCR0_IDM;
+ }
+ if (dbg->bp[3]) {
+ mtspr(SPRN_IAC4, dbg->bp[3]);
+ dbcr0 |= DBCR0_IAC4 | DBCR0_IDM;
+ }
+
+ mtspr(SPRN_DBCR0, dbcr0);
+ mtspr(SPRN_DBCR1, 0);
+ mtspr(SPRN_DBCR2, 0);
+}
+
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
+ int i;
+
if (vcpu->guest_debug.enabled)
- kvmppc_core_load_guest_debugstate(vcpu);
+ kvmppc_load_guest_debug_registers(vcpu);
- kvmppc_core_vcpu_load(vcpu, cpu);
+ /* Mark every guest entry in the shadow TLB entry modified, so that they
+ * will all be reloaded on the next vcpu run (instead of being
+ * demand-faulted). */
+ for (i = 0; i <= tlb_44x_hwater; i++)
+ kvmppc_tlbe_set_modified(vcpu, i);
}
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{
if (vcpu->guest_debug.enabled)
- kvmppc_core_load_host_debugstate(vcpu);
+ kvmppc_restore_host_debug_state(vcpu);
- kvmppc_core_vcpu_put(vcpu);
+ /* Don't leave guest TLB entries resident when being de-scheduled. */
+ /* XXX It would be nice to differentiate between heavyweight exit and
+ * sched_out here, since we could avoid the TLB flush for heavyweight
+ * exits. */
+ _tlbia();
}
-int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
- struct kvm_guest_debug *dbg)
+int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
+ struct kvm_debug_guest *dbg)
{
int i;
@@ -251,14 +354,14 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu,
struct kvm_run *run)
{
- ulong *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr];
+ u32 *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr];
*gpr = run->dcr.data;
}
static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
struct kvm_run *run)
{
- ulong *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr];
+ u32 *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr];
if (run->mmio.len > sizeof(*gpr)) {
printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len);
@@ -356,7 +459,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
vcpu->arch.dcr_needed = 0;
}
- kvmppc_core_deliver_interrupts(vcpu);
+ kvmppc_check_and_deliver_interrupts(vcpu);
local_irq_disable();
kvm_guest_enter();
@@ -374,7 +477,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
{
- kvmppc_core_queue_external(vcpu, irq);
+ kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_EXTERNAL);
if (waitqueue_active(&vcpu->wq)) {
wake_up_interruptible(&vcpu->wq);
diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c
deleted file mode 100644
index 47ee603f558e..000000000000
--- a/arch/powerpc/kvm/timing.c
+++ /dev/null
@@ -1,239 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- *
- * Copyright IBM Corp. 2008
- *
- * Authors: Hollis Blanchard <hollisb@us.ibm.com>
- * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
- */
-
-#include <linux/kvm_host.h>
-#include <linux/fs.h>
-#include <linux/seq_file.h>
-#include <linux/debugfs.h>
-#include <linux/uaccess.h>
-
-#include <asm/time.h>
-#include <asm-generic/div64.h>
-
-#include "timing.h"
-
-void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu)
-{
- int i;
-
- /* pause guest execution to avoid concurrent updates */
- local_irq_disable();
- mutex_lock(&vcpu->mutex);
-
- vcpu->arch.last_exit_type = 0xDEAD;
- for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) {
- vcpu->arch.timing_count_type[i] = 0;
- vcpu->arch.timing_max_duration[i] = 0;
- vcpu->arch.timing_min_duration[i] = 0xFFFFFFFF;
- vcpu->arch.timing_sum_duration[i] = 0;
- vcpu->arch.timing_sum_quad_duration[i] = 0;
- }
- vcpu->arch.timing_last_exit = 0;
- vcpu->arch.timing_exit.tv64 = 0;
- vcpu->arch.timing_last_enter.tv64 = 0;
-
- mutex_unlock(&vcpu->mutex);
- local_irq_enable();
-}
-
-static void add_exit_timing(struct kvm_vcpu *vcpu, u64 duration, int type)
-{
- u64 old;
-
- do_div(duration, tb_ticks_per_usec);
- if (unlikely(duration > 0xFFFFFFFF)) {
- printk(KERN_ERR"%s - duration too big -> overflow"
- " duration %lld type %d exit #%d\n",
- __func__, duration, type,
- vcpu->arch.timing_count_type[type]);
- return;
- }
-
- vcpu->arch.timing_count_type[type]++;
-
- /* sum */
- old = vcpu->arch.timing_sum_duration[type];
- vcpu->arch.timing_sum_duration[type] += duration;
- if (unlikely(old > vcpu->arch.timing_sum_duration[type])) {
- printk(KERN_ERR"%s - wrap adding sum of durations"
- " old %lld new %lld type %d exit # of type %d\n",
- __func__, old, vcpu->arch.timing_sum_duration[type],
- type, vcpu->arch.timing_count_type[type]);
- }
-
- /* square sum */
- old = vcpu->arch.timing_sum_quad_duration[type];
- vcpu->arch.timing_sum_quad_duration[type] += (duration*duration);
- if (unlikely(old > vcpu->arch.timing_sum_quad_duration[type])) {
- printk(KERN_ERR"%s - wrap adding sum of squared durations"
- " old %lld new %lld type %d exit # of type %d\n",
- __func__, old,
- vcpu->arch.timing_sum_quad_duration[type],
- type, vcpu->arch.timing_count_type[type]);
- }
-
- /* set min/max */
- if (unlikely(duration < vcpu->arch.timing_min_duration[type]))
- vcpu->arch.timing_min_duration[type] = duration;
- if (unlikely(duration > vcpu->arch.timing_max_duration[type]))
- vcpu->arch.timing_max_duration[type] = duration;
-}
-
-void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu)
-{
- u64 exit = vcpu->arch.timing_last_exit;
- u64 enter = vcpu->arch.timing_last_enter.tv64;
-
- /* save exit time, used next exit when the reenter time is known */
- vcpu->arch.timing_last_exit = vcpu->arch.timing_exit.tv64;
-
- if (unlikely(vcpu->arch.last_exit_type == 0xDEAD || exit == 0))
- return; /* skip incomplete cycle (e.g. after reset) */
-
- /* update statistics for average and standard deviation */
- add_exit_timing(vcpu, (enter - exit), vcpu->arch.last_exit_type);
- /* enter -> timing_last_exit is time spent in guest - log this too */
- add_exit_timing(vcpu, (vcpu->arch.timing_last_exit - enter),
- TIMEINGUEST);
-}
-
-static const char *kvm_exit_names[__NUMBER_OF_KVM_EXIT_TYPES] = {
- [MMIO_EXITS] = "MMIO",
- [DCR_EXITS] = "DCR",
- [SIGNAL_EXITS] = "SIGNAL",
- [ITLB_REAL_MISS_EXITS] = "ITLBREAL",
- [ITLB_VIRT_MISS_EXITS] = "ITLBVIRT",
- [DTLB_REAL_MISS_EXITS] = "DTLBREAL",
- [DTLB_VIRT_MISS_EXITS] = "DTLBVIRT",
- [SYSCALL_EXITS] = "SYSCALL",
- [ISI_EXITS] = "ISI",
- [DSI_EXITS] = "DSI",
- [EMULATED_INST_EXITS] = "EMULINST",
- [EMULATED_MTMSRWE_EXITS] = "EMUL_WAIT",
- [EMULATED_WRTEE_EXITS] = "EMUL_WRTEE",
- [EMULATED_MTSPR_EXITS] = "EMUL_MTSPR",
- [EMULATED_MFSPR_EXITS] = "EMUL_MFSPR",
- [EMULATED_MTMSR_EXITS] = "EMUL_MTMSR",
- [EMULATED_MFMSR_EXITS] = "EMUL_MFMSR",
- [EMULATED_TLBSX_EXITS] = "EMUL_TLBSX",
- [EMULATED_TLBWE_EXITS] = "EMUL_TLBWE",
- [EMULATED_RFI_EXITS] = "EMUL_RFI",
- [DEC_EXITS] = "DEC",
- [EXT_INTR_EXITS] = "EXTINT",
- [HALT_WAKEUP] = "HALT",
- [USR_PR_INST] = "USR_PR_INST",
- [FP_UNAVAIL] = "FP_UNAVAIL",
- [DEBUG_EXITS] = "DEBUG",
- [TIMEINGUEST] = "TIMEINGUEST"
-};
-
-static int kvmppc_exit_timing_show(struct seq_file *m, void *private)
-{
- struct kvm_vcpu *vcpu = m->private;
- int i;
-
- seq_printf(m, "%s", "type count min max sum sum_squared\n");
-
- for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) {
- seq_printf(m, "%12s %10d %10lld %10lld %20lld %20lld\n",
- kvm_exit_names[i],
- vcpu->arch.timing_count_type[i],
- vcpu->arch.timing_min_duration[i],
- vcpu->arch.timing_max_duration[i],
- vcpu->arch.timing_sum_duration[i],
- vcpu->arch.timing_sum_quad_duration[i]);
- }
- return 0;
-}
-
-/* Write 'c' to clear the timing statistics. */
-static ssize_t kvmppc_exit_timing_write(struct file *file,
- const char __user *user_buf,
- size_t count, loff_t *ppos)
-{
- int err = -EINVAL;
- char c;
-
- if (count > 1) {
- goto done;
- }
-
- if (get_user(c, user_buf)) {
- err = -EFAULT;
- goto done;
- }
-
- if (c == 'c') {
- struct seq_file *seqf = (struct seq_file *)file->private_data;
- struct kvm_vcpu *vcpu = seqf->private;
- /* Write does not affect our buffers previously generated with
- * show. seq_file is locked here to prevent races of init with
- * a show call */
- mutex_lock(&seqf->lock);
- kvmppc_init_timing_stats(vcpu);
- mutex_unlock(&seqf->lock);
- err = count;
- }
-
-done:
- return err;
-}
-
-static int kvmppc_exit_timing_open(struct inode *inode, struct file *file)
-{
- return single_open(file, kvmppc_exit_timing_show, inode->i_private);
-}
-
-static struct file_operations kvmppc_exit_timing_fops = {
- .owner = THIS_MODULE,
- .open = kvmppc_exit_timing_open,
- .read = seq_read,
- .write = kvmppc_exit_timing_write,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
-void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id)
-{
- static char dbg_fname[50];
- struct dentry *debugfs_file;
-
- snprintf(dbg_fname, sizeof(dbg_fname), "vm%u_vcpu%u_timing",
- current->pid, id);
- debugfs_file = debugfs_create_file(dbg_fname, 0666,
- kvm_debugfs_dir, vcpu,
- &kvmppc_exit_timing_fops);
-
- if (!debugfs_file) {
- printk(KERN_ERR"%s: error creating debugfs file %s\n",
- __func__, dbg_fname);
- return;
- }
-
- vcpu->arch.debugfs_exit_timing = debugfs_file;
-}
-
-void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu)
-{
- if (vcpu->arch.debugfs_exit_timing) {
- debugfs_remove(vcpu->arch.debugfs_exit_timing);
- vcpu->arch.debugfs_exit_timing = NULL;
- }
-}
diff --git a/arch/powerpc/kvm/timing.h b/arch/powerpc/kvm/timing.h
deleted file mode 100644
index bb13b1f3cd5a..000000000000
--- a/arch/powerpc/kvm/timing.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- *
- * Copyright IBM Corp. 2008
- *
- * Authors: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
- */
-
-#ifndef __POWERPC_KVM_EXITTIMING_H__
-#define __POWERPC_KVM_EXITTIMING_H__
-
-#include <linux/kvm_host.h>
-#include <asm/kvm_host.h>
-
-#ifdef CONFIG_KVM_EXIT_TIMING
-void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu);
-void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu);
-void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id);
-void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu);
-
-static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type)
-{
- vcpu->arch.last_exit_type = type;
-}
-
-#else
-/* if exit timing is not configured there is no need to build the c file */
-static inline void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu) {}
-static inline void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu) {}
-static inline void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu,
- unsigned int id) {}
-static inline void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu) {}
-static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type) {}
-#endif /* CONFIG_KVM_EXIT_TIMING */
-
-/* account the exit in kvm_stats */
-static inline void kvmppc_account_exit_stat(struct kvm_vcpu *vcpu, int type)
-{
- /* type has to be known at build time for optimization */
- BUILD_BUG_ON(__builtin_constant_p(type));
- switch (type) {
- case EXT_INTR_EXITS:
- vcpu->stat.ext_intr_exits++;
- break;
- case DEC_EXITS:
- vcpu->stat.dec_exits++;
- break;
- case EMULATED_INST_EXITS:
- vcpu->stat.emulated_inst_exits++;
- break;
- case DCR_EXITS:
- vcpu->stat.dcr_exits++;
- break;
- case DSI_EXITS:
- vcpu->stat.dsi_exits++;
- break;
- case ISI_EXITS:
- vcpu->stat.isi_exits++;
- break;
- case SYSCALL_EXITS:
- vcpu->stat.syscall_exits++;
- break;
- case DTLB_REAL_MISS_EXITS:
- vcpu->stat.dtlb_real_miss_exits++;
- break;
- case DTLB_VIRT_MISS_EXITS:
- vcpu->stat.dtlb_virt_miss_exits++;
- break;
- case MMIO_EXITS:
- vcpu->stat.mmio_exits++;
- break;
- case ITLB_REAL_MISS_EXITS:
- vcpu->stat.itlb_real_miss_exits++;
- break;
- case ITLB_VIRT_MISS_EXITS:
- vcpu->stat.itlb_virt_miss_exits++;
- break;
- case SIGNAL_EXITS:
- vcpu->stat.signal_exits++;
- break;
- }
-}
-
-/* wrapper to set exit time and account for it in kvm_stats */
-static inline void kvmppc_account_exit(struct kvm_vcpu *vcpu, int type)
-{
- kvmppc_set_exit_type(vcpu, type);
- kvmppc_account_exit_stat(vcpu, type);
-}
-
-#endif /* __POWERPC_KVM_EXITTIMING_H__ */
diff --git a/arch/s390/include/asm/kvm.h b/arch/s390/include/asm/kvm.h
index 0d566833e48d..d74002f95794 100644
--- a/arch/s390/include/asm/kvm.h
+++ b/arch/s390/include/asm/kvm.h
@@ -42,11 +42,4 @@ struct kvm_fpu {
__u64 fprs[16];
};
-struct kvm_debug_exit_arch {
-};
-
-/* for KVM_SET_GUEST_DEBUG */
-struct kvm_guest_debug_arch {
-};
-
#endif
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 54b6534ffb99..8b00eb2ddf57 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -113,6 +113,8 @@ long kvm_arch_dev_ioctl(struct file *filp,
int kvm_dev_ioctl_check_extension(long ext)
{
switch (ext) {
+ case KVM_CAP_USER_MEMORY:
+ return 1;
default:
return 0;
}
@@ -183,6 +185,8 @@ struct kvm *kvm_arch_create_vm(void)
debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
VM_EVENT(kvm, 3, "%s", "vm created");
+ try_module_get(THIS_MODULE);
+
return kvm;
out_nodbf:
free_page((unsigned long)(kvm->arch.sca));
@@ -192,33 +196,13 @@ out_nokvm:
return ERR_PTR(rc);
}
-void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
-{
- VCPU_EVENT(vcpu, 3, "%s", "free cpu");
- free_page((unsigned long)(vcpu->arch.sie_block));
- kvm_vcpu_uninit(vcpu);
- kfree(vcpu);
-}
-
-static void kvm_free_vcpus(struct kvm *kvm)
-{
- unsigned int i;
-
- for (i = 0; i < KVM_MAX_VCPUS; ++i) {
- if (kvm->vcpus[i]) {
- kvm_arch_vcpu_destroy(kvm->vcpus[i]);
- kvm->vcpus[i] = NULL;
- }
- }
-}
-
void kvm_arch_destroy_vm(struct kvm *kvm)
{
- kvm_free_vcpus(kvm);
+ debug_unregister(kvm->arch.dbf);
kvm_free_physmem(kvm);
free_page((unsigned long)(kvm->arch.sca));
- debug_unregister(kvm->arch.dbf);
kfree(kvm);
+ module_put(THIS_MODULE);
}
/* Section: vcpu related */
@@ -229,7 +213,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
{
- /* Nothing todo */
+ /* kvm common code refers to this, but does'nt call it */
+ BUG();
}
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@@ -323,6 +308,8 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
vcpu->arch.sie_block);
+ try_module_get(THIS_MODULE);
+
return vcpu;
out_free_cpu:
kfree(vcpu);
@@ -330,6 +317,14 @@ out_nomem:
return ERR_PTR(rc);
}
+void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
+{
+ VCPU_EVENT(vcpu, 3, "%s", "destroy cpu");
+ free_page((unsigned long)(vcpu->arch.sie_block));
+ kfree(vcpu);
+ module_put(THIS_MODULE);
+}
+
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
{
/* kvm common code refers to this, but never calls it */
@@ -418,8 +413,8 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
return -EINVAL; /* not implemented yet */
}
-int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
- struct kvm_guest_debug *dbg)
+int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
+ struct kvm_debug_guest *dbg)
{
return -EINVAL; /* not implemented yet */
}
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index 4baf107a00e0..b95162af0bf6 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -205,24 +205,6 @@ struct kvm_pit_channel_state {
__s64 count_load_time;
};
-struct kvm_debug_exit_arch {
- __u32 exception;
- __u32 pad;
- __u64 pc;
- __u64 dr6;
- __u64 dr7;
-};
-
-#define KVM_GUESTDBG_USE_SW_BP 0x00010000
-#define KVM_GUESTDBG_USE_HW_BP 0x00020000
-#define KVM_GUESTDBG_INJECT_DB 0x00040000
-#define KVM_GUESTDBG_INJECT_BP 0x00080000
-
-/* for KVM_SET_GUEST_DEBUG */
-struct kvm_guest_debug_arch {
- __u64 debugreg[8];
-};
-
struct kvm_pit_state {
struct kvm_pit_channel_state channels[3];
};
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index da65439588a8..8346be87cfa1 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -21,8 +21,6 @@
#include <asm/pvclock-abi.h>
#include <asm/desc.h>
-#include <asm/mtrr.h>
-#include <asm/msr-index.h>
#define KVM_MAX_VCPUS 16
#define KVM_MEMORY_SLOTS 32
@@ -88,7 +86,6 @@
#define KVM_MIN_FREE_MMU_PAGES 5
#define KVM_REFILL_PAGES 25
#define KVM_MAX_CPUID_ENTRIES 40
-#define KVM_NR_FIXED_MTRR_REGION 88
#define KVM_NR_VAR_MTRR 8
extern spinlock_t kvm_lock;
@@ -135,18 +132,11 @@ enum {
#define KVM_NR_MEM_OBJS 40
-#define KVM_NR_DB_REGS 4
-
-#define DR6_BD (1 << 13)
-#define DR6_BS (1 << 14)
-#define DR6_FIXED_1 0xffff0ff0
-#define DR6_VOLATILE 0x0000e00f
-
-#define DR7_BP_EN_MASK 0x000000ff
-#define DR7_GE (1 << 9)
-#define DR7_GD (1 << 13)
-#define DR7_FIXED_1 0x00000400
-#define DR7_VOLATILE 0xffff23ff
+struct kvm_guest_debug {
+ int enabled;
+ unsigned long bp[4];
+ int singlestep;
+};
/*
* We don't want allocation failures within the mmu code, so we preallocate
@@ -190,8 +180,6 @@ struct kvm_mmu_page {
struct list_head link;
struct hlist_node hash_link;
- struct list_head oos_link;
-
/*
* The following two entries are used to key the shadow page in the
* hash table.
@@ -202,16 +190,13 @@ struct kvm_mmu_page {
u64 *spt;
/* hold the gfn of each spte inside spt */
gfn_t *gfns;
- /*
- * One bit set per slot which has memory
- * in this shadow page.
- */
- DECLARE_BITMAP(slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
+ unsigned long slot_bitmap; /* One bit set per slot which has memory
+ * in this shadow page.
+ */
int multimapped; /* More than one parent_pte? */
int root_count; /* Currently serving as active root */
bool unsync;
- bool global;
- unsigned int unsync_children;
+ bool unsync_children;
union {
u64 *parent_pte; /* !multimapped */
struct hlist_head parent_ptes; /* multimapped, kvm_pte_chain */
@@ -266,7 +251,6 @@ struct kvm_vcpu_arch {
unsigned long cr3;
unsigned long cr4;
unsigned long cr8;
- u32 hflags;
u64 pdptrs[4]; /* pae */
u64 shadow_efer;
u64 apic_base;
@@ -343,19 +327,8 @@ struct kvm_vcpu_arch {
bool nmi_pending;
bool nmi_injected;
- bool nmi_window_open;
-
- struct mtrr_state_type mtrr_state;
- u32 pat;
-
- int switch_db_regs;
- unsigned long host_db[KVM_NR_DB_REGS];
- unsigned long host_dr6;
- unsigned long host_dr7;
- unsigned long db[KVM_NR_DB_REGS];
- unsigned long dr6;
- unsigned long dr7;
- unsigned long eff_db[KVM_NR_DB_REGS];
+
+ u64 mtrr[0x100];
};
struct kvm_mem_alias {
@@ -377,13 +350,11 @@ struct kvm_arch{
*/
struct list_head active_mmu_pages;
struct list_head assigned_dev_head;
- struct list_head oos_global_pages;
struct dmar_domain *intel_iommu_domain;
struct kvm_pic *vpic;
struct kvm_ioapic *vioapic;
struct kvm_pit *vpit;
struct hlist_head irq_ack_notifier_list;
- int vapics_in_nmi_mode;
int round_robin_prev_vcpu;
unsigned int tss_addr;
@@ -407,7 +378,6 @@ struct kvm_vm_stat {
u32 mmu_recycled;
u32 mmu_cache_miss;
u32 mmu_unsync;
- u32 mmu_unsync_global;
u32 remote_tlb_flush;
u32 lpages;
};
@@ -427,7 +397,6 @@ struct kvm_vcpu_stat {
u32 halt_exits;
u32 halt_wakeup;
u32 request_irq_exits;
- u32 request_nmi_exits;
u32 irq_exits;
u32 host_state_reload;
u32 efer_reload;
@@ -436,7 +405,6 @@ struct kvm_vcpu_stat {
u32 insn_emulation_fail;
u32 hypercalls;
u32 irq_injections;
- u32 nmi_injections;
};
struct descriptor_table {
@@ -464,7 +432,8 @@ struct kvm_x86_ops {
void (*vcpu_put)(struct kvm_vcpu *vcpu);
int (*set_guest_debug)(struct kvm_vcpu *vcpu,
- struct kvm_guest_debug *dbg);
+ struct kvm_debug_guest *dbg);
+ void (*guest_debug_pre)(struct kvm_vcpu *vcpu);
int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata);
int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg);
@@ -508,7 +477,6 @@ struct kvm_x86_ops {
int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
int (*get_tdp_level)(void);
- int (*get_mt_mask_shift)(void);
};
extern struct kvm_x86_ops *kvm_x86_ops;
@@ -522,7 +490,7 @@ int kvm_mmu_setup(struct kvm_vcpu *vcpu);
void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte);
void kvm_mmu_set_base_ptes(u64 base_pte);
void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
- u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 mt_mask);
+ u64 dirty_mask, u64 nx_mask, u64 x_mask);
int kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
@@ -619,14 +587,12 @@ unsigned long segment_base(u16 selector);
void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu);
void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
- const u8 *new, int bytes,
- bool guest_initiated);
+ const u8 *new, int bytes);
int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva);
void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
int kvm_mmu_load(struct kvm_vcpu *vcpu);
void kvm_mmu_unload(struct kvm_vcpu *vcpu);
void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
-void kvm_mmu_sync_global(struct kvm_vcpu *vcpu);
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
@@ -641,8 +607,6 @@ void kvm_disable_tdp(void);
int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3);
int complete_pio(struct kvm_vcpu *vcpu);
-struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn);
-
static inline struct kvm_mmu_page *page_header(hpa_t shadow_page)
{
struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT);
@@ -738,6 +702,18 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code)
kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
}
+#define ASM_VMX_VMCLEAR_RAX ".byte 0x66, 0x0f, 0xc7, 0x30"
+#define ASM_VMX_VMLAUNCH ".byte 0x0f, 0x01, 0xc2"
+#define ASM_VMX_VMRESUME ".byte 0x0f, 0x01, 0xc3"
+#define ASM_VMX_VMPTRLD_RAX ".byte 0x0f, 0xc7, 0x30"
+#define ASM_VMX_VMREAD_RDX_RAX ".byte 0x0f, 0x78, 0xd0"
+#define ASM_VMX_VMWRITE_RAX_RDX ".byte 0x0f, 0x79, 0xd0"
+#define ASM_VMX_VMWRITE_RSP_RDX ".byte 0x0f, 0x79, 0xd4"
+#define ASM_VMX_VMXOFF ".byte 0x0f, 0x01, 0xc4"
+#define ASM_VMX_VMXON_RAX ".byte 0xf3, 0x0f, 0xc7, 0x30"
+#define ASM_VMX_INVEPT ".byte 0x66, 0x0f, 0x38, 0x80, 0x08"
+#define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08"
+
#define MSR_IA32_TIME_STAMP_COUNTER 0x010
#define TSS_IOPB_BASE_OFFSET 0x66
@@ -754,10 +730,6 @@ enum {
TASK_SWITCH_GATE = 3,
};
-#define HF_GIF_MASK (1 << 0)
-#define HF_HIF_MASK (1 << 1)
-#define HF_VINTR_MASK (1 << 2)
-
/*
* Hardware virtualization extension instructions may fault if a
* reboot turns off virtualization while processes are running.
diff --git a/arch/x86/include/asm/kvm_x86_emulate.h b/arch/x86/include/asm/kvm_x86_emulate.h
index 6a159732881a..25179a29f208 100644
--- a/arch/x86/include/asm/kvm_x86_emulate.h
+++ b/arch/x86/include/asm/kvm_x86_emulate.h
@@ -123,7 +123,6 @@ struct decode_cache {
u8 ad_bytes;
u8 rex_prefix;
struct operand src;
- struct operand src2;
struct operand dst;
bool has_seg_override;
u8 seg_override;
@@ -147,18 +146,22 @@ struct x86_emulate_ctxt {
/* Register state before/after emulation. */
struct kvm_vcpu *vcpu;
+ /* Linear faulting address (if emulating a page-faulting instruction) */
unsigned long eflags;
+
/* Emulated execution mode, represented by an X86EMUL_MODE value. */
int mode;
+
u32 cs_base;
/* decode cache */
+
struct decode_cache decode;
};
/* Repeat String Operation Prefix */
-#define REPE_PREFIX 1
-#define REPNE_PREFIX 2
+#define REPE_PREFIX 1
+#define REPNE_PREFIX 2
/* Execution mode, passed to the emulator. */
#define X86EMUL_MODE_REAL 0 /* Real mode. */
@@ -167,7 +170,7 @@ struct x86_emulate_ctxt {
#define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */
/* Host execution mode. */
-#if defined(CONFIG_X86_32)
+#if defined(__i386__)
#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32
#elif defined(CONFIG_X86_64)
#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index e7625b1d98a6..e38859d577a1 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -18,13 +18,11 @@
#define _EFER_LME 8 /* Long mode enable */
#define _EFER_LMA 10 /* Long mode active (read-only) */
#define _EFER_NX 11 /* No execute enable */
-#define _EFER_SVME 12 /* Enable virtualization */
#define EFER_SCE (1<<_EFER_SCE)
#define EFER_LME (1<<_EFER_LME)
#define EFER_LMA (1<<_EFER_LMA)
#define EFER_NX (1<<_EFER_NX)
-#define EFER_SVME (1<<_EFER_SVME)
/* Intel MSRs. Some also available on other CPUs */
#define MSR_IA32_PERFCTR0 0x000000c1
@@ -331,9 +329,4 @@
#define MSR_IA32_VMX_PROCBASED_CTLS2 0x0000048b
#define MSR_IA32_VMX_EPT_VPID_CAP 0x0000048c
-/* AMD-V MSRs */
-
-#define MSR_VM_CR 0xc0010114
-#define MSR_VM_HSAVE_PA 0xc0010117
-
#endif /* _ASM_X86_MSR_INDEX_H */
diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h
index cb988aab716d..7c1e4258b31e 100644
--- a/arch/x86/include/asm/mtrr.h
+++ b/arch/x86/include/asm/mtrr.h
@@ -57,31 +57,6 @@ struct mtrr_gentry {
};
#endif /* !__i386__ */
-struct mtrr_var_range {
- u32 base_lo;
- u32 base_hi;
- u32 mask_lo;
- u32 mask_hi;
-};
-
-/* In the Intel processor's MTRR interface, the MTRR type is always held in
- an 8 bit field: */
-typedef u8 mtrr_type;
-
-#define MTRR_NUM_FIXED_RANGES 88
-#define MTRR_MAX_VAR_RANGES 256
-
-struct mtrr_state_type {
- struct mtrr_var_range var_ranges[MTRR_MAX_VAR_RANGES];
- mtrr_type fixed_ranges[MTRR_NUM_FIXED_RANGES];
- unsigned char enabled;
- unsigned char have_fixed;
- mtrr_type def_type;
-};
-
-#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg))
-#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
-
/* These are the various ioctls */
#define MTRRIOC_ADD_ENTRY _IOW(MTRR_IOCTL_BASE, 0, struct mtrr_sentry)
#define MTRRIOC_SET_ENTRY _IOW(MTRR_IOCTL_BASE, 1, struct mtrr_sentry)
diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
deleted file mode 100644
index e0f9aa16358b..000000000000
--- a/arch/x86/include/asm/virtext.h
+++ /dev/null
@@ -1,132 +0,0 @@
-/* CPU virtualization extensions handling
- *
- * This should carry the code for handling CPU virtualization extensions
- * that needs to live in the kernel core.
- *
- * Author: Eduardo Habkost <ehabkost@redhat.com>
- *
- * Copyright (C) 2008, Red Hat Inc.
- *
- * Contains code from KVM, Copyright (C) 2006 Qumranet, Inc.
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- */
-#ifndef _ASM_X86_VIRTEX_H
-#define _ASM_X86_VIRTEX_H
-
-#include <asm/processor.h>
-#include <asm/system.h>
-
-#include <asm/vmx.h>
-#include <asm/svm.h>
-
-/*
- * VMX functions:
- */
-
-static inline int cpu_has_vmx(void)
-{
- unsigned long ecx = cpuid_ecx(1);
- return test_bit(5, &ecx); /* CPUID.1:ECX.VMX[bit 5] -> VT */
-}
-
-
-/** Disable VMX on the current CPU
- *
- * vmxoff causes a undefined-opcode exception if vmxon was not run
- * on the CPU previously. Only call this function if you know VMX
- * is enabled.
- */
-static inline void cpu_vmxoff(void)
-{
- asm volatile (ASM_VMX_VMXOFF : : : "cc");
- write_cr4(read_cr4() & ~X86_CR4_VMXE);
-}
-
-static inline int cpu_vmx_enabled(void)
-{
- return read_cr4() & X86_CR4_VMXE;
-}
-
-/** Disable VMX if it is enabled on the current CPU
- *
- * You shouldn't call this if cpu_has_vmx() returns 0.
- */
-static inline void __cpu_emergency_vmxoff(void)
-{
- if (cpu_vmx_enabled())
- cpu_vmxoff();
-}
-
-/** Disable VMX if it is supported and enabled on the current CPU
- */
-static inline void cpu_emergency_vmxoff(void)
-{
- if (cpu_has_vmx())
- __cpu_emergency_vmxoff();
-}
-
-
-
-
-/*
- * SVM functions:
- */
-
-/** Check if the CPU has SVM support
- *
- * You can use the 'msg' arg to get a message describing the problem,
- * if the function returns zero. Simply pass NULL if you are not interested
- * on the messages; gcc should take care of not generating code for
- * the messages on this case.
- */
-static inline int cpu_has_svm(const char **msg)
-{
- uint32_t eax, ebx, ecx, edx;
-
- if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
- if (msg)
- *msg = "not amd";
- return 0;
- }
-
- cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
- if (eax < SVM_CPUID_FUNC) {
- if (msg)
- *msg = "can't execute cpuid_8000000a";
- return 0;
- }
-
- cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
- if (!(ecx & (1 << SVM_CPUID_FEATURE_SHIFT))) {
- if (msg)
- *msg = "svm not available";
- return 0;
- }
- return 1;
-}
-
-
-/** Disable SVM on the current CPU
- *
- * You should call this only if cpu_has_svm() returned true.
- */
-static inline void cpu_svm_disable(void)
-{
- uint64_t efer;
-
- wrmsrl(MSR_VM_HSAVE_PA, 0);
- rdmsrl(MSR_EFER, efer);
- wrmsrl(MSR_EFER, efer & ~EFER_SVME);
-}
-
-/** Makes sure SVM is disabled, if it is supported on the CPU
- */
-static inline void cpu_emergency_svm_disable(void)
-{
- if (cpu_has_svm(NULL))
- cpu_svm_disable();
-}
-
-#endif /* _ASM_X86_VIRTEX_H */
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index b59ddcc88cd8..4e8d77f01eeb 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -14,6 +14,14 @@
#include <asm/pat.h>
#include "mtrr.h"
+struct mtrr_state {
+ struct mtrr_var_range var_ranges[MAX_VAR_RANGES];
+ mtrr_type fixed_ranges[NUM_FIXED_RANGES];
+ unsigned char enabled;
+ unsigned char have_fixed;
+ mtrr_type def_type;
+};
+
struct fixed_range_block {
int base_msr; /* start address of an MTRR block */
int ranges; /* number of MTRRs in this block */
@@ -27,12 +35,10 @@ static struct fixed_range_block fixed_range_blocks[] = {
};
static unsigned long smp_changes_mask;
+static struct mtrr_state mtrr_state = {};
static int mtrr_state_set;
u64 mtrr_tom2;
-struct mtrr_state_type mtrr_state = {};
-EXPORT_SYMBOL_GPL(mtrr_state);
-
#undef MODULE_PARAM_PREFIX
#define MODULE_PARAM_PREFIX "mtrr."
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index d6ec7ec30274..1159e269e596 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -49,7 +49,7 @@
u32 num_var_ranges = 0;
-unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
+unsigned int mtrr_usage_table[MAX_VAR_RANGES];
static DEFINE_MUTEX(mtrr_mutex);
u64 size_or_mask, size_and_mask;
@@ -574,7 +574,7 @@ struct mtrr_value {
unsigned long lsize;
};
-static struct mtrr_value mtrr_state[MTRR_MAX_VAR_RANGES];
+static struct mtrr_value mtrr_state[MAX_VAR_RANGES];
static int mtrr_save(struct sys_device * sysdev, pm_message_t state)
{
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index ffd60409cc6d..2dc4ec656b23 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -8,6 +8,11 @@
#define MTRRcap_MSR 0x0fe
#define MTRRdefType_MSR 0x2ff
+#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg))
+#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
+
+#define NUM_FIXED_RANGES 88
+#define MAX_VAR_RANGES 256
#define MTRRfix64K_00000_MSR 0x250
#define MTRRfix16K_80000_MSR 0x258
#define MTRRfix16K_A0000_MSR 0x259
@@ -24,7 +29,11 @@
#define MTRR_CHANGE_MASK_VARIABLE 0x02
#define MTRR_CHANGE_MASK_DEFTYPE 0x04
-extern unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
+/* In the Intel processor's MTRR interface, the MTRR type is always held in
+ an 8 bit field: */
+typedef u8 mtrr_type;
+
+extern unsigned int mtrr_usage_table[MAX_VAR_RANGES];
struct mtrr_ops {
u32 vendor;
@@ -61,6 +70,13 @@ struct set_mtrr_context {
u32 ccr3;
};
+struct mtrr_var_range {
+ u32 base_lo;
+ u32 base_hi;
+ u32 mask_lo;
+ u32 mask_hi;
+};
+
void set_mtrr_done(struct set_mtrr_context *ctxt);
void set_mtrr_cache_disable(struct set_mtrr_context *ctxt);
void set_mtrr_prepare_save(struct set_mtrr_context *ctxt);
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index c689d19e35ab..d84a852e4cd7 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -26,7 +26,6 @@
#include <linux/kdebug.h>
#include <asm/smp.h>
#include <asm/reboot.h>
-#include <asm/virtext.h>
#include <mach_ipi.h>
@@ -50,15 +49,6 @@ static void kdump_nmi_callback(int cpu, struct die_args *args)
#endif
crash_save_cpu(regs, cpu);
- /* Disable VMX or SVM if needed.
- *
- * We need to disable virtualization on all CPUs.
- * Having VMX or SVM enabled on any CPU may break rebooting
- * after the kdump kernel has finished its task.
- */
- cpu_emergency_vmxoff();
- cpu_emergency_svm_disable();
-
disable_local_APIC();
}
@@ -90,14 +80,6 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
local_irq_disable();
kdump_nmi_shootdown_cpus();
-
- /* Booting kdump kernel with VMX or SVM enabled won't work,
- * because (among other limitations) we can't disable paging
- * with the virt flags.
- */
- cpu_emergency_vmxoff();
- cpu_emergency_svm_disable();
-
lapic_shutdown();
#if defined(CONFIG_X86_IO_APIC)
disable_IO_APIC();
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 652fce6d2cce..e169ae9b6a62 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -89,17 +89,17 @@ static cycle_t kvm_clock_read(void)
*/
static unsigned long kvm_get_tsc_khz(void)
{
- struct pvclock_vcpu_time_info *src;
- src = &per_cpu(hv_clock, 0);
- return pvclock_tsc_khz(src);
+ return preset_lpj;
}
static void kvm_get_preset_lpj(void)
{
+ struct pvclock_vcpu_time_info *src;
unsigned long khz;
u64 lpj;
- khz = kvm_get_tsc_khz();
+ src = &per_cpu(hv_clock, 0);
+ khz = pvclock_tsc_khz(src);
lpj = ((u64)khz * 1000);
do_div(lpj, HZ);
@@ -194,7 +194,5 @@ void __init kvmclock_init(void)
#endif
kvm_get_preset_lpj();
clocksource_register(&kvm_clock);
- pv_info.paravirt_enabled = 1;
- pv_info.name = "KVM";
}
}
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 3e819c747201..0acde8f300a1 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -12,7 +12,6 @@
#include <asm/proto.h>
#include <asm/reboot_fixups.h>
#include <asm/reboot.h>
-#include <asm/virtext.h>
#ifdef CONFIG_X86_32
# include <linux/dmi.h>
@@ -40,12 +39,6 @@ int reboot_force;
static int reboot_cpu = -1;
#endif
-/* This is set if we need to go through the 'emergency' path.
- * When machine_emergency_restart() is called, we may be on
- * an inconsistent state and won't be able to do a clean cleanup
- */
-static int reboot_emergency;
-
/* This is set by the PCI code if either type 1 or type 2 PCI is detected */
bool port_cf9_safe = false;
@@ -375,48 +368,6 @@ static inline void kb_wait(void)
}
}
-static void vmxoff_nmi(int cpu, struct die_args *args)
-{
- cpu_emergency_vmxoff();
-}
-
-/* Use NMIs as IPIs to tell all CPUs to disable virtualization
- */
-static void emergency_vmx_disable_all(void)
-{
- /* Just make sure we won't change CPUs while doing this */
- local_irq_disable();
-
- /* We need to disable VMX on all CPUs before rebooting, otherwise
- * we risk hanging up the machine, because the CPU ignore INIT
- * signals when VMX is enabled.
- *
- * We can't take any locks and we may be on an inconsistent
- * state, so we use NMIs as IPIs to tell the other CPUs to disable
- * VMX and halt.
- *
- * For safety, we will avoid running the nmi_shootdown_cpus()
- * stuff unnecessarily, but we don't have a way to check
- * if other CPUs have VMX enabled. So we will call it only if the
- * CPU we are running on has VMX enabled.
- *
- * We will miss cases where VMX is not enabled on all CPUs. This
- * shouldn't do much harm because KVM always enable VMX on all
- * CPUs anyway. But we can miss it on the small window where KVM
- * is still enabling VMX.
- */
- if (cpu_has_vmx() && cpu_vmx_enabled()) {
- /* Disable VMX on this CPU.
- */
- cpu_vmxoff();
-
- /* Halt and disable VMX on the other CPUs */
- nmi_shootdown_cpus(vmxoff_nmi);
-
- }
-}
-
-
void __attribute__((weak)) mach_reboot_fixups(void)
{
}
@@ -425,9 +376,6 @@ static void native_machine_emergency_restart(void)
{
int i;
- if (reboot_emergency)
- emergency_vmx_disable_all();
-
/* Tell the BIOS if we want cold or warm reboot */
*((unsigned short *)__va(0x472)) = reboot_mode;
@@ -534,19 +482,13 @@ void native_machine_shutdown(void)
#endif
}
-static void __machine_emergency_restart(int emergency)
-{
- reboot_emergency = emergency;
- machine_ops.emergency_restart();
-}
-
static void native_machine_restart(char *__unused)
{
printk("machine restart\n");
if (!reboot_force)
machine_shutdown();
- __machine_emergency_restart(0);
+ machine_emergency_restart();
}
static void native_machine_halt(void)
@@ -590,7 +532,7 @@ void machine_shutdown(void)
void machine_emergency_restart(void)
{
- __machine_emergency_restart(1);
+ machine_ops.emergency_restart();
}
void machine_restart(char *cmd)
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index e665d1c623ca..59ebd37ad79e 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -603,29 +603,10 @@ void kvm_free_pit(struct kvm *kvm)
static void __inject_pit_timer_intr(struct kvm *kvm)
{
- struct kvm_vcpu *vcpu;
- int i;
-
mutex_lock(&kvm->lock);
kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1);
kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0);
mutex_unlock(&kvm->lock);
-
- /*
- * Provides NMI watchdog support via Virtual Wire mode.
- * The route is: PIT -> PIC -> LVT0 in NMI mode.
- *
- * Note: Our Virtual Wire implementation is simplified, only
- * propagating PIT interrupts to all VCPUs when they have set
- * LVT0 to NMI delivery. Other PIC interrupts are just sent to
- * VCPU0, and only if its LVT0 is in EXTINT mode.
- */
- if (kvm->arch.vapics_in_nmi_mode > 0)
- for (i = 0; i < KVM_MAX_VCPUS; ++i) {
- vcpu = kvm->vcpus[i];
- if (vcpu)
- kvm_apic_nmi_wd_deliver(vcpu);
- }
}
void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index b9e9051650ea..f17c8f5bbf31 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -87,7 +87,6 @@ void kvm_pic_reset(struct kvm_kpic_state *s);
void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
-void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu);
void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu);
void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu);
void __kvm_migrate_timers(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/kvm_svm.h b/arch/x86/kvm/kvm_svm.h
index ed66e4c078dc..65ef0fc2c036 100644
--- a/arch/x86/kvm/kvm_svm.h
+++ b/arch/x86/kvm/kvm_svm.h
@@ -7,7 +7,7 @@
#include <linux/kvm_host.h>
#include <asm/msr.h>
-#include <asm/svm.h>
+#include "svm.h"
static const u32 host_save_user_msrs[] = {
#ifdef CONFIG_X86_64
@@ -18,6 +18,7 @@ static const u32 host_save_user_msrs[] = {
};
#define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
+#define NUM_DB_REGS 4
struct kvm_vcpu;
@@ -28,23 +29,18 @@ struct vcpu_svm {
struct svm_cpu_data *svm_data;
uint64_t asid_generation;
+ unsigned long db_regs[NUM_DB_REGS];
+
u64 next_rip;
u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
u64 host_gs_base;
unsigned long host_cr2;
+ unsigned long host_db_regs[NUM_DB_REGS];
+ unsigned long host_dr6;
+ unsigned long host_dr7;
u32 *msrpm;
- struct vmcb *hsave;
- u64 hsave_msr;
-
- u64 nested_vmcb;
-
- /* These are the merged vectors */
- u32 *nested_msrpm;
-
- /* gpa pointers to the real vectors */
- u64 nested_vmcb_msrpm;
};
#endif
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index afac68c0815c..0fc3cab48943 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -130,11 +130,6 @@ static inline int apic_lvtt_period(struct kvm_lapic *apic)
return apic_get_reg(apic, APIC_LVTT) & APIC_LVT_TIMER_PERIODIC;
}
-static inline int apic_lvt_nmi_mode(u32 lvt_val)
-{
- return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI;
-}
-
static unsigned int apic_lvt_mask[APIC_LVT_NUM] = {
LVT_MASK | APIC_LVT_TIMER_PERIODIC, /* LVTT */
LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */
@@ -359,7 +354,6 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
case APIC_DM_NMI:
kvm_inject_nmi(vcpu);
- kvm_vcpu_kick(vcpu);
break;
case APIC_DM_INIT:
@@ -386,14 +380,6 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
}
break;
- case APIC_DM_EXTINT:
- /*
- * Should only be called by kvm_apic_local_deliver() with LVT0,
- * before NMI watchdog was enabled. Already handled by
- * kvm_apic_accept_pic_intr().
- */
- break;
-
default:
printk(KERN_ERR "TODO: unsupported delivery mode %x\n",
delivery_mode);
@@ -677,20 +663,6 @@ static void start_apic_timer(struct kvm_lapic *apic)
apic->timer.period)));
}
-static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
-{
- int nmi_wd_enabled = apic_lvt_nmi_mode(apic_get_reg(apic, APIC_LVT0));
-
- if (apic_lvt_nmi_mode(lvt0_val)) {
- if (!nmi_wd_enabled) {
- apic_debug("Receive NMI setting on APIC_LVT0 "
- "for cpu %d\n", apic->vcpu->vcpu_id);
- apic->vcpu->kvm->arch.vapics_in_nmi_mode++;
- }
- } else if (nmi_wd_enabled)
- apic->vcpu->kvm->arch.vapics_in_nmi_mode--;
-}
-
static void apic_mmio_write(struct kvm_io_device *this,
gpa_t address, int len, const void *data)
{
@@ -771,11 +743,10 @@ static void apic_mmio_write(struct kvm_io_device *this,
apic_set_reg(apic, APIC_ICR2, val & 0xff000000);
break;
- case APIC_LVT0:
- apic_manage_nmi_watchdog(apic, val);
case APIC_LVTT:
case APIC_LVTTHMR:
case APIC_LVTPC:
+ case APIC_LVT0:
case APIC_LVT1:
case APIC_LVTERR:
/* TODO: Check vector */
@@ -990,26 +961,12 @@ int apic_has_pending_timer(struct kvm_vcpu *vcpu)
return 0;
}
-static int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
-{
- u32 reg = apic_get_reg(apic, lvt_type);
- int vector, mode, trig_mode;
-
- if (apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) {
- vector = reg & APIC_VECTOR_MASK;
- mode = reg & APIC_MODE_MASK;
- trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
- return __apic_accept_irq(apic, mode, vector, 1, trig_mode);
- }
- return 0;
-}
-
-void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu)
+static int __inject_apic_timer_irq(struct kvm_lapic *apic)
{
- struct kvm_lapic *apic = vcpu->arch.apic;
+ int vector;
- if (apic)
- kvm_apic_local_deliver(apic, APIC_LVT0);
+ vector = apic_lvt_vector(apic, APIC_LVTT);
+ return __apic_accept_irq(apic, APIC_DM_FIXED, vector, 1, 0);
}
static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
@@ -1104,8 +1061,9 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
- if (apic && atomic_read(&apic->timer.pending) > 0) {
- if (kvm_apic_local_deliver(apic, APIC_LVTT))
+ if (apic && apic_lvt_enabled(apic, APIC_LVTT) &&
+ atomic_read(&apic->timer.pending) > 0) {
+ if (__inject_apic_timer_irq(apic))
atomic_dec(&apic->timer.pending);
}
}
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 641c07844e6e..410ddbc1aa2e 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -17,6 +17,7 @@
*
*/
+#include "vmx.h"
#include "mmu.h"
#include <linux/kvm_host.h>
@@ -32,7 +33,6 @@
#include <asm/page.h>
#include <asm/cmpxchg.h>
#include <asm/io.h>
-#include <asm/vmx.h>
/*
* When setting this variable to true it enables Two-Dimensional-Paging
@@ -168,7 +168,6 @@ static u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */
static u64 __read_mostly shadow_user_mask;
static u64 __read_mostly shadow_accessed_mask;
static u64 __read_mostly shadow_dirty_mask;
-static u64 __read_mostly shadow_mt_mask;
void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte)
{
@@ -184,14 +183,13 @@ void kvm_mmu_set_base_ptes(u64 base_pte)
EXPORT_SYMBOL_GPL(kvm_mmu_set_base_ptes);
void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
- u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 mt_mask)
+ u64 dirty_mask, u64 nx_mask, u64 x_mask)
{
shadow_user_mask = user_mask;
shadow_accessed_mask = accessed_mask;
shadow_dirty_mask = dirty_mask;
shadow_nx_mask = nx_mask;
shadow_x_mask = x_mask;
- shadow_mt_mask = mt_mask;
}
EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
@@ -386,9 +384,7 @@ static void account_shadowed(struct kvm *kvm, gfn_t gfn)
{
int *write_count;
- gfn = unalias_gfn(kvm, gfn);
- write_count = slot_largepage_idx(gfn,
- gfn_to_memslot_unaliased(kvm, gfn));
+ write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn));
*write_count += 1;
}
@@ -396,20 +392,16 @@ static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn)
{
int *write_count;
- gfn = unalias_gfn(kvm, gfn);
- write_count = slot_largepage_idx(gfn,
- gfn_to_memslot_unaliased(kvm, gfn));
+ write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn));
*write_count -= 1;
WARN_ON(*write_count < 0);
}
static int has_wrprotected_page(struct kvm *kvm, gfn_t gfn)
{
- struct kvm_memory_slot *slot;
+ struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
int *largepage_idx;
- gfn = unalias_gfn(kvm, gfn);
- slot = gfn_to_memslot_unaliased(kvm, gfn);
if (slot) {
largepage_idx = slot_largepage_idx(gfn, slot);
return *largepage_idx;
@@ -621,7 +613,7 @@ static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte)
return NULL;
}
-static int rmap_write_protect(struct kvm *kvm, u64 gfn)
+static void rmap_write_protect(struct kvm *kvm, u64 gfn)
{
unsigned long *rmapp;
u64 *spte;
@@ -667,7 +659,8 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn)
spte = rmap_next(kvm, rmapp, spte);
}
- return write_protected;
+ if (write_protected)
+ kvm_flush_remote_tlbs(kvm);
}
static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp)
@@ -793,11 +786,9 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE);
set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
- INIT_LIST_HEAD(&sp->oos_link);
ASSERT(is_empty_shadow_page(sp->spt));
- bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
+ sp->slot_bitmap = 0;
sp->multimapped = 0;
- sp->global = 1;
sp->parent_pte = parent_pte;
--vcpu->kvm->arch.n_free_mmu_pages;
return sp;
@@ -909,9 +900,8 @@ static void kvm_mmu_update_unsync_bitmap(u64 *spte)
struct kvm_mmu_page *sp = page_header(__pa(spte));
index = spte - sp->spt;
- if (!__test_and_set_bit(index, sp->unsync_child_bitmap))
- sp->unsync_children++;
- WARN_ON(!sp->unsync_children);
+ __set_bit(index, sp->unsync_child_bitmap);
+ sp->unsync_children = 1;
}
static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp)
@@ -938,6 +928,7 @@ static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp)
static int unsync_walk_fn(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
{
+ sp->unsync_children = 1;
kvm_mmu_update_parents_unsync(sp);
return 1;
}
@@ -968,41 +959,18 @@ static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
{
}
-#define KVM_PAGE_ARRAY_NR 16
-
-struct kvm_mmu_pages {
- struct mmu_page_and_offset {
- struct kvm_mmu_page *sp;
- unsigned int idx;
- } page[KVM_PAGE_ARRAY_NR];
- unsigned int nr;
-};
-
#define for_each_unsync_children(bitmap, idx) \
for (idx = find_first_bit(bitmap, 512); \
idx < 512; \
idx = find_next_bit(bitmap, 512, idx+1))
-int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp,
- int idx)
+static int mmu_unsync_walk(struct kvm_mmu_page *sp,
+ struct kvm_unsync_walk *walker)
{
- int i;
-
- if (sp->unsync)
- for (i=0; i < pvec->nr; i++)
- if (pvec->page[i].sp == sp)
- return 0;
+ int i, ret;
- pvec->page[pvec->nr].sp = sp;
- pvec->page[pvec->nr].idx = idx;
- pvec->nr++;
- return (pvec->nr == KVM_PAGE_ARRAY_NR);
-}
-
-static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
- struct kvm_mmu_pages *pvec)
-{
- int i, ret, nr_unsync_leaf = 0;
+ if (!sp->unsync_children)
+ return 0;
for_each_unsync_children(sp->unsync_child_bitmap, i) {
u64 ent = sp->spt[i];
@@ -1012,22 +980,17 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
child = page_header(ent & PT64_BASE_ADDR_MASK);
if (child->unsync_children) {
- if (mmu_pages_add(pvec, child, i))
- return -ENOSPC;
-
- ret = __mmu_unsync_walk(child, pvec);
- if (!ret)
- __clear_bit(i, sp->unsync_child_bitmap);
- else if (ret > 0)
- nr_unsync_leaf += ret;
- else
+ ret = mmu_unsync_walk(child, walker);
+ if (ret)
return ret;
+ __clear_bit(i, sp->unsync_child_bitmap);
}
if (child->unsync) {
- nr_unsync_leaf++;
- if (mmu_pages_add(pvec, child, i))
- return -ENOSPC;
+ ret = walker->entry(child, walker);
+ __clear_bit(i, sp->unsync_child_bitmap);
+ if (ret)
+ return ret;
}
}
}
@@ -1035,17 +998,7 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
if (find_first_bit(sp->unsync_child_bitmap, 512) == 512)
sp->unsync_children = 0;
- return nr_unsync_leaf;
-}
-
-static int mmu_unsync_walk(struct kvm_mmu_page *sp,
- struct kvm_mmu_pages *pvec)
-{
- if (!sp->unsync_children)
- return 0;
-
- mmu_pages_add(pvec, sp, 0);
- return __mmu_unsync_walk(sp, pvec);
+ return 0;
}
static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
@@ -1068,18 +1021,10 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
return NULL;
}
-static void kvm_unlink_unsync_global(struct kvm *kvm, struct kvm_mmu_page *sp)
-{
- list_del(&sp->oos_link);
- --kvm->stat.mmu_unsync_global;
-}
-
static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)
{
WARN_ON(!sp->unsync);
sp->unsync = 0;
- if (sp->global)
- kvm_unlink_unsync_global(kvm, sp);
--kvm->stat.mmu_unsync;
}
@@ -1092,8 +1037,7 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
return 1;
}
- if (rmap_write_protect(vcpu->kvm, sp->gfn))
- kvm_flush_remote_tlbs(vcpu->kvm);
+ rmap_write_protect(vcpu->kvm, sp->gfn);
kvm_unlink_unsync_page(vcpu->kvm, sp);
if (vcpu->arch.mmu.sync_page(vcpu, sp)) {
kvm_mmu_zap_page(vcpu->kvm, sp);
@@ -1104,89 +1048,30 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
return 0;
}
-struct mmu_page_path {
- struct kvm_mmu_page *parent[PT64_ROOT_LEVEL-1];
- unsigned int idx[PT64_ROOT_LEVEL-1];
+struct sync_walker {
+ struct kvm_vcpu *vcpu;
+ struct kvm_unsync_walk walker;
};
-#define for_each_sp(pvec, sp, parents, i) \
- for (i = mmu_pages_next(&pvec, &parents, -1), \
- sp = pvec.page[i].sp; \
- i < pvec.nr && ({ sp = pvec.page[i].sp; 1;}); \
- i = mmu_pages_next(&pvec, &parents, i))
-
-int mmu_pages_next(struct kvm_mmu_pages *pvec, struct mmu_page_path *parents,
- int i)
+static int mmu_sync_fn(struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk)
{
- int n;
-
- for (n = i+1; n < pvec->nr; n++) {
- struct kvm_mmu_page *sp = pvec->page[n].sp;
+ struct sync_walker *sync_walk = container_of(walk, struct sync_walker,
+ walker);
+ struct kvm_vcpu *vcpu = sync_walk->vcpu;
- if (sp->role.level == PT_PAGE_TABLE_LEVEL) {
- parents->idx[0] = pvec->page[n].idx;
- return n;
- }
-
- parents->parent[sp->role.level-2] = sp;
- parents->idx[sp->role.level-1] = pvec->page[n].idx;
- }
-
- return n;
-}
-
-void mmu_pages_clear_parents(struct mmu_page_path *parents)
-{
- struct kvm_mmu_page *sp;
- unsigned int level = 0;
-
- do {
- unsigned int idx = parents->idx[level];
-
- sp = parents->parent[level];
- if (!sp)
- return;
-
- --sp->unsync_children;
- WARN_ON((int)sp->unsync_children < 0);
- __clear_bit(idx, sp->unsync_child_bitmap);
- level++;
- } while (level < PT64_ROOT_LEVEL-1 && !sp->unsync_children);
+ kvm_sync_page(vcpu, sp);
+ return (need_resched() || spin_needbreak(&vcpu->kvm->mmu_lock));
}
-static void kvm_mmu_pages_init(struct kvm_mmu_page *parent,
- struct mmu_page_path *parents,
- struct kvm_mmu_pages *pvec)
+static void mmu_sync_children(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
{
- parents->parent[parent->role.level-1] = NULL;
- pvec->nr = 0;
-}
-
-static void mmu_sync_children(struct kvm_vcpu *vcpu,
- struct kvm_mmu_page *parent)
-{
- int i;
- struct kvm_mmu_page *sp;
- struct mmu_page_path parents;
- struct kvm_mmu_pages pages;
-
- kvm_mmu_pages_init(parent, &parents, &pages);
- while (mmu_unsync_walk(parent, &pages)) {
- int protected = 0;
-
- for_each_sp(pages, sp, parents, i)
- protected |= rmap_write_protect(vcpu->kvm, sp->gfn);
-
- if (protected)
- kvm_flush_remote_tlbs(vcpu->kvm);
+ struct sync_walker walker = {
+ .walker = { .entry = mmu_sync_fn, },
+ .vcpu = vcpu,
+ };
- for_each_sp(pages, sp, parents, i) {
- kvm_sync_page(vcpu, sp);
- mmu_pages_clear_parents(&parents);
- }
+ while (mmu_unsync_walk(sp, &walker.walker))
cond_resched_lock(&vcpu->kvm->mmu_lock);
- kvm_mmu_pages_init(parent, &parents, &pages);
- }
}
static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
@@ -1244,8 +1129,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
sp->role = role;
hlist_add_head(&sp->hash_link, bucket);
if (!metaphysical) {
- if (rmap_write_protect(vcpu->kvm, gfn))
- kvm_flush_remote_tlbs(vcpu->kvm);
+ rmap_write_protect(vcpu->kvm, gfn);
account_shadowed(vcpu->kvm, gfn);
}
if (shadow_trap_nonpresent_pte != shadow_notrap_nonpresent_pte)
@@ -1269,8 +1153,6 @@ static int walk_shadow(struct kvm_shadow_walk *walker,
if (level == PT32E_ROOT_LEVEL) {
shadow_addr = vcpu->arch.mmu.pae_root[(addr >> 30) & 3];
shadow_addr &= PT64_BASE_ADDR_MASK;
- if (!shadow_addr)
- return 1;
--level;
}
@@ -1355,29 +1237,33 @@ static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp)
}
}
-static int mmu_zap_unsync_children(struct kvm *kvm,
- struct kvm_mmu_page *parent)
-{
- int i, zapped = 0;
- struct mmu_page_path parents;
- struct kvm_mmu_pages pages;
-
- if (parent->role.level == PT_PAGE_TABLE_LEVEL)
- return 0;
+struct zap_walker {
+ struct kvm_unsync_walk walker;
+ struct kvm *kvm;
+ int zapped;
+};
- kvm_mmu_pages_init(parent, &parents, &pages);
- while (mmu_unsync_walk(parent, &pages)) {
- struct kvm_mmu_page *sp;
+static int mmu_zap_fn(struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk)
+{
+ struct zap_walker *zap_walk = container_of(walk, struct zap_walker,
+ walker);
+ kvm_mmu_zap_page(zap_walk->kvm, sp);
+ zap_walk->zapped = 1;
+ return 0;
+}
- for_each_sp(pages, sp, parents, i) {
- kvm_mmu_zap_page(kvm, sp);
- mmu_pages_clear_parents(&parents);
- }
- zapped += pages.nr;
- kvm_mmu_pages_init(parent, &parents, &pages);
- }
+static int mmu_zap_unsync_children(struct kvm *kvm, struct kvm_mmu_page *sp)
+{
+ struct zap_walker walker = {
+ .walker = { .entry = mmu_zap_fn, },
+ .kvm = kvm,
+ .zapped = 0,
+ };
- return zapped;
+ if (sp->role.level == PT_PAGE_TABLE_LEVEL)
+ return 0;
+ mmu_unsync_walk(sp, &walker.walker);
+ return walker.zapped;
}
static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
@@ -1476,7 +1362,7 @@ static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn)
int slot = memslot_id(kvm, gfn_to_memslot(kvm, gfn));
struct kvm_mmu_page *sp = page_header(__pa(pte));
- __set_bit(slot, sp->slot_bitmap);
+ __set_bit(slot, &sp->slot_bitmap);
}
static void mmu_convert_notrap(struct kvm_mmu_page *sp)
@@ -1507,110 +1393,6 @@ struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva)
return page;
}
-/*
- * The function is based on mtrr_type_lookup() in
- * arch/x86/kernel/cpu/mtrr/generic.c
- */
-static int get_mtrr_type(struct mtrr_state_type *mtrr_state,
- u64 start, u64 end)
-{
- int i;
- u64 base, mask;
- u8 prev_match, curr_match;
- int num_var_ranges = KVM_NR_VAR_MTRR;
-
- if (!mtrr_state->enabled)
- return 0xFF;
-
- /* Make end inclusive end, instead of exclusive */
- end--;
-
- /* Look in fixed ranges. Just return the type as per start */
- if (mtrr_state->have_fixed && (start < 0x100000)) {
- int idx;
-
- if (start < 0x80000) {
- idx = 0;
- idx += (start >> 16);
- return mtrr_state->fixed_ranges[idx];
- } else if (start < 0xC0000) {
- idx = 1 * 8;
- idx += ((start - 0x80000) >> 14);
- return mtrr_state->fixed_ranges[idx];
- } else if (start < 0x1000000) {
- idx = 3 * 8;
- idx += ((start - 0xC0000) >> 12);
- return mtrr_state->fixed_ranges[idx];
- }
- }
-
- /*
- * Look in variable ranges
- * Look of multiple ranges matching this address and pick type
- * as per MTRR precedence
- */
- if (!(mtrr_state->enabled & 2))
- return mtrr_state->def_type;
-
- prev_match = 0xFF;
- for (i = 0; i < num_var_ranges; ++i) {
- unsigned short start_state, end_state;
-
- if (!(mtrr_state->var_ranges[i].mask_lo & (1 << 11)))
- continue;
-
- base = (((u64)mtrr_state->var_ranges[i].base_hi) << 32) +
- (mtrr_state->var_ranges[i].base_lo & PAGE_MASK);
- mask = (((u64)mtrr_state->var_ranges[i].mask_hi) << 32) +
- (mtrr_state->var_ranges[i].mask_lo & PAGE_MASK);
-
- start_state = ((start & mask) == (base & mask));
- end_state = ((end & mask) == (base & mask));
- if (start_state != end_state)
- return 0xFE;
-
- if ((start & mask) != (base & mask))
- continue;
-
- curr_match = mtrr_state->var_ranges[i].base_lo & 0xff;
- if (prev_match == 0xFF) {
- prev_match = curr_match;
- continue;
- }
-
- if (prev_match == MTRR_TYPE_UNCACHABLE ||
- curr_match == MTRR_TYPE_UNCACHABLE)
- return MTRR_TYPE_UNCACHABLE;
-
- if ((prev_match == MTRR_TYPE_WRBACK &&
- curr_match == MTRR_TYPE_WRTHROUGH) ||
- (prev_match == MTRR_TYPE_WRTHROUGH &&
- curr_match == MTRR_TYPE_WRBACK)) {
- prev_match = MTRR_TYPE_WRTHROUGH;
- curr_match = MTRR_TYPE_WRTHROUGH;
- }
-
- if (prev_match != curr_match)
- return MTRR_TYPE_UNCACHABLE;
- }
-
- if (prev_match != 0xFF)
- return prev_match;
-
- return mtrr_state->def_type;
-}
-
-static u8 get_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
-{
- u8 mtrr;
-
- mtrr = get_mtrr_type(&vcpu->arch.mtrr_state, gfn << PAGE_SHIFT,
- (gfn << PAGE_SHIFT) + PAGE_SIZE);
- if (mtrr == 0xfe || mtrr == 0xff)
- mtrr = MTRR_TYPE_WRBACK;
- return mtrr;
-}
-
static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
{
unsigned index;
@@ -1627,15 +1409,9 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
if (s->role.word != sp->role.word)
return 1;
}
+ kvm_mmu_mark_parents_unsync(vcpu, sp);
++vcpu->kvm->stat.mmu_unsync;
sp->unsync = 1;
-
- if (sp->global) {
- list_add(&sp->oos_link, &vcpu->kvm->arch.oos_global_pages);
- ++vcpu->kvm->stat.mmu_unsync_global;
- } else
- kvm_mmu_mark_parents_unsync(vcpu, sp);
-
mmu_convert_notrap(sp);
return 0;
}
@@ -1661,22 +1437,11 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
unsigned pte_access, int user_fault,
int write_fault, int dirty, int largepage,
- int global, gfn_t gfn, pfn_t pfn, bool speculative,
+ gfn_t gfn, pfn_t pfn, bool speculative,
bool can_unsync)
{
u64 spte;
int ret = 0;
- u64 mt_mask = shadow_mt_mask;
- struct kvm_mmu_page *sp = page_header(__pa(shadow_pte));
-
- if (!global && sp->global) {
- sp->global = 0;
- if (sp->unsync) {
- kvm_unlink_unsync_global(vcpu->kvm, sp);
- kvm_mmu_mark_parents_unsync(vcpu, sp);
- }
- }
-
/*
* We don't set the accessed bit, since we sometimes want to see
* whether the guest actually used the pte (in order to detect
@@ -1695,11 +1460,6 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
spte |= shadow_user_mask;
if (largepage)
spte |= PT_PAGE_SIZE_MASK;
- if (mt_mask) {
- mt_mask = get_memory_type(vcpu, gfn) <<
- kvm_x86_ops->get_mt_mask_shift();
- spte |= mt_mask;
- }
spte |= (u64)pfn << PAGE_SHIFT;
@@ -1714,15 +1474,6 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
spte |= PT_WRITABLE_MASK;
- /*
- * Optimization: for pte sync, if spte was writable the hash
- * lookup is unnecessary (and expensive). Write protection
- * is responsibility of mmu_get_page / kvm_sync_page.
- * Same reasoning can be applied to dirty page accounting.
- */
- if (!can_unsync && is_writeble_pte(*shadow_pte))
- goto set_pte;
-
if (mmu_need_write_protect(vcpu, gfn, can_unsync)) {
pgprintk("%s: found shadow page for %lx, marking ro\n",
__func__, gfn);
@@ -1744,8 +1495,8 @@ set_pte:
static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
unsigned pt_access, unsigned pte_access,
int user_fault, int write_fault, int dirty,
- int *ptwrite, int largepage, int global,
- gfn_t gfn, pfn_t pfn, bool speculative)
+ int *ptwrite, int largepage, gfn_t gfn,
+ pfn_t pfn, bool speculative)
{
int was_rmapped = 0;
int was_writeble = is_writeble_pte(*shadow_pte);
@@ -1778,7 +1529,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
}
}
if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault,
- dirty, largepage, global, gfn, pfn, speculative, true)) {
+ dirty, largepage, gfn, pfn, speculative, true)) {
if (write_fault)
*ptwrite = 1;
kvm_x86_ops->tlb_flush(vcpu);
@@ -1835,7 +1586,7 @@ static int direct_map_entry(struct kvm_shadow_walk *_walk,
|| (walk->largepage && level == PT_DIRECTORY_LEVEL)) {
mmu_set_spte(vcpu, sptep, ACC_ALL, ACC_ALL,
0, walk->write, 1, &walk->pt_write,
- walk->largepage, 0, gfn, walk->pfn, false);
+ walk->largepage, gfn, walk->pfn, false);
++vcpu->stat.pf_fixed;
return 1;
}
@@ -2022,15 +1773,6 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu)
}
}
-static void mmu_sync_global(struct kvm_vcpu *vcpu)
-{
- struct kvm *kvm = vcpu->kvm;
- struct kvm_mmu_page *sp, *n;
-
- list_for_each_entry_safe(sp, n, &kvm->arch.oos_global_pages, oos_link)
- kvm_sync_page(vcpu, sp);
-}
-
void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
{
spin_lock(&vcpu->kvm->mmu_lock);
@@ -2038,13 +1780,6 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
spin_unlock(&vcpu->kvm->mmu_lock);
}
-void kvm_mmu_sync_global(struct kvm_vcpu *vcpu)
-{
- spin_lock(&vcpu->kvm->mmu_lock);
- mmu_sync_global(vcpu);
- spin_unlock(&vcpu->kvm->mmu_lock);
-}
-
static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr)
{
return vaddr;
@@ -2443,8 +2178,7 @@ static void kvm_mmu_access_page(struct kvm_vcpu *vcpu, gfn_t gfn)
}
void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
- const u8 *new, int bytes,
- bool guest_initiated)
+ const u8 *new, int bytes)
{
gfn_t gfn = gpa >> PAGE_SHIFT;
struct kvm_mmu_page *sp;
@@ -2470,17 +2204,15 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
kvm_mmu_free_some_pages(vcpu);
++vcpu->kvm->stat.mmu_pte_write;
kvm_mmu_audit(vcpu, "pre pte write");
- if (guest_initiated) {
- if (gfn == vcpu->arch.last_pt_write_gfn
- && !last_updated_pte_accessed(vcpu)) {
- ++vcpu->arch.last_pt_write_count;
- if (vcpu->arch.last_pt_write_count >= 3)
- flooded = 1;
- } else {
- vcpu->arch.last_pt_write_gfn = gfn;
- vcpu->arch.last_pt_write_count = 1;
- vcpu->arch.last_pte_updated = NULL;
- }
+ if (gfn == vcpu->arch.last_pt_write_gfn
+ && !last_updated_pte_accessed(vcpu)) {
+ ++vcpu->arch.last_pt_write_count;
+ if (vcpu->arch.last_pt_write_count >= 3)
+ flooded = 1;
+ } else {
+ vcpu->arch.last_pt_write_gfn = gfn;
+ vcpu->arch.last_pt_write_count = 1;
+ vcpu->arch.last_pte_updated = NULL;
}
index = kvm_page_table_hashfn(gfn);
bucket = &vcpu->kvm->arch.mmu_page_hash[index];
@@ -2620,7 +2352,9 @@ EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
{
+ spin_lock(&vcpu->kvm->mmu_lock);
vcpu->arch.mmu.invlpg(vcpu, gva);
+ spin_unlock(&vcpu->kvm->mmu_lock);
kvm_mmu_flush_tlb(vcpu);
++vcpu->stat.invlpg;
}
@@ -2717,7 +2451,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
int i;
u64 *pt;
- if (!test_bit(slot, sp->slot_bitmap))
+ if (!test_bit(slot, &sp->slot_bitmap))
continue;
pt = sp->spt;
@@ -3126,8 +2860,8 @@ static void audit_write_protection(struct kvm_vcpu *vcpu)
if (sp->role.metaphysical)
continue;
+ slot = gfn_to_memslot(vcpu->kvm, sp->gfn);
gfn = unalias_gfn(vcpu->kvm, sp->gfn);
- slot = gfn_to_memslot_unaliased(vcpu->kvm, sp->gfn);
rmapp = &slot->rmap[gfn - slot->base_gfn];
if (*rmapp)
printk(KERN_ERR "%s: (%s) shadow page has writable"
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index d20640154216..84eee43bbe74 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -82,7 +82,6 @@ struct shadow_walker {
int *ptwrite;
pfn_t pfn;
u64 *sptep;
- gpa_t pte_gpa;
};
static gfn_t gpte_to_gfn(pt_element_t gpte)
@@ -223,7 +222,7 @@ walk:
if (ret)
goto walk;
pte |= PT_DIRTY_MASK;
- kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte), 0);
+ kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte));
walker->ptes[walker->level - 1] = pte;
}
@@ -275,8 +274,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
return;
kvm_get_pfn(pfn);
mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
- gpte & PT_DIRTY_MASK, NULL, largepage,
- gpte & PT_GLOBAL_MASK, gpte_to_gfn(gpte),
+ gpte & PT_DIRTY_MASK, NULL, largepage, gpte_to_gfn(gpte),
pfn, true);
}
@@ -303,9 +301,8 @@ static int FNAME(shadow_walk_entry)(struct kvm_shadow_walk *_sw,
mmu_set_spte(vcpu, sptep, access, gw->pte_access & access,
sw->user_fault, sw->write_fault,
gw->ptes[gw->level-1] & PT_DIRTY_MASK,
- sw->ptwrite, sw->largepage,
- gw->ptes[gw->level-1] & PT_GLOBAL_MASK,
- gw->gfn, sw->pfn, false);
+ sw->ptwrite, sw->largepage, gw->gfn, sw->pfn,
+ false);
sw->sptep = sptep;
return 1;
}
@@ -469,15 +466,8 @@ static int FNAME(shadow_invlpg_entry)(struct kvm_shadow_walk *_sw,
struct kvm_vcpu *vcpu, u64 addr,
u64 *sptep, int level)
{
- struct shadow_walker *sw =
- container_of(_sw, struct shadow_walker, walker);
if (level == PT_PAGE_TABLE_LEVEL) {
- struct kvm_mmu_page *sp = page_header(__pa(sptep));
-
- sw->pte_gpa = (sp->gfn << PAGE_SHIFT);
- sw->pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t);
-
if (is_shadow_present_pte(*sptep))
rmap_remove(vcpu->kvm, sptep);
set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
@@ -490,26 +480,11 @@ static int FNAME(shadow_invlpg_entry)(struct kvm_shadow_walk *_sw,
static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
{
- pt_element_t gpte;
struct shadow_walker walker = {
.walker = { .entry = FNAME(shadow_invlpg_entry), },
- .pte_gpa = -1,
};
- spin_lock(&vcpu->kvm->mmu_lock);
walk_shadow(&walker.walker, vcpu, gva);
- spin_unlock(&vcpu->kvm->mmu_lock);
- if (walker.pte_gpa == -1)
- return;
- if (kvm_read_guest_atomic(vcpu->kvm, walker.pte_gpa, &gpte,
- sizeof(pt_element_t)))
- return;
- if (is_present_pte(gpte) && (gpte & PT_ACCESSED_MASK)) {
- if (mmu_topup_memory_caches(vcpu))
- return;
- kvm_mmu_pte_write(vcpu, walker.pte_gpa, (const u8 *)&gpte,
- sizeof(pt_element_t), 0);
- }
}
static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
@@ -605,7 +580,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
nr_present++;
pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
set_spte(vcpu, &sp->spt[i], pte_access, 0, 0,
- is_dirty_pte(gpte), 0, gpte & PT_GLOBAL_MASK, gfn,
+ is_dirty_pte(gpte), 0, gfn,
spte_to_pfn(sp->spt[i]), true, false);
}
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index f67ca16cd797..9c4ce657d963 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -28,8 +28,6 @@
#include <asm/desc.h>
-#include <asm/virtext.h>
-
#define __ex(x) __kvm_handle_fault_on_reboot(x)
MODULE_AUTHOR("Qumranet");
@@ -38,6 +36,9 @@ MODULE_LICENSE("GPL");
#define IOPM_ALLOC_ORDER 2
#define MSRPM_ALLOC_ORDER 1
+#define DR7_GD_MASK (1 << 13)
+#define DR6_BD_MASK (1 << 13)
+
#define SEG_TYPE_LDT 2
#define SEG_TYPE_BUSY_TSS16 3
@@ -47,15 +48,6 @@ MODULE_LICENSE("GPL");
#define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
-/* Turn on to get debugging output*/
-/* #define NESTED_DEBUG */
-
-#ifdef NESTED_DEBUG
-#define nsvm_printk(fmt, args...) printk(KERN_INFO fmt, ## args)
-#else
-#define nsvm_printk(fmt, args...) do {} while(0)
-#endif
-
/* enable NPT for AMD64 and X86 with PAE */
#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
static bool npt_enabled = true;
@@ -66,34 +58,14 @@ static int npt = 1;
module_param(npt, int, S_IRUGO);
-static int nested = 0;
-module_param(nested, int, S_IRUGO);
-
static void kvm_reput_irq(struct vcpu_svm *svm);
static void svm_flush_tlb(struct kvm_vcpu *vcpu);
-static int nested_svm_exit_handled(struct vcpu_svm *svm, bool kvm_override);
-static int nested_svm_vmexit(struct vcpu_svm *svm);
-static int nested_svm_vmsave(struct vcpu_svm *svm, void *nested_vmcb,
- void *arg2, void *opaque);
-static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
- bool has_error_code, u32 error_code);
-static int vmload_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run);
-static int vmsave_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run);
-static int vmrun_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run);
-static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run);
-
-
static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
{
return container_of(vcpu, struct vcpu_svm, vcpu);
}
-static inline bool is_nested(struct vcpu_svm *svm)
-{
- return svm->nested_vmcb;
-}
-
static unsigned long iopm_base;
struct kvm_ldttss_desc {
@@ -183,6 +155,32 @@ static inline void kvm_write_cr2(unsigned long val)
asm volatile ("mov %0, %%cr2" :: "r" (val));
}
+static inline unsigned long read_dr6(void)
+{
+ unsigned long dr6;
+
+ asm volatile ("mov %%dr6, %0" : "=r" (dr6));
+ return dr6;
+}
+
+static inline void write_dr6(unsigned long val)
+{
+ asm volatile ("mov %0, %%dr6" :: "r" (val));
+}
+
+static inline unsigned long read_dr7(void)
+{
+ unsigned long dr7;
+
+ asm volatile ("mov %%dr7, %0" : "=r" (dr7));
+ return dr7;
+}
+
+static inline void write_dr7(unsigned long val)
+{
+ asm volatile ("mov %0, %%dr7" :: "r" (val));
+}
+
static inline void force_new_asid(struct kvm_vcpu *vcpu)
{
to_svm(vcpu)->asid_generation--;
@@ -198,7 +196,7 @@ static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
if (!npt_enabled && !(efer & EFER_LMA))
efer &= ~EFER_LME;
- to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME;
+ to_svm(vcpu)->vmcb->save.efer = efer | MSR_EFER_SVME_MASK;
vcpu->arch.shadow_efer = efer;
}
@@ -207,11 +205,6 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
{
struct vcpu_svm *svm = to_svm(vcpu);
- /* If we are within a nested VM we'd better #VMEXIT and let the
- guest handle the exception */
- if (nested_svm_check_exception(svm, nr, has_error_code, error_code))
- return;
-
svm->vmcb->control.event_inj = nr
| SVM_EVTINJ_VALID
| (has_error_code ? SVM_EVTINJ_VALID_ERR : 0)
@@ -247,24 +240,39 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
kvm_rip_write(vcpu, svm->next_rip);
svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
- vcpu->arch.interrupt_window_open = (svm->vcpu.arch.hflags & HF_GIF_MASK);
+ vcpu->arch.interrupt_window_open = 1;
}
static int has_svm(void)
{
- const char *msg;
+ uint32_t eax, ebx, ecx, edx;
+
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
+ printk(KERN_INFO "has_svm: not amd\n");
+ return 0;
+ }
- if (!cpu_has_svm(&msg)) {
- printk(KERN_INFO "has_svn: %s\n", msg);
+ cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
+ if (eax < SVM_CPUID_FUNC) {
+ printk(KERN_INFO "has_svm: can't execute cpuid_8000000a\n");
return 0;
}
+ cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
+ if (!(ecx & (1 << SVM_CPUID_FEATURE_SHIFT))) {
+ printk(KERN_DEBUG "has_svm: svm not available\n");
+ return 0;
+ }
return 1;
}
static void svm_hardware_disable(void *garbage)
{
- cpu_svm_disable();
+ uint64_t efer;
+
+ wrmsrl(MSR_VM_HSAVE_PA, 0);
+ rdmsrl(MSR_EFER, efer);
+ wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK);
}
static void svm_hardware_enable(void *garbage)
@@ -297,7 +305,7 @@ static void svm_hardware_enable(void *garbage)
svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
rdmsrl(MSR_EFER, efer);
- wrmsrl(MSR_EFER, efer | EFER_SVME);
+ wrmsrl(MSR_EFER, efer | MSR_EFER_SVME_MASK);
wrmsrl(MSR_VM_HSAVE_PA,
page_to_pfn(svm_data->save_area) << PAGE_SHIFT);
@@ -422,11 +430,6 @@ static __init int svm_hardware_setup(void)
if (boot_cpu_has(X86_FEATURE_NX))
kvm_enable_efer_bits(EFER_NX);
- if (nested) {
- printk(KERN_INFO "kvm: Nested Virtualization enabled\n");
- kvm_enable_efer_bits(EFER_SVME);
- }
-
for_each_online_cpu(cpu) {
r = svm_cpu_init(cpu);
if (r)
@@ -569,7 +572,7 @@ static void init_vmcb(struct vcpu_svm *svm)
init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
- save->efer = EFER_SVME;
+ save->efer = MSR_EFER_SVME_MASK;
save->dr6 = 0xffff0ff0;
save->dr7 = 0x400;
save->rflags = 2;
@@ -601,9 +604,6 @@ static void init_vmcb(struct vcpu_svm *svm)
save->cr4 = 0;
}
force_new_asid(&svm->vcpu);
-
- svm->nested_vmcb = 0;
- svm->vcpu.arch.hflags = HF_GIF_MASK;
}
static int svm_vcpu_reset(struct kvm_vcpu *vcpu)
@@ -628,8 +628,6 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
struct vcpu_svm *svm;
struct page *page;
struct page *msrpm_pages;
- struct page *hsave_page;
- struct page *nested_msrpm_pages;
int err;
svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
@@ -652,25 +650,14 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
if (!msrpm_pages)
goto uninit;
-
- nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
- if (!nested_msrpm_pages)
- goto uninit;
-
svm->msrpm = page_address(msrpm_pages);
svm_vcpu_init_msrpm(svm->msrpm);
- hsave_page = alloc_page(GFP_KERNEL);
- if (!hsave_page)
- goto uninit;
- svm->hsave = page_address(hsave_page);
-
- svm->nested_msrpm = page_address(nested_msrpm_pages);
-
svm->vmcb = page_address(page);
clear_page(svm->vmcb);
svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
svm->asid_generation = 0;
+ memset(svm->db_regs, 0, sizeof(svm->db_regs));
init_vmcb(svm);
fx_init(&svm->vcpu);
@@ -695,8 +682,6 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
__free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT));
__free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
- __free_page(virt_to_page(svm->hsave));
- __free_pages(virt_to_page(svm->nested_msrpm), MSRPM_ALLOC_ORDER);
kvm_vcpu_uninit(vcpu);
kmem_cache_free(kvm_vcpu_cache, svm);
}
@@ -746,16 +731,6 @@ static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
to_svm(vcpu)->vmcb->save.rflags = rflags;
}
-static void svm_set_vintr(struct vcpu_svm *svm)
-{
- svm->vmcb->control.intercept |= 1ULL << INTERCEPT_VINTR;
-}
-
-static void svm_clear_vintr(struct vcpu_svm *svm)
-{
- svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VINTR);
-}
-
static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg)
{
struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
@@ -797,22 +772,6 @@ static void svm_get_segment(struct kvm_vcpu *vcpu,
var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1;
var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1;
var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1;
-
- /*
- * SVM always stores 0 for the 'G' bit in the CS selector in
- * the VMCB on a VMEXIT. This hurts cross-vendor migration:
- * Intel's VMENTRY has a check on the 'G' bit.
- */
- if (seg == VCPU_SREG_CS)
- var->g = s->limit > 0xfffff;
-
- /*
- * Work around a bug where the busy flag in the tr selector
- * isn't exposed
- */
- if (seg == VCPU_SREG_TR)
- var->type |= 0x2;
-
var->unusable = !var->present;
}
@@ -943,37 +902,9 @@ static void svm_set_segment(struct kvm_vcpu *vcpu,
}
-static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
+static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg)
{
- int old_debug = vcpu->guest_debug;
- struct vcpu_svm *svm = to_svm(vcpu);
-
- vcpu->guest_debug = dbg->control;
-
- svm->vmcb->control.intercept_exceptions &=
- ~((1 << DB_VECTOR) | (1 << BP_VECTOR));
- if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
- if (vcpu->guest_debug &
- (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
- svm->vmcb->control.intercept_exceptions |=
- 1 << DB_VECTOR;
- if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
- svm->vmcb->control.intercept_exceptions |=
- 1 << BP_VECTOR;
- } else
- vcpu->guest_debug = 0;
-
- if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
- svm->vmcb->save.dr7 = dbg->arch.debugreg[7];
- else
- svm->vmcb->save.dr7 = vcpu->arch.dr7;
-
- if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
- svm->vmcb->save.rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
- else if (old_debug & KVM_GUESTDBG_SINGLESTEP)
- svm->vmcb->save.rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
-
- return 0;
+ return -EOPNOTSUPP;
}
static int svm_get_irq(struct kvm_vcpu *vcpu)
@@ -1015,29 +946,7 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *svm_data)
static unsigned long svm_get_dr(struct kvm_vcpu *vcpu, int dr)
{
- struct vcpu_svm *svm = to_svm(vcpu);
- unsigned long val;
-
- switch (dr) {
- case 0 ... 3:
- val = vcpu->arch.db[dr];
- break;
- case 6:
- if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
- val = vcpu->arch.dr6;
- else
- val = svm->vmcb->save.dr6;
- break;
- case 7:
- if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
- val = vcpu->arch.dr7;
- else
- val = svm->vmcb->save.dr7;
- break;
- default:
- val = 0;
- }
-
+ unsigned long val = to_svm(vcpu)->db_regs[dr];
KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler);
return val;
}
@@ -1047,40 +956,33 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value,
{
struct vcpu_svm *svm = to_svm(vcpu);
- KVMTRACE_2D(DR_WRITE, vcpu, (u32)dr, (u32)value, handler);
-
*exception = 0;
+ if (svm->vmcb->save.dr7 & DR7_GD_MASK) {
+ svm->vmcb->save.dr7 &= ~DR7_GD_MASK;
+ svm->vmcb->save.dr6 |= DR6_BD_MASK;
+ *exception = DB_VECTOR;
+ return;
+ }
+
switch (dr) {
case 0 ... 3:
- vcpu->arch.db[dr] = value;
- if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
- vcpu->arch.eff_db[dr] = value;
+ svm->db_regs[dr] = value;
return;
case 4 ... 5:
- if (vcpu->arch.cr4 & X86_CR4_DE)
+ if (vcpu->arch.cr4 & X86_CR4_DE) {
*exception = UD_VECTOR;
- return;
- case 6:
- if (value & 0xffffffff00000000ULL) {
- *exception = GP_VECTOR;
return;
}
- vcpu->arch.dr6 = (value & DR6_VOLATILE) | DR6_FIXED_1;
- return;
- case 7:
- if (value & 0xffffffff00000000ULL) {
+ case 7: {
+ if (value & ~((1ULL << 32) - 1)) {
*exception = GP_VECTOR;
return;
}
- vcpu->arch.dr7 = (value & DR7_VOLATILE) | DR7_FIXED_1;
- if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
- svm->vmcb->save.dr7 = vcpu->arch.dr7;
- vcpu->arch.switch_db_regs = (value & DR7_BP_EN_MASK);
- }
+ svm->vmcb->save.dr7 = value;
return;
+ }
default:
- /* FIXME: Possible case? */
printk(KERN_DEBUG "%s: unexpected dr %u\n",
__func__, dr);
*exception = UD_VECTOR;
@@ -1126,27 +1028,6 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code);
}
-static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
-{
- if (!(svm->vcpu.guest_debug &
- (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
- kvm_queue_exception(&svm->vcpu, DB_VECTOR);
- return 1;
- }
- kvm_run->exit_reason = KVM_EXIT_DEBUG;
- kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
- kvm_run->debug.arch.exception = DB_VECTOR;
- return 0;
-}
-
-static int bp_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
-{
- kvm_run->exit_reason = KVM_EXIT_DEBUG;
- kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
- kvm_run->debug.arch.exception = BP_VECTOR;
- return 0;
-}
-
static int ud_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
{
int er;
@@ -1218,7 +1099,6 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
rep = (io_info & SVM_IOIO_REP_MASK) != 0;
down = (svm->vmcb->save.rflags & X86_EFLAGS_DF) != 0;
- skip_emulated_instruction(&svm->vcpu);
return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port);
}
@@ -1255,631 +1135,6 @@ static int vmmcall_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
return 1;
}
-static int nested_svm_check_permissions(struct vcpu_svm *svm)
-{
- if (!(svm->vcpu.arch.shadow_efer & EFER_SVME)
- || !is_paging(&svm->vcpu)) {
- kvm_queue_exception(&svm->vcpu, UD_VECTOR);
- return 1;
- }
-
- if (svm->vmcb->save.cpl) {
- kvm_inject_gp(&svm->vcpu, 0);
- return 1;
- }
-
- return 0;
-}
-
-static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
- bool has_error_code, u32 error_code)
-{
- if (is_nested(svm)) {
- svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
- svm->vmcb->control.exit_code_hi = 0;
- svm->vmcb->control.exit_info_1 = error_code;
- svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
- if (nested_svm_exit_handled(svm, false)) {
- nsvm_printk("VMexit -> EXCP 0x%x\n", nr);
-
- nested_svm_vmexit(svm);
- return 1;
- }
- }
-
- return 0;
-}
-
-static inline int nested_svm_intr(struct vcpu_svm *svm)
-{
- if (is_nested(svm)) {
- if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
- return 0;
-
- if (!(svm->vcpu.arch.hflags & HF_HIF_MASK))
- return 0;
-
- svm->vmcb->control.exit_code = SVM_EXIT_INTR;
-
- if (nested_svm_exit_handled(svm, false)) {
- nsvm_printk("VMexit -> INTR\n");
- nested_svm_vmexit(svm);
- return 1;
- }
- }
-
- return 0;
-}
-
-static struct page *nested_svm_get_page(struct vcpu_svm *svm, u64 gpa)
-{
- struct page *page;
-
- down_read(&current->mm->mmap_sem);
- page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT);
- up_read(&current->mm->mmap_sem);
-
- if (is_error_page(page)) {
- printk(KERN_INFO "%s: could not find page at 0x%llx\n",
- __func__, gpa);
- kvm_release_page_clean(page);
- kvm_inject_gp(&svm->vcpu, 0);
- return NULL;
- }
- return page;
-}
-
-static int nested_svm_do(struct vcpu_svm *svm,
- u64 arg1_gpa, u64 arg2_gpa, void *opaque,
- int (*handler)(struct vcpu_svm *svm,
- void *arg1,
- void *arg2,
- void *opaque))
-{
- struct page *arg1_page;
- struct page *arg2_page = NULL;
- void *arg1;
- void *arg2 = NULL;
- int retval;
-
- arg1_page = nested_svm_get_page(svm, arg1_gpa);
- if(arg1_page == NULL)
- return 1;
-
- if (arg2_gpa) {
- arg2_page = nested_svm_get_page(svm, arg2_gpa);
- if(arg2_page == NULL) {
- kvm_release_page_clean(arg1_page);
- return 1;
- }
- }
-
- arg1 = kmap_atomic(arg1_page, KM_USER0);
- if (arg2_gpa)
- arg2 = kmap_atomic(arg2_page, KM_USER1);
-
- retval = handler(svm, arg1, arg2, opaque);
-
- kunmap_atomic(arg1, KM_USER0);
- if (arg2_gpa)
- kunmap_atomic(arg2, KM_USER1);
-
- kvm_release_page_dirty(arg1_page);
- if (arg2_gpa)
- kvm_release_page_dirty(arg2_page);
-
- return retval;
-}
-
-static int nested_svm_exit_handled_real(struct vcpu_svm *svm,
- void *arg1,
- void *arg2,
- void *opaque)
-{
- struct vmcb *nested_vmcb = (struct vmcb *)arg1;
- bool kvm_overrides = *(bool *)opaque;
- u32 exit_code = svm->vmcb->control.exit_code;
-
- if (kvm_overrides) {
- switch (exit_code) {
- case SVM_EXIT_INTR:
- case SVM_EXIT_NMI:
- return 0;
- /* For now we are always handling NPFs when using them */
- case SVM_EXIT_NPF:
- if (npt_enabled)
- return 0;
- break;
- /* When we're shadowing, trap PFs */
- case SVM_EXIT_EXCP_BASE + PF_VECTOR:
- if (!npt_enabled)
- return 0;
- break;
- default:
- break;
- }
- }
-
- switch (exit_code) {
- case SVM_EXIT_READ_CR0 ... SVM_EXIT_READ_CR8: {
- u32 cr_bits = 1 << (exit_code - SVM_EXIT_READ_CR0);
- if (nested_vmcb->control.intercept_cr_read & cr_bits)
- return 1;
- break;
- }
- case SVM_EXIT_WRITE_CR0 ... SVM_EXIT_WRITE_CR8: {
- u32 cr_bits = 1 << (exit_code - SVM_EXIT_WRITE_CR0);
- if (nested_vmcb->control.intercept_cr_write & cr_bits)
- return 1;
- break;
- }
- case SVM_EXIT_READ_DR0 ... SVM_EXIT_READ_DR7: {
- u32 dr_bits = 1 << (exit_code - SVM_EXIT_READ_DR0);
- if (nested_vmcb->control.intercept_dr_read & dr_bits)
- return 1;
- break;
- }
- case SVM_EXIT_WRITE_DR0 ... SVM_EXIT_WRITE_DR7: {
- u32 dr_bits = 1 << (exit_code - SVM_EXIT_WRITE_DR0);
- if (nested_vmcb->control.intercept_dr_write & dr_bits)
- return 1;
- break;
- }
- case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
- u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
- if (nested_vmcb->control.intercept_exceptions & excp_bits)
- return 1;
- break;
- }
- default: {
- u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR);
- nsvm_printk("exit code: 0x%x\n", exit_code);
- if (nested_vmcb->control.intercept & exit_bits)
- return 1;
- }
- }
-
- return 0;
-}
-
-static int nested_svm_exit_handled_msr(struct vcpu_svm *svm,
- void *arg1, void *arg2,
- void *opaque)
-{
- struct vmcb *nested_vmcb = (struct vmcb *)arg1;
- u8 *msrpm = (u8 *)arg2;
- u32 t0, t1;
- u32 msr = svm->vcpu.arch.regs[VCPU_REGS_RCX];
- u32 param = svm->vmcb->control.exit_info_1 & 1;
-
- if (!(nested_vmcb->control.intercept & (1ULL << INTERCEPT_MSR_PROT)))
- return 0;
-
- switch(msr) {
- case 0 ... 0x1fff:
- t0 = (msr * 2) % 8;
- t1 = msr / 8;
- break;
- case 0xc0000000 ... 0xc0001fff:
- t0 = (8192 + msr - 0xc0000000) * 2;
- t1 = (t0 / 8);
- t0 %= 8;
- break;
- case 0xc0010000 ... 0xc0011fff:
- t0 = (16384 + msr - 0xc0010000) * 2;
- t1 = (t0 / 8);
- t0 %= 8;
- break;
- default:
- return 1;
- break;
- }
- if (msrpm[t1] & ((1 << param) << t0))
- return 1;
-
- return 0;
-}
-
-static int nested_svm_exit_handled(struct vcpu_svm *svm, bool kvm_override)
-{
- bool k = kvm_override;
-
- switch (svm->vmcb->control.exit_code) {
- case SVM_EXIT_MSR:
- return nested_svm_do(svm, svm->nested_vmcb,
- svm->nested_vmcb_msrpm, NULL,
- nested_svm_exit_handled_msr);
- default: break;
- }
-
- return nested_svm_do(svm, svm->nested_vmcb, 0, &k,
- nested_svm_exit_handled_real);
-}
-
-static int nested_svm_emulate(struct vcpu_svm *svm, struct kvm_run *kvm_run)
-{
- int er;
- u32 opcode = 0;
- unsigned long rip;
- unsigned long rip_linear;
-
- svm->vmcb->save.rax = svm->vcpu.arch.regs[VCPU_REGS_RAX];
- svm->vmcb->save.rsp = svm->vcpu.arch.regs[VCPU_REGS_RSP];
- svm->vmcb->save.rip = svm->vcpu.arch.regs[VCPU_REGS_RIP];
- rip = svm->vcpu.arch.regs[VCPU_REGS_RIP];
- rip_linear = rip + svm_seg(&svm->vcpu, VCPU_SREG_CS)->base;
-
- er = emulator_read_std(rip_linear, (void *)&opcode, 3, &svm->vcpu);
- if (er != X86EMUL_CONTINUE)
- return er;
- er = EMULATE_FAIL;
-
- switch (opcode) {
- case 0xda010f:
- vmload_interception(svm, kvm_run);
- er = EMULATE_DONE;
- break;
- case 0xd8010f:
- vmrun_interception(svm, kvm_run);
- er = EMULATE_DONE;
- break;
- case 0xdb010f:
- vmsave_interception(svm, kvm_run);
- er = EMULATE_DONE;
- break;
- case 0xdc010f:
- stgi_interception(svm, kvm_run);
- er = EMULATE_DONE;
- break;
- default:
- nsvm_printk("NSVM: Opcode %x unknown\n", opcode);
- }
-
- nsvm_printk("NSVM: svm emul at 0x%lx -> %d\n", rip, er);
-
- return er;
-}
-
-static int nested_svm_vmexit_real(struct vcpu_svm *svm, void *arg1,
- void *arg2, void *opaque)
-{
- struct vmcb *nested_vmcb = (struct vmcb *)arg1;
- struct vmcb *hsave = svm->hsave;
- u64 nested_save[] = { nested_vmcb->save.cr0,
- nested_vmcb->save.cr3,
- nested_vmcb->save.cr4,
- nested_vmcb->save.efer,
- nested_vmcb->control.intercept_cr_read,
- nested_vmcb->control.intercept_cr_write,
- nested_vmcb->control.intercept_dr_read,
- nested_vmcb->control.intercept_dr_write,
- nested_vmcb->control.intercept_exceptions,
- nested_vmcb->control.intercept,
- nested_vmcb->control.msrpm_base_pa,
- nested_vmcb->control.iopm_base_pa,
- nested_vmcb->control.tsc_offset };
-
- /* Give the current vmcb to the guest */
- memcpy(nested_vmcb, svm->vmcb, sizeof(struct vmcb));
- nested_vmcb->save.cr0 = nested_save[0];
- if (!npt_enabled)
- nested_vmcb->save.cr3 = nested_save[1];
- nested_vmcb->save.cr4 = nested_save[2];
- nested_vmcb->save.efer = nested_save[3];
- nested_vmcb->control.intercept_cr_read = nested_save[4];
- nested_vmcb->control.intercept_cr_write = nested_save[5];
- nested_vmcb->control.intercept_dr_read = nested_save[6];
- nested_vmcb->control.intercept_dr_write = nested_save[7];
- nested_vmcb->control.intercept_exceptions = nested_save[8];
- nested_vmcb->control.intercept = nested_save[9];
- nested_vmcb->control.msrpm_base_pa = nested_save[10];
- nested_vmcb->control.iopm_base_pa = nested_save[11];
- nested_vmcb->control.tsc_offset = nested_save[12];
-
- /* We always set V_INTR_MASKING and remember the old value in hflags */
- if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
- nested_vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
-
- if ((nested_vmcb->control.int_ctl & V_IRQ_MASK) &&
- (nested_vmcb->control.int_vector)) {
- nsvm_printk("WARNING: IRQ 0x%x still enabled on #VMEXIT\n",
- nested_vmcb->control.int_vector);
- }
-
- /* Restore the original control entries */
- svm->vmcb->control = hsave->control;
-
- /* Kill any pending exceptions */
- if (svm->vcpu.arch.exception.pending == true)
- nsvm_printk("WARNING: Pending Exception\n");
- svm->vcpu.arch.exception.pending = false;
-
- /* Restore selected save entries */
- svm->vmcb->save.es = hsave->save.es;
- svm->vmcb->save.cs = hsave->save.cs;
- svm->vmcb->save.ss = hsave->save.ss;
- svm->vmcb->save.ds = hsave->save.ds;
- svm->vmcb->save.gdtr = hsave->save.gdtr;
- svm->vmcb->save.idtr = hsave->save.idtr;
- svm->vmcb->save.rflags = hsave->save.rflags;
- svm_set_efer(&svm->vcpu, hsave->save.efer);
- svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE);
- svm_set_cr4(&svm->vcpu, hsave->save.cr4);
- if (npt_enabled) {
- svm->vmcb->save.cr3 = hsave->save.cr3;
- svm->vcpu.arch.cr3 = hsave->save.cr3;
- } else {
- kvm_set_cr3(&svm->vcpu, hsave->save.cr3);
- }
- kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, hsave->save.rax);
- kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, hsave->save.rsp);
- kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, hsave->save.rip);
- svm->vmcb->save.dr7 = 0;
- svm->vmcb->save.cpl = 0;
- svm->vmcb->control.exit_int_info = 0;
-
- svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
- /* Exit nested SVM mode */
- svm->nested_vmcb = 0;
-
- return 0;
-}
-
-static int nested_svm_vmexit(struct vcpu_svm *svm)
-{
- nsvm_printk("VMexit\n");
- if (nested_svm_do(svm, svm->nested_vmcb, 0,
- NULL, nested_svm_vmexit_real))
- return 1;
-
- kvm_mmu_reset_context(&svm->vcpu);
- kvm_mmu_load(&svm->vcpu);
-
- /* KVM calls vmsave after vmrun, so let's run it now if we can */
- nested_svm_emulate(svm, NULL);
-
- return 0;
-}
-
-static int nested_svm_vmrun_msrpm(struct vcpu_svm *svm, void *arg1,
- void *arg2, void *opaque)
-{
- int i;
- u32 *nested_msrpm = (u32*)arg1;
- for (i=0; i< PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER) / 4; i++)
- svm->nested_msrpm[i] = svm->msrpm[i] | nested_msrpm[i];
- svm->vmcb->control.msrpm_base_pa = __pa(svm->nested_msrpm);
-
- return 0;
-}
-
-static int nested_svm_vmrun(struct vcpu_svm *svm, void *arg1,
- void *arg2, void *opaque)
-{
- struct vmcb *nested_vmcb = (struct vmcb *)arg1;
- struct vmcb *hsave = svm->hsave;
-
- /* nested_vmcb is our indicator if nested SVM is activated */
- svm->nested_vmcb = svm->vmcb->save.rax;
-
- /* Clear internal status */
- svm->vcpu.arch.exception.pending = false;
-
- /* Save the old vmcb, so we don't need to pick what we save, but
- can restore everything when a VMEXIT occurs */
- memcpy(hsave, svm->vmcb, sizeof(struct vmcb));
- /* We need to remember the original CR3 in the SPT case */
- if (!npt_enabled)
- hsave->save.cr3 = svm->vcpu.arch.cr3;
- hsave->save.cr4 = svm->vcpu.arch.cr4;
- hsave->save.rip = svm->next_rip;
-
- if (svm->vmcb->save.rflags & X86_EFLAGS_IF)
- svm->vcpu.arch.hflags |= HF_HIF_MASK;
- else
- svm->vcpu.arch.hflags &= ~HF_HIF_MASK;
-
- /* Load the nested guest state */
- svm->vmcb->save.es = nested_vmcb->save.es;
- svm->vmcb->save.cs = nested_vmcb->save.cs;
- svm->vmcb->save.ss = nested_vmcb->save.ss;
- svm->vmcb->save.ds = nested_vmcb->save.ds;
- svm->vmcb->save.gdtr = nested_vmcb->save.gdtr;
- svm->vmcb->save.idtr = nested_vmcb->save.idtr;
- svm->vmcb->save.rflags = nested_vmcb->save.rflags;
- svm_set_efer(&svm->vcpu, nested_vmcb->save.efer);
- svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0);
- svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4);
- if (npt_enabled) {
- svm->vmcb->save.cr3 = nested_vmcb->save.cr3;
- svm->vcpu.arch.cr3 = nested_vmcb->save.cr3;
- } else {
- kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3);
- kvm_mmu_reset_context(&svm->vcpu);
- }
- svm->vmcb->save.cr2 = nested_vmcb->save.cr2;
- kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax);
- kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp);
- kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip);
- /* In case we don't even reach vcpu_run, the fields are not updated */
- svm->vmcb->save.rax = nested_vmcb->save.rax;
- svm->vmcb->save.rsp = nested_vmcb->save.rsp;
- svm->vmcb->save.rip = nested_vmcb->save.rip;
- svm->vmcb->save.dr7 = nested_vmcb->save.dr7;
- svm->vmcb->save.dr6 = nested_vmcb->save.dr6;
- svm->vmcb->save.cpl = nested_vmcb->save.cpl;
-
- /* We don't want a nested guest to be more powerful than the guest,
- so all intercepts are ORed */
- svm->vmcb->control.intercept_cr_read |=
- nested_vmcb->control.intercept_cr_read;
- svm->vmcb->control.intercept_cr_write |=
- nested_vmcb->control.intercept_cr_write;
- svm->vmcb->control.intercept_dr_read |=
- nested_vmcb->control.intercept_dr_read;
- svm->vmcb->control.intercept_dr_write |=
- nested_vmcb->control.intercept_dr_write;
- svm->vmcb->control.intercept_exceptions |=
- nested_vmcb->control.intercept_exceptions;
-
- svm->vmcb->control.intercept |= nested_vmcb->control.intercept;
-
- svm->nested_vmcb_msrpm = nested_vmcb->control.msrpm_base_pa;
-
- force_new_asid(&svm->vcpu);
- svm->vmcb->control.exit_int_info = nested_vmcb->control.exit_int_info;
- svm->vmcb->control.exit_int_info_err = nested_vmcb->control.exit_int_info_err;
- svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
- if (nested_vmcb->control.int_ctl & V_IRQ_MASK) {
- nsvm_printk("nSVM Injecting Interrupt: 0x%x\n",
- nested_vmcb->control.int_ctl);
- }
- if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
- svm->vcpu.arch.hflags |= HF_VINTR_MASK;
- else
- svm->vcpu.arch.hflags &= ~HF_VINTR_MASK;
-
- nsvm_printk("nSVM exit_int_info: 0x%x | int_state: 0x%x\n",
- nested_vmcb->control.exit_int_info,
- nested_vmcb->control.int_state);
-
- svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
- svm->vmcb->control.int_state = nested_vmcb->control.int_state;
- svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset;
- if (nested_vmcb->control.event_inj & SVM_EVTINJ_VALID)
- nsvm_printk("Injecting Event: 0x%x\n",
- nested_vmcb->control.event_inj);
- svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
- svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;
-
- svm->vcpu.arch.hflags |= HF_GIF_MASK;
-
- return 0;
-}
-
-static int nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
-{
- to_vmcb->save.fs = from_vmcb->save.fs;
- to_vmcb->save.gs = from_vmcb->save.gs;
- to_vmcb->save.tr = from_vmcb->save.tr;
- to_vmcb->save.ldtr = from_vmcb->save.ldtr;
- to_vmcb->save.kernel_gs_base = from_vmcb->save.kernel_gs_base;
- to_vmcb->save.star = from_vmcb->save.star;
- to_vmcb->save.lstar = from_vmcb->save.lstar;
- to_vmcb->save.cstar = from_vmcb->save.cstar;
- to_vmcb->save.sfmask = from_vmcb->save.sfmask;
- to_vmcb->save.sysenter_cs = from_vmcb->save.sysenter_cs;
- to_vmcb->save.sysenter_esp = from_vmcb->save.sysenter_esp;
- to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip;
-
- return 1;
-}
-
-static int nested_svm_vmload(struct vcpu_svm *svm, void *nested_vmcb,
- void *arg2, void *opaque)
-{
- return nested_svm_vmloadsave((struct vmcb *)nested_vmcb, svm->vmcb);
-}
-
-static int nested_svm_vmsave(struct vcpu_svm *svm, void *nested_vmcb,
- void *arg2, void *opaque)
-{
- return nested_svm_vmloadsave(svm->vmcb, (struct vmcb *)nested_vmcb);
-}
-
-static int vmload_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
-{
- if (nested_svm_check_permissions(svm))
- return 1;
-
- svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
- skip_emulated_instruction(&svm->vcpu);
-
- nested_svm_do(svm, svm->vmcb->save.rax, 0, NULL, nested_svm_vmload);
-
- return 1;
-}
-
-static int vmsave_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
-{
- if (nested_svm_check_permissions(svm))
- return 1;
-
- svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
- skip_emulated_instruction(&svm->vcpu);
-
- nested_svm_do(svm, svm->vmcb->save.rax, 0, NULL, nested_svm_vmsave);
-
- return 1;
-}
-
-static int vmrun_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
-{
- nsvm_printk("VMrun\n");
- if (nested_svm_check_permissions(svm))
- return 1;
-
- svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
- skip_emulated_instruction(&svm->vcpu);
-
- if (nested_svm_do(svm, svm->vmcb->save.rax, 0,
- NULL, nested_svm_vmrun))
- return 1;
-
- if (nested_svm_do(svm, svm->nested_vmcb_msrpm, 0,
- NULL, nested_svm_vmrun_msrpm))
- return 1;
-
- return 1;
-}
-
-static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
-{
- if (nested_svm_check_permissions(svm))
- return 1;
-
- svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
- skip_emulated_instruction(&svm->vcpu);
-
- svm->vcpu.arch.hflags |= HF_GIF_MASK;
-
- return 1;
-}
-
-static int clgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
-{
- if (nested_svm_check_permissions(svm))
- return 1;
-
- svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
- skip_emulated_instruction(&svm->vcpu);
-
- svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
-
- /* After a CLGI no interrupts should come */
- svm_clear_vintr(svm);
- svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
-
- /* Let's try to emulate as many instructions as possible in GIF=0 */
-
- while(true) {
- int er;
-
- er = emulate_instruction(&svm->vcpu, kvm_run, 0, 0, 0);
- nsvm_printk("NSVM: emulating at 0x%lx -> %d\n", svm->vcpu.arch.regs[VCPU_REGS_RIP], er);
-
- /* So we can now emulate the SVM instructions that most probably
- occur at the end of the codepath */
- if (er != EMULATE_DONE) {
- while (true)
- if (nested_svm_emulate(svm, kvm_run) == EMULATE_FAIL)
- break;
- break;
- }
- }
- return 1;
-}
-
static int invalid_op_interception(struct vcpu_svm *svm,
struct kvm_run *kvm_run)
{
@@ -1991,12 +1246,6 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
case MSR_IA32_LASTINTTOIP:
*data = svm->vmcb->save.last_excp_to;
break;
- case MSR_VM_HSAVE_PA:
- *data = svm->hsave_msr;
- break;
- case MSR_VM_CR:
- *data = 0;
- break;
default:
return kvm_get_msr_common(vcpu, ecx, data);
}
@@ -2091,9 +1340,6 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", ecx, data);
break;
- case MSR_VM_HSAVE_PA:
- svm->hsave_msr = data;
- break;
default:
return kvm_set_msr_common(vcpu, ecx, data);
}
@@ -2130,7 +1376,7 @@ static int interrupt_window_interception(struct vcpu_svm *svm,
{
KVMTRACE_0D(PEND_INTR, &svm->vcpu, handler);
- svm_clear_vintr(svm);
+ svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VINTR);
svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
/*
* If the user space waits to inject interrupts, exit as soon as
@@ -2167,8 +1413,6 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
[SVM_EXIT_WRITE_DR3] = emulate_on_interception,
[SVM_EXIT_WRITE_DR5] = emulate_on_interception,
[SVM_EXIT_WRITE_DR7] = emulate_on_interception,
- [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception,
- [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception,
[SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception,
[SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception,
[SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception,
@@ -2188,12 +1432,12 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
[SVM_EXIT_MSR] = msr_interception,
[SVM_EXIT_TASK_SWITCH] = task_switch_interception,
[SVM_EXIT_SHUTDOWN] = shutdown_interception,
- [SVM_EXIT_VMRUN] = vmrun_interception,
+ [SVM_EXIT_VMRUN] = invalid_op_interception,
[SVM_EXIT_VMMCALL] = vmmcall_interception,
- [SVM_EXIT_VMLOAD] = vmload_interception,
- [SVM_EXIT_VMSAVE] = vmsave_interception,
- [SVM_EXIT_STGI] = stgi_interception,
- [SVM_EXIT_CLGI] = clgi_interception,
+ [SVM_EXIT_VMLOAD] = invalid_op_interception,
+ [SVM_EXIT_VMSAVE] = invalid_op_interception,
+ [SVM_EXIT_STGI] = invalid_op_interception,
+ [SVM_EXIT_CLGI] = invalid_op_interception,
[SVM_EXIT_SKINIT] = invalid_op_interception,
[SVM_EXIT_WBINVD] = emulate_on_interception,
[SVM_EXIT_MONITOR] = invalid_op_interception,
@@ -2209,17 +1453,6 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
KVMTRACE_3D(VMEXIT, vcpu, exit_code, (u32)svm->vmcb->save.rip,
(u32)((u64)svm->vmcb->save.rip >> 32), entryexit);
- if (is_nested(svm)) {
- nsvm_printk("nested handle_exit: 0x%x | 0x%lx | 0x%lx | 0x%lx\n",
- exit_code, svm->vmcb->control.exit_info_1,
- svm->vmcb->control.exit_info_2, svm->vmcb->save.rip);
- if (nested_svm_exit_handled(svm, true)) {
- nested_svm_vmexit(svm);
- nsvm_printk("-> #VMEXIT\n");
- return 1;
- }
- }
-
if (npt_enabled) {
int mmu_reload = 0;
if ((vcpu->arch.cr0 ^ svm->vmcb->save.cr0) & X86_CR0_PG) {
@@ -2307,8 +1540,6 @@ static void svm_set_irq(struct kvm_vcpu *vcpu, int irq)
{
struct vcpu_svm *svm = to_svm(vcpu);
- nested_svm_intr(svm);
-
svm_inject_irq(svm, irq);
}
@@ -2354,17 +1585,11 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu)
if (!kvm_cpu_has_interrupt(vcpu))
goto out;
- if (nested_svm_intr(svm))
- goto out;
-
- if (!(svm->vcpu.arch.hflags & HF_GIF_MASK))
- goto out;
-
if (!(vmcb->save.rflags & X86_EFLAGS_IF) ||
(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) ||
(vmcb->control.event_inj & SVM_EVTINJ_VALID)) {
/* unable to deliver irq, set pending irq */
- svm_set_vintr(svm);
+ vmcb->control.intercept |= (1ULL << INTERCEPT_VINTR);
svm_inject_irq(svm, 0x0);
goto out;
}
@@ -2387,8 +1612,7 @@ static void kvm_reput_irq(struct vcpu_svm *svm)
}
svm->vcpu.arch.interrupt_window_open =
- !(control->int_state & SVM_INTERRUPT_SHADOW_MASK) &&
- (svm->vcpu.arch.hflags & HF_GIF_MASK);
+ !(control->int_state & SVM_INTERRUPT_SHADOW_MASK);
}
static void svm_do_inject_vector(struct vcpu_svm *svm)
@@ -2410,13 +1634,9 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
struct vcpu_svm *svm = to_svm(vcpu);
struct vmcb_control_area *control = &svm->vmcb->control;
- if (nested_svm_intr(svm))
- return;
-
svm->vcpu.arch.interrupt_window_open =
(!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) &&
- (svm->vmcb->save.rflags & X86_EFLAGS_IF) &&
- (svm->vcpu.arch.hflags & HF_GIF_MASK));
+ (svm->vmcb->save.rflags & X86_EFLAGS_IF));
if (svm->vcpu.arch.interrupt_window_open && svm->vcpu.arch.irq_summary)
/*
@@ -2429,9 +1649,9 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
*/
if (!svm->vcpu.arch.interrupt_window_open &&
(svm->vcpu.arch.irq_summary || kvm_run->request_interrupt_window))
- svm_set_vintr(svm);
- else
- svm_clear_vintr(svm);
+ control->intercept |= 1ULL << INTERCEPT_VINTR;
+ else
+ control->intercept &= ~(1ULL << INTERCEPT_VINTR);
}
static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
@@ -2439,6 +1659,22 @@ static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
return 0;
}
+static void save_db_regs(unsigned long *db_regs)
+{
+ asm volatile ("mov %%dr0, %0" : "=r"(db_regs[0]));
+ asm volatile ("mov %%dr1, %0" : "=r"(db_regs[1]));
+ asm volatile ("mov %%dr2, %0" : "=r"(db_regs[2]));
+ asm volatile ("mov %%dr3, %0" : "=r"(db_regs[3]));
+}
+
+static void load_db_regs(unsigned long *db_regs)
+{
+ asm volatile ("mov %0, %%dr0" : : "r"(db_regs[0]));
+ asm volatile ("mov %0, %%dr1" : : "r"(db_regs[1]));
+ asm volatile ("mov %0, %%dr2" : : "r"(db_regs[2]));
+ asm volatile ("mov %0, %%dr3" : : "r"(db_regs[3]));
+}
+
static void svm_flush_tlb(struct kvm_vcpu *vcpu)
{
force_new_asid(vcpu);
@@ -2497,12 +1733,19 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
gs_selector = kvm_read_gs();
ldt_selector = kvm_read_ldt();
svm->host_cr2 = kvm_read_cr2();
- if (!is_nested(svm))
- svm->vmcb->save.cr2 = vcpu->arch.cr2;
+ svm->host_dr6 = read_dr6();
+ svm->host_dr7 = read_dr7();
+ svm->vmcb->save.cr2 = vcpu->arch.cr2;
/* required for live migration with NPT */
if (npt_enabled)
svm->vmcb->save.cr3 = vcpu->arch.cr3;
+ if (svm->vmcb->save.dr7 & 0xff) {
+ write_dr7(0);
+ save_db_regs(svm->host_db_regs);
+ load_db_regs(svm->db_regs);
+ }
+
clgi();
local_irq_enable();
@@ -2578,11 +1821,16 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
#endif
);
+ if ((svm->vmcb->save.dr7 & 0xff))
+ load_db_regs(svm->host_db_regs);
+
vcpu->arch.cr2 = svm->vmcb->save.cr2;
vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
+ write_dr6(svm->host_dr6);
+ write_dr7(svm->host_dr7);
kvm_write_cr2(svm->host_cr2);
kvm_load_fs(fs_selector);
@@ -2664,11 +1912,6 @@ static int get_npt_level(void)
#endif
}
-static int svm_get_mt_mask_shift(void)
-{
- return 0;
-}
-
static struct kvm_x86_ops svm_x86_ops = {
.cpu_has_kvm_support = has_svm,
.disabled_by_bios = is_disabled,
@@ -2724,7 +1967,6 @@ static struct kvm_x86_ops svm_x86_ops = {
.set_tss_addr = svm_set_tss_addr,
.get_tdp_level = get_npt_level,
- .get_mt_mask_shift = svm_get_mt_mask_shift,
};
static int __init svm_init(void)
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/kvm/svm.h
index 82ada75f3ebf..1b8afa78e869 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/kvm/svm.h
@@ -174,6 +174,10 @@ struct __attribute__ ((__packed__)) vmcb {
#define SVM_CPUID_FEATURE_SHIFT 2
#define SVM_CPUID_FUNC 0x8000000a
+#define MSR_EFER_SVME_MASK (1ULL << 12)
+#define MSR_VM_CR 0xc0010114
+#define MSR_VM_HSAVE_PA 0xc0010117ULL
+
#define SVM_VM_CR_SVM_DISABLE 4
#define SVM_SELECTOR_S_SHIFT 4
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ffd8f24dc66c..a4018b01e1f9 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -16,6 +16,7 @@
*/
#include "irq.h"
+#include "vmx.h"
#include "mmu.h"
#include <linux/kvm_host.h>
@@ -30,8 +31,6 @@
#include <asm/io.h>
#include <asm/desc.h>
-#include <asm/vmx.h>
-#include <asm/virtext.h>
#define __ex(x) __kvm_handle_fault_on_reboot(x)
@@ -91,11 +90,6 @@ struct vcpu_vmx {
} rmode;
int vpid;
bool emulation_required;
-
- /* Support for vnmi-less CPUs */
- int soft_vnmi_blocked;
- ktime_t entry_time;
- s64 vnmi_blocked_time;
};
static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
@@ -128,7 +122,7 @@ static struct vmcs_config {
u32 vmentry_ctrl;
} vmcs_config;
-static struct vmx_capability {
+struct vmx_capability {
u32 ept;
u32 vpid;
} vmx_capability;
@@ -189,21 +183,21 @@ static inline int is_page_fault(u32 intr_info)
{
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
INTR_INFO_VALID_MASK)) ==
- (INTR_TYPE_HARD_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK);
+ (INTR_TYPE_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK);
}
static inline int is_no_device(u32 intr_info)
{
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
INTR_INFO_VALID_MASK)) ==
- (INTR_TYPE_HARD_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK);
+ (INTR_TYPE_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK);
}
static inline int is_invalid_opcode(u32 intr_info)
{
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
INTR_INFO_VALID_MASK)) ==
- (INTR_TYPE_HARD_EXCEPTION | UD_VECTOR | INTR_INFO_VALID_MASK);
+ (INTR_TYPE_EXCEPTION | UD_VECTOR | INTR_INFO_VALID_MASK);
}
static inline int is_external_interrupt(u32 intr_info)
@@ -480,13 +474,8 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
eb = (1u << PF_VECTOR) | (1u << UD_VECTOR);
if (!vcpu->fpu_active)
eb |= 1u << NM_VECTOR;
- if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
- if (vcpu->guest_debug &
- (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
- eb |= 1u << DB_VECTOR;
- if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
- eb |= 1u << BP_VECTOR;
- }
+ if (vcpu->guest_debug.enabled)
+ eb |= 1u << DB_VECTOR;
if (vcpu->arch.rmode.active)
eb = ~0;
if (vm_need_ept())
@@ -752,33 +741,29 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
bool has_error_code, u32 error_code)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- u32 intr_info = nr | INTR_INFO_VALID_MASK;
- if (has_error_code) {
+ if (has_error_code)
vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
- intr_info |= INTR_INFO_DELIVER_CODE_MASK;
- }
if (vcpu->arch.rmode.active) {
vmx->rmode.irq.pending = true;
vmx->rmode.irq.vector = nr;
vmx->rmode.irq.rip = kvm_rip_read(vcpu);
- if (nr == BP_VECTOR || nr == OF_VECTOR)
+ if (nr == BP_VECTOR)
vmx->rmode.irq.rip++;
- intr_info |= INTR_TYPE_SOFT_INTR;
- vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
+ vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
+ nr | INTR_TYPE_SOFT_INTR
+ | (has_error_code ? INTR_INFO_DELIVER_CODE_MASK : 0)
+ | INTR_INFO_VALID_MASK);
vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1);
return;
}
- if (nr == BP_VECTOR || nr == OF_VECTOR) {
- vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
- intr_info |= INTR_TYPE_SOFT_EXCEPTION;
- } else
- intr_info |= INTR_TYPE_HARD_EXCEPTION;
-
- vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
+ vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
+ nr | INTR_TYPE_EXCEPTION
+ | (has_error_code ? INTR_INFO_DELIVER_CODE_MASK : 0)
+ | INTR_INFO_VALID_MASK);
}
static bool vmx_exception_injected(struct kvm_vcpu *vcpu)
@@ -972,13 +957,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", msr_index, data);
break;
- case MSR_IA32_CR_PAT:
- if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
- vmcs_write64(GUEST_IA32_PAT, data);
- vcpu->arch.pat = data;
- break;
- }
- /* Otherwise falls through to kvm_set_msr_common */
default:
vmx_load_host_state(vmx);
msr = find_msr_entry(vmx, msr_index);
@@ -1007,28 +985,40 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
}
}
-static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
+static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg)
{
- int old_debug = vcpu->guest_debug;
- unsigned long flags;
+ unsigned long dr7 = 0x400;
+ int old_singlestep;
- vcpu->guest_debug = dbg->control;
- if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE))
- vcpu->guest_debug = 0;
+ old_singlestep = vcpu->guest_debug.singlestep;
- if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
- vmcs_writel(GUEST_DR7, dbg->arch.debugreg[7]);
- else
- vmcs_writel(GUEST_DR7, vcpu->arch.dr7);
+ vcpu->guest_debug.enabled = dbg->enabled;
+ if (vcpu->guest_debug.enabled) {
+ int i;
- flags = vmcs_readl(GUEST_RFLAGS);
- if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
- flags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
- else if (old_debug & KVM_GUESTDBG_SINGLESTEP)
+ dr7 |= 0x200; /* exact */
+ for (i = 0; i < 4; ++i) {
+ if (!dbg->breakpoints[i].enabled)
+ continue;
+ vcpu->guest_debug.bp[i] = dbg->breakpoints[i].address;
+ dr7 |= 2 << (i*2); /* global enable */
+ dr7 |= 0 << (i*4+16); /* execution breakpoint */
+ }
+
+ vcpu->guest_debug.singlestep = dbg->singlestep;
+ } else
+ vcpu->guest_debug.singlestep = 0;
+
+ if (old_singlestep && !vcpu->guest_debug.singlestep) {
+ unsigned long flags;
+
+ flags = vmcs_readl(GUEST_RFLAGS);
flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
- vmcs_writel(GUEST_RFLAGS, flags);
+ vmcs_writel(GUEST_RFLAGS, flags);
+ }
update_exception_bitmap(vcpu);
+ vmcs_writel(GUEST_DR7, dr7);
return 0;
}
@@ -1042,7 +1032,8 @@ static int vmx_get_irq(struct kvm_vcpu *vcpu)
static __init int cpu_has_kvm_support(void)
{
- return cpu_has_vmx();
+ unsigned long ecx = cpuid_ecx(1);
+ return test_bit(5, &ecx); /* CPUID.1:ECX.VMX[bit 5] -> VT */
}
static __init int vmx_disabled_by_bios(void)
@@ -1088,20 +1079,11 @@ static void vmclear_local_vcpus(void)
__vcpu_clear(vmx);
}
-
-/* Just like cpu_vmxoff(), but with the __kvm_handle_fault_on_reboot()
- * tricks.
- */
-static void kvm_cpu_vmxoff(void)
-{
- asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
- write_cr4(read_cr4() & ~X86_CR4_VMXE);
-}
-
static void hardware_disable(void *garbage)
{
vmclear_local_vcpus();
- kvm_cpu_vmxoff();
+ asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
+ write_cr4(read_cr4() & ~X86_CR4_VMXE);
}
static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
@@ -1194,13 +1176,12 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
#ifdef CONFIG_X86_64
min |= VM_EXIT_HOST_ADDR_SPACE_SIZE;
#endif
- opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT;
+ opt = 0;
if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS,
&_vmexit_control) < 0)
return -EIO;
- min = 0;
- opt = VM_ENTRY_LOAD_IA32_PAT;
+ min = opt = 0;
if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS,
&_vmentry_control) < 0)
return -EIO;
@@ -2106,9 +2087,8 @@ static void vmx_disable_intercept_for_msr(struct page *msr_bitmap, u32 msr)
*/
static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
{
- u32 host_sysenter_cs, msr_low, msr_high;
+ u32 host_sysenter_cs;
u32 junk;
- u64 host_pat;
unsigned long a;
struct descriptor_table dt;
int i;
@@ -2196,20 +2176,6 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
rdmsrl(MSR_IA32_SYSENTER_EIP, a);
vmcs_writel(HOST_IA32_SYSENTER_EIP, a); /* 22.2.3 */
- if (vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_PAT) {
- rdmsr(MSR_IA32_CR_PAT, msr_low, msr_high);
- host_pat = msr_low | ((u64) msr_high << 32);
- vmcs_write64(HOST_IA32_PAT, host_pat);
- }
- if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
- rdmsr(MSR_IA32_CR_PAT, msr_low, msr_high);
- host_pat = msr_low | ((u64) msr_high << 32);
- /* Write the default value follow host pat */
- vmcs_write64(GUEST_IA32_PAT, host_pat);
- /* Keep arch.pat sync with GUEST_IA32_PAT */
- vmx->vcpu.arch.pat = host_pat;
- }
-
for (i = 0; i < NR_VMX_MSR; ++i) {
u32 index = vmx_msr_index[i];
u32 data_low, data_high;
@@ -2264,8 +2230,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
vmx->vcpu.arch.rmode.active = 0;
- vmx->soft_vnmi_blocked = 0;
-
vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
kvm_set_cr8(&vmx->vcpu, 0);
msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
@@ -2315,6 +2279,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
kvm_rip_write(vcpu, 0);
kvm_register_write(vcpu, VCPU_REGS_RSP, 0);
+ /* todo: dr0 = dr1 = dr2 = dr3 = 0; dr6 = 0xffff0ff0 */
vmcs_writel(GUEST_DR7, 0x400);
vmcs_writel(GUEST_GDTR_BASE, 0);
@@ -2370,29 +2335,6 @@ out:
return ret;
}
-static void enable_irq_window(struct kvm_vcpu *vcpu)
-{
- u32 cpu_based_vm_exec_control;
-
- cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
- cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
- vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
-}
-
-static void enable_nmi_window(struct kvm_vcpu *vcpu)
-{
- u32 cpu_based_vm_exec_control;
-
- if (!cpu_has_virtual_nmis()) {
- enable_irq_window(vcpu);
- return;
- }
-
- cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
- cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING;
- vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
-}
-
static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -2416,54 +2358,10 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
{
- struct vcpu_vmx *vmx = to_vmx(vcpu);
-
- if (!cpu_has_virtual_nmis()) {
- /*
- * Tracking the NMI-blocked state in software is built upon
- * finding the next open IRQ window. This, in turn, depends on
- * well-behaving guests: They have to keep IRQs disabled at
- * least as long as the NMI handler runs. Otherwise we may
- * cause NMI nesting, maybe breaking the guest. But as this is
- * highly unlikely, we can live with the residual risk.
- */
- vmx->soft_vnmi_blocked = 1;
- vmx->vnmi_blocked_time = 0;
- }
-
- ++vcpu->stat.nmi_injections;
- if (vcpu->arch.rmode.active) {
- vmx->rmode.irq.pending = true;
- vmx->rmode.irq.vector = NMI_VECTOR;
- vmx->rmode.irq.rip = kvm_rip_read(vcpu);
- vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
- NMI_VECTOR | INTR_TYPE_SOFT_INTR |
- INTR_INFO_VALID_MASK);
- vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
- kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1);
- return;
- }
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
}
-static void vmx_update_window_states(struct kvm_vcpu *vcpu)
-{
- u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
-
- vcpu->arch.nmi_window_open =
- !(guest_intr & (GUEST_INTR_STATE_STI |
- GUEST_INTR_STATE_MOV_SS |
- GUEST_INTR_STATE_NMI));
- if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked)
- vcpu->arch.nmi_window_open = 0;
-
- vcpu->arch.interrupt_window_open =
- ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
- !(guest_intr & (GUEST_INTR_STATE_STI |
- GUEST_INTR_STATE_MOV_SS)));
-}
-
static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
{
int word_index = __ffs(vcpu->arch.irq_summary);
@@ -2476,54 +2374,40 @@ static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
kvm_queue_interrupt(vcpu, irq);
}
+
static void do_interrupt_requests(struct kvm_vcpu *vcpu,
struct kvm_run *kvm_run)
{
- vmx_update_window_states(vcpu);
-
- if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
- vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
- GUEST_INTR_STATE_STI |
- GUEST_INTR_STATE_MOV_SS);
-
- if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
- if (vcpu->arch.interrupt.pending) {
- enable_nmi_window(vcpu);
- } else if (vcpu->arch.nmi_window_open) {
- vcpu->arch.nmi_pending = false;
- vcpu->arch.nmi_injected = true;
- } else {
- enable_nmi_window(vcpu);
- return;
- }
- }
- if (vcpu->arch.nmi_injected) {
- vmx_inject_nmi(vcpu);
- if (vcpu->arch.nmi_pending)
- enable_nmi_window(vcpu);
- else if (vcpu->arch.irq_summary
- || kvm_run->request_interrupt_window)
- enable_irq_window(vcpu);
- return;
- }
+ u32 cpu_based_vm_exec_control;
- if (vcpu->arch.interrupt_window_open) {
- if (vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending)
- kvm_do_inject_irq(vcpu);
+ vcpu->arch.interrupt_window_open =
+ ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
+ (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0);
- if (vcpu->arch.interrupt.pending)
- vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
- }
+ if (vcpu->arch.interrupt_window_open &&
+ vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending)
+ kvm_do_inject_irq(vcpu);
+
+ if (vcpu->arch.interrupt_window_open && vcpu->arch.interrupt.pending)
+ vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
+
+ cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
if (!vcpu->arch.interrupt_window_open &&
(vcpu->arch.irq_summary || kvm_run->request_interrupt_window))
- enable_irq_window(vcpu);
+ /*
+ * Interrupts blocked. Wait for unblock.
+ */
+ cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
+ else
+ cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
+ vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
}
static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
{
int ret;
struct kvm_userspace_memory_region tss_mem = {
- .slot = TSS_PRIVATE_MEMSLOT,
+ .slot = 8,
.guest_phys_addr = addr,
.memory_size = PAGE_SIZE * 3,
.flags = 0,
@@ -2536,6 +2420,24 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
return 0;
}
+static void kvm_guest_debug_pre(struct kvm_vcpu *vcpu)
+{
+ struct kvm_guest_debug *dbg = &vcpu->guest_debug;
+
+ set_debugreg(dbg->bp[0], 0);
+ set_debugreg(dbg->bp[1], 1);
+ set_debugreg(dbg->bp[2], 2);
+ set_debugreg(dbg->bp[3], 3);
+
+ if (dbg->singlestep) {
+ unsigned long flags;
+
+ flags = vmcs_readl(GUEST_RFLAGS);
+ flags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
+ vmcs_writel(GUEST_RFLAGS, flags);
+ }
+}
+
static int handle_rmode_exception(struct kvm_vcpu *vcpu,
int vec, u32 err_code)
{
@@ -2552,17 +2454,9 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
* the required debugging infrastructure rework.
*/
switch (vec) {
+ case DE_VECTOR:
case DB_VECTOR:
- if (vcpu->guest_debug &
- (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
- return 0;
- kvm_queue_exception(vcpu, vec);
- return 1;
case BP_VECTOR:
- if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
- return 0;
- /* fall through */
- case DE_VECTOR:
case OF_VECTOR:
case BR_VECTOR:
case UD_VECTOR:
@@ -2579,8 +2473,8 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- u32 intr_info, ex_no, error_code;
- unsigned long cr2, rip, dr6;
+ u32 intr_info, error_code;
+ unsigned long cr2, rip;
u32 vect_info;
enum emulation_result er;
@@ -2598,7 +2492,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
set_bit(irq / BITS_PER_LONG, &vcpu->arch.irq_summary);
}
- if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR)
+ if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) /* nmi */
return 1; /* already handled by vmx_vcpu_run() */
if (is_no_device(intr_info)) {
@@ -2639,30 +2533,14 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
return 1;
}
- ex_no = intr_info & INTR_INFO_VECTOR_MASK;
- switch (ex_no) {
- case DB_VECTOR:
- dr6 = vmcs_readl(EXIT_QUALIFICATION);
- if (!(vcpu->guest_debug &
- (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
- vcpu->arch.dr6 = dr6 | DR6_FIXED_1;
- kvm_queue_exception(vcpu, DB_VECTOR);
- return 1;
- }
- kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1;
- kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7);
- /* fall through */
- case BP_VECTOR:
+ if ((intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK)) ==
+ (INTR_TYPE_EXCEPTION | 1)) {
kvm_run->exit_reason = KVM_EXIT_DEBUG;
- kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip;
- kvm_run->debug.arch.exception = ex_no;
- break;
- default:
- kvm_run->exit_reason = KVM_EXIT_EXCEPTION;
- kvm_run->ex.exception = ex_no;
- kvm_run->ex.error_code = error_code;
- break;
+ return 0;
}
+ kvm_run->exit_reason = KVM_EXIT_EXCEPTION;
+ kvm_run->ex.exception = intr_info & INTR_INFO_VECTOR_MASK;
+ kvm_run->ex.error_code = error_code;
return 0;
}
@@ -2703,7 +2581,6 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
rep = (exit_qualification & 32) != 0;
port = exit_qualification >> 16;
- skip_emulated_instruction(vcpu);
return kvm_emulate_pio(vcpu, kvm_run, in, size, port);
}
@@ -2801,44 +2678,21 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
unsigned long val;
int dr, reg;
- dr = vmcs_readl(GUEST_DR7);
- if (dr & DR7_GD) {
- /*
- * As the vm-exit takes precedence over the debug trap, we
- * need to emulate the latter, either for the host or the
- * guest debugging itself.
- */
- if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
- kvm_run->debug.arch.dr6 = vcpu->arch.dr6;
- kvm_run->debug.arch.dr7 = dr;
- kvm_run->debug.arch.pc =
- vmcs_readl(GUEST_CS_BASE) +
- vmcs_readl(GUEST_RIP);
- kvm_run->debug.arch.exception = DB_VECTOR;
- kvm_run->exit_reason = KVM_EXIT_DEBUG;
- return 0;
- } else {
- vcpu->arch.dr7 &= ~DR7_GD;
- vcpu->arch.dr6 |= DR6_BD;
- vmcs_writel(GUEST_DR7, vcpu->arch.dr7);
- kvm_queue_exception(vcpu, DB_VECTOR);
- return 1;
- }
- }
-
+ /*
+ * FIXME: this code assumes the host is debugging the guest.
+ * need to deal with guest debugging itself too.
+ */
exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
- dr = exit_qualification & DEBUG_REG_ACCESS_NUM;
- reg = DEBUG_REG_ACCESS_REG(exit_qualification);
- if (exit_qualification & TYPE_MOV_FROM_DR) {
+ dr = exit_qualification & 7;
+ reg = (exit_qualification >> 8) & 15;
+ if (exit_qualification & 16) {
+ /* mov from dr */
switch (dr) {
- case 0 ... 3:
- val = vcpu->arch.db[dr];
- break;
case 6:
- val = vcpu->arch.dr6;
+ val = 0xffff0ff0;
break;
case 7:
- val = vcpu->arch.dr7;
+ val = 0x400;
break;
default:
val = 0;
@@ -2846,38 +2700,7 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
kvm_register_write(vcpu, reg, val);
KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler);
} else {
- val = vcpu->arch.regs[reg];
- switch (dr) {
- case 0 ... 3:
- vcpu->arch.db[dr] = val;
- if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
- vcpu->arch.eff_db[dr] = val;
- break;
- case 4 ... 5:
- if (vcpu->arch.cr4 & X86_CR4_DE)
- kvm_queue_exception(vcpu, UD_VECTOR);
- break;
- case 6:
- if (val & 0xffffffff00000000ULL) {
- kvm_queue_exception(vcpu, GP_VECTOR);
- break;
- }
- vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1;
- break;
- case 7:
- if (val & 0xffffffff00000000ULL) {
- kvm_queue_exception(vcpu, GP_VECTOR);
- break;
- }
- vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1;
- if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
- vmcs_writel(GUEST_DR7, vcpu->arch.dr7);
- vcpu->arch.switch_db_regs =
- (val & DR7_BP_EN_MASK);
- }
- break;
- }
- KVMTRACE_2D(DR_WRITE, vcpu, (u32)dr, (u32)val, handler);
+ /* mov to dr */
}
skip_emulated_instruction(vcpu);
return 1;
@@ -2944,7 +2767,6 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu,
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
KVMTRACE_0D(PEND_INTR, vcpu, handler);
- ++vcpu->stat.irq_window_exits;
/*
* If the user space waits to inject interrupts, exit as soon as
@@ -2953,6 +2775,7 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu,
if (kvm_run->request_interrupt_window &&
!vcpu->arch.irq_summary) {
kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
+ ++vcpu->stat.irq_window_exits;
return 0;
}
return 1;
@@ -3009,7 +2832,6 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
- struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long exit_qualification;
u16 tss_selector;
int reason;
@@ -3017,29 +2839,9 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
reason = (u32)exit_qualification >> 30;
- if (reason == TASK_SWITCH_GATE && vmx->vcpu.arch.nmi_injected &&
- (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
- (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK)
- == INTR_TYPE_NMI_INTR) {
- vcpu->arch.nmi_injected = false;
- if (cpu_has_virtual_nmis())
- vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
- GUEST_INTR_STATE_NMI);
- }
tss_selector = exit_qualification;
- if (!kvm_task_switch(vcpu, tss_selector, reason))
- return 0;
-
- /* clear all local breakpoint enable flags */
- vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~55);
-
- /*
- * TODO: What about debug traps on tss switch?
- * Are we supposed to inject them and update dr6?
- */
-
- return 1;
+ return kvm_task_switch(vcpu, tss_selector, reason);
}
static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
@@ -3125,12 +2927,16 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
while (!guest_state_valid(vcpu)) {
err = emulate_instruction(vcpu, kvm_run, 0, 0, 0);
- if (err == EMULATE_DO_MMIO)
- break;
-
- if (err != EMULATE_DONE) {
- kvm_report_emulation_failure(vcpu, "emulation failure");
- return;
+ switch (err) {
+ case EMULATE_DONE:
+ break;
+ case EMULATE_DO_MMIO:
+ kvm_report_emulation_failure(vcpu, "mmio");
+ /* TODO: Handle MMIO */
+ return;
+ default:
+ kvm_report_emulation_failure(vcpu, "emulation failure");
+ return;
}
if (signal_pending(current))
@@ -3142,10 +2948,8 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
local_irq_disable();
preempt_disable();
- /* Guest state should be valid now except if we need to
- * emulate an MMIO */
- if (guest_state_valid(vcpu))
- vmx->emulation_required = 0;
+ /* Guest state should be valid now, no more emulation should be needed */
+ vmx->emulation_required = 0;
}
/*
@@ -3192,11 +2996,6 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)kvm_rip_read(vcpu),
(u32)((u64)kvm_rip_read(vcpu) >> 32), entryexit);
- /* If we need to emulate an MMIO from handle_invalid_guest_state
- * we just return 0 */
- if (vmx->emulation_required && emulate_invalid_guest_state)
- return 0;
-
/* Access CR3 don't cause VMExit in paging mode, so we need
* to sync with guest real CR3. */
if (vm_need_ept() && is_paging(vcpu)) {
@@ -3213,32 +3012,9 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
if ((vectoring_info & VECTORING_INFO_VALID_MASK) &&
(exit_reason != EXIT_REASON_EXCEPTION_NMI &&
- exit_reason != EXIT_REASON_EPT_VIOLATION &&
- exit_reason != EXIT_REASON_TASK_SWITCH))
- printk(KERN_WARNING "%s: unexpected, valid vectoring info "
- "(0x%x) and exit reason is 0x%x\n",
- __func__, vectoring_info, exit_reason);
-
- if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) {
- if (vcpu->arch.interrupt_window_open) {
- vmx->soft_vnmi_blocked = 0;
- vcpu->arch.nmi_window_open = 1;
- } else if (vmx->vnmi_blocked_time > 1000000000LL &&
- vcpu->arch.nmi_pending) {
- /*
- * This CPU don't support us in finding the end of an
- * NMI-blocked window if the guest runs with IRQs
- * disabled. So we pull the trigger after 1 s of
- * futile waiting, but inform the user about this.
- */
- printk(KERN_WARNING "%s: Breaking out of NMI-blocked "
- "state on VCPU %d after 1 s timeout\n",
- __func__, vcpu->vcpu_id);
- vmx->soft_vnmi_blocked = 0;
- vmx->vcpu.arch.nmi_window_open = 1;
- }
- }
-
+ exit_reason != EXIT_REASON_EPT_VIOLATION))
+ printk(KERN_WARNING "%s: unexpected, valid vectoring info and "
+ "exit reason is 0x%x\n", __func__, exit_reason);
if (exit_reason < kvm_vmx_max_exit_handlers
&& kvm_vmx_exit_handlers[exit_reason])
return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run);
@@ -3266,6 +3042,51 @@ static void update_tpr_threshold(struct kvm_vcpu *vcpu)
vmcs_write32(TPR_THRESHOLD, (max_irr > tpr) ? tpr >> 4 : max_irr >> 4);
}
+static void enable_irq_window(struct kvm_vcpu *vcpu)
+{
+ u32 cpu_based_vm_exec_control;
+
+ cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+ cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
+ vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
+}
+
+static void enable_nmi_window(struct kvm_vcpu *vcpu)
+{
+ u32 cpu_based_vm_exec_control;
+
+ if (!cpu_has_virtual_nmis())
+ return;
+
+ cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+ cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING;
+ vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
+}
+
+static int vmx_nmi_enabled(struct kvm_vcpu *vcpu)
+{
+ u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
+ return !(guest_intr & (GUEST_INTR_STATE_NMI |
+ GUEST_INTR_STATE_MOV_SS |
+ GUEST_INTR_STATE_STI));
+}
+
+static int vmx_irq_enabled(struct kvm_vcpu *vcpu)
+{
+ u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
+ return (!(guest_intr & (GUEST_INTR_STATE_MOV_SS |
+ GUEST_INTR_STATE_STI)) &&
+ (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF));
+}
+
+static void enable_intr_window(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->arch.nmi_pending)
+ enable_nmi_window(vcpu);
+ else if (kvm_cpu_has_interrupt(vcpu))
+ enable_irq_window(vcpu);
+}
+
static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
{
u32 exit_intr_info;
@@ -3288,9 +3109,7 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
if (unblock_nmi && vector != DF_VECTOR)
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
GUEST_INTR_STATE_NMI);
- } else if (unlikely(vmx->soft_vnmi_blocked))
- vmx->vnmi_blocked_time +=
- ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time));
+ }
idt_vectoring_info = vmx->idt_vectoring_info;
idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
@@ -3309,8 +3128,7 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
vmx->vcpu.arch.nmi_injected = false;
}
kvm_clear_exception_queue(&vmx->vcpu);
- if (idtv_info_valid && (type == INTR_TYPE_HARD_EXCEPTION ||
- type == INTR_TYPE_SOFT_EXCEPTION)) {
+ if (idtv_info_valid && type == INTR_TYPE_EXCEPTION) {
if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
error = vmcs_read32(IDT_VECTORING_ERROR_CODE);
kvm_queue_exception_e(&vmx->vcpu, vector, error);
@@ -3329,34 +3147,26 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
{
update_tpr_threshold(vcpu);
- vmx_update_window_states(vcpu);
-
- if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
- vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
- GUEST_INTR_STATE_STI |
- GUEST_INTR_STATE_MOV_SS);
-
- if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
- if (vcpu->arch.interrupt.pending) {
- enable_nmi_window(vcpu);
- } else if (vcpu->arch.nmi_window_open) {
- vcpu->arch.nmi_pending = false;
- vcpu->arch.nmi_injected = true;
- } else {
- enable_nmi_window(vcpu);
+ if (cpu_has_virtual_nmis()) {
+ if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
+ if (vcpu->arch.interrupt.pending) {
+ enable_nmi_window(vcpu);
+ } else if (vmx_nmi_enabled(vcpu)) {
+ vcpu->arch.nmi_pending = false;
+ vcpu->arch.nmi_injected = true;
+ } else {
+ enable_intr_window(vcpu);
+ return;
+ }
+ }
+ if (vcpu->arch.nmi_injected) {
+ vmx_inject_nmi(vcpu);
+ enable_intr_window(vcpu);
return;
}
}
- if (vcpu->arch.nmi_injected) {
- vmx_inject_nmi(vcpu);
- if (vcpu->arch.nmi_pending)
- enable_nmi_window(vcpu);
- else if (kvm_cpu_has_interrupt(vcpu))
- enable_irq_window(vcpu);
- return;
- }
if (!vcpu->arch.interrupt.pending && kvm_cpu_has_interrupt(vcpu)) {
- if (vcpu->arch.interrupt_window_open)
+ if (vmx_irq_enabled(vcpu))
kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
else
enable_irq_window(vcpu);
@@ -3364,8 +3174,6 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
if (vcpu->arch.interrupt.pending) {
vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
kvm_timer_intr_post(vcpu, vcpu->arch.interrupt.nr);
- if (kvm_cpu_has_interrupt(vcpu))
- enable_irq_window(vcpu);
}
}
@@ -3405,10 +3213,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 intr_info;
- /* Record the guest's net vcpu time for enforced NMI injections. */
- if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked))
- vmx->entry_time = ktime_get();
-
/* Handle invalid guest state instead of entering VMX */
if (vmx->emulation_required && emulate_invalid_guest_state) {
handle_invalid_guest_state(vcpu, kvm_run);
@@ -3425,8 +3229,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
*/
vmcs_writel(HOST_CR0, read_cr0());
- set_debugreg(vcpu->arch.dr6, 6);
-
asm(
/* Store host registers */
"push %%"R"dx; push %%"R"bp;"
@@ -3521,13 +3323,13 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP));
vcpu->arch.regs_dirty = 0;
- get_debugreg(vcpu->arch.dr6, 6);
-
vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
if (vmx->rmode.irq.pending)
fixup_rmode_irq(vmx);
- vmx_update_window_states(vcpu);
+ vcpu->arch.interrupt_window_open =
+ (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
+ (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)) == 0;
asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
vmx->launched = 1;
@@ -3535,7 +3337,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
/* We need to handle NMIs before interrupts are enabled */
- if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR &&
+ if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200 &&
(intr_info & INTR_INFO_VALID_MASK)) {
KVMTRACE_0D(NMI, vcpu, handler);
asm("int $2");
@@ -3653,11 +3455,6 @@ static int get_ept_level(void)
return VMX_EPT_DEFAULT_GAW + 1;
}
-static int vmx_get_mt_mask_shift(void)
-{
- return VMX_EPT_MT_EPTE_SHIFT;
-}
-
static struct kvm_x86_ops vmx_x86_ops = {
.cpu_has_kvm_support = cpu_has_kvm_support,
.disabled_by_bios = vmx_disabled_by_bios,
@@ -3677,6 +3474,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
.vcpu_put = vmx_vcpu_put,
.set_guest_debug = set_guest_debug,
+ .guest_debug_pre = kvm_guest_debug_pre,
.get_msr = vmx_get_msr,
.set_msr = vmx_set_msr,
.get_segment_base = vmx_get_segment_base,
@@ -3712,7 +3510,6 @@ static struct kvm_x86_ops vmx_x86_ops = {
.set_tss_addr = vmx_set_tss_addr,
.get_tdp_level = get_ept_level,
- .get_mt_mask_shift = vmx_get_mt_mask_shift,
};
static int __init vmx_init(void)
@@ -3769,10 +3566,10 @@ static int __init vmx_init(void)
bypass_guest_pf = 0;
kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK |
VMX_EPT_WRITABLE_MASK |
+ VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT |
VMX_EPT_IGMT_BIT);
kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull,
- VMX_EPT_EXECUTABLE_MASK,
- VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT);
+ VMX_EPT_EXECUTABLE_MASK);
kvm_enable_tdp();
} else
kvm_disable_tdp();
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/kvm/vmx.h
index 498f944010b9..ec5edc339da6 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/kvm/vmx.h
@@ -63,13 +63,10 @@
#define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200
#define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000
-#define VM_EXIT_SAVE_IA32_PAT 0x00040000
-#define VM_EXIT_LOAD_IA32_PAT 0x00080000
#define VM_ENTRY_IA32E_MODE 0x00000200
#define VM_ENTRY_SMM 0x00000400
#define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800
-#define VM_ENTRY_LOAD_IA32_PAT 0x00004000
/* VMCS Encodings */
enum vmcs_field {
@@ -115,8 +112,6 @@ enum vmcs_field {
VMCS_LINK_POINTER_HIGH = 0x00002801,
GUEST_IA32_DEBUGCTL = 0x00002802,
GUEST_IA32_DEBUGCTL_HIGH = 0x00002803,
- GUEST_IA32_PAT = 0x00002804,
- GUEST_IA32_PAT_HIGH = 0x00002805,
GUEST_PDPTR0 = 0x0000280a,
GUEST_PDPTR0_HIGH = 0x0000280b,
GUEST_PDPTR1 = 0x0000280c,
@@ -125,8 +120,6 @@ enum vmcs_field {
GUEST_PDPTR2_HIGH = 0x0000280f,
GUEST_PDPTR3 = 0x00002810,
GUEST_PDPTR3_HIGH = 0x00002811,
- HOST_IA32_PAT = 0x00002c00,
- HOST_IA32_PAT_HIGH = 0x00002c01,
PIN_BASED_VM_EXEC_CONTROL = 0x00004000,
CPU_BASED_VM_EXEC_CONTROL = 0x00004002,
EXCEPTION_BITMAP = 0x00004004,
@@ -270,9 +263,8 @@ enum vmcs_field {
#define INTR_TYPE_EXT_INTR (0 << 8) /* external interrupt */
#define INTR_TYPE_NMI_INTR (2 << 8) /* NMI */
-#define INTR_TYPE_HARD_EXCEPTION (3 << 8) /* processor exception */
+#define INTR_TYPE_EXCEPTION (3 << 8) /* processor exception */
#define INTR_TYPE_SOFT_INTR (4 << 8) /* software interrupt */
-#define INTR_TYPE_SOFT_EXCEPTION (6 << 8) /* software exception */
/* GUEST_INTERRUPTIBILITY_INFO flags. */
#define GUEST_INTR_STATE_STI 0x00000001
@@ -312,7 +304,7 @@ enum vmcs_field {
#define DEBUG_REG_ACCESS_TYPE 0x10 /* 4, direction of access */
#define TYPE_MOV_TO_DR (0 << 4)
#define TYPE_MOV_FROM_DR (1 << 4)
-#define DEBUG_REG_ACCESS_REG(eq) (((eq) >> 8) & 0xf) /* 11:8, general purpose reg. */
+#define DEBUG_REG_ACCESS_REG 0xf00 /* 11:8, general purpose reg. */
/* segment AR */
@@ -339,9 +331,8 @@ enum vmcs_field {
#define AR_RESERVD_MASK 0xfffe0f00
-#define TSS_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 0)
-#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 1)
-#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 2)
+#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT 9
+#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT 10
#define VMX_NR_VPIDS (1 << 16)
#define VMX_VPID_EXTENT_SINGLE_CONTEXT 1
@@ -365,19 +356,4 @@ enum vmcs_field {
#define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul
-
-#define ASM_VMX_VMCLEAR_RAX ".byte 0x66, 0x0f, 0xc7, 0x30"
-#define ASM_VMX_VMLAUNCH ".byte 0x0f, 0x01, 0xc2"
-#define ASM_VMX_VMRESUME ".byte 0x0f, 0x01, 0xc3"
-#define ASM_VMX_VMPTRLD_RAX ".byte 0x0f, 0xc7, 0x30"
-#define ASM_VMX_VMREAD_RDX_RAX ".byte 0x0f, 0x78, 0xd0"
-#define ASM_VMX_VMWRITE_RAX_RDX ".byte 0x0f, 0x79, 0xd0"
-#define ASM_VMX_VMWRITE_RSP_RDX ".byte 0x0f, 0x79, 0xd4"
-#define ASM_VMX_VMXOFF ".byte 0x0f, 0x01, 0xc4"
-#define ASM_VMX_VMXON_RAX ".byte 0xf3, 0x0f, 0xc7, 0x30"
-#define ASM_VMX_INVEPT ".byte 0x66, 0x0f, 0x38, 0x80, 0x08"
-#define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08"
-
-
-
#endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c4da7fbabe01..f1f8ff2f1fa2 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -39,7 +39,6 @@
#include <asm/uaccess.h>
#include <asm/msr.h>
#include <asm/desc.h>
-#include <asm/mtrr.h>
#define MAX_IO_MSRS 256
#define CR0_RESERVED_BITS \
@@ -68,8 +67,6 @@ static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffffeULL;
static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
struct kvm_cpuid_entry2 __user *entries);
-struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
- u32 function, u32 index);
struct kvm_x86_ops *kvm_x86_ops;
EXPORT_SYMBOL_GPL(kvm_x86_ops);
@@ -89,7 +86,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
{ "hypercalls", VCPU_STAT(hypercalls) },
{ "request_irq", VCPU_STAT(request_irq_exits) },
- { "request_nmi", VCPU_STAT(request_nmi_exits) },
{ "irq_exits", VCPU_STAT(irq_exits) },
{ "host_state_reload", VCPU_STAT(host_state_reload) },
{ "efer_reload", VCPU_STAT(efer_reload) },
@@ -97,7 +93,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "insn_emulation", VCPU_STAT(insn_emulation) },
{ "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) },
{ "irq_injections", VCPU_STAT(irq_injections) },
- { "nmi_injections", VCPU_STAT(nmi_injections) },
{ "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) },
{ "mmu_pte_write", VM_STAT(mmu_pte_write) },
{ "mmu_pte_updated", VM_STAT(mmu_pte_updated) },
@@ -106,7 +101,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "mmu_recycled", VM_STAT(mmu_recycled) },
{ "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
{ "mmu_unsync", VM_STAT(mmu_unsync) },
- { "mmu_unsync_global", VM_STAT(mmu_unsync_global) },
{ "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
{ "largepages", VM_STAT(lpages) },
{ NULL }
@@ -174,7 +168,6 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr,
u32 error_code)
{
++vcpu->stat.pf_guest;
-
if (vcpu->arch.exception.pending) {
if (vcpu->arch.exception.nr == PF_VECTOR) {
printk(KERN_DEBUG "kvm: inject_page_fault:"
@@ -319,7 +312,6 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
kvm_x86_ops->set_cr0(vcpu, cr0);
vcpu->arch.cr0 = cr0;
- kvm_mmu_sync_global(vcpu);
kvm_mmu_reset_context(vcpu);
return;
}
@@ -363,7 +355,6 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
}
kvm_x86_ops->set_cr4(vcpu, cr4);
vcpu->arch.cr4 = cr4;
- kvm_mmu_sync_global(vcpu);
kvm_mmu_reset_context(vcpu);
}
EXPORT_SYMBOL_GPL(kvm_set_cr4);
@@ -444,11 +435,6 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_get_cr8);
-static inline u32 bit(int bitno)
-{
- return 1 << (bitno & 31);
-}
-
/*
* List of msr numbers which we expose to userspace through KVM_GET_MSRS
* and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
@@ -463,7 +449,7 @@ static u32 msrs_to_save[] = {
MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
#endif
MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
- MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA
+ MSR_IA32_PERF_STATUS,
};
static unsigned num_msrs_to_save;
@@ -488,17 +474,6 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
return;
}
- if (efer & EFER_SVME) {
- struct kvm_cpuid_entry2 *feat;
-
- feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
- if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) {
- printk(KERN_DEBUG "set_efer: #GP, enable SVM w/o SVM\n");
- kvm_inject_gp(vcpu, 0);
- return;
- }
- }
-
kvm_x86_ops->set_efer(vcpu, efer);
efer &= ~EFER_LMA;
@@ -673,38 +648,10 @@ static bool msr_mtrr_valid(unsigned msr)
static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
{
- u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
-
if (!msr_mtrr_valid(msr))
return 1;
- if (msr == MSR_MTRRdefType) {
- vcpu->arch.mtrr_state.def_type = data;
- vcpu->arch.mtrr_state.enabled = (data & 0xc00) >> 10;
- } else if (msr == MSR_MTRRfix64K_00000)
- p[0] = data;
- else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
- p[1 + msr - MSR_MTRRfix16K_80000] = data;
- else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
- p[3 + msr - MSR_MTRRfix4K_C0000] = data;
- else if (msr == MSR_IA32_CR_PAT)
- vcpu->arch.pat = data;
- else { /* Variable MTRRs */
- int idx, is_mtrr_mask;
- u64 *pt;
-
- idx = (msr - 0x200) / 2;
- is_mtrr_mask = msr - 0x200 - 2 * idx;
- if (!is_mtrr_mask)
- pt =
- (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
- else
- pt =
- (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
- *pt = data;
- }
-
- kvm_mmu_reset_context(vcpu);
+ vcpu->arch.mtrr[msr - 0x200] = data;
return 0;
}
@@ -800,37 +747,10 @@ int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
{
- u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
-
if (!msr_mtrr_valid(msr))
return 1;
- if (msr == MSR_MTRRdefType)
- *pdata = vcpu->arch.mtrr_state.def_type +
- (vcpu->arch.mtrr_state.enabled << 10);
- else if (msr == MSR_MTRRfix64K_00000)
- *pdata = p[0];
- else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000)
- *pdata = p[1 + msr - MSR_MTRRfix16K_80000];
- else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000)
- *pdata = p[3 + msr - MSR_MTRRfix4K_C0000];
- else if (msr == MSR_IA32_CR_PAT)
- *pdata = vcpu->arch.pat;
- else { /* Variable MTRRs */
- int idx, is_mtrr_mask;
- u64 *pt;
-
- idx = (msr - 0x200) / 2;
- is_mtrr_mask = msr - 0x200 - 2 * idx;
- if (!is_mtrr_mask)
- pt =
- (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo;
- else
- pt =
- (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo;
- *pdata = *pt;
- }
-
+ *pdata = vcpu->arch.mtrr[msr - 0x200];
return 0;
}
@@ -983,6 +903,7 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_IRQCHIP:
case KVM_CAP_HLT:
case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
+ case KVM_CAP_USER_MEMORY:
case KVM_CAP_SET_TSS_ADDR:
case KVM_CAP_EXT_CPUID:
case KVM_CAP_CLOCKSOURCE:
@@ -1197,6 +1118,11 @@ out:
return r;
}
+static inline u32 bit(int bitno)
+{
+ return 1 << (bitno & 31);
+}
+
static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function,
u32 index)
{
@@ -1239,8 +1165,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
const u32 kvm_supported_word3_x86_features =
bit(X86_FEATURE_XMM3) | bit(X86_FEATURE_CX16);
const u32 kvm_supported_word6_x86_features =
- bit(X86_FEATURE_LAHF_LM) | bit(X86_FEATURE_CMP_LEGACY) |
- bit(X86_FEATURE_SVM);
+ bit(X86_FEATURE_LAHF_LM) | bit(X86_FEATURE_CMP_LEGACY);
/* all func 2 cpuid_count() should be called on the same cpu */
get_cpu();
@@ -1263,7 +1188,6 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
int t, times = entry->eax & 0xff;
entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
- entry->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
for (t = 1; t < times && *nent < maxnent; ++t) {
do_cpuid_1_ent(&entry[t], function, 0);
entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
@@ -1294,7 +1218,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
/* read more entries until level_type is zero */
for (i = 1; *nent < maxnent; ++i) {
- level_type = entry[i - 1].ecx & 0xff00;
+ level_type = entry[i - 1].ecx & 0xff;
if (!level_type)
break;
do_cpuid_1_ent(&entry[i], function, i);
@@ -1394,15 +1318,6 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
return 0;
}
-static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
-{
- vcpu_load(vcpu);
- kvm_inject_nmi(vcpu);
- vcpu_put(vcpu);
-
- return 0;
-}
-
static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
struct kvm_tpr_access_ctl *tac)
{
@@ -1462,13 +1377,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = 0;
break;
}
- case KVM_NMI: {
- r = kvm_vcpu_ioctl_nmi(vcpu);
- if (r)
- goto out;
- r = 0;
- break;
- }
case KVM_SET_CPUID: {
struct kvm_cpuid __user *cpuid_arg = argp;
struct kvm_cpuid cpuid;
@@ -2060,7 +1968,7 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
if (ret < 0)
return 0;
- kvm_mmu_pte_write(vcpu, gpa, val, bytes, 1);
+ kvm_mmu_pte_write(vcpu, gpa, val, bytes);
return 1;
}
@@ -2496,6 +2404,8 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
val = kvm_register_read(vcpu, VCPU_REGS_RAX);
memcpy(vcpu->arch.pio_data, &val, 4);
+ kvm_x86_ops->skip_emulated_instruction(vcpu);
+
pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in);
if (pio_dev) {
kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data);
@@ -2631,7 +2541,7 @@ int kvm_arch_init(void *opaque)
kvm_mmu_set_nonpresent_ptes(0ull, 0ull);
kvm_mmu_set_base_ptes(PT_PRESENT_MASK);
kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
- PT_DIRTY_MASK, PT64_NX_MASK, 0, 0);
+ PT_DIRTY_MASK, PT64_NX_MASK, 0);
return 0;
out:
@@ -2819,7 +2729,7 @@ static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i)
e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT;
/* when no next entry is found, the current entry[i] is reselected */
- for (j = i + 1; ; j = (j + 1) % nent) {
+ for (j = i + 1; j == i; j = (j + 1) % nent) {
struct kvm_cpuid_entry2 *ej = &vcpu->arch.cpuid_entries[j];
if (ej->function == e->function) {
ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
@@ -2844,15 +2754,20 @@ static int is_matching_cpuid_entry(struct kvm_cpuid_entry2 *e,
return 1;
}
-struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
- u32 function, u32 index)
+void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
{
int i;
- struct kvm_cpuid_entry2 *best = NULL;
+ u32 function, index;
+ struct kvm_cpuid_entry2 *e, *best;
+ function = kvm_register_read(vcpu, VCPU_REGS_RAX);
+ index = kvm_register_read(vcpu, VCPU_REGS_RCX);
+ kvm_register_write(vcpu, VCPU_REGS_RAX, 0);
+ kvm_register_write(vcpu, VCPU_REGS_RBX, 0);
+ kvm_register_write(vcpu, VCPU_REGS_RCX, 0);
+ kvm_register_write(vcpu, VCPU_REGS_RDX, 0);
+ best = NULL;
for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
- struct kvm_cpuid_entry2 *e;
-
e = &vcpu->arch.cpuid_entries[i];
if (is_matching_cpuid_entry(e, function, index)) {
if (e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC)
@@ -2867,22 +2782,6 @@ struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
if (!best || e->function > best->function)
best = e;
}
-
- return best;
-}
-
-void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
-{
- u32 function, index;
- struct kvm_cpuid_entry2 *best;
-
- function = kvm_register_read(vcpu, VCPU_REGS_RAX);
- index = kvm_register_read(vcpu, VCPU_REGS_RCX);
- kvm_register_write(vcpu, VCPU_REGS_RAX, 0);
- kvm_register_write(vcpu, VCPU_REGS_RBX, 0);
- kvm_register_write(vcpu, VCPU_REGS_RCX, 0);
- kvm_register_write(vcpu, VCPU_REGS_RDX, 0);
- best = kvm_find_cpuid_entry(vcpu, function, index);
if (best) {
kvm_register_write(vcpu, VCPU_REGS_RAX, best->eax);
kvm_register_write(vcpu, VCPU_REGS_RBX, best->ebx);
@@ -3002,6 +2901,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
goto out;
}
+ if (vcpu->guest_debug.enabled)
+ kvm_x86_ops->guest_debug_pre(vcpu);
+
vcpu->guest_mode = 1;
/*
* Make sure that guest_mode assignment won't happen after
@@ -3022,34 +2924,10 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
kvm_guest_enter();
- get_debugreg(vcpu->arch.host_dr6, 6);
- get_debugreg(vcpu->arch.host_dr7, 7);
- if (unlikely(vcpu->arch.switch_db_regs)) {
- get_debugreg(vcpu->arch.host_db[0], 0);
- get_debugreg(vcpu->arch.host_db[1], 1);
- get_debugreg(vcpu->arch.host_db[2], 2);
- get_debugreg(vcpu->arch.host_db[3], 3);
-
- set_debugreg(0, 7);
- set_debugreg(vcpu->arch.eff_db[0], 0);
- set_debugreg(vcpu->arch.eff_db[1], 1);
- set_debugreg(vcpu->arch.eff_db[2], 2);
- set_debugreg(vcpu->arch.eff_db[3], 3);
- }
KVMTRACE_0D(VMENTRY, vcpu, entryexit);
kvm_x86_ops->run(vcpu, kvm_run);
- if (unlikely(vcpu->arch.switch_db_regs)) {
- set_debugreg(0, 7);
- set_debugreg(vcpu->arch.host_db[0], 0);
- set_debugreg(vcpu->arch.host_db[1], 1);
- set_debugreg(vcpu->arch.host_db[2], 2);
- set_debugreg(vcpu->arch.host_db[3], 3);
- }
- set_debugreg(vcpu->arch.host_dr6, 6);
- set_debugreg(vcpu->arch.host_dr7, 7);
-
vcpu->guest_mode = 0;
local_irq_enable();
@@ -3095,7 +2973,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
pr_debug("vcpu %d received sipi with vector # %x\n",
vcpu->vcpu_id, vcpu->arch.sipi_vector);
kvm_lapic_reset(vcpu);
- r = kvm_arch_vcpu_reset(vcpu);
+ r = kvm_x86_ops->vcpu_reset(vcpu);
if (r)
return r;
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
@@ -3236,7 +3114,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
/*
* Don't leak debug flags in case they were set for guest debugging
*/
- if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+ if (vcpu->guest_debug.enabled && vcpu->guest_debug.singlestep)
regs->rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
vcpu_put(vcpu);
@@ -3397,9 +3275,9 @@ static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector,
kvm_desct->padding = 0;
}
-static void get_segment_descriptor_dtable(struct kvm_vcpu *vcpu,
- u16 selector,
- struct descriptor_table *dtable)
+static void get_segment_descritptor_dtable(struct kvm_vcpu *vcpu,
+ u16 selector,
+ struct descriptor_table *dtable)
{
if (selector & 1 << 2) {
struct kvm_segment kvm_seg;
@@ -3424,7 +3302,7 @@ static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
struct descriptor_table dtable;
u16 index = selector >> 3;
- get_segment_descriptor_dtable(vcpu, selector, &dtable);
+ get_segment_descritptor_dtable(vcpu, selector, &dtable);
if (dtable.limit < index * 8 + 7) {
kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc);
@@ -3443,7 +3321,7 @@ static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
struct descriptor_table dtable;
u16 index = selector >> 3;
- get_segment_descriptor_dtable(vcpu, selector, &dtable);
+ get_segment_descritptor_dtable(vcpu, selector, &dtable);
if (dtable.limit < index * 8 + 7)
return 1;
@@ -3855,32 +3733,15 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
return 0;
}
-int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
- struct kvm_guest_debug *dbg)
+int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
+ struct kvm_debug_guest *dbg)
{
- int i, r;
+ int r;
vcpu_load(vcpu);
- if ((dbg->control & (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) ==
- (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) {
- for (i = 0; i < KVM_NR_DB_REGS; ++i)
- vcpu->arch.eff_db[i] = dbg->arch.debugreg[i];
- vcpu->arch.switch_db_regs =
- (dbg->arch.debugreg[7] & DR7_BP_EN_MASK);
- } else {
- for (i = 0; i < KVM_NR_DB_REGS; i++)
- vcpu->arch.eff_db[i] = vcpu->arch.db[i];
- vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK);
- }
-
r = kvm_x86_ops->set_guest_debug(vcpu, dbg);
- if (dbg->control & KVM_GUESTDBG_INJECT_DB)
- kvm_queue_exception(vcpu, DB_VECTOR);
- else if (dbg->control & KVM_GUESTDBG_INJECT_BP)
- kvm_queue_exception(vcpu, BP_VECTOR);
-
vcpu_put(vcpu);
return r;
@@ -4039,7 +3900,6 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
/* We do fxsave: this must be aligned. */
BUG_ON((unsigned long)&vcpu->arch.host_fx_image & 0xF);
- vcpu->arch.mtrr_state.have_fixed = 1;
vcpu_load(vcpu);
r = kvm_arch_vcpu_reset(vcpu);
if (r == 0)
@@ -4065,14 +3925,6 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
{
- vcpu->arch.nmi_pending = false;
- vcpu->arch.nmi_injected = false;
-
- vcpu->arch.switch_db_regs = 0;
- memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
- vcpu->arch.dr6 = DR6_FIXED_1;
- vcpu->arch.dr7 = DR7_FIXED_1;
-
return kvm_x86_ops->vcpu_reset(vcpu);
}
@@ -4160,7 +4012,6 @@ struct kvm *kvm_arch_create_vm(void)
return ERR_PTR(-ENOMEM);
INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
- INIT_LIST_HEAD(&kvm->arch.oos_global_pages);
INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
/* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
@@ -4197,8 +4048,8 @@ static void kvm_free_vcpus(struct kvm *kvm)
void kvm_arch_destroy_vm(struct kvm *kvm)
{
- kvm_free_all_assigned_devices(kvm);
kvm_iommu_unmap_guest(kvm);
+ kvm_free_all_assigned_devices(kvm);
kvm_free_pit(kvm);
kfree(kvm->arch.vpic);
kfree(kvm->arch.vioapic);
@@ -4276,8 +4127,7 @@ void kvm_arch_flush_shadow(struct kvm *kvm)
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
{
return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE
- || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
- || vcpu->arch.nmi_pending;
+ || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED;
}
static void vcpu_kick_intr(void *info)
diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index d174db7a3370..ea051173b0da 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -58,7 +58,6 @@
#define SrcMem32 (4<<4) /* Memory operand (32-bit). */
#define SrcImm (5<<4) /* Immediate operand. */
#define SrcImmByte (6<<4) /* 8-bit sign-extended immediate operand. */
-#define SrcOne (7<<4) /* Implied '1' */
#define SrcMask (7<<4)
/* Generic ModRM decode. */
#define ModRM (1<<7)
@@ -71,23 +70,17 @@
#define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */
#define GroupDual (1<<15) /* Alternate decoding of mod == 3 */
#define GroupMask 0xff /* Group number stored in bits 0:7 */
-/* Source 2 operand type */
-#define Src2None (0<<29)
-#define Src2CL (1<<29)
-#define Src2ImmByte (2<<29)
-#define Src2One (3<<29)
-#define Src2Mask (7<<29)
enum {
Group1_80, Group1_81, Group1_82, Group1_83,
Group1A, Group3_Byte, Group3, Group4, Group5, Group7,
};
-static u32 opcode_table[256] = {
+static u16 opcode_table[256] = {
/* 0x00 - 0x07 */
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
- ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 0, 0,
+ 0, 0, 0, 0,
/* 0x08 - 0x0F */
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
@@ -202,7 +195,7 @@ static u32 opcode_table[256] = {
ImplicitOps, ImplicitOps, Group | Group4, Group | Group5,
};
-static u32 twobyte_table[256] = {
+static u16 twobyte_table[256] = {
/* 0x00 - 0x0F */
0, Group | GroupDual | Group7, 0, 0, 0, 0, ImplicitOps, 0,
ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps | ModRM, 0, 0,
@@ -237,14 +230,9 @@ static u32 twobyte_table[256] = {
/* 0x90 - 0x9F */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* 0xA0 - 0xA7 */
- 0, 0, 0, DstMem | SrcReg | ModRM | BitOp,
- DstMem | SrcReg | Src2ImmByte | ModRM,
- DstMem | SrcReg | Src2CL | ModRM, 0, 0,
+ 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, 0, 0,
/* 0xA8 - 0xAF */
- 0, 0, 0, DstMem | SrcReg | ModRM | BitOp,
- DstMem | SrcReg | Src2ImmByte | ModRM,
- DstMem | SrcReg | Src2CL | ModRM,
- ModRM, 0,
+ 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, ModRM, 0,
/* 0xB0 - 0xB7 */
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 0,
DstMem | SrcReg | ModRM | BitOp,
@@ -265,7 +253,7 @@ static u32 twobyte_table[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
-static u32 group_table[] = {
+static u16 group_table[] = {
[Group1_80*8] =
ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
@@ -309,9 +297,9 @@ static u32 group_table[] = {
SrcMem16 | ModRM | Mov, SrcMem | ModRM | ByteOp,
};
-static u32 group2_table[] = {
+static u16 group2_table[] = {
[Group7*8] =
- SrcNone | ModRM, 0, 0, SrcNone | ModRM,
+ SrcNone | ModRM, 0, 0, 0,
SrcNone | ModRM | DstMem | Mov, 0,
SrcMem16 | ModRM | Mov, 0,
};
@@ -371,48 +359,49 @@ static u32 group2_table[] = {
"andl %"_msk",%"_LO32 _tmp"; " \
"orl %"_LO32 _tmp",%"_sav"; "
-#ifdef CONFIG_X86_64
-#define ON64(x) x
-#else
-#define ON64(x)
-#endif
-
-#define ____emulate_2op(_op, _src, _dst, _eflags, _x, _y, _suffix) \
- do { \
- __asm__ __volatile__ ( \
- _PRE_EFLAGS("0", "4", "2") \
- _op _suffix " %"_x"3,%1; " \
- _POST_EFLAGS("0", "4", "2") \
- : "=m" (_eflags), "=m" ((_dst).val), \
- "=&r" (_tmp) \
- : _y ((_src).val), "i" (EFLAGS_MASK)); \
- } while (0)
-
-
/* Raw emulation: instruction has two explicit operands. */
#define __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy) \
- do { \
- unsigned long _tmp; \
- \
- switch ((_dst).bytes) { \
- case 2: \
- ____emulate_2op(_op,_src,_dst,_eflags,_wx,_wy,"w"); \
- break; \
- case 4: \
- ____emulate_2op(_op,_src,_dst,_eflags,_lx,_ly,"l"); \
- break; \
- case 8: \
- ON64(____emulate_2op(_op,_src,_dst,_eflags,_qx,_qy,"q")); \
- break; \
- } \
+ do { \
+ unsigned long _tmp; \
+ \
+ switch ((_dst).bytes) { \
+ case 2: \
+ __asm__ __volatile__ ( \
+ _PRE_EFLAGS("0", "4", "2") \
+ _op"w %"_wx"3,%1; " \
+ _POST_EFLAGS("0", "4", "2") \
+ : "=m" (_eflags), "=m" ((_dst).val), \
+ "=&r" (_tmp) \
+ : _wy ((_src).val), "i" (EFLAGS_MASK)); \
+ break; \
+ case 4: \
+ __asm__ __volatile__ ( \
+ _PRE_EFLAGS("0", "4", "2") \
+ _op"l %"_lx"3,%1; " \
+ _POST_EFLAGS("0", "4", "2") \
+ : "=m" (_eflags), "=m" ((_dst).val), \
+ "=&r" (_tmp) \
+ : _ly ((_src).val), "i" (EFLAGS_MASK)); \
+ break; \
+ case 8: \
+ __emulate_2op_8byte(_op, _src, _dst, \
+ _eflags, _qx, _qy); \
+ break; \
+ } \
} while (0)
#define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \
do { \
- unsigned long _tmp; \
+ unsigned long __tmp; \
switch ((_dst).bytes) { \
case 1: \
- ____emulate_2op(_op,_src,_dst,_eflags,_bx,_by,"b"); \
+ __asm__ __volatile__ ( \
+ _PRE_EFLAGS("0", "4", "2") \
+ _op"b %"_bx"3,%1; " \
+ _POST_EFLAGS("0", "4", "2") \
+ : "=m" (_eflags), "=m" ((_dst).val), \
+ "=&r" (__tmp) \
+ : _by ((_src).val), "i" (EFLAGS_MASK)); \
break; \
default: \
__emulate_2op_nobyte(_op, _src, _dst, _eflags, \
@@ -436,69 +425,72 @@ static u32 group2_table[] = {
__emulate_2op_nobyte(_op, _src, _dst, _eflags, \
"w", "r", _LO32, "r", "", "r")
-/* Instruction has three operands and one operand is stored in ECX register */
-#define __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, _suffix, _type) \
- do { \
- unsigned long _tmp; \
- _type _clv = (_cl).val; \
- _type _srcv = (_src).val; \
- _type _dstv = (_dst).val; \
- \
- __asm__ __volatile__ ( \
- _PRE_EFLAGS("0", "5", "2") \
- _op _suffix " %4,%1 \n" \
- _POST_EFLAGS("0", "5", "2") \
- : "=m" (_eflags), "+r" (_dstv), "=&r" (_tmp) \
- : "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK) \
- ); \
- \
- (_cl).val = (unsigned long) _clv; \
- (_src).val = (unsigned long) _srcv; \
- (_dst).val = (unsigned long) _dstv; \
- } while (0)
-
-#define emulate_2op_cl(_op, _cl, _src, _dst, _eflags) \
- do { \
- switch ((_dst).bytes) { \
- case 2: \
- __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \
- "w", unsigned short); \
- break; \
- case 4: \
- __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \
- "l", unsigned int); \
- break; \
- case 8: \
- ON64(__emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \
- "q", unsigned long)); \
- break; \
- } \
- } while (0)
-
-#define __emulate_1op(_op, _dst, _eflags, _suffix) \
- do { \
- unsigned long _tmp; \
- \
- __asm__ __volatile__ ( \
- _PRE_EFLAGS("0", "3", "2") \
- _op _suffix " %1; " \
- _POST_EFLAGS("0", "3", "2") \
- : "=m" (_eflags), "+m" ((_dst).val), \
- "=&r" (_tmp) \
- : "i" (EFLAGS_MASK)); \
- } while (0)
-
/* Instruction has only one explicit operand (no source operand). */
#define emulate_1op(_op, _dst, _eflags) \
do { \
+ unsigned long _tmp; \
+ \
switch ((_dst).bytes) { \
- case 1: __emulate_1op(_op, _dst, _eflags, "b"); break; \
- case 2: __emulate_1op(_op, _dst, _eflags, "w"); break; \
- case 4: __emulate_1op(_op, _dst, _eflags, "l"); break; \
- case 8: ON64(__emulate_1op(_op, _dst, _eflags, "q")); break; \
+ case 1: \
+ __asm__ __volatile__ ( \
+ _PRE_EFLAGS("0", "3", "2") \
+ _op"b %1; " \
+ _POST_EFLAGS("0", "3", "2") \
+ : "=m" (_eflags), "=m" ((_dst).val), \
+ "=&r" (_tmp) \
+ : "i" (EFLAGS_MASK)); \
+ break; \
+ case 2: \
+ __asm__ __volatile__ ( \
+ _PRE_EFLAGS("0", "3", "2") \
+ _op"w %1; " \
+ _POST_EFLAGS("0", "3", "2") \
+ : "=m" (_eflags), "=m" ((_dst).val), \
+ "=&r" (_tmp) \
+ : "i" (EFLAGS_MASK)); \
+ break; \
+ case 4: \
+ __asm__ __volatile__ ( \
+ _PRE_EFLAGS("0", "3", "2") \
+ _op"l %1; " \
+ _POST_EFLAGS("0", "3", "2") \
+ : "=m" (_eflags), "=m" ((_dst).val), \
+ "=&r" (_tmp) \
+ : "i" (EFLAGS_MASK)); \
+ break; \
+ case 8: \
+ __emulate_1op_8byte(_op, _dst, _eflags); \
+ break; \
} \
} while (0)
+/* Emulate an instruction with quadword operands (x86/64 only). */
+#if defined(CONFIG_X86_64)
+#define __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy) \
+ do { \
+ __asm__ __volatile__ ( \
+ _PRE_EFLAGS("0", "4", "2") \
+ _op"q %"_qx"3,%1; " \
+ _POST_EFLAGS("0", "4", "2") \
+ : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \
+ : _qy ((_src).val), "i" (EFLAGS_MASK)); \
+ } while (0)
+
+#define __emulate_1op_8byte(_op, _dst, _eflags) \
+ do { \
+ __asm__ __volatile__ ( \
+ _PRE_EFLAGS("0", "3", "2") \
+ _op"q %1; " \
+ _POST_EFLAGS("0", "3", "2") \
+ : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \
+ : "i" (EFLAGS_MASK)); \
+ } while (0)
+
+#elif defined(__i386__)
+#define __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy)
+#define __emulate_1op_8byte(_op, _dst, _eflags)
+#endif /* __i386__ */
+
/* Fetch next part of the instruction being emulated. */
#define insn_fetch(_type, _size, _eip) \
({ unsigned long _x; \
@@ -1049,33 +1041,6 @@ done_prefixes:
c->src.bytes = 1;
c->src.val = insn_fetch(s8, 1, c->eip);
break;
- case SrcOne:
- c->src.bytes = 1;
- c->src.val = 1;
- break;
- }
-
- /*
- * Decode and fetch the second source operand: register, memory
- * or immediate.
- */
- switch (c->d & Src2Mask) {
- case Src2None:
- break;
- case Src2CL:
- c->src2.bytes = 1;
- c->src2.val = c->regs[VCPU_REGS_RCX] & 0x8;
- break;
- case Src2ImmByte:
- c->src2.type = OP_IMM;
- c->src2.ptr = (unsigned long *)c->eip;
- c->src2.bytes = 1;
- c->src2.val = insn_fetch(u8, 1, c->eip);
- break;
- case Src2One:
- c->src2.bytes = 1;
- c->src2.val = 1;
- break;
}
/* Decode and fetch the destination operand: register or memory. */
@@ -1135,33 +1100,20 @@ static inline void emulate_push(struct x86_emulate_ctxt *ctxt)
c->regs[VCPU_REGS_RSP]);
}
-static int emulate_pop(struct x86_emulate_ctxt *ctxt,
- struct x86_emulate_ops *ops)
-{
- struct decode_cache *c = &ctxt->decode;
- int rc;
-
- rc = ops->read_emulated(register_address(c, ss_base(ctxt),
- c->regs[VCPU_REGS_RSP]),
- &c->src.val, c->src.bytes, ctxt->vcpu);
- if (rc != 0)
- return rc;
-
- register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->src.bytes);
- return rc;
-}
-
static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt,
struct x86_emulate_ops *ops)
{
struct decode_cache *c = &ctxt->decode;
int rc;
- c->src.bytes = c->dst.bytes;
- rc = emulate_pop(ctxt, ops);
+ rc = ops->read_std(register_address(c, ss_base(ctxt),
+ c->regs[VCPU_REGS_RSP]),
+ &c->dst.val, c->dst.bytes, ctxt->vcpu);
if (rc != 0)
return rc;
- c->dst.val = c->src.val;
+
+ register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->dst.bytes);
+
return 0;
}
@@ -1463,15 +1415,24 @@ special_insn:
emulate_1op("dec", c->dst, ctxt->eflags);
break;
case 0x50 ... 0x57: /* push reg */
- emulate_push(ctxt);
+ c->dst.type = OP_MEM;
+ c->dst.bytes = c->op_bytes;
+ c->dst.val = c->src.val;
+ register_address_increment(c, &c->regs[VCPU_REGS_RSP],
+ -c->op_bytes);
+ c->dst.ptr = (void *) register_address(
+ c, ss_base(ctxt), c->regs[VCPU_REGS_RSP]);
break;
case 0x58 ... 0x5f: /* pop reg */
pop_instruction:
- c->src.bytes = c->op_bytes;
- rc = emulate_pop(ctxt, ops);
- if (rc != 0)
+ if ((rc = ops->read_std(register_address(c, ss_base(ctxt),
+ c->regs[VCPU_REGS_RSP]), c->dst.ptr,
+ c->op_bytes, ctxt->vcpu)) != 0)
goto done;
- c->dst.val = c->src.val;
+
+ register_address_increment(c, &c->regs[VCPU_REGS_RSP],
+ c->op_bytes);
+ c->dst.type = OP_NONE; /* Disable writeback. */
break;
case 0x63: /* movsxd */
if (ctxt->mode != X86EMUL_MODE_PROT64)
@@ -1630,9 +1591,7 @@ special_insn:
emulate_push(ctxt);
break;
case 0x9d: /* popf */
- c->dst.type = OP_REG;
c->dst.ptr = (unsigned long *) &ctxt->eflags;
- c->dst.bytes = c->op_bytes;
goto pop_instruction;
case 0xa0 ... 0xa1: /* mov */
c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
@@ -1730,9 +1689,7 @@ special_insn:
emulate_grp2(ctxt);
break;
case 0xc3: /* ret */
- c->dst.type = OP_REG;
c->dst.ptr = &c->eip;
- c->dst.bytes = c->op_bytes;
goto pop_instruction;
case 0xc6 ... 0xc7: /* mov (sole member of Grp11) */
mov:
@@ -1821,7 +1778,7 @@ special_insn:
c->eip = saved_eip;
goto cannot_emulate;
}
- break;
+ return 0;
case 0xf4: /* hlt */
ctxt->vcpu->arch.halt_request = 1;
break;
@@ -2042,20 +1999,12 @@ twobyte_insn:
c->src.val &= (c->dst.bytes << 3) - 1;
emulate_2op_SrcV_nobyte("bt", c->src, c->dst, ctxt->eflags);
break;
- case 0xa4: /* shld imm8, r, r/m */
- case 0xa5: /* shld cl, r, r/m */
- emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags);
- break;
case 0xab:
bts: /* bts */
/* only subword offset */
c->src.val &= (c->dst.bytes << 3) - 1;
emulate_2op_SrcV_nobyte("bts", c->src, c->dst, ctxt->eflags);
break;
- case 0xac: /* shrd imm8, r, r/m */
- case 0xad: /* shrd cl, r, r/m */
- emulate_2op_cl("shrd", c->src2, c->src, c->dst, ctxt->eflags);
- break;
case 0xae: /* clflush */
break;
case 0xb0 ... 0xb1: /* cmpxchg */
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 3bbdb9d02376..c16d9be1b017 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -79,12 +79,9 @@ int anon_inode_getfd(const char *name, const struct file_operations *fops,
if (IS_ERR(anon_inode_inode))
return -ENODEV;
- if (fops->owner && !try_module_get(fops->owner))
- return -ENOENT;
-
error = get_unused_fd_flags(flags);
if (error < 0)
- goto err_module;
+ return error;
fd = error;
/*
@@ -131,8 +128,6 @@ err_dput:
dput(dentry);
err_put_unused_fd:
put_unused_fd(fd);
-err_module:
- module_put(fops->owner);
return error;
}
EXPORT_SYMBOL_GPL(anon_inode_getfd);
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 95a905f1270b..f18b86fa8655 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -83,7 +83,6 @@ struct kvm_irqchip {
#define KVM_EXIT_S390_SIEIC 13
#define KVM_EXIT_S390_RESET 14
#define KVM_EXIT_DCR 15
-#define KVM_EXIT_NMI 16
/* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
struct kvm_run {
@@ -126,7 +125,6 @@ struct kvm_run {
__u64 data_offset; /* relative to kvm_run start */
} io;
struct {
- struct kvm_debug_exit_arch arch;
} debug;
/* KVM_EXIT_MMIO */
struct {
@@ -218,6 +216,21 @@ struct kvm_interrupt {
__u32 irq;
};
+struct kvm_breakpoint {
+ __u32 enabled;
+ __u32 padding;
+ __u64 address;
+};
+
+/* for KVM_DEBUG_GUEST */
+struct kvm_debug_guest {
+ /* int */
+ __u32 enabled;
+ __u32 pad;
+ struct kvm_breakpoint breakpoints[4];
+ __u32 singlestep;
+};
+
/* for KVM_GET_DIRTY_LOG */
struct kvm_dirty_log {
__u32 slot;
@@ -278,17 +291,6 @@ struct kvm_s390_interrupt {
__u64 parm64;
};
-/* for KVM_SET_GUEST_DEBUG */
-
-#define KVM_GUESTDBG_ENABLE 0x00000001
-#define KVM_GUESTDBG_SINGLESTEP 0x00000002
-
-struct kvm_guest_debug {
- __u32 control;
- __u32 pad;
- struct kvm_guest_debug_arch arch;
-};
-
#define KVM_TRC_SHIFT 16
/*
* kvm trace categories
@@ -385,15 +387,6 @@ struct kvm_trace_rec {
#define KVM_CAP_DEVICE_ASSIGNMENT 17
#endif
#define KVM_CAP_IOMMU 18
-#if defined(CONFIG_X86)
-#define KVM_CAP_DEVICE_MSI 20
-#endif
-/* Bug in KVM_SET_USER_MEMORY_REGION fixed: */
-#define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21
-#if defined(CONFIG_X86)
-#define KVM_CAP_USER_NMI 22
-#endif
-#define KVM_CAP_SET_GUEST_DEBUG 23
/*
* ioctls for VM fds
@@ -438,8 +431,7 @@ struct kvm_trace_rec {
#define KVM_SET_SREGS _IOW(KVMIO, 0x84, struct kvm_sregs)
#define KVM_TRANSLATE _IOWR(KVMIO, 0x85, struct kvm_translation)
#define KVM_INTERRUPT _IOW(KVMIO, 0x86, struct kvm_interrupt)
-/* KVM_DEBUG_GUEST is no longer supported, use KVM_SET_GUEST_DEBUG instead */
-#define KVM_DEBUG_GUEST __KVM_DEPRECATED_DEBUG_GUEST
+#define KVM_DEBUG_GUEST _IOW(KVMIO, 0x87, struct kvm_debug_guest)
#define KVM_GET_MSRS _IOWR(KVMIO, 0x88, struct kvm_msrs)
#define KVM_SET_MSRS _IOW(KVMIO, 0x89, struct kvm_msrs)
#define KVM_SET_CPUID _IOW(KVMIO, 0x8a, struct kvm_cpuid)
@@ -466,28 +458,6 @@ struct kvm_trace_rec {
#define KVM_S390_INITIAL_RESET _IO(KVMIO, 0x97)
#define KVM_GET_MP_STATE _IOR(KVMIO, 0x98, struct kvm_mp_state)
#define KVM_SET_MP_STATE _IOW(KVMIO, 0x99, struct kvm_mp_state)
-/* Available with KVM_CAP_NMI */
-#define KVM_NMI _IO(KVMIO, 0x9a)
-/* Available with KVM_CAP_SET_GUEST_DEBUG */
-#define KVM_SET_GUEST_DEBUG _IOW(KVMIO, 0x9b, struct kvm_guest_debug)
-
-/*
- * Deprecated interfaces
- */
-struct kvm_breakpoint {
- __u32 enabled;
- __u32 padding;
- __u64 address;
-};
-
-struct kvm_debug_guest {
- __u32 enabled;
- __u32 pad;
- struct kvm_breakpoint breakpoints[4];
- __u32 singlestep;
-};
-
-#define __KVM_DEPRECATED_DEBUG_GUEST _IOW(KVMIO, 0x87, struct kvm_debug_guest)
#define KVM_TRC_INJ_VIRQ (KVM_TRC_HANDLER + 0x02)
#define KVM_TRC_REDELIVER_EVT (KVM_TRC_HANDLER + 0x03)
@@ -530,17 +500,10 @@ struct kvm_assigned_irq {
__u32 guest_irq;
__u32 flags;
union {
- struct {
- __u32 addr_lo;
- __u32 addr_hi;
- __u32 data;
- } guest_msi;
__u32 reserved[12];
};
};
#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
-#define KVM_DEV_IRQ_ASSIGN_ENABLE_MSI (1 << 0)
-
#endif
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index d63e9a4959d8..bb92be2153bc 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -16,7 +16,6 @@
#include <linux/mm.h>
#include <linux/preempt.h>
#include <linux/marker.h>
-#include <linux/msi.h>
#include <asm/signal.h>
#include <linux/kvm.h>
@@ -73,7 +72,7 @@ struct kvm_vcpu {
struct kvm_run *run;
int guest_mode;
unsigned long requests;
- unsigned long guest_debug;
+ struct kvm_guest_debug guest_debug;
int fpu_active;
int guest_fpu_loaded;
wait_queue_head_t wq;
@@ -255,8 +254,8 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state);
int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state);
-int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
- struct kvm_guest_debug *dbg);
+int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
+ struct kvm_debug_guest *dbg);
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run);
int kvm_arch_init(void *opaque);
@@ -307,14 +306,8 @@ struct kvm_assigned_dev_kernel {
int host_busnr;
int host_devfn;
int host_irq;
- bool host_irq_disabled;
int guest_irq;
- struct msi_msg guest_msi;
-#define KVM_ASSIGNED_DEV_GUEST_INTX (1 << 0)
-#define KVM_ASSIGNED_DEV_GUEST_MSI (1 << 1)
-#define KVM_ASSIGNED_DEV_HOST_INTX (1 << 8)
-#define KVM_ASSIGNED_DEV_HOST_MSI (1 << 9)
- unsigned long irq_requested_type;
+ int irq_requested;
int irq_source_id;
struct pci_dev *dev;
struct kvm *kvm;
@@ -323,7 +316,8 @@ void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level);
void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi);
void kvm_register_irq_ack_notifier(struct kvm *kvm,
struct kvm_irq_ack_notifier *kian);
-void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian);
+void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
+ struct kvm_irq_ack_notifier *kian);
int kvm_request_irq_source_id(struct kvm *kvm);
void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 23b81cf242af..53772bb46320 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -150,11 +150,10 @@ static int ioapic_inj_irq(struct kvm_ioapic *ioapic,
static void ioapic_inj_nmi(struct kvm_vcpu *vcpu)
{
kvm_inject_nmi(vcpu);
- kvm_vcpu_kick(vcpu);
}
-u32 kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
- u8 dest_mode)
+static u32 ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
+ u8 dest_mode)
{
u32 mask = 0;
int i;
@@ -208,8 +207,7 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
"vector=%x trig_mode=%x\n",
dest, dest_mode, delivery_mode, vector, trig_mode);
- deliver_bitmask = kvm_ioapic_get_delivery_bitmask(ioapic, dest,
- dest_mode);
+ deliver_bitmask = ioapic_get_delivery_bitmask(ioapic, dest, dest_mode);
if (!deliver_bitmask) {
ioapic_debug("no target on destination\n");
return 0;
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index 49c9581d2586..cd7ae7691c9d 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -85,7 +85,5 @@ void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode);
int kvm_ioapic_init(struct kvm *kvm);
void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
void kvm_ioapic_reset(struct kvm_ioapic *ioapic);
-u32 kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
- u8 dest_mode);
#endif
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index aa5d1e5c497e..55ad76ee2d09 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -61,9 +61,10 @@ void kvm_register_irq_ack_notifier(struct kvm *kvm,
hlist_add_head(&kian->link, &kvm->arch.irq_ack_notifier_list);
}
-void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian)
+void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
+ struct kvm_irq_ack_notifier *kian)
{
- hlist_del_init(&kian->link);
+ hlist_del(&kian->link);
}
/* The caller must hold kvm->lock mutex */
@@ -72,15 +73,11 @@ int kvm_request_irq_source_id(struct kvm *kvm)
unsigned long *bitmap = &kvm->arch.irq_sources_bitmap;
int irq_source_id = find_first_zero_bit(bitmap,
sizeof(kvm->arch.irq_sources_bitmap));
-
if (irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) {
printk(KERN_WARNING "kvm: exhaust allocatable IRQ sources!\n");
- return -EFAULT;
- }
-
- ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
- set_bit(irq_source_id, bitmap);
-
+ irq_source_id = -EFAULT;
+ } else
+ set_bit(irq_source_id, bitmap);
return irq_source_id;
}
@@ -88,9 +85,7 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
{
int i;
- ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
-
- if (irq_source_id < 0 ||
+ if (irq_source_id <= 0 ||
irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) {
printk(KERN_ERR "kvm: IRQ source ID out of range!\n");
return;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index ffd261d8afa6..a87f45edfae8 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -47,10 +47,6 @@
#include <asm/uaccess.h>
#include <asm/pgtable.h>
-#ifdef CONFIG_X86
-#include <asm/msidef.h>
-#endif
-
#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
#include "coalesced_mmio.h"
#endif
@@ -64,13 +60,10 @@
MODULE_AUTHOR("Qumranet");
MODULE_LICENSE("GPL");
-static int msi2intx = 1;
-module_param(msi2intx, bool, 0);
-
DEFINE_SPINLOCK(kvm_lock);
LIST_HEAD(vm_list);
-static cpumask_var_t cpus_hardware_enabled;
+static cpumask_t cpus_hardware_enabled;
struct kmem_cache *kvm_vcpu_cache;
EXPORT_SYMBOL_GPL(kvm_vcpu_cache);
@@ -82,60 +75,9 @@ struct dentry *kvm_debugfs_dir;
static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
unsigned long arg);
-static bool kvm_rebooting;
+bool kvm_rebooting;
#ifdef KVM_CAP_DEVICE_ASSIGNMENT
-
-#ifdef CONFIG_X86
-static void assigned_device_msi_dispatch(struct kvm_assigned_dev_kernel *dev)
-{
- int vcpu_id;
- struct kvm_vcpu *vcpu;
- struct kvm_ioapic *ioapic = ioapic_irqchip(dev->kvm);
- int dest_id = (dev->guest_msi.address_lo & MSI_ADDR_DEST_ID_MASK)
- >> MSI_ADDR_DEST_ID_SHIFT;
- int vector = (dev->guest_msi.data & MSI_DATA_VECTOR_MASK)
- >> MSI_DATA_VECTOR_SHIFT;
- int dest_mode = test_bit(MSI_ADDR_DEST_MODE_SHIFT,
- (unsigned long *)&dev->guest_msi.address_lo);
- int trig_mode = test_bit(MSI_DATA_TRIGGER_SHIFT,
- (unsigned long *)&dev->guest_msi.data);
- int delivery_mode = test_bit(MSI_DATA_DELIVERY_MODE_SHIFT,
- (unsigned long *)&dev->guest_msi.data);
- u32 deliver_bitmask;
-
- BUG_ON(!ioapic);
-
- deliver_bitmask = kvm_ioapic_get_delivery_bitmask(ioapic,
- dest_id, dest_mode);
- /* IOAPIC delivery mode value is the same as MSI here */
- switch (delivery_mode) {
- case IOAPIC_LOWEST_PRIORITY:
- vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm, vector,
- deliver_bitmask);
- if (vcpu != NULL)
- kvm_apic_set_irq(vcpu, vector, trig_mode);
- else
- printk(KERN_INFO "kvm: null lowest priority vcpu!\n");
- break;
- case IOAPIC_FIXED:
- for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) {
- if (!(deliver_bitmask & (1 << vcpu_id)))
- continue;
- deliver_bitmask &= ~(1 << vcpu_id);
- vcpu = ioapic->kvm->vcpus[vcpu_id];
- if (vcpu)
- kvm_apic_set_irq(vcpu, vector, trig_mode);
- }
- break;
- default:
- printk(KERN_INFO "kvm: unsupported MSI delivery mode\n");
- }
-}
-#else
-static void assigned_device_msi_dispatch(struct kvm_assigned_dev_kernel *dev) {}
-#endif
-
static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
int assigned_dev_id)
{
@@ -162,16 +104,9 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
* finer-grained lock, update this
*/
mutex_lock(&assigned_dev->kvm->lock);
- if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_INTX)
- kvm_set_irq(assigned_dev->kvm,
- assigned_dev->irq_source_id,
- assigned_dev->guest_irq, 1);
- else if (assigned_dev->irq_requested_type &
- KVM_ASSIGNED_DEV_GUEST_MSI) {
- assigned_device_msi_dispatch(assigned_dev);
- enable_irq(assigned_dev->host_irq);
- assigned_dev->host_irq_disabled = false;
- }
+ kvm_set_irq(assigned_dev->kvm,
+ assigned_dev->irq_source_id,
+ assigned_dev->guest_irq, 1);
mutex_unlock(&assigned_dev->kvm->lock);
kvm_put_kvm(assigned_dev->kvm);
}
@@ -182,12 +117,8 @@ static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
(struct kvm_assigned_dev_kernel *) dev_id;
kvm_get_kvm(assigned_dev->kvm);
-
schedule_work(&assigned_dev->interrupt_work);
-
disable_irq_nosync(irq);
- assigned_dev->host_irq_disabled = true;
-
return IRQ_HANDLED;
}
@@ -201,32 +132,19 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
dev = container_of(kian, struct kvm_assigned_dev_kernel,
ack_notifier);
-
kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0);
-
- /* The guest irq may be shared so this ack may be
- * from another device.
- */
- if (dev->host_irq_disabled) {
- enable_irq(dev->host_irq);
- dev->host_irq_disabled = false;
- }
+ enable_irq(dev->host_irq);
}
-static void kvm_free_assigned_irq(struct kvm *kvm,
- struct kvm_assigned_dev_kernel *assigned_dev)
+static void kvm_free_assigned_device(struct kvm *kvm,
+ struct kvm_assigned_dev_kernel
+ *assigned_dev)
{
- if (!irqchip_in_kernel(kvm))
- return;
-
- kvm_unregister_irq_ack_notifier(&assigned_dev->ack_notifier);
+ if (irqchip_in_kernel(kvm) && assigned_dev->irq_requested)
+ free_irq(assigned_dev->host_irq, (void *)assigned_dev);
- if (assigned_dev->irq_source_id != -1)
- kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
- assigned_dev->irq_source_id = -1;
-
- if (!assigned_dev->irq_requested_type)
- return;
+ kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier);
+ kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
if (cancel_work_sync(&assigned_dev->interrupt_work))
/* We had pending work. That means we will have to take
@@ -234,23 +152,6 @@ static void kvm_free_assigned_irq(struct kvm *kvm,
*/
kvm_put_kvm(kvm);
- free_irq(assigned_dev->host_irq, (void *)assigned_dev);
-
- if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI)
- pci_disable_msi(assigned_dev->dev);
-
- assigned_dev->irq_requested_type = 0;
-}
-
-
-static void kvm_free_assigned_device(struct kvm *kvm,
- struct kvm_assigned_dev_kernel
- *assigned_dev)
-{
- kvm_free_assigned_irq(kvm, assigned_dev);
-
- pci_reset_function(assigned_dev->dev);
-
pci_release_regions(assigned_dev->dev);
pci_disable_device(assigned_dev->dev);
pci_dev_put(assigned_dev->dev);
@@ -273,95 +174,6 @@ void kvm_free_all_assigned_devices(struct kvm *kvm)
}
}
-static int assigned_device_update_intx(struct kvm *kvm,
- struct kvm_assigned_dev_kernel *adev,
- struct kvm_assigned_irq *airq)
-{
- adev->guest_irq = airq->guest_irq;
- adev->ack_notifier.gsi = airq->guest_irq;
-
- if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_INTX)
- return 0;
-
- if (irqchip_in_kernel(kvm)) {
- if (!msi2intx &&
- adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) {
- free_irq(adev->host_irq, (void *)kvm);
- pci_disable_msi(adev->dev);
- }
-
- if (!capable(CAP_SYS_RAWIO))
- return -EPERM;
-
- if (airq->host_irq)
- adev->host_irq = airq->host_irq;
- else
- adev->host_irq = adev->dev->irq;
-
- /* Even though this is PCI, we don't want to use shared
- * interrupts. Sharing host devices with guest-assigned devices
- * on the same interrupt line is not a happy situation: there
- * are going to be long delays in accepting, acking, etc.
- */
- if (request_irq(adev->host_irq, kvm_assigned_dev_intr,
- 0, "kvm_assigned_intx_device", (void *)adev))
- return -EIO;
- }
-
- adev->irq_requested_type = KVM_ASSIGNED_DEV_GUEST_INTX |
- KVM_ASSIGNED_DEV_HOST_INTX;
- return 0;
-}
-
-#ifdef CONFIG_X86
-static int assigned_device_update_msi(struct kvm *kvm,
- struct kvm_assigned_dev_kernel *adev,
- struct kvm_assigned_irq *airq)
-{
- int r;
-
- if (airq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSI) {
- /* x86 don't care upper address of guest msi message addr */
- adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_MSI;
- adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_INTX;
- adev->guest_msi.address_lo = airq->guest_msi.addr_lo;
- adev->guest_msi.data = airq->guest_msi.data;
- adev->ack_notifier.gsi = -1;
- } else if (msi2intx) {
- adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_INTX;
- adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_MSI;
- adev->guest_irq = airq->guest_irq;
- adev->ack_notifier.gsi = airq->guest_irq;
- }
-
- if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI)
- return 0;
-
- if (irqchip_in_kernel(kvm)) {
- if (!msi2intx) {
- if (adev->irq_requested_type &
- KVM_ASSIGNED_DEV_HOST_INTX)
- free_irq(adev->host_irq, (void *)adev);
-
- r = pci_enable_msi(adev->dev);
- if (r)
- return r;
- }
-
- adev->host_irq = adev->dev->irq;
- if (request_irq(adev->host_irq, kvm_assigned_dev_intr, 0,
- "kvm_assigned_msi_device", (void *)adev))
- return -EIO;
- }
-
- if (!msi2intx)
- adev->irq_requested_type = KVM_ASSIGNED_DEV_GUEST_MSI;
-
- adev->irq_requested_type |= KVM_ASSIGNED_DEV_HOST_MSI;
- return 0;
-}
-#endif
-
static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
struct kvm_assigned_irq
*assigned_irq)
@@ -378,68 +190,49 @@ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
return -EINVAL;
}
- if (!match->irq_requested_type) {
- INIT_WORK(&match->interrupt_work,
- kvm_assigned_dev_interrupt_work_handler);
- if (irqchip_in_kernel(kvm)) {
- /* Register ack nofitier */
- match->ack_notifier.gsi = -1;
- match->ack_notifier.irq_acked =
- kvm_assigned_dev_ack_irq;
- kvm_register_irq_ack_notifier(kvm,
- &match->ack_notifier);
-
- /* Request IRQ source ID */
- r = kvm_request_irq_source_id(kvm);
- if (r < 0)
- goto out_release;
- else
- match->irq_source_id = r;
-
-#ifdef CONFIG_X86
- /* Determine host device irq type, we can know the
- * result from dev->msi_enabled */
- if (msi2intx)
- pci_enable_msi(match->dev);
-#endif
- }
+ if (match->irq_requested) {
+ match->guest_irq = assigned_irq->guest_irq;
+ match->ack_notifier.gsi = assigned_irq->guest_irq;
+ mutex_unlock(&kvm->lock);
+ return 0;
}
- if ((!msi2intx &&
- (assigned_irq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSI)) ||
- (msi2intx && match->dev->msi_enabled)) {
-#ifdef CONFIG_X86
- r = assigned_device_update_msi(kvm, match, assigned_irq);
- if (r) {
- printk(KERN_WARNING "kvm: failed to enable "
- "MSI device!\n");
+ INIT_WORK(&match->interrupt_work,
+ kvm_assigned_dev_interrupt_work_handler);
+
+ if (irqchip_in_kernel(kvm)) {
+ if (!capable(CAP_SYS_RAWIO)) {
+ r = -EPERM;
goto out_release;
}
-#else
- r = -ENOTTY;
-#endif
- } else if (assigned_irq->host_irq == 0 && match->dev->irq == 0) {
- /* Host device IRQ 0 means don't support INTx */
- if (!msi2intx) {
- printk(KERN_WARNING
- "kvm: wait device to enable MSI!\n");
- r = 0;
- } else {
- printk(KERN_WARNING
- "kvm: failed to enable MSI device!\n");
- r = -ENOTTY;
+
+ if (assigned_irq->host_irq)
+ match->host_irq = assigned_irq->host_irq;
+ else
+ match->host_irq = match->dev->irq;
+ match->guest_irq = assigned_irq->guest_irq;
+ match->ack_notifier.gsi = assigned_irq->guest_irq;
+ match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
+ kvm_register_irq_ack_notifier(kvm, &match->ack_notifier);
+ r = kvm_request_irq_source_id(kvm);
+ if (r < 0)
goto out_release;
- }
- } else {
- /* Non-sharing INTx mode */
- r = assigned_device_update_intx(kvm, match, assigned_irq);
- if (r) {
- printk(KERN_WARNING "kvm: failed to enable "
- "INTx device!\n");
+ else
+ match->irq_source_id = r;
+
+ /* Even though this is PCI, we don't want to use shared
+ * interrupts. Sharing host devices with guest-assigned devices
+ * on the same interrupt line is not a happy situation: there
+ * are going to be long delays in accepting, acking, etc.
+ */
+ if (request_irq(match->host_irq, kvm_assigned_dev_intr, 0,
+ "kvm_assigned_device", (void *)match)) {
+ r = -EIO;
goto out_release;
}
}
+ match->irq_requested = true;
mutex_unlock(&kvm->lock);
return r;
out_release:
@@ -490,14 +283,11 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
__func__);
goto out_disable;
}
-
- pci_reset_function(dev);
-
match->assigned_dev_id = assigned_dev->assigned_dev_id;
match->host_busnr = assigned_dev->busnr;
match->host_devfn = assigned_dev->devfn;
match->dev = dev;
- match->irq_source_id = -1;
+
match->kvm = kvm;
list_add(&match->list, &kvm->arch.assigned_dev_head);
@@ -565,49 +355,58 @@ static void ack_flush(void *_completed)
{
}
-static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
+void kvm_flush_remote_tlbs(struct kvm *kvm)
{
int i, cpu, me;
- cpumask_var_t cpus;
- bool called = true;
+ cpumask_t cpus;
struct kvm_vcpu *vcpu;
- if (alloc_cpumask_var(&cpus, GFP_ATOMIC))
- cpumask_clear(cpus);
-
me = get_cpu();
+ cpus_clear(cpus);
for (i = 0; i < KVM_MAX_VCPUS; ++i) {
vcpu = kvm->vcpus[i];
if (!vcpu)
continue;
- if (test_and_set_bit(req, &vcpu->requests))
+ if (test_and_set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
continue;
cpu = vcpu->cpu;
- if (cpus != NULL && cpu != -1 && cpu != me)
- cpumask_set_cpu(cpu, cpus);
+ if (cpu != -1 && cpu != me)
+ cpu_set(cpu, cpus);
}
- if (unlikely(cpus == NULL))
- smp_call_function_many(cpu_online_mask, ack_flush, NULL, 1);
- else if (!cpumask_empty(cpus))
- smp_call_function_many(cpus, ack_flush, NULL, 1);
- else
- called = false;
+ if (cpus_empty(cpus))
+ goto out;
+ ++kvm->stat.remote_tlb_flush;
+ smp_call_function_mask(cpus, ack_flush, NULL, 1);
+out:
put_cpu();
- free_cpumask_var(cpus);
- return called;
-}
-
-void kvm_flush_remote_tlbs(struct kvm *kvm)
-{
- if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
- ++kvm->stat.remote_tlb_flush;
}
void kvm_reload_remote_mmus(struct kvm *kvm)
{
- make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD);
+ int i, cpu, me;
+ cpumask_t cpus;
+ struct kvm_vcpu *vcpu;
+
+ me = get_cpu();
+ cpus_clear(cpus);
+ for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+ vcpu = kvm->vcpus[i];
+ if (!vcpu)
+ continue;
+ if (test_and_set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
+ continue;
+ cpu = vcpu->cpu;
+ if (cpu != -1 && cpu != me)
+ cpu_set(cpu, cpus);
+ }
+ if (cpus_empty(cpus))
+ goto out;
+ smp_call_function_mask(cpus, ack_flush, NULL, 1);
+out:
+ put_cpu();
}
+
int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
{
struct page *page;
@@ -911,8 +710,6 @@ int __kvm_set_memory_region(struct kvm *kvm,
goto out;
if (mem->guest_phys_addr & (PAGE_SIZE - 1))
goto out;
- if (user_alloc && (mem->userspace_addr & (PAGE_SIZE - 1)))
- goto out;
if (mem->slot >= KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS)
goto out;
if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
@@ -1024,10 +821,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
goto out_free;
}
- kvm_free_physmem_slot(&old, npages ? &new : NULL);
- /* Slot deletion case: we have to update the current slot */
- if (!npages)
- *memslot = old;
+ kvm_free_physmem_slot(&old, &new);
#ifdef CONFIG_DMAR
/* map the pages in iommu page table */
r = kvm_iommu_map_pages(kvm, base_gfn, npages);
@@ -1124,7 +918,7 @@ int kvm_is_error_hva(unsigned long addr)
}
EXPORT_SYMBOL_GPL(kvm_is_error_hva);
-struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn)
+static struct kvm_memory_slot *__gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
{
int i;
@@ -1137,12 +931,11 @@ struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn)
}
return NULL;
}
-EXPORT_SYMBOL_GPL(gfn_to_memslot_unaliased);
struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
{
gfn = unalias_gfn(kvm, gfn);
- return gfn_to_memslot_unaliased(kvm, gfn);
+ return __gfn_to_memslot(kvm, gfn);
}
int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
@@ -1166,7 +959,7 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
struct kvm_memory_slot *slot;
gfn = unalias_gfn(kvm, gfn);
- slot = gfn_to_memslot_unaliased(kvm, gfn);
+ slot = __gfn_to_memslot(kvm, gfn);
if (!slot)
return bad_hva();
return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE);
@@ -1417,7 +1210,7 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
struct kvm_memory_slot *memslot;
gfn = unalias_gfn(kvm, gfn);
- memslot = gfn_to_memslot_unaliased(kvm, gfn);
+ memslot = __gfn_to_memslot(kvm, gfn);
if (memslot && memslot->dirty_bitmap) {
unsigned long rel_gfn = gfn - memslot->base_gfn;
@@ -1502,7 +1295,7 @@ static int kvm_vcpu_release(struct inode *inode, struct file *filp)
return 0;
}
-static struct file_operations kvm_vcpu_fops = {
+static const struct file_operations kvm_vcpu_fops = {
.release = kvm_vcpu_release,
.unlocked_ioctl = kvm_vcpu_ioctl,
.compat_ioctl = kvm_vcpu_ioctl,
@@ -1697,13 +1490,13 @@ out_free2:
r = 0;
break;
}
- case KVM_SET_GUEST_DEBUG: {
- struct kvm_guest_debug dbg;
+ case KVM_DEBUG_GUEST: {
+ struct kvm_debug_guest dbg;
r = -EFAULT;
if (copy_from_user(&dbg, argp, sizeof dbg))
goto out;
- r = kvm_arch_vcpu_ioctl_set_guest_debug(vcpu, &dbg);
+ r = kvm_arch_vcpu_ioctl_debug_guest(vcpu, &dbg);
if (r)
goto out;
r = 0;
@@ -1896,7 +1689,7 @@ static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma)
return 0;
}
-static struct file_operations kvm_vm_fops = {
+static const struct file_operations kvm_vm_fops = {
.release = kvm_vm_release,
.unlocked_ioctl = kvm_vm_ioctl,
.compat_ioctl = kvm_vm_ioctl,
@@ -1918,18 +1711,6 @@ static int kvm_dev_ioctl_create_vm(void)
return fd;
}
-static long kvm_dev_ioctl_check_extension_generic(long arg)
-{
- switch (arg) {
- case KVM_CAP_USER_MEMORY:
- case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
- return 1;
- default:
- break;
- }
- return kvm_dev_ioctl_check_extension(arg);
-}
-
static long kvm_dev_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -1949,7 +1730,7 @@ static long kvm_dev_ioctl(struct file *filp,
r = kvm_dev_ioctl_create_vm();
break;
case KVM_CHECK_EXTENSION:
- r = kvm_dev_ioctl_check_extension_generic(arg);
+ r = kvm_dev_ioctl_check_extension(arg);
break;
case KVM_GET_VCPU_MMAP_SIZE:
r = -EINVAL;
@@ -1990,9 +1771,9 @@ static void hardware_enable(void *junk)
{
int cpu = raw_smp_processor_id();
- if (cpumask_test_cpu(cpu, cpus_hardware_enabled))
+ if (cpu_isset(cpu, cpus_hardware_enabled))
return;
- cpumask_set_cpu(cpu, cpus_hardware_enabled);
+ cpu_set(cpu, cpus_hardware_enabled);
kvm_arch_hardware_enable(NULL);
}
@@ -2000,9 +1781,9 @@ static void hardware_disable(void *junk)
{
int cpu = raw_smp_processor_id();
- if (!cpumask_test_cpu(cpu, cpus_hardware_enabled))
+ if (!cpu_isset(cpu, cpus_hardware_enabled))
return;
- cpumask_clear_cpu(cpu, cpus_hardware_enabled);
+ cpu_clear(cpu, cpus_hardware_enabled);
kvm_arch_hardware_disable(NULL);
}
@@ -2236,14 +2017,9 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
bad_pfn = page_to_pfn(bad_page);
- if (!alloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) {
- r = -ENOMEM;
- goto out_free_0;
- }
-
r = kvm_arch_hardware_setup();
if (r < 0)
- goto out_free_0a;
+ goto out_free_0;
for_each_online_cpu(cpu) {
smp_call_function_single(cpu,
@@ -2277,8 +2053,6 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
}
kvm_chardev_ops.owner = module;
- kvm_vm_fops.owner = module;
- kvm_vcpu_fops.owner = module;
r = misc_register(&kvm_dev);
if (r) {
@@ -2288,9 +2062,6 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
kvm_preempt_ops.sched_in = kvm_sched_in;
kvm_preempt_ops.sched_out = kvm_sched_out;
-#ifndef CONFIG_X86
- msi2intx = 0;
-#endif
return 0;
@@ -2307,8 +2078,6 @@ out_free_2:
on_each_cpu(hardware_disable, NULL, 1);
out_free_1:
kvm_arch_hardware_unsetup();
-out_free_0a:
- free_cpumask_var(cpus_hardware_enabled);
out_free_0:
__free_page(bad_page);
out:
@@ -2332,7 +2101,6 @@ void kvm_exit(void)
kvm_arch_hardware_unsetup();
kvm_arch_exit();
kvm_exit_debug();
- free_cpumask_var(cpus_hardware_enabled);
__free_page(bad_page);
}
EXPORT_SYMBOL_GPL(kvm_exit);
diff --git a/virt/kvm/kvm_trace.c b/virt/kvm/kvm_trace.c
index f59874446440..41dcc845f78c 100644
--- a/virt/kvm/kvm_trace.c
+++ b/virt/kvm/kvm_trace.c
@@ -252,7 +252,6 @@ void kvm_trace_cleanup(void)
struct kvm_trace_probe *p = &kvm_trace_probes[i];
marker_probe_unregister(p->name, p->probe_func, p);
}
- marker_synchronize_unregister();
relay_close(kt->rchan);
debugfs_remove(kt->lost_file);