summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2009-01-16 13:01:58 +1100
committerStephen Rothwell <sfr@canb.auug.org.au>2009-01-16 13:01:58 +1100
commit343d8f360a5cdd0233c71bb8395626f34630b7bd (patch)
tree5ab37ef62af82351acfe46f391153a12c281bde7
parent7495d505cadaae8170f05b6e54f2aebc7c7c26bb (diff)
parent24bf54a8d47ce045f398d16cd89ae5cf7ce7a6b0 (diff)
Merge commit 'kvm/master'
-rw-r--r--arch/ia64/include/asm/kvm.h49
-rw-r--r--arch/ia64/include/asm/kvm_host.h8
-rw-r--r--arch/ia64/kvm/Kconfig4
-rw-r--r--arch/ia64/kvm/kvm-ia64.c100
-rw-r--r--arch/ia64/kvm/process.c32
-rw-r--r--arch/ia64/kvm/vcpu.c39
-rw-r--r--arch/ia64/kvm/vtlb.c5
-rw-r--r--arch/powerpc/include/asm/kvm.h7
-rw-r--r--arch/powerpc/include/asm/kvm_44x.h7
-rw-r--r--arch/powerpc/include/asm/kvm_asm.h7
-rw-r--r--arch/powerpc/include/asm/kvm_e500.h67
-rw-r--r--arch/powerpc/include/asm/kvm_host.h21
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h15
-rw-r--r--arch/powerpc/include/asm/mmu-fsl-booke.h2
-rw-r--r--arch/powerpc/kernel/asm-offsets.c4
-rw-r--r--arch/powerpc/kvm/44x.c72
-rw-r--r--arch/powerpc/kvm/44x_emulate.c217
-rw-r--r--arch/powerpc/kvm/44x_tlb.c39
-rw-r--r--arch/powerpc/kvm/44x_tlb.h9
-rw-r--r--arch/powerpc/kvm/Kconfig16
-rw-r--r--arch/powerpc/kvm/Makefile10
-rw-r--r--arch/powerpc/kvm/booke.c50
-rw-r--r--arch/powerpc/kvm/booke.h35
-rw-r--r--arch/powerpc/kvm/booke_emulate.c266
-rw-r--r--arch/powerpc/kvm/booke_interrupts.S5
-rw-r--r--arch/powerpc/kvm/e500.c169
-rw-r--r--arch/powerpc/kvm/e500_emulate.c194
-rw-r--r--arch/powerpc/kvm/e500_tlb.c737
-rw-r--r--arch/powerpc/kvm/e500_tlb.h184
-rw-r--r--arch/powerpc/kvm/emulate.c93
-rw-r--r--arch/powerpc/kvm/powerpc.c35
-rw-r--r--arch/s390/include/asm/kvm.h7
-rw-r--r--arch/s390/include/asm/kvm_host.h3
-rw-r--r--arch/s390/kvm/Kconfig3
-rw-r--r--arch/s390/kvm/kvm-s390.c8
-rw-r--r--arch/x86/include/asm/kvm.h23
-rw-r--r--arch/x86/include/asm/kvm_host.h59
-rw-r--r--arch/x86/include/asm/msr-index.h7
-rw-r--r--arch/x86/include/asm/svm.h4
-rw-r--r--arch/x86/include/asm/virtext.h2
-rw-r--r--arch/x86/include/asm/vmx.h5
-rw-r--r--arch/x86/kvm/Kconfig4
-rw-r--r--arch/x86/kvm/i8254.c23
-rw-r--r--arch/x86/kvm/i8254.h2
-rw-r--r--arch/x86/kvm/kvm_svm.h16
-rw-r--r--arch/x86/kvm/mmu.c217
-rw-r--r--arch/x86/kvm/paging_tmpl.h213
-rw-r--r--arch/x86/kvm/svm.c868
-rw-r--r--arch/x86/kvm/vmx.c303
-rw-r--r--arch/x86/kvm/x86.c304
-rw-r--r--arch/x86/kvm/x86_emulate.c56
-rw-r--r--include/linux/kvm.h61
-rw-r--r--include/linux/kvm_host.h25
-rw-r--r--include/linux/kvm_types.h13
-rw-r--r--virt/kvm/ioapic.c6
-rw-r--r--virt/kvm/irq_comm.c24
-rw-r--r--virt/kvm/kvm_main.c67
57 files changed, 3707 insertions, 1114 deletions
diff --git a/arch/ia64/include/asm/kvm.h b/arch/ia64/include/asm/kvm.h
index 68aa6da807c1..2e04498d5968 100644
--- a/arch/ia64/include/asm/kvm.h
+++ b/arch/ia64/include/asm/kvm.h
@@ -162,7 +162,40 @@ struct saved_vpd {
unsigned long vcpuid[5];
unsigned long vpsr;
unsigned long vpr;
- unsigned long vcr[128];
+ union {
+ unsigned long vcr[128];
+ struct {
+ unsigned long dcr;
+ unsigned long itm;
+ unsigned long iva;
+ unsigned long rsv1[5];
+ unsigned long pta;
+ unsigned long rsv2[7];
+ unsigned long ipsr;
+ unsigned long isr;
+ unsigned long rsv3;
+ unsigned long iip;
+ unsigned long ifa;
+ unsigned long itir;
+ unsigned long iipa;
+ unsigned long ifs;
+ unsigned long iim;
+ unsigned long iha;
+ unsigned long rsv4[38];
+ unsigned long lid;
+ unsigned long ivr;
+ unsigned long tpr;
+ unsigned long eoi;
+ unsigned long irr[4];
+ unsigned long itv;
+ unsigned long pmv;
+ unsigned long cmcv;
+ unsigned long rsv5[5];
+ unsigned long lrr0;
+ unsigned long lrr1;
+ unsigned long rsv6[46];
+ };
+ };
};
struct kvm_regs {
@@ -210,4 +243,18 @@ struct kvm_sregs {
struct kvm_fpu {
};
+#define KVM_IA64_VCPU_STACK_SHIFT 16
+#define KVM_IA64_VCPU_STACK_SIZE (1UL << KVM_IA64_VCPU_STACK_SHIFT)
+
+struct kvm_ia64_vcpu_stack {
+ unsigned char stack[KVM_IA64_VCPU_STACK_SIZE];
+};
+
+struct kvm_debug_exit_arch {
+};
+
+/* for KVM_SET_GUEST_DEBUG */
+struct kvm_guest_debug_arch {
+};
+
#endif
diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index 348663661659..46f8b0eb5684 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -112,7 +112,11 @@
#define VCPU_STRUCT_SHIFT 16
#define VCPU_STRUCT_SIZE (__IA64_UL_CONST(1) << VCPU_STRUCT_SHIFT)
-#define KVM_STK_OFFSET VCPU_STRUCT_SIZE
+/*
+ * This must match KVM_IA64_VCPU_STACK_{SHIFT,SIZE} arch/ia64/include/asm/kvm.h
+ */
+#define KVM_STK_SHIFT 16
+#define KVM_STK_OFFSET (__IA64_UL_CONST(1)<< KVM_STK_SHIFT)
#define KVM_VM_STRUCT_SHIFT 19
#define KVM_VM_STRUCT_SIZE (__IA64_UL_CONST(1) << KVM_VM_STRUCT_SHIFT)
@@ -235,8 +239,6 @@ struct kvm_vm_data {
struct kvm;
struct kvm_vcpu;
-struct kvm_guest_debug{
-};
struct kvm_mmio_req {
uint64_t addr; /* physical address */
diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig
index f833a0b4188d..0a2d6b86075a 100644
--- a/arch/ia64/kvm/Kconfig
+++ b/arch/ia64/kvm/Kconfig
@@ -4,6 +4,10 @@
config HAVE_KVM
bool
+config HAVE_KVM_IRQCHIP
+ bool
+ default y
+
menuconfig VIRTUALIZATION
bool "Virtualization"
depends on HAVE_KVM || IA64
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 4e586f6110aa..1477f91617a5 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1303,8 +1303,8 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
return -EINVAL;
}
-int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
- struct kvm_debug_guest *dbg)
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug *dbg)
{
return -EINVAL;
}
@@ -1337,6 +1337,10 @@ static void kvm_release_vm_pages(struct kvm *kvm)
}
}
+void kvm_arch_sync_events(struct kvm *kvm)
+{
+}
+
void kvm_arch_destroy_vm(struct kvm *kvm)
{
kvm_iommu_unmap_guest(kvm);
@@ -1417,6 +1421,23 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
return 0;
}
+int kvm_arch_vcpu_ioctl_get_stack(struct kvm_vcpu *vcpu,
+ struct kvm_ia64_vcpu_stack *stack)
+{
+ memcpy(stack, vcpu, sizeof(struct kvm_ia64_vcpu_stack));
+ return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_stack(struct kvm_vcpu *vcpu,
+ struct kvm_ia64_vcpu_stack *stack)
+{
+ memcpy(vcpu + 1, &stack->stack[0] + sizeof(struct kvm_vcpu),
+ sizeof(struct kvm_ia64_vcpu_stack) - sizeof(struct kvm_vcpu));
+
+ vcpu->arch.exit_data = ((struct kvm_vcpu *)stack)->arch.exit_data;
+ return 0;
+}
+
void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
{
@@ -1426,9 +1447,78 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
long kvm_arch_vcpu_ioctl(struct file *filp,
- unsigned int ioctl, unsigned long arg)
+ unsigned int ioctl, unsigned long arg)
{
- return -EINVAL;
+ struct kvm_vcpu *vcpu = filp->private_data;
+ void __user *argp = (void __user *)arg;
+ struct kvm_ia64_vcpu_stack *stack = NULL;
+ long r;
+
+ switch (ioctl) {
+ case KVM_IA64_VCPU_GET_STACK: {
+ struct kvm_ia64_vcpu_stack __user *user_stack;
+ void __user *first_p = argp;
+
+ r = -EFAULT;
+ if (copy_from_user(&user_stack, first_p, sizeof(void *)))
+ goto out;
+
+ if (!access_ok(VERIFY_WRITE, user_stack,
+ sizeof(struct kvm_ia64_vcpu_stack))) {
+ printk(KERN_INFO "KVM_IA64_VCPU_GET_STACK: "
+ "Illegal user destination address for stack\n");
+ goto out;
+ }
+ stack = kzalloc(sizeof(struct kvm_ia64_vcpu_stack), GFP_KERNEL);
+ if (!stack) {
+ r = -ENOMEM;
+ goto out;
+ }
+
+ r = kvm_arch_vcpu_ioctl_get_stack(vcpu, stack);
+ if (r)
+ goto out;
+
+ if (copy_to_user(user_stack, stack,
+ sizeof(struct kvm_ia64_vcpu_stack)))
+ goto out;
+
+ break;
+ }
+ case KVM_IA64_VCPU_SET_STACK: {
+ struct kvm_ia64_vcpu_stack __user *user_stack;
+ void __user *first_p = argp;
+
+ r = -EFAULT;
+ if (copy_from_user(&user_stack, first_p, sizeof(void *)))
+ goto out;
+
+ if (!access_ok(VERIFY_READ, user_stack,
+ sizeof(struct kvm_ia64_vcpu_stack))) {
+ printk(KERN_INFO "KVM_IA64_VCPU_SET_STACK: "
+ "Illegal user address for stack\n");
+ goto out;
+ }
+ stack = kmalloc(sizeof(struct kvm_ia64_vcpu_stack), GFP_KERNEL);
+ if (!stack) {
+ r = -ENOMEM;
+ goto out;
+ }
+ if (copy_from_user(stack, user_stack,
+ sizeof(struct kvm_ia64_vcpu_stack)))
+ goto out;
+
+ r = kvm_arch_vcpu_ioctl_set_stack(vcpu, stack);
+ break;
+ }
+
+ default:
+ r = -EINVAL;
+ }
+
+out:
+ kfree(stack);
+ return r;
}
int kvm_arch_set_memory_region(struct kvm *kvm,
@@ -1468,7 +1558,7 @@ void kvm_arch_flush_shadow(struct kvm *kvm)
}
long kvm_arch_dev_ioctl(struct file *filp,
- unsigned int ioctl, unsigned long arg)
+ unsigned int ioctl, unsigned long arg)
{
return -EINVAL;
}
diff --git a/arch/ia64/kvm/process.c b/arch/ia64/kvm/process.c
index 552d07724207..f9c9504144fa 100644
--- a/arch/ia64/kvm/process.c
+++ b/arch/ia64/kvm/process.c
@@ -167,7 +167,6 @@ static u64 vcpu_get_itir_on_fault(struct kvm_vcpu *vcpu, u64 ifa)
return (rr1.val);
}
-
/*
* Set vIFA & vITIR & vIHA, when vPSR.ic =1
* Parameter:
@@ -222,8 +221,6 @@ void itlb_fault(struct kvm_vcpu *vcpu, u64 vadr)
inject_guest_interruption(vcpu, IA64_INST_TLB_VECTOR);
}
-
-
/*
* Data Nested TLB Fault
* @ Data Nested TLB Vector
@@ -245,7 +242,6 @@ void alt_dtlb(struct kvm_vcpu *vcpu, u64 vadr)
inject_guest_interruption(vcpu, IA64_ALT_DATA_TLB_VECTOR);
}
-
/*
* Data TLB Fault
* @ Data TLB vector
@@ -265,8 +261,6 @@ static void _vhpt_fault(struct kvm_vcpu *vcpu, u64 vadr)
/* If vPSR.ic, IFA, ITIR, IHA*/
set_ifa_itir_iha(vcpu, vadr, 1, 1, 1);
inject_guest_interruption(vcpu, IA64_VHPT_TRANS_VECTOR);
-
-
}
/*
@@ -279,7 +273,6 @@ void ivhpt_fault(struct kvm_vcpu *vcpu, u64 vadr)
_vhpt_fault(vcpu, vadr);
}
-
/*
* VHPT Data Fault
* @ VHPT Translation vector
@@ -290,8 +283,6 @@ void dvhpt_fault(struct kvm_vcpu *vcpu, u64 vadr)
_vhpt_fault(vcpu, vadr);
}
-
-
/*
* Deal with:
* General Exception vector
@@ -301,7 +292,6 @@ void _general_exception(struct kvm_vcpu *vcpu)
inject_guest_interruption(vcpu, IA64_GENEX_VECTOR);
}
-
/*
* Illegal Operation Fault
* @ General Exception Vector
@@ -419,19 +409,16 @@ static void __page_not_present(struct kvm_vcpu *vcpu, u64 vadr)
inject_guest_interruption(vcpu, IA64_PAGE_NOT_PRESENT_VECTOR);
}
-
void data_page_not_present(struct kvm_vcpu *vcpu, u64 vadr)
{
__page_not_present(vcpu, vadr);
}
-
void inst_page_not_present(struct kvm_vcpu *vcpu, u64 vadr)
{
__page_not_present(vcpu, vadr);
}
-
/* Deal with
* Data access rights vector
*/
@@ -455,13 +442,18 @@ fpswa_ret_t vmm_fp_emulate(int fp_fault, void *bundle, unsigned long *ipsr,
if (!vmm_fpswa_interface)
return (fpswa_ret_t) {-1, 0, 0, 0};
- /*
- * Just let fpswa driver to use hardware fp registers.
- * No fp register is valid in memory.
- */
memset(&fp_state, 0, sizeof(fp_state_t));
/*
+ * compute fp_state. only FP registers f6 - f11 are used by the
+ * vmm, so set those bits in the mask and set the low volatile
+ * pointer to point to these registers.
+ */
+ fp_state.bitmask_low64 = 0xfc0; /* bit6..bit11 */
+
+ fp_state.fp_state_low_volatile = (fp_state_low_volatile_t *) &regs->f6;
+
+ /*
* unsigned long (*EFI_FPSWA) (
* unsigned long trap_type,
* void *Bundle,
@@ -545,10 +537,6 @@ void reflect_interruption(u64 ifa, u64 isr, u64 iim,
status = vmm_handle_fpu_swa(0, regs, isr);
if (!status)
return ;
- else if (-EAGAIN == status) {
- vcpu_decrement_iip(vcpu);
- return ;
- }
break;
}
@@ -702,7 +690,6 @@ void vhpi_detection(struct kvm_vcpu *vcpu)
}
}
-
void leave_hypervisor_tail(void)
{
struct kvm_vcpu *v = current_vcpu;
@@ -736,7 +723,6 @@ void leave_hypervisor_tail(void)
}
}
-
static inline void handle_lds(struct kvm_pt_regs *regs)
{
regs->cr_ipsr |= IA64_PSR_ED;
diff --git a/arch/ia64/kvm/vcpu.c b/arch/ia64/kvm/vcpu.c
index ecd526b55323..4d8be4c252fa 100644
--- a/arch/ia64/kvm/vcpu.c
+++ b/arch/ia64/kvm/vcpu.c
@@ -112,7 +112,6 @@ void switch_to_physical_rid(struct kvm_vcpu *vcpu)
return;
}
-
void switch_to_virtual_rid(struct kvm_vcpu *vcpu)
{
unsigned long psr;
@@ -166,8 +165,6 @@ void switch_mm_mode(struct kvm_vcpu *vcpu, struct ia64_psr old_psr,
return;
}
-
-
/*
* In physical mode, insert tc/tr for region 0 and 4 uses
* RID[0] and RID[4] which is for physical mode emulation.
@@ -269,7 +266,6 @@ static inline unsigned long fph_index(struct kvm_pt_regs *regs,
return rotate_reg(96, rrb_fr, (regnum - IA64_FIRST_ROTATING_FR));
}
-
/*
* The inverse of the above: given bspstore and the number of
* registers, calculate ar.bsp.
@@ -1039,8 +1035,6 @@ u64 vcpu_tak(struct kvm_vcpu *vcpu, u64 vadr)
return key;
}
-
-
void kvm_thash(struct kvm_vcpu *vcpu, INST64 inst)
{
unsigned long thash, vadr;
@@ -1050,7 +1044,6 @@ void kvm_thash(struct kvm_vcpu *vcpu, INST64 inst)
vcpu_set_gr(vcpu, inst.M46.r1, thash, 0);
}
-
void kvm_ttag(struct kvm_vcpu *vcpu, INST64 inst)
{
unsigned long tag, vadr;
@@ -1131,7 +1124,6 @@ int vcpu_tpa(struct kvm_vcpu *vcpu, u64 vadr, u64 *padr)
return IA64_NO_FAULT;
}
-
int kvm_tpa(struct kvm_vcpu *vcpu, INST64 inst)
{
unsigned long r1, r3;
@@ -1154,7 +1146,6 @@ void kvm_tak(struct kvm_vcpu *vcpu, INST64 inst)
vcpu_set_gr(vcpu, inst.M46.r1, r1, 0);
}
-
/************************************
* Insert/Purge translation register/cache
************************************/
@@ -1385,7 +1376,6 @@ void kvm_mov_to_ar_reg(struct kvm_vcpu *vcpu, INST64 inst)
vcpu_set_itc(vcpu, r2);
}
-
void kvm_mov_from_ar_reg(struct kvm_vcpu *vcpu, INST64 inst)
{
unsigned long r1;
@@ -1393,8 +1383,9 @@ void kvm_mov_from_ar_reg(struct kvm_vcpu *vcpu, INST64 inst)
r1 = vcpu_get_itc(vcpu);
vcpu_set_gr(vcpu, inst.M31.r1, r1, 0);
}
+
/**************************************************************************
- struct kvm_vcpu*protection key register access routines
+ struct kvm_vcpu protection key register access routines
**************************************************************************/
unsigned long vcpu_get_pkr(struct kvm_vcpu *vcpu, unsigned long reg)
@@ -1407,20 +1398,6 @@ void vcpu_set_pkr(struct kvm_vcpu *vcpu, unsigned long reg, unsigned long val)
ia64_set_pkr(reg, val);
}
-
-unsigned long vcpu_get_itir_on_fault(struct kvm_vcpu *vcpu, unsigned long ifa)
-{
- union ia64_rr rr, rr1;
-
- rr.val = vcpu_get_rr(vcpu, ifa);
- rr1.val = 0;
- rr1.ps = rr.ps;
- rr1.rid = rr.rid;
- return (rr1.val);
-}
-
-
-
/********************************
* Moves to privileged registers
********************************/
@@ -1464,8 +1441,6 @@ unsigned long vcpu_set_rr(struct kvm_vcpu *vcpu, unsigned long reg,
return (IA64_NO_FAULT);
}
-
-
void kvm_mov_to_rr(struct kvm_vcpu *vcpu, INST64 inst)
{
unsigned long r3, r2;
@@ -1510,8 +1485,6 @@ void kvm_mov_to_pkr(struct kvm_vcpu *vcpu, INST64 inst)
vcpu_set_pkr(vcpu, r3, r2);
}
-
-
void kvm_mov_from_rr(struct kvm_vcpu *vcpu, INST64 inst)
{
unsigned long r3, r1;
@@ -1557,7 +1530,6 @@ void kvm_mov_from_pmc(struct kvm_vcpu *vcpu, INST64 inst)
vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
}
-
unsigned long vcpu_get_cpuid(struct kvm_vcpu *vcpu, unsigned long reg)
{
/* FIXME: This could get called as a result of a rsvd-reg fault */
@@ -1609,7 +1581,6 @@ unsigned long kvm_mov_to_cr(struct kvm_vcpu *vcpu, INST64 inst)
return 0;
}
-
unsigned long kvm_mov_from_cr(struct kvm_vcpu *vcpu, INST64 inst)
{
unsigned long tgt = inst.M33.r1;
@@ -1633,8 +1604,6 @@ unsigned long kvm_mov_from_cr(struct kvm_vcpu *vcpu, INST64 inst)
return 0;
}
-
-
void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val)
{
@@ -1776,9 +1745,6 @@ void vcpu_bsw1(struct kvm_vcpu *vcpu)
}
}
-
-
-
void vcpu_rfi(struct kvm_vcpu *vcpu)
{
unsigned long ifs, psr;
@@ -1796,7 +1762,6 @@ void vcpu_rfi(struct kvm_vcpu *vcpu)
regs->cr_iip = VCPU(vcpu, iip);
}
-
/*
VPSR can't keep track of below bits of guest PSR
This function gets guest PSR
diff --git a/arch/ia64/kvm/vtlb.c b/arch/ia64/kvm/vtlb.c
index 6b6307a3bd55..ac94867f8267 100644
--- a/arch/ia64/kvm/vtlb.c
+++ b/arch/ia64/kvm/vtlb.c
@@ -509,7 +509,6 @@ void thash_purge_all(struct kvm_vcpu *v)
local_flush_tlb_all();
}
-
/*
* Lookup the hash table and its collision chain to find an entry
* covering this address rid:va or the entry.
@@ -517,7 +516,6 @@ void thash_purge_all(struct kvm_vcpu *v)
* INPUT:
* in: TLB format for both VHPT & TLB.
*/
-
struct thash_data *vtlb_lookup(struct kvm_vcpu *v, u64 va, int is_data)
{
struct thash_data *cch;
@@ -547,7 +545,6 @@ struct thash_data *vtlb_lookup(struct kvm_vcpu *v, u64 va, int is_data)
return NULL;
}
-
/*
* Initialize internal control data before service.
*/
@@ -589,7 +586,6 @@ u64 kvm_gpa_to_mpa(u64 gpa)
return (pte >> PAGE_SHIFT << PAGE_SHIFT) | (gpa & ~PAGE_MASK);
}
-
/*
* Fetch guest bundle code.
* INPUT:
@@ -631,7 +627,6 @@ int fetch_code(struct kvm_vcpu *vcpu, u64 gip, IA64_BUNDLE *pbundle)
return IA64_NO_FAULT;
}
-
void kvm_init_vhpt(struct kvm_vcpu *v)
{
v->arch.vhpt.num = VHPT_NUM_ENTRIES;
diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h
index f993e4198d5c..755f1b1948c5 100644
--- a/arch/powerpc/include/asm/kvm.h
+++ b/arch/powerpc/include/asm/kvm.h
@@ -52,4 +52,11 @@ struct kvm_fpu {
__u64 fpr[32];
};
+struct kvm_debug_exit_arch {
+};
+
+/* for KVM_SET_GUEST_DEBUG */
+struct kvm_guest_debug_arch {
+};
+
#endif /* __LINUX_KVM_POWERPC_H */
diff --git a/arch/powerpc/include/asm/kvm_44x.h b/arch/powerpc/include/asm/kvm_44x.h
index f49031b632ca..d22d39942a92 100644
--- a/arch/powerpc/include/asm/kvm_44x.h
+++ b/arch/powerpc/include/asm/kvm_44x.h
@@ -28,6 +28,13 @@
* need to find some way of advertising it. */
#define KVM44x_GUEST_TLB_SIZE 64
+struct kvmppc_44x_tlbe {
+ u32 tid; /* Only the low 8 bits are used. */
+ u32 word0;
+ u32 word1;
+ u32 word2;
+};
+
struct kvmppc_44x_shadow_ref {
struct page *page;
u16 gtlb_index;
diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h
index 2197764796d9..56bfae59837f 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -42,7 +42,12 @@
#define BOOKE_INTERRUPT_DTLB_MISS 13
#define BOOKE_INTERRUPT_ITLB_MISS 14
#define BOOKE_INTERRUPT_DEBUG 15
-#define BOOKE_MAX_INTERRUPT 15
+
+/* E500 */
+#define BOOKE_INTERRUPT_SPE_UNAVAIL 32
+#define BOOKE_INTERRUPT_SPE_FP_DATA 33
+#define BOOKE_INTERRUPT_SPE_FP_ROUND 34
+#define BOOKE_INTERRUPT_PERFORMANCE_MONITOR 35
#define RESUME_FLAG_NV (1<<0) /* Reload guest nonvolatile state? */
#define RESUME_FLAG_HOST (1<<1) /* Resume host? */
diff --git a/arch/powerpc/include/asm/kvm_e500.h b/arch/powerpc/include/asm/kvm_e500.h
new file mode 100644
index 000000000000..9d497ce49726
--- /dev/null
+++ b/arch/powerpc/include/asm/kvm_e500.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: Yu Liu, <yu.liu@freescale.com>
+ *
+ * Description:
+ * This file is derived from arch/powerpc/include/asm/kvm_44x.h,
+ * by Hollis Blanchard <hollisb@us.ibm.com>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_KVM_E500_H__
+#define __ASM_KVM_E500_H__
+
+#include <linux/kvm_host.h>
+
+#define BOOKE_INTERRUPT_SIZE 36
+
+#define E500_PID_NUM 3
+#define E500_TLB_NUM 2
+
+struct tlbe{
+ u32 mas1;
+ u32 mas2;
+ u32 mas3;
+ u32 mas7;
+};
+
+struct kvmppc_vcpu_e500 {
+ /* Unmodified copy of the guest's TLB. */
+ struct tlbe *guest_tlb[E500_TLB_NUM];
+ /* TLB that's actually used when the guest is running. */
+ struct tlbe *shadow_tlb[E500_TLB_NUM];
+ /* Pages which are referenced in the shadow TLB. */
+ struct page **shadow_pages[E500_TLB_NUM];
+
+ unsigned int guest_tlb_size[E500_TLB_NUM];
+ unsigned int shadow_tlb_size[E500_TLB_NUM];
+ unsigned int guest_tlb_nv[E500_TLB_NUM];
+
+ u32 host_pid[E500_PID_NUM];
+ u32 pid[E500_PID_NUM];
+
+ u32 mas0;
+ u32 mas1;
+ u32 mas2;
+ u32 mas3;
+ u32 mas4;
+ u32 mas5;
+ u32 mas6;
+ u32 mas7;
+ u32 l1csr1;
+ u32 hid0;
+ u32 hid1;
+
+ struct kvm_vcpu vcpu;
+};
+
+static inline struct kvmppc_vcpu_e500 *to_e500(struct kvm_vcpu *vcpu)
+{
+ return container_of(vcpu, struct kvmppc_vcpu_e500, vcpu);
+}
+
+#endif /* __ASM_KVM_E500_H__ */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index c1e436fe7738..dfdf13c9fefd 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -64,13 +64,6 @@ struct kvm_vcpu_stat {
u32 halt_wakeup;
};
-struct kvmppc_44x_tlbe {
- u32 tid; /* Only the low 8 bits are used. */
- u32 word0;
- u32 word1;
- u32 word2;
-};
-
enum kvm_exit_types {
MMIO_EXITS,
DCR_EXITS,
@@ -118,11 +111,6 @@ struct kvm_arch {
struct kvm_vcpu_arch {
u32 host_stack;
u32 host_pid;
- u32 host_dbcr0;
- u32 host_dbcr1;
- u32 host_dbcr2;
- u32 host_iac[4];
- u32 host_msr;
u64 fpr[32];
ulong gpr[32];
@@ -157,7 +145,7 @@ struct kvm_vcpu_arch {
u32 tbu;
u32 tcr;
u32 tsr;
- u32 ivor[16];
+ u32 ivor[64];
ulong ivpr;
u32 pir;
@@ -170,6 +158,7 @@ struct kvm_vcpu_arch {
u32 ccr1;
u32 dbcr0;
u32 dbcr1;
+ u32 dbsr;
#ifdef CONFIG_KVM_EXIT_TIMING
struct kvmppc_exit_timing timing_exit;
@@ -200,10 +189,4 @@ struct kvm_vcpu_arch {
unsigned long pending_exceptions;
};
-struct kvm_guest_debug {
- int enabled;
- unsigned long bp[4];
- int singlestep;
-};
-
#endif /* __POWERPC_KVM_HOST_H__ */
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 36d2a50a8487..2c6ee349df5e 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -52,13 +52,19 @@ extern int kvmppc_emulate_instruction(struct kvm_run *run,
extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu);
extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
+/* Core-specific hooks */
+
extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr,
- u64 asid, u32 flags, u32 max_bytes,
unsigned int gtlb_idx);
extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode);
extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid);
-
-/* Core-specific hooks */
+extern void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu);
+extern int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr);
+extern int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr);
+extern gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int gtlb_index,
+ gva_t eaddr);
+extern void kvmppc_mmu_dtlb_miss(struct kvm_vcpu *vcpu);
+extern void kvmppc_mmu_itlb_miss(struct kvm_vcpu *vcpu);
extern struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm,
unsigned int id);
@@ -71,9 +77,6 @@ extern int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
extern void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu);
-extern void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu);
-extern void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu);
-
extern void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu);
extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu);
extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu);
diff --git a/arch/powerpc/include/asm/mmu-fsl-booke.h b/arch/powerpc/include/asm/mmu-fsl-booke.h
index 3f941c0f7e8e..4285b64a65e0 100644
--- a/arch/powerpc/include/asm/mmu-fsl-booke.h
+++ b/arch/powerpc/include/asm/mmu-fsl-booke.h
@@ -75,6 +75,8 @@
#ifndef __ASSEMBLY__
+extern unsigned int tlbcam_index;
+
typedef struct {
unsigned int id;
unsigned int active;
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 19ee491e9e23..42fe4da4e8ae 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -49,7 +49,7 @@
#include <asm/iseries/alpaca.h>
#endif
#ifdef CONFIG_KVM
-#include <asm/kvm_44x.h>
+#include <linux/kvm_host.h>
#endif
#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
@@ -361,8 +361,6 @@ int main(void)
DEFINE(PTE_SIZE, sizeof(pte_t));
#ifdef CONFIG_KVM
- DEFINE(TLBE_BYTES, sizeof(struct kvmppc_44x_tlbe));
-
DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack));
DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid));
DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr));
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
index a66bec57265a..0cef809cec21 100644
--- a/arch/powerpc/kvm/44x.c
+++ b/arch/powerpc/kvm/44x.c
@@ -28,72 +28,6 @@
#include "44x_tlb.h"
-/* Note: clearing MSR[DE] just means that the debug interrupt will not be
- * delivered *immediately*. Instead, it simply sets the appropriate DBSR bits.
- * If those DBSR bits are still set when MSR[DE] is re-enabled, the interrupt
- * will be delivered as an "imprecise debug event" (which is indicated by
- * DBSR[IDE].
- */
-static void kvm44x_disable_debug_interrupts(void)
-{
- mtmsr(mfmsr() & ~MSR_DE);
-}
-
-void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu)
-{
- kvm44x_disable_debug_interrupts();
-
- mtspr(SPRN_IAC1, vcpu->arch.host_iac[0]);
- mtspr(SPRN_IAC2, vcpu->arch.host_iac[1]);
- mtspr(SPRN_IAC3, vcpu->arch.host_iac[2]);
- mtspr(SPRN_IAC4, vcpu->arch.host_iac[3]);
- mtspr(SPRN_DBCR1, vcpu->arch.host_dbcr1);
- mtspr(SPRN_DBCR2, vcpu->arch.host_dbcr2);
- mtspr(SPRN_DBCR0, vcpu->arch.host_dbcr0);
- mtmsr(vcpu->arch.host_msr);
-}
-
-void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
-{
- struct kvm_guest_debug *dbg = &vcpu->guest_debug;
- u32 dbcr0 = 0;
-
- vcpu->arch.host_msr = mfmsr();
- kvm44x_disable_debug_interrupts();
-
- /* Save host debug register state. */
- vcpu->arch.host_iac[0] = mfspr(SPRN_IAC1);
- vcpu->arch.host_iac[1] = mfspr(SPRN_IAC2);
- vcpu->arch.host_iac[2] = mfspr(SPRN_IAC3);
- vcpu->arch.host_iac[3] = mfspr(SPRN_IAC4);
- vcpu->arch.host_dbcr0 = mfspr(SPRN_DBCR0);
- vcpu->arch.host_dbcr1 = mfspr(SPRN_DBCR1);
- vcpu->arch.host_dbcr2 = mfspr(SPRN_DBCR2);
-
- /* set registers up for guest */
-
- if (dbg->bp[0]) {
- mtspr(SPRN_IAC1, dbg->bp[0]);
- dbcr0 |= DBCR0_IAC1 | DBCR0_IDM;
- }
- if (dbg->bp[1]) {
- mtspr(SPRN_IAC2, dbg->bp[1]);
- dbcr0 |= DBCR0_IAC2 | DBCR0_IDM;
- }
- if (dbg->bp[2]) {
- mtspr(SPRN_IAC3, dbg->bp[2]);
- dbcr0 |= DBCR0_IAC3 | DBCR0_IDM;
- }
- if (dbg->bp[3]) {
- mtspr(SPRN_IAC4, dbg->bp[3]);
- dbcr0 |= DBCR0_IAC4 | DBCR0_IDM;
- }
-
- mtspr(SPRN_DBCR0, dbcr0);
- mtspr(SPRN_DBCR1, 0);
- mtspr(SPRN_DBCR2, 0);
-}
-
void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
kvmppc_44x_tlb_load(vcpu);
@@ -149,8 +83,6 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
struct kvm_translation *tr)
{
- struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
- struct kvmppc_44x_tlbe *gtlbe;
int index;
gva_t eaddr;
u8 pid;
@@ -166,9 +98,7 @@ int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
return 0;
}
- gtlbe = &vcpu_44x->guest_tlb[index];
-
- tr->physical_address = tlb_xlate(gtlbe, eaddr);
+ tr->physical_address = kvmppc_mmu_xlate(vcpu, index, eaddr);
/* XXX what does "writeable" and "usermode" even mean? */
tr->valid = 1;
diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c
index 82489a743a6f..61af58fcecee 100644
--- a/arch/powerpc/kvm/44x_emulate.c
+++ b/arch/powerpc/kvm/44x_emulate.c
@@ -27,25 +27,12 @@
#include "booke.h"
#include "44x_tlb.h"
-#define OP_RFI 19
-
-#define XOP_RFI 50
-#define XOP_MFMSR 83
-#define XOP_WRTEE 131
-#define XOP_MTMSR 146
-#define XOP_WRTEEI 163
#define XOP_MFDCR 323
#define XOP_MTDCR 451
#define XOP_TLBSX 914
#define XOP_ICCCI 966
#define XOP_TLBWE 978
-static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
-{
- vcpu->arch.pc = vcpu->arch.srr0;
- kvmppc_set_msr(vcpu, vcpu->arch.srr1);
-}
-
int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned int inst, int *advance)
{
@@ -59,48 +46,9 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
int ws;
switch (get_op(inst)) {
- case OP_RFI:
- switch (get_xop(inst)) {
- case XOP_RFI:
- kvmppc_emul_rfi(vcpu);
- kvmppc_set_exit_type(vcpu, EMULATED_RFI_EXITS);
- *advance = 0;
- break;
-
- default:
- emulated = EMULATE_FAIL;
- break;
- }
- break;
-
case 31:
switch (get_xop(inst)) {
- case XOP_MFMSR:
- rt = get_rt(inst);
- vcpu->arch.gpr[rt] = vcpu->arch.msr;
- kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS);
- break;
-
- case XOP_MTMSR:
- rs = get_rs(inst);
- kvmppc_set_exit_type(vcpu, EMULATED_MTMSR_EXITS);
- kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]);
- break;
-
- case XOP_WRTEE:
- rs = get_rs(inst);
- vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
- | (vcpu->arch.gpr[rs] & MSR_EE);
- kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
- break;
-
- case XOP_WRTEEI:
- vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
- | (inst & MSR_EE);
- kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
- break;
-
case XOP_MFDCR:
dcrn = get_dcrn(inst);
rt = get_rt(inst);
@@ -186,186 +134,51 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
emulated = EMULATE_FAIL;
}
+ if (emulated == EMULATE_FAIL)
+ emulated = kvmppc_booke_emulate_op(run, vcpu, inst, advance);
+
return emulated;
}
int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
{
+ int emulated = EMULATE_DONE;
+
switch (sprn) {
- case SPRN_MMUCR:
- vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break;
case SPRN_PID:
kvmppc_set_pid(vcpu, vcpu->arch.gpr[rs]); break;
+ case SPRN_MMUCR:
+ vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break;
case SPRN_CCR0:
vcpu->arch.ccr0 = vcpu->arch.gpr[rs]; break;
case SPRN_CCR1:
vcpu->arch.ccr1 = vcpu->arch.gpr[rs]; break;
- case SPRN_DEAR:
- vcpu->arch.dear = vcpu->arch.gpr[rs]; break;
- case SPRN_ESR:
- vcpu->arch.esr = vcpu->arch.gpr[rs]; break;
- case SPRN_DBCR0:
- vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break;
- case SPRN_DBCR1:
- vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break;
- case SPRN_TSR:
- vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break;
- case SPRN_TCR:
- vcpu->arch.tcr = vcpu->arch.gpr[rs];
- kvmppc_emulate_dec(vcpu);
- break;
-
- /* Note: SPRG4-7 are user-readable. These values are
- * loaded into the real SPRGs when resuming the
- * guest. */
- case SPRN_SPRG4:
- vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break;
- case SPRN_SPRG5:
- vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break;
- case SPRN_SPRG6:
- vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break;
- case SPRN_SPRG7:
- vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break;
-
- case SPRN_IVPR:
- vcpu->arch.ivpr = vcpu->arch.gpr[rs];
- break;
- case SPRN_IVOR0:
- vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = vcpu->arch.gpr[rs];
- break;
- case SPRN_IVOR1:
- vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = vcpu->arch.gpr[rs];
- break;
- case SPRN_IVOR2:
- vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = vcpu->arch.gpr[rs];
- break;
- case SPRN_IVOR3:
- vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = vcpu->arch.gpr[rs];
- break;
- case SPRN_IVOR4:
- vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = vcpu->arch.gpr[rs];
- break;
- case SPRN_IVOR5:
- vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = vcpu->arch.gpr[rs];
- break;
- case SPRN_IVOR6:
- vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = vcpu->arch.gpr[rs];
- break;
- case SPRN_IVOR7:
- vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = vcpu->arch.gpr[rs];
- break;
- case SPRN_IVOR8:
- vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = vcpu->arch.gpr[rs];
- break;
- case SPRN_IVOR9:
- vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = vcpu->arch.gpr[rs];
- break;
- case SPRN_IVOR10:
- vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = vcpu->arch.gpr[rs];
- break;
- case SPRN_IVOR11:
- vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = vcpu->arch.gpr[rs];
- break;
- case SPRN_IVOR12:
- vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = vcpu->arch.gpr[rs];
- break;
- case SPRN_IVOR13:
- vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = vcpu->arch.gpr[rs];
- break;
- case SPRN_IVOR14:
- vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = vcpu->arch.gpr[rs];
- break;
- case SPRN_IVOR15:
- vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = vcpu->arch.gpr[rs];
- break;
-
default:
- return EMULATE_FAIL;
+ emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs);
}
kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS);
- return EMULATE_DONE;
+ return emulated;
}
int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
{
+ int emulated = EMULATE_DONE;
+
switch (sprn) {
- /* 440 */
+ case SPRN_PID:
+ vcpu->arch.gpr[rt] = vcpu->arch.pid; break;
case SPRN_MMUCR:
vcpu->arch.gpr[rt] = vcpu->arch.mmucr; break;
case SPRN_CCR0:
vcpu->arch.gpr[rt] = vcpu->arch.ccr0; break;
case SPRN_CCR1:
vcpu->arch.gpr[rt] = vcpu->arch.ccr1; break;
-
- /* Book E */
- case SPRN_PID:
- vcpu->arch.gpr[rt] = vcpu->arch.pid; break;
- case SPRN_IVPR:
- vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break;
- case SPRN_DEAR:
- vcpu->arch.gpr[rt] = vcpu->arch.dear; break;
- case SPRN_ESR:
- vcpu->arch.gpr[rt] = vcpu->arch.esr; break;
- case SPRN_DBCR0:
- vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break;
- case SPRN_DBCR1:
- vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break;
-
- case SPRN_IVOR0:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL];
- break;
- case SPRN_IVOR1:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK];
- break;
- case SPRN_IVOR2:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE];
- break;
- case SPRN_IVOR3:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE];
- break;
- case SPRN_IVOR4:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL];
- break;
- case SPRN_IVOR5:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT];
- break;
- case SPRN_IVOR6:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM];
- break;
- case SPRN_IVOR7:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL];
- break;
- case SPRN_IVOR8:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL];
- break;
- case SPRN_IVOR9:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL];
- break;
- case SPRN_IVOR10:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER];
- break;
- case SPRN_IVOR11:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT];
- break;
- case SPRN_IVOR12:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG];
- break;
- case SPRN_IVOR13:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS];
- break;
- case SPRN_IVOR14:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS];
- break;
- case SPRN_IVOR15:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG];
- break;
-
default:
- return EMULATE_FAIL;
+ emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt);
}
kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS);
- return EMULATE_DONE;
+ return emulated;
}
diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
index 9a34b8edb9e2..4a16f472cc18 100644
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -208,20 +208,38 @@ int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid,
return -1;
}
-int kvmppc_44x_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
+gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int gtlb_index,
+ gva_t eaddr)
+{
+ struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+ struct kvmppc_44x_tlbe *gtlbe = &vcpu_44x->guest_tlb[gtlb_index];
+ unsigned int pgmask = get_tlb_bytes(gtlbe) - 1;
+
+ return get_tlb_raddr(gtlbe) | (eaddr & pgmask);
+}
+
+int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
{
unsigned int as = !!(vcpu->arch.msr & MSR_IS);
return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
}
-int kvmppc_44x_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
+int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
{
unsigned int as = !!(vcpu->arch.msr & MSR_DS);
return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
}
+void kvmppc_mmu_itlb_miss(struct kvm_vcpu *vcpu)
+{
+}
+
+void kvmppc_mmu_dtlb_miss(struct kvm_vcpu *vcpu)
+{
+}
+
static void kvmppc_44x_shadow_release(struct kvmppc_vcpu_44x *vcpu_44x,
unsigned int stlb_index)
{
@@ -248,7 +266,7 @@ static void kvmppc_44x_shadow_release(struct kvmppc_vcpu_44x *vcpu_44x,
KVMTRACE_1D(STLB_INVAL, &vcpu_44x->vcpu, stlb_index, handler);
}
-void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu)
+void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
{
struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
int i;
@@ -269,15 +287,19 @@ void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu)
* Caller must ensure that the specified guest TLB entry is safe to insert into
* the shadow TLB.
*/
-void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, u64 asid,
- u32 flags, u32 max_bytes, unsigned int gtlb_index)
+void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr,
+ unsigned int gtlb_index)
{
struct kvmppc_44x_tlbe stlbe;
struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
+ struct kvmppc_44x_tlbe *gtlbe = &vcpu_44x->guest_tlb[gtlb_index];
struct kvmppc_44x_shadow_ref *ref;
struct page *new_page;
hpa_t hpaddr;
gfn_t gfn;
+ u32 asid = gtlbe->tid;
+ u32 flags = gtlbe->word2;
+ u32 max_bytes = get_tlb_bytes(gtlbe);
unsigned int victim;
/* Select TLB entry to clobber. Indirectly guard against races with the TLB
@@ -448,10 +470,8 @@ int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
}
if (tlbe_is_host_safe(vcpu, tlbe)) {
- u64 asid;
gva_t eaddr;
gpa_t gpaddr;
- u32 flags;
u32 bytes;
eaddr = get_tlb_eaddr(tlbe);
@@ -462,10 +482,7 @@ int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
eaddr &= ~(bytes - 1);
gpaddr &= ~(bytes - 1);
- asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid;
- flags = tlbe->word2 & 0xffff;
-
- kvmppc_mmu_map(vcpu, eaddr, gpaddr, asid, flags, bytes, gtlb_index);
+ kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlb_index);
}
KVMTRACE_5D(GTLB_WRITE, vcpu, gtlb_index, tlbe->tid, tlbe->word0,
diff --git a/arch/powerpc/kvm/44x_tlb.h b/arch/powerpc/kvm/44x_tlb.h
index 772191f29e62..a9ff80e51526 100644
--- a/arch/powerpc/kvm/44x_tlb.h
+++ b/arch/powerpc/kvm/44x_tlb.h
@@ -25,8 +25,6 @@
extern int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr,
unsigned int pid, unsigned int as);
-extern int kvmppc_44x_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr);
-extern int kvmppc_44x_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr);
extern int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb,
u8 rc);
@@ -85,11 +83,4 @@ static inline unsigned int get_mmucr_sts(const struct kvm_vcpu *vcpu)
return (vcpu->arch.mmucr >> 16) & 0x1;
}
-static inline gpa_t tlb_xlate(struct kvmppc_44x_tlbe *tlbe, gva_t eaddr)
-{
- unsigned int pgmask = get_tlb_bytes(tlbe) - 1;
-
- return get_tlb_raddr(tlbe) | (eaddr & pgmask);
-}
-
#endif /* __KVM_POWERPC_TLB_H__ */
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 6dbdc4817d80..5a152a52796f 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -2,6 +2,9 @@
# KVM configuration
#
+config HAVE_KVM_IRQCHIP
+ bool
+
menuconfig VIRTUALIZATION
bool "Virtualization"
---help---
@@ -43,6 +46,19 @@ config KVM_EXIT_TIMING
If unsure, say N.
+config KVM_E500
+ bool "KVM support for PowerPC E500 processors"
+ depends on EXPERIMENTAL && E500
+ select KVM
+ ---help---
+ Support running unmodified E500 guest kernels in virtual machines on
+ E500 host processors.
+
+ This module provides access to the hardware capabilities through
+ a character device node named /dev/kvm.
+
+ If unsure, say N.
+
config KVM_TRACE
bool "KVM trace support"
depends on KVM && MARKERS && SYSFS
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index df7ba59e6d53..4b2df66c79d8 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -16,8 +16,18 @@ AFLAGS_booke_interrupts.o := -I$(obj)
kvm-440-objs := \
booke.o \
+ booke_emulate.o \
booke_interrupts.o \
44x.o \
44x_tlb.o \
44x_emulate.o
obj-$(CONFIG_KVM_440) += kvm-440.o
+
+kvm-e500-objs := \
+ booke.o \
+ booke_emulate.o \
+ booke_interrupts.o \
+ e500.o \
+ e500_tlb.o \
+ e500_emulate.o
+obj-$(CONFIG_KVM_E500) += kvm-e500.o
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 35485dd6927e..642e4204cf25 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -30,10 +30,8 @@
#include <asm/kvm_ppc.h>
#include "timing.h"
#include <asm/cacheflush.h>
-#include <asm/kvm_44x.h>
#include "booke.h"
-#include "44x_tlb.h"
unsigned long kvmppc_booke_handlers;
@@ -120,6 +118,9 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
case BOOKE_IRQPRIO_DATA_STORAGE:
case BOOKE_IRQPRIO_INST_STORAGE:
case BOOKE_IRQPRIO_FP_UNAVAIL:
+ case BOOKE_IRQPRIO_SPE_UNAVAIL:
+ case BOOKE_IRQPRIO_SPE_FP_DATA:
+ case BOOKE_IRQPRIO_SPE_FP_ROUND:
case BOOKE_IRQPRIO_AP_UNAVAIL:
case BOOKE_IRQPRIO_ALIGNMENT:
allowed = 1;
@@ -165,7 +166,7 @@ void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
unsigned int priority;
priority = __ffs(*pending);
- while (priority <= BOOKE_MAX_INTERRUPT) {
+ while (priority <= BOOKE_IRQPRIO_MAX) {
if (kvmppc_booke_irqprio_deliver(vcpu, priority))
break;
@@ -263,6 +264,21 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
r = RESUME_GUEST;
break;
+ case BOOKE_INTERRUPT_SPE_UNAVAIL:
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_UNAVAIL);
+ r = RESUME_GUEST;
+ break;
+
+ case BOOKE_INTERRUPT_SPE_FP_DATA:
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_FP_DATA);
+ r = RESUME_GUEST;
+ break;
+
+ case BOOKE_INTERRUPT_SPE_FP_ROUND:
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_FP_ROUND);
+ r = RESUME_GUEST;
+ break;
+
case BOOKE_INTERRUPT_DATA_STORAGE:
vcpu->arch.dear = vcpu->arch.fault_dear;
vcpu->arch.esr = vcpu->arch.fault_esr;
@@ -284,29 +300,27 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
r = RESUME_GUEST;
break;
- /* XXX move to a 440-specific file. */
case BOOKE_INTERRUPT_DTLB_MISS: {
- struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
- struct kvmppc_44x_tlbe *gtlbe;
unsigned long eaddr = vcpu->arch.fault_dear;
int gtlb_index;
+ gpa_t gpaddr;
gfn_t gfn;
/* Check the guest TLB. */
- gtlb_index = kvmppc_44x_dtlb_index(vcpu, eaddr);
+ gtlb_index = kvmppc_mmu_dtlb_index(vcpu, eaddr);
if (gtlb_index < 0) {
/* The guest didn't have a mapping for it. */
kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS);
vcpu->arch.dear = vcpu->arch.fault_dear;
vcpu->arch.esr = vcpu->arch.fault_esr;
+ kvmppc_mmu_dtlb_miss(vcpu);
kvmppc_account_exit(vcpu, DTLB_REAL_MISS_EXITS);
r = RESUME_GUEST;
break;
}
- gtlbe = &vcpu_44x->guest_tlb[gtlb_index];
- vcpu->arch.paddr_accessed = tlb_xlate(gtlbe, eaddr);
- gfn = vcpu->arch.paddr_accessed >> PAGE_SHIFT;
+ gpaddr = kvmppc_mmu_xlate(vcpu, gtlb_index, eaddr);
+ gfn = gpaddr >> PAGE_SHIFT;
if (kvm_is_visible_gfn(vcpu->kvm, gfn)) {
/* The guest TLB had a mapping, but the shadow TLB
@@ -315,13 +329,13 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
* b) the guest used a large mapping which we're faking
* Either way, we need to satisfy the fault without
* invoking the guest. */
- kvmppc_mmu_map(vcpu, eaddr, vcpu->arch.paddr_accessed, gtlbe->tid,
- gtlbe->word2, get_tlb_bytes(gtlbe), gtlb_index);
+ kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlb_index);
kvmppc_account_exit(vcpu, DTLB_VIRT_MISS_EXITS);
r = RESUME_GUEST;
} else {
/* Guest has mapped and accessed a page which is not
* actually RAM. */
+ vcpu->arch.paddr_accessed = gpaddr;
r = kvmppc_emulate_mmio(run, vcpu);
kvmppc_account_exit(vcpu, MMIO_EXITS);
}
@@ -329,10 +343,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
break;
}
- /* XXX move to a 440-specific file. */
case BOOKE_INTERRUPT_ITLB_MISS: {
- struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
- struct kvmppc_44x_tlbe *gtlbe;
unsigned long eaddr = vcpu->arch.pc;
gpa_t gpaddr;
gfn_t gfn;
@@ -341,18 +352,18 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
r = RESUME_GUEST;
/* Check the guest TLB. */
- gtlb_index = kvmppc_44x_itlb_index(vcpu, eaddr);
+ gtlb_index = kvmppc_mmu_itlb_index(vcpu, eaddr);
if (gtlb_index < 0) {
/* The guest didn't have a mapping for it. */
kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ITLB_MISS);
+ kvmppc_mmu_itlb_miss(vcpu);
kvmppc_account_exit(vcpu, ITLB_REAL_MISS_EXITS);
break;
}
kvmppc_account_exit(vcpu, ITLB_VIRT_MISS_EXITS);
- gtlbe = &vcpu_44x->guest_tlb[gtlb_index];
- gpaddr = tlb_xlate(gtlbe, eaddr);
+ gpaddr = kvmppc_mmu_xlate(vcpu, gtlb_index, eaddr);
gfn = gpaddr >> PAGE_SHIFT;
if (kvm_is_visible_gfn(vcpu->kvm, gfn)) {
@@ -362,8 +373,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
* b) the guest used a large mapping which we're faking
* Either way, we need to satisfy the fault without
* invoking the guest. */
- kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlbe->tid,
- gtlbe->word2, get_tlb_bytes(gtlbe), gtlb_index);
+ kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlb_index);
} else {
/* Guest mapped and leaped at non-RAM! */
kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_MACHINE_CHECK);
diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h
index cf7c94ca24bf..d59bcca1f9d8 100644
--- a/arch/powerpc/kvm/booke.h
+++ b/arch/powerpc/kvm/booke.h
@@ -22,6 +22,7 @@
#include <linux/types.h>
#include <linux/kvm_host.h>
+#include <asm/kvm_ppc.h>
#include "timing.h"
/* interrupt priortity ordering */
@@ -30,17 +31,24 @@
#define BOOKE_IRQPRIO_ALIGNMENT 2
#define BOOKE_IRQPRIO_PROGRAM 3
#define BOOKE_IRQPRIO_FP_UNAVAIL 4
-#define BOOKE_IRQPRIO_SYSCALL 5
-#define BOOKE_IRQPRIO_AP_UNAVAIL 6
-#define BOOKE_IRQPRIO_DTLB_MISS 7
-#define BOOKE_IRQPRIO_ITLB_MISS 8
-#define BOOKE_IRQPRIO_MACHINE_CHECK 9
-#define BOOKE_IRQPRIO_DEBUG 10
-#define BOOKE_IRQPRIO_CRITICAL 11
-#define BOOKE_IRQPRIO_WATCHDOG 12
-#define BOOKE_IRQPRIO_EXTERNAL 13
-#define BOOKE_IRQPRIO_FIT 14
-#define BOOKE_IRQPRIO_DECREMENTER 15
+#define BOOKE_IRQPRIO_SPE_UNAVAIL 5
+#define BOOKE_IRQPRIO_SPE_FP_DATA 6
+#define BOOKE_IRQPRIO_SPE_FP_ROUND 7
+#define BOOKE_IRQPRIO_SYSCALL 8
+#define BOOKE_IRQPRIO_AP_UNAVAIL 9
+#define BOOKE_IRQPRIO_DTLB_MISS 10
+#define BOOKE_IRQPRIO_ITLB_MISS 11
+#define BOOKE_IRQPRIO_MACHINE_CHECK 12
+#define BOOKE_IRQPRIO_DEBUG 13
+#define BOOKE_IRQPRIO_CRITICAL 14
+#define BOOKE_IRQPRIO_WATCHDOG 15
+#define BOOKE_IRQPRIO_EXTERNAL 16
+#define BOOKE_IRQPRIO_FIT 17
+#define BOOKE_IRQPRIO_DECREMENTER 18
+#define BOOKE_IRQPRIO_PERFORMANCE_MONITOR 19
+#define BOOKE_IRQPRIO_MAX 19
+
+extern unsigned long kvmppc_booke_handlers;
/* Helper function for "full" MSR writes. No need to call this if only EE is
* changing. */
@@ -57,4 +65,9 @@ static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
};
}
+int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned int inst, int *advance);
+int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt);
+int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs);
+
#endif /* __KVM_BOOKE_H__ */
diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c
new file mode 100644
index 000000000000..aebc65e93f4b
--- /dev/null
+++ b/arch/powerpc/kvm/booke_emulate.c
@@ -0,0 +1,266 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#include <linux/kvm_host.h>
+#include <asm/disassemble.h>
+
+#include "booke.h"
+
+#define OP_19_XOP_RFI 50
+
+#define OP_31_XOP_MFMSR 83
+#define OP_31_XOP_WRTEE 131
+#define OP_31_XOP_MTMSR 146
+#define OP_31_XOP_WRTEEI 163
+
+static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.pc = vcpu->arch.srr0;
+ kvmppc_set_msr(vcpu, vcpu->arch.srr1);
+}
+
+int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned int inst, int *advance)
+{
+ int emulated = EMULATE_DONE;
+ int rs;
+ int rt;
+
+ switch (get_op(inst)) {
+ case 19:
+ switch (get_xop(inst)) {
+ case OP_19_XOP_RFI:
+ kvmppc_emul_rfi(vcpu);
+ kvmppc_set_exit_type(vcpu, EMULATED_RFI_EXITS);
+ *advance = 0;
+ break;
+
+ default:
+ emulated = EMULATE_FAIL;
+ break;
+ }
+ break;
+
+ case 31:
+ switch (get_xop(inst)) {
+
+ case OP_31_XOP_MFMSR:
+ rt = get_rt(inst);
+ vcpu->arch.gpr[rt] = vcpu->arch.msr;
+ kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS);
+ break;
+
+ case OP_31_XOP_MTMSR:
+ rs = get_rs(inst);
+ kvmppc_set_exit_type(vcpu, EMULATED_MTMSR_EXITS);
+ kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]);
+ break;
+
+ case OP_31_XOP_WRTEE:
+ rs = get_rs(inst);
+ vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
+ | (vcpu->arch.gpr[rs] & MSR_EE);
+ kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
+ break;
+
+ case OP_31_XOP_WRTEEI:
+ vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
+ | (inst & MSR_EE);
+ kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
+ break;
+
+ default:
+ emulated = EMULATE_FAIL;
+ }
+
+ break;
+
+ default:
+ emulated = EMULATE_FAIL;
+ }
+
+ return emulated;
+}
+
+int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
+{
+ int emulated = EMULATE_DONE;
+
+ switch (sprn) {
+ case SPRN_DEAR:
+ vcpu->arch.dear = vcpu->arch.gpr[rs]; break;
+ case SPRN_ESR:
+ vcpu->arch.esr = vcpu->arch.gpr[rs]; break;
+ case SPRN_DBCR0:
+ vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break;
+ case SPRN_DBCR1:
+ vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break;
+ case SPRN_DBSR:
+ vcpu->arch.dbsr &= ~vcpu->arch.gpr[rs]; break;
+ case SPRN_TSR:
+ vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break;
+ case SPRN_TCR:
+ vcpu->arch.tcr = vcpu->arch.gpr[rs];
+ kvmppc_emulate_dec(vcpu);
+ break;
+
+ /* Note: SPRG4-7 are user-readable. These values are
+ * loaded into the real SPRGs when resuming the
+ * guest. */
+ case SPRN_SPRG4:
+ vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break;
+ case SPRN_SPRG5:
+ vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break;
+ case SPRN_SPRG6:
+ vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break;
+ case SPRN_SPRG7:
+ vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break;
+
+ case SPRN_IVPR:
+ vcpu->arch.ivpr = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR0:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR1:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR2:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR3:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR4:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR5:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR6:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR7:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR8:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR9:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR10:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR11:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR12:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR13:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR14:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR15:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = vcpu->arch.gpr[rs];
+ break;
+
+ default:
+ emulated = EMULATE_FAIL;
+ }
+
+ return emulated;
+}
+
+int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
+{
+ int emulated = EMULATE_DONE;
+
+ switch (sprn) {
+ case SPRN_IVPR:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break;
+ case SPRN_DEAR:
+ vcpu->arch.gpr[rt] = vcpu->arch.dear; break;
+ case SPRN_ESR:
+ vcpu->arch.gpr[rt] = vcpu->arch.esr; break;
+ case SPRN_DBCR0:
+ vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break;
+ case SPRN_DBCR1:
+ vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break;
+ case SPRN_DBSR:
+ vcpu->arch.gpr[rt] = vcpu->arch.dbsr; break;
+
+ case SPRN_IVOR0:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL];
+ break;
+ case SPRN_IVOR1:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK];
+ break;
+ case SPRN_IVOR2:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE];
+ break;
+ case SPRN_IVOR3:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE];
+ break;
+ case SPRN_IVOR4:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL];
+ break;
+ case SPRN_IVOR5:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT];
+ break;
+ case SPRN_IVOR6:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM];
+ break;
+ case SPRN_IVOR7:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL];
+ break;
+ case SPRN_IVOR8:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL];
+ break;
+ case SPRN_IVOR9:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL];
+ break;
+ case SPRN_IVOR10:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER];
+ break;
+ case SPRN_IVOR11:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT];
+ break;
+ case SPRN_IVOR12:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG];
+ break;
+ case SPRN_IVOR13:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS];
+ break;
+ case SPRN_IVOR14:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS];
+ break;
+ case SPRN_IVOR15:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG];
+ break;
+
+ default:
+ emulated = EMULATE_FAIL;
+ }
+
+ return emulated;
+}
diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S
index 084ebcd7dd83..d0c6f841bbd1 100644
--- a/arch/powerpc/kvm/booke_interrupts.S
+++ b/arch/powerpc/kvm/booke_interrupts.S
@@ -86,6 +86,9 @@ KVM_HANDLER BOOKE_INTERRUPT_WATCHDOG
KVM_HANDLER BOOKE_INTERRUPT_DTLB_MISS
KVM_HANDLER BOOKE_INTERRUPT_ITLB_MISS
KVM_HANDLER BOOKE_INTERRUPT_DEBUG
+KVM_HANDLER BOOKE_INTERRUPT_SPE_UNAVAIL
+KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_DATA
+KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_ROUND
_GLOBAL(kvmppc_handler_len)
.long kvmppc_handler_1 - kvmppc_handler_0
@@ -347,7 +350,9 @@ lightweight_exit:
lwz r3, VCPU_SHADOW_PID(r4)
mtspr SPRN_PID, r3
+#ifdef CONFIG_44x
iccci 0, 0 /* XXX hack */
+#endif
/* Load some guest volatiles. */
lwz r0, VCPU_GPR(r0)(r4)
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
new file mode 100644
index 000000000000..d8067fd81cdd
--- /dev/null
+++ b/arch/powerpc/kvm/e500.c
@@ -0,0 +1,169 @@
+/*
+ * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: Yu Liu, <yu.liu@freescale.com>
+ *
+ * Description:
+ * This file is derived from arch/powerpc/kvm/44x.c,
+ * by Hollis Blanchard <hollisb@us.ibm.com>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/err.h>
+
+#include <asm/reg.h>
+#include <asm/cputable.h>
+#include <asm/tlbflush.h>
+#include <asm/kvm_e500.h>
+#include <asm/kvm_ppc.h>
+
+#include "booke.h"
+#include "e500_tlb.h"
+
+void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu)
+{
+}
+
+void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
+{
+}
+
+void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+ kvmppc_e500_tlb_load(vcpu, cpu);
+}
+
+void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
+{
+ kvmppc_e500_tlb_put(vcpu);
+}
+
+int kvmppc_core_check_processor_compat(void)
+{
+ int r;
+
+ if (strcmp(cur_cpu_spec->cpu_name, "e500v2") == 0)
+ r = 0;
+ else
+ r = -ENOTSUPP;
+
+ return r;
+}
+
+int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+ kvmppc_e500_tlb_setup(vcpu_e500);
+
+ /* Use the same core vertion as host's */
+ vcpu->arch.pvr = mfspr(SPRN_PVR);
+
+ return 0;
+}
+
+/* 'linear_address' is actually an encoding of AS|PID|EADDR . */
+int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
+ struct kvm_translation *tr)
+{
+ int index;
+ gva_t eaddr;
+ u8 pid;
+ u8 as;
+
+ eaddr = tr->linear_address;
+ pid = (tr->linear_address >> 32) & 0xff;
+ as = (tr->linear_address >> 40) & 0x1;
+
+ index = kvmppc_e500_tlb_search(vcpu, eaddr, pid, as);
+ if (index < 0) {
+ tr->valid = 0;
+ return 0;
+ }
+
+ tr->physical_address = kvmppc_mmu_xlate(vcpu, index, eaddr);
+ /* XXX what does "writeable" and "usermode" even mean? */
+ tr->valid = 1;
+
+ return 0;
+}
+
+struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500;
+ struct kvm_vcpu *vcpu;
+ int err;
+
+ vcpu_e500 = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
+ if (!vcpu_e500) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ vcpu = &vcpu_e500->vcpu;
+ err = kvm_vcpu_init(vcpu, kvm, id);
+ if (err)
+ goto free_vcpu;
+
+ err = kvmppc_e500_tlb_init(vcpu_e500);
+ if (err)
+ goto uninit_vcpu;
+
+ return vcpu;
+
+uninit_vcpu:
+ kvm_vcpu_uninit(vcpu);
+free_vcpu:
+ kmem_cache_free(kvm_vcpu_cache, vcpu_e500);
+out:
+ return ERR_PTR(err);
+}
+
+void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+
+ kvmppc_e500_tlb_uninit(vcpu_e500);
+ kvm_vcpu_uninit(vcpu);
+ kmem_cache_free(kvm_vcpu_cache, vcpu_e500);
+}
+
+static int kvmppc_e500_init(void)
+{
+ int r, i;
+ unsigned long ivor[3];
+ unsigned long max_ivor = 0;
+
+ r = kvmppc_booke_init();
+ if (r)
+ return r;
+
+ /* copy extra E500 exception handlers */
+ ivor[0] = mfspr(SPRN_IVOR32);
+ ivor[1] = mfspr(SPRN_IVOR33);
+ ivor[2] = mfspr(SPRN_IVOR34);
+ for (i = 0; i < 3; i++) {
+ if (ivor[i] > max_ivor)
+ max_ivor = ivor[i];
+
+ memcpy((void *)kvmppc_booke_handlers + ivor[i],
+ kvmppc_handlers_start + (i + 16) * kvmppc_handler_len,
+ kvmppc_handler_len);
+ }
+ flush_icache_range(kvmppc_booke_handlers,
+ kvmppc_booke_handlers + max_ivor + kvmppc_handler_len);
+
+ return kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), THIS_MODULE);
+}
+
+static void kvmppc_e500_exit(void)
+{
+ kvmppc_booke_exit();
+}
+
+module_init(kvmppc_e500_init);
+module_exit(kvmppc_e500_exit);
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
new file mode 100644
index 000000000000..7a98d4a4c1ed
--- /dev/null
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -0,0 +1,194 @@
+/*
+ * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: Yu Liu, <yu.liu@freescale.com>
+ *
+ * Description:
+ * This file is derived from arch/powerpc/kvm/44x_emulate.c,
+ * by Hollis Blanchard <hollisb@us.ibm.com>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/kvm_ppc.h>
+#include <asm/disassemble.h>
+#include <asm/kvm_e500.h>
+
+#include "booke.h"
+#include "e500_tlb.h"
+
+#define XOP_TLBIVAX 786
+#define XOP_TLBSX 914
+#define XOP_TLBRE 946
+#define XOP_TLBWE 978
+
+int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned int inst, int *advance)
+{
+ int emulated = EMULATE_DONE;
+ int ra;
+ int rb;
+
+ switch (get_op(inst)) {
+ case 31:
+ switch (get_xop(inst)) {
+
+ case XOP_TLBRE:
+ emulated = kvmppc_e500_emul_tlbre(vcpu);
+ break;
+
+ case XOP_TLBWE:
+ emulated = kvmppc_e500_emul_tlbwe(vcpu);
+ break;
+
+ case XOP_TLBSX:
+ rb = get_rb(inst);
+ emulated = kvmppc_e500_emul_tlbsx(vcpu,rb);
+ break;
+
+ case XOP_TLBIVAX:
+ ra = get_ra(inst);
+ rb = get_rb(inst);
+ emulated = kvmppc_e500_emul_tlbivax(vcpu, ra, rb);
+ break;
+
+ default:
+ emulated = EMULATE_FAIL;
+ }
+
+ break;
+
+ default:
+ emulated = EMULATE_FAIL;
+ }
+
+ if (emulated == EMULATE_FAIL)
+ emulated = kvmppc_booke_emulate_op(run, vcpu, inst, advance);
+
+ return emulated;
+}
+
+int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ int emulated = EMULATE_DONE;
+
+ switch (sprn) {
+ case SPRN_PID:
+ vcpu_e500->pid[0] = vcpu->arch.shadow_pid =
+ vcpu->arch.pid = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_PID1:
+ vcpu_e500->pid[1] = vcpu->arch.gpr[rs]; break;
+ case SPRN_PID2:
+ vcpu_e500->pid[2] = vcpu->arch.gpr[rs]; break;
+ case SPRN_MAS0:
+ vcpu_e500->mas0 = vcpu->arch.gpr[rs]; break;
+ case SPRN_MAS1:
+ vcpu_e500->mas1 = vcpu->arch.gpr[rs]; break;
+ case SPRN_MAS2:
+ vcpu_e500->mas2 = vcpu->arch.gpr[rs]; break;
+ case SPRN_MAS3:
+ vcpu_e500->mas3 = vcpu->arch.gpr[rs]; break;
+ case SPRN_MAS4:
+ vcpu_e500->mas4 = vcpu->arch.gpr[rs]; break;
+ case SPRN_MAS6:
+ vcpu_e500->mas6 = vcpu->arch.gpr[rs]; break;
+ case SPRN_MAS7:
+ vcpu_e500->mas7 = vcpu->arch.gpr[rs]; break;
+ case SPRN_L1CSR1:
+ vcpu_e500->l1csr1 = vcpu->arch.gpr[rs]; break;
+ case SPRN_HID0:
+ vcpu_e500->hid0 = vcpu->arch.gpr[rs]; break;
+ case SPRN_HID1:
+ vcpu_e500->hid1 = vcpu->arch.gpr[rs]; break;
+
+ /* extra exceptions */
+ case SPRN_IVOR32:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR33:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR34:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND] = vcpu->arch.gpr[rs];
+ break;
+ case SPRN_IVOR35:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] = vcpu->arch.gpr[rs];
+ break;
+
+ default:
+ emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs);
+ }
+
+ return emulated;
+}
+
+int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ int emulated = EMULATE_DONE;
+
+ switch (sprn) {
+ case SPRN_PID:
+ vcpu->arch.gpr[rt] = vcpu_e500->pid[0]; break;
+ case SPRN_PID1:
+ vcpu->arch.gpr[rt] = vcpu_e500->pid[1]; break;
+ case SPRN_PID2:
+ vcpu->arch.gpr[rt] = vcpu_e500->pid[2]; break;
+ case SPRN_MAS0:
+ vcpu->arch.gpr[rt] = vcpu_e500->mas0; break;
+ case SPRN_MAS1:
+ vcpu->arch.gpr[rt] = vcpu_e500->mas1; break;
+ case SPRN_MAS2:
+ vcpu->arch.gpr[rt] = vcpu_e500->mas2; break;
+ case SPRN_MAS3:
+ vcpu->arch.gpr[rt] = vcpu_e500->mas3; break;
+ case SPRN_MAS4:
+ vcpu->arch.gpr[rt] = vcpu_e500->mas4; break;
+ case SPRN_MAS6:
+ vcpu->arch.gpr[rt] = vcpu_e500->mas6; break;
+ case SPRN_MAS7:
+ vcpu->arch.gpr[rt] = vcpu_e500->mas7; break;
+
+ case SPRN_TLB0CFG:
+ vcpu->arch.gpr[rt] = mfspr(SPRN_TLB0CFG);
+ vcpu->arch.gpr[rt] &= ~0xfffUL;
+ vcpu->arch.gpr[rt] |= vcpu_e500->guest_tlb_size[0];
+ break;
+
+ case SPRN_TLB1CFG:
+ vcpu->arch.gpr[rt] = mfspr(SPRN_TLB1CFG);
+ vcpu->arch.gpr[rt] &= ~0xfffUL;
+ vcpu->arch.gpr[rt] |= vcpu_e500->guest_tlb_size[1];
+ break;
+
+ case SPRN_L1CSR1:
+ vcpu->arch.gpr[rt] = vcpu_e500->l1csr1; break;
+ case SPRN_HID0:
+ vcpu->arch.gpr[rt] = vcpu_e500->hid0; break;
+ case SPRN_HID1:
+ vcpu->arch.gpr[rt] = vcpu_e500->hid1; break;
+
+ /* extra exceptions */
+ case SPRN_IVOR32:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL];
+ break;
+ case SPRN_IVOR33:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA];
+ break;
+ case SPRN_IVOR34:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND];
+ break;
+ case SPRN_IVOR35:
+ vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR];
+ break;
+ default:
+ emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt);
+ }
+
+ return emulated;
+}
+
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
new file mode 100644
index 000000000000..d437160d388c
--- /dev/null
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -0,0 +1,737 @@
+/*
+ * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: Yu Liu, yu.liu@freescale.com
+ *
+ * Description:
+ * This file is based on arch/powerpc/kvm/44x_tlb.c,
+ * by Hollis Blanchard <hollisb@us.ibm.com>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/highmem.h>
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_e500.h>
+
+#include "../mm/mmu_decl.h"
+#include "e500_tlb.h"
+
+#define to_htlb1_esel(esel) (tlb1_entry_num - (esel) - 1)
+
+static unsigned int tlb1_entry_num;
+
+void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ struct tlbe *tlbe;
+ int i, tlbsel;
+
+ printk("| %8s | %8s | %8s | %8s | %8s |\n",
+ "nr", "mas1", "mas2", "mas3", "mas7");
+
+ for (tlbsel = 0; tlbsel < 2; tlbsel++) {
+ printk("Guest TLB%d:\n", tlbsel);
+ for (i = 0; i < vcpu_e500->guest_tlb_size[tlbsel]; i++) {
+ tlbe = &vcpu_e500->guest_tlb[tlbsel][i];
+ if (tlbe->mas1 & MAS1_VALID)
+ printk(" G[%d][%3d] | %08X | %08X | %08X | %08X |\n",
+ tlbsel, i, tlbe->mas1, tlbe->mas2,
+ tlbe->mas3, tlbe->mas7);
+ }
+ }
+
+ for (tlbsel = 0; tlbsel < 2; tlbsel++) {
+ printk("Shadow TLB%d:\n", tlbsel);
+ for (i = 0; i < vcpu_e500->shadow_tlb_size[tlbsel]; i++) {
+ tlbe = &vcpu_e500->shadow_tlb[tlbsel][i];
+ if (tlbe->mas1 & MAS1_VALID)
+ printk(" S[%d][%3d] | %08X | %08X | %08X | %08X |\n",
+ tlbsel, i, tlbe->mas1, tlbe->mas2,
+ tlbe->mas3, tlbe->mas7);
+ }
+ }
+}
+
+static inline unsigned int tlb0_get_next_victim(
+ struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ unsigned int victim;
+
+ victim = vcpu_e500->guest_tlb_nv[0]++;
+ if (unlikely(vcpu_e500->guest_tlb_nv[0] >= KVM_E500_TLB0_WAY_NUM))
+ vcpu_e500->guest_tlb_nv[0] = 0;
+
+ return victim;
+}
+
+static inline unsigned int tlb1_max_shadow_size(void)
+{
+ return tlb1_entry_num - tlbcam_index;
+}
+
+static inline int tlbe_is_writable(struct tlbe *tlbe)
+{
+ return tlbe->mas3 & (MAS3_SW|MAS3_UW);
+}
+
+static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode)
+{
+ /* Mask off reserved bits. */
+ mas3 &= MAS3_ATTRIB_MASK;
+
+ if (!usermode) {
+ /* Guest is in supervisor mode,
+ * so we need to translate guest
+ * supervisor permissions into user permissions. */
+ mas3 &= ~E500_TLB_USER_PERM_MASK;
+ mas3 |= (mas3 & E500_TLB_SUPER_PERM_MASK) << 1;
+ }
+
+ return mas3 | E500_TLB_SUPER_PERM_MASK;
+}
+
+static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode)
+{
+ return mas2 & MAS2_ATTRIB_MASK;
+}
+
+/*
+ * writing shadow tlb entry to host TLB
+ */
+static inline void __write_host_tlbe(struct tlbe *stlbe)
+{
+ mtspr(SPRN_MAS1, stlbe->mas1);
+ mtspr(SPRN_MAS2, stlbe->mas2);
+ mtspr(SPRN_MAS3, stlbe->mas3);
+ mtspr(SPRN_MAS7, stlbe->mas7);
+ __asm__ __volatile__ ("tlbwe\n" : : );
+}
+
+static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
+ int tlbsel, int esel)
+{
+ struct tlbe *stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel];
+
+ local_irq_disable();
+ if (tlbsel == 0) {
+ __write_host_tlbe(stlbe);
+ } else {
+ unsigned register mas0;
+
+ mas0 = mfspr(SPRN_MAS0);
+
+ mtspr(SPRN_MAS0, MAS0_TLBSEL(1) | MAS0_ESEL(to_htlb1_esel(esel)));
+ __write_host_tlbe(stlbe);
+
+ mtspr(SPRN_MAS0, mas0);
+ }
+ local_irq_enable();
+}
+
+void kvmppc_e500_tlb_load(struct kvm_vcpu *vcpu, int cpu)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ int i;
+ unsigned register mas0;
+
+ /* Load all valid TLB1 entries to reduce guest tlb miss fault */
+ local_irq_disable();
+ mas0 = mfspr(SPRN_MAS0);
+ for (i = 0; i < tlb1_max_shadow_size(); i++) {
+ struct tlbe *stlbe = &vcpu_e500->shadow_tlb[1][i];
+
+ if (get_tlb_v(stlbe)) {
+ mtspr(SPRN_MAS0, MAS0_TLBSEL(1)
+ | MAS0_ESEL(to_htlb1_esel(i)));
+ __write_host_tlbe(stlbe);
+ }
+ }
+ mtspr(SPRN_MAS0, mas0);
+ local_irq_enable();
+}
+
+void kvmppc_e500_tlb_put(struct kvm_vcpu *vcpu)
+{
+ _tlbil_all();
+}
+
+/* Search the guest TLB for a matching entry. */
+static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500,
+ gva_t eaddr, int tlbsel, unsigned int pid, int as)
+{
+ int i;
+
+ /* XXX Replace loop with fancy data structures. */
+ for (i = 0; i < vcpu_e500->guest_tlb_size[tlbsel]; i++) {
+ struct tlbe *tlbe = &vcpu_e500->guest_tlb[tlbsel][i];
+ unsigned int tid;
+
+ if (eaddr < get_tlb_eaddr(tlbe))
+ continue;
+
+ if (eaddr > get_tlb_end(tlbe))
+ continue;
+
+ tid = get_tlb_tid(tlbe);
+ if (tid && (tid != pid))
+ continue;
+
+ if (!get_tlb_v(tlbe))
+ continue;
+
+ if (get_tlb_ts(tlbe) != as && as != -1)
+ continue;
+
+ return i;
+ }
+
+ return -1;
+}
+
+static void kvmppc_e500_shadow_release(struct kvmppc_vcpu_e500 *vcpu_e500,
+ int tlbsel, int esel)
+{
+ struct tlbe *stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel];
+ struct page *page = vcpu_e500->shadow_pages[tlbsel][esel];
+
+ if (page) {
+ vcpu_e500->shadow_pages[tlbsel][esel] = NULL;
+
+ if (get_tlb_v(stlbe)) {
+ if (tlbe_is_writable(stlbe))
+ kvm_release_page_dirty(page);
+ else
+ kvm_release_page_clean(page);
+ }
+ }
+}
+
+static void kvmppc_e500_stlbe_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
+ int tlbsel, int esel)
+{
+ struct tlbe *stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel];
+
+ kvmppc_e500_shadow_release(vcpu_e500, tlbsel, esel);
+ stlbe->mas1 = 0;
+ KVMTRACE_5D(STLB_INVAL, &vcpu_e500->vcpu, index_of(tlbsel, esel),
+ stlbe->mas1, stlbe->mas2, stlbe->mas3, stlbe->mas7,
+ handler);
+}
+
+static void kvmppc_e500_tlb1_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
+ gva_t eaddr, gva_t eend, u32 tid)
+{
+ unsigned int pid = tid & 0xff;
+ unsigned int i;
+
+ /* XXX Replace loop with fancy data structures. */
+ for (i = 0; i < vcpu_e500->guest_tlb_size[1]; i++) {
+ struct tlbe *stlbe = &vcpu_e500->shadow_tlb[1][i];
+ unsigned int tid;
+
+ if (!get_tlb_v(stlbe))
+ continue;
+
+ if (eend < get_tlb_eaddr(stlbe))
+ continue;
+
+ if (eaddr > get_tlb_end(stlbe))
+ continue;
+
+ tid = get_tlb_tid(stlbe);
+ if (tid && (tid != pid))
+ continue;
+
+ kvmppc_e500_stlbe_invalidate(vcpu_e500, 1, i);
+ write_host_tlbe(vcpu_e500, 1, i);
+ }
+}
+
+static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu,
+ unsigned int eaddr, int as)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ unsigned int victim, pidsel, tsized;
+ int tlbsel;
+
+ /* since we only have two TLBs, only lower bit is used. */
+ tlbsel = (vcpu_e500->mas4 >> 28) & 0x1;
+ victim = (tlbsel == 0) ? tlb0_get_next_victim(vcpu_e500) : 0;
+ pidsel = (vcpu_e500->mas4 >> 16) & 0xf;
+ tsized = (vcpu_e500->mas4 >> 8) & 0xf;
+
+ vcpu_e500->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(victim)
+ | MAS0_NV(vcpu_e500->guest_tlb_nv[tlbsel]);
+ vcpu_e500->mas1 = MAS1_VALID | (as ? MAS1_TS : 0)
+ | MAS1_TID(vcpu_e500->pid[pidsel])
+ | MAS1_TSIZE(tsized);
+ vcpu_e500->mas2 = (eaddr & MAS2_EPN)
+ | (vcpu_e500->mas4 & MAS2_ATTRIB_MASK);
+ vcpu_e500->mas3 &= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3;
+ vcpu_e500->mas6 = (vcpu_e500->mas6 & MAS6_SPID1)
+ | (get_cur_pid(vcpu) << 16)
+ | (as ? MAS6_SAS : 0);
+ vcpu_e500->mas7 = 0;
+}
+
+static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
+ u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe, int tlbsel, int esel)
+{
+ struct page *new_page;
+ struct tlbe *stlbe;
+ hpa_t hpaddr;
+
+ stlbe = &vcpu_e500->shadow_tlb[tlbsel][esel];
+
+ /* Get reference to new page. */
+ new_page = gfn_to_page(vcpu_e500->vcpu.kvm, gfn);
+ if (is_error_page(new_page)) {
+ printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n", gfn);
+ kvm_release_page_clean(new_page);
+ return;
+ }
+ hpaddr = page_to_phys(new_page);
+
+ /* Drop reference to old page. */
+ kvmppc_e500_shadow_release(vcpu_e500, tlbsel, esel);
+
+ vcpu_e500->shadow_pages[tlbsel][esel] = new_page;
+
+ /* Force TS=1 IPROT=0 TSIZE=4KB for all guest mappings. */
+ stlbe->mas1 = MAS1_TSIZE(BOOKE_PAGESZ_4K)
+ | MAS1_TID(get_tlb_tid(gtlbe)) | MAS1_TS | MAS1_VALID;
+ stlbe->mas2 = (gvaddr & MAS2_EPN)
+ | e500_shadow_mas2_attrib(gtlbe->mas2,
+ vcpu_e500->vcpu.arch.msr & MSR_PR);
+ stlbe->mas3 = (hpaddr & MAS3_RPN)
+ | e500_shadow_mas3_attrib(gtlbe->mas3,
+ vcpu_e500->vcpu.arch.msr & MSR_PR);
+ stlbe->mas7 = (hpaddr >> 32) & MAS7_RPN;
+
+ KVMTRACE_5D(STLB_WRITE, &vcpu_e500->vcpu, index_of(tlbsel, esel),
+ stlbe->mas1, stlbe->mas2, stlbe->mas3, stlbe->mas7,
+ handler);
+}
+
+/* XXX only map the one-one case, for now use TLB0 */
+static int kvmppc_e500_stlbe_map(struct kvmppc_vcpu_e500 *vcpu_e500,
+ int tlbsel, int esel)
+{
+ struct tlbe *gtlbe;
+
+ gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel];
+
+ kvmppc_e500_shadow_map(vcpu_e500, get_tlb_eaddr(gtlbe),
+ get_tlb_raddr(gtlbe) >> PAGE_SHIFT,
+ gtlbe, tlbsel, esel);
+
+ return esel;
+}
+
+/* Caller must ensure that the specified guest TLB entry is safe to insert into
+ * the shadow TLB. */
+/* XXX for both one-one and one-to-many , for now use TLB1 */
+static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500,
+ u64 gvaddr, gfn_t gfn, struct tlbe *gtlbe)
+{
+ unsigned int victim;
+
+ victim = vcpu_e500->guest_tlb_nv[1]++;
+
+ if (unlikely(vcpu_e500->guest_tlb_nv[1] >= tlb1_max_shadow_size()))
+ vcpu_e500->guest_tlb_nv[1] = 0;
+
+ kvmppc_e500_shadow_map(vcpu_e500, gvaddr, gfn, gtlbe, 1, victim);
+
+ return victim;
+}
+
+/* Invalidate all guest kernel mappings when enter usermode,
+ * so that when they fault back in they will get the
+ * proper permission bits. */
+void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode)
+{
+ if (usermode) {
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ int i;
+
+ /* XXX Replace loop with fancy data structures. */
+ for (i = 0; i < tlb1_max_shadow_size(); i++)
+ kvmppc_e500_stlbe_invalidate(vcpu_e500, 1, i);
+
+ _tlbil_all();
+ }
+}
+
+static int kvmppc_e500_gtlbe_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
+ int tlbsel, int esel)
+{
+ struct tlbe *gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel];
+
+ if (unlikely(get_tlb_iprot(gtlbe)))
+ return -1;
+
+ if (tlbsel == 1) {
+ kvmppc_e500_tlb1_invalidate(vcpu_e500, get_tlb_eaddr(gtlbe),
+ get_tlb_end(gtlbe),
+ get_tlb_tid(gtlbe));
+ } else {
+ kvmppc_e500_stlbe_invalidate(vcpu_e500, tlbsel, esel);
+ }
+
+ gtlbe->mas1 = 0;
+
+ return 0;
+}
+
+int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ unsigned int ia;
+ int esel, tlbsel;
+ gva_t ea;
+
+ ea = ((ra) ? vcpu->arch.gpr[ra] : 0) + vcpu->arch.gpr[rb];
+
+ ia = (ea >> 2) & 0x1;
+
+ /* since we only have two TLBs, only lower bit is used. */
+ tlbsel = (ea >> 3) & 0x1;
+
+ if (ia) {
+ /* invalidate all entries */
+ for (esel = 0; esel < vcpu_e500->guest_tlb_size[tlbsel]; esel++)
+ kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel);
+ } else {
+ ea &= 0xfffff000;
+ esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel,
+ get_cur_pid(vcpu), -1);
+ if (esel >= 0)
+ kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel);
+ }
+
+ _tlbil_all();
+
+ return EMULATE_DONE;
+}
+
+int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ int tlbsel, esel;
+ struct tlbe *gtlbe;
+
+ tlbsel = get_tlb_tlbsel(vcpu_e500);
+ esel = get_tlb_esel(vcpu_e500, tlbsel);
+
+ gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel];
+ vcpu_e500->mas0 &= MAS0_NV(0);
+ vcpu_e500->mas0 |= MAS0_NV(vcpu_e500->guest_tlb_nv[tlbsel]);
+ vcpu_e500->mas1 = gtlbe->mas1;
+ vcpu_e500->mas2 = gtlbe->mas2;
+ vcpu_e500->mas3 = gtlbe->mas3;
+ vcpu_e500->mas7 = gtlbe->mas7;
+
+ return EMULATE_DONE;
+}
+
+int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ int as = !!get_cur_sas(vcpu_e500);
+ unsigned int pid = get_cur_spid(vcpu_e500);
+ int esel, tlbsel;
+ struct tlbe *gtlbe = NULL;
+ gva_t ea;
+
+ ea = vcpu->arch.gpr[rb];
+
+ for (tlbsel = 0; tlbsel < 2; tlbsel++) {
+ esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, as);
+ if (esel >= 0) {
+ gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel];
+ break;
+ }
+ }
+
+ if (gtlbe) {
+ vcpu_e500->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(esel)
+ | MAS0_NV(vcpu_e500->guest_tlb_nv[tlbsel]);
+ vcpu_e500->mas1 = gtlbe->mas1;
+ vcpu_e500->mas2 = gtlbe->mas2;
+ vcpu_e500->mas3 = gtlbe->mas3;
+ vcpu_e500->mas7 = gtlbe->mas7;
+ } else {
+ int victim;
+
+ /* since we only have two TLBs, only lower bit is used. */
+ tlbsel = vcpu_e500->mas4 >> 28 & 0x1;
+ victim = (tlbsel == 0) ? tlb0_get_next_victim(vcpu_e500) : 0;
+
+ vcpu_e500->mas0 = MAS0_TLBSEL(tlbsel) | MAS0_ESEL(victim)
+ | MAS0_NV(vcpu_e500->guest_tlb_nv[tlbsel]);
+ vcpu_e500->mas1 = (vcpu_e500->mas6 & MAS6_SPID0)
+ | (vcpu_e500->mas6 & (MAS6_SAS ? MAS1_TS : 0))
+ | (vcpu_e500->mas4 & MAS4_TSIZED(~0));
+ vcpu_e500->mas2 &= MAS2_EPN;
+ vcpu_e500->mas2 |= vcpu_e500->mas4 & MAS2_ATTRIB_MASK;
+ vcpu_e500->mas3 &= MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3;
+ vcpu_e500->mas7 = 0;
+ }
+
+ return EMULATE_DONE;
+}
+
+int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ u64 eaddr;
+ u64 raddr;
+ u32 tid;
+ struct tlbe *gtlbe;
+ int tlbsel, esel, stlbsel, sesel;
+
+ tlbsel = get_tlb_tlbsel(vcpu_e500);
+ esel = get_tlb_esel(vcpu_e500, tlbsel);
+
+ gtlbe = &vcpu_e500->guest_tlb[tlbsel][esel];
+
+ if (get_tlb_v(gtlbe) && tlbsel == 1) {
+ eaddr = get_tlb_eaddr(gtlbe);
+ tid = get_tlb_tid(gtlbe);
+ kvmppc_e500_tlb1_invalidate(vcpu_e500, eaddr,
+ get_tlb_end(gtlbe), tid);
+ }
+
+ gtlbe->mas1 = vcpu_e500->mas1;
+ gtlbe->mas2 = vcpu_e500->mas2;
+ gtlbe->mas3 = vcpu_e500->mas3;
+ gtlbe->mas7 = vcpu_e500->mas7;
+
+ KVMTRACE_5D(GTLB_WRITE, vcpu, vcpu_e500->mas0,
+ gtlbe->mas1, gtlbe->mas2, gtlbe->mas3, gtlbe->mas7,
+ handler);
+
+ /* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */
+ if (tlbe_is_host_safe(vcpu, gtlbe)) {
+ switch (tlbsel) {
+ case 0:
+ /* TLB0 */
+ gtlbe->mas1 &= ~MAS1_TSIZE(~0);
+ gtlbe->mas1 |= MAS1_TSIZE(BOOKE_PAGESZ_4K);
+
+ stlbsel = 0;
+ sesel = kvmppc_e500_stlbe_map(vcpu_e500, 0, esel);
+
+ break;
+
+ case 1:
+ /* TLB1 */
+ eaddr = get_tlb_eaddr(gtlbe);
+ raddr = get_tlb_raddr(gtlbe);
+
+ /* Create a 4KB mapping on the host.
+ * If the guest wanted a large page,
+ * only the first 4KB is mapped here and the rest
+ * are mapped on the fly. */
+ stlbsel = 1;
+ sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr,
+ raddr >> PAGE_SHIFT, gtlbe);
+ break;
+
+ default:
+ BUG();
+ }
+ write_host_tlbe(vcpu_e500, stlbsel, sesel);
+ }
+
+ return EMULATE_DONE;
+}
+
+int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
+{
+ unsigned int as = !!(vcpu->arch.msr & MSR_IS);
+
+ return kvmppc_e500_tlb_search(vcpu, eaddr, get_cur_pid(vcpu), as);
+}
+
+int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
+{
+ unsigned int as = !!(vcpu->arch.msr & MSR_DS);
+
+ return kvmppc_e500_tlb_search(vcpu, eaddr, get_cur_pid(vcpu), as);
+}
+
+void kvmppc_mmu_itlb_miss(struct kvm_vcpu *vcpu)
+{
+ unsigned int as = !!(vcpu->arch.msr & MSR_IS);
+
+ kvmppc_e500_deliver_tlb_miss(vcpu, vcpu->arch.pc, as);
+}
+
+void kvmppc_mmu_dtlb_miss(struct kvm_vcpu *vcpu)
+{
+ unsigned int as = !!(vcpu->arch.msr & MSR_DS);
+
+ kvmppc_e500_deliver_tlb_miss(vcpu, vcpu->arch.fault_dear, as);
+}
+
+gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int index,
+ gva_t eaddr)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ struct tlbe *gtlbe =
+ &vcpu_e500->guest_tlb[tlbsel_of(index)][esel_of(index)];
+ u64 pgmask = get_tlb_bytes(gtlbe) - 1;
+
+ return get_tlb_raddr(gtlbe) | (eaddr & pgmask);
+}
+
+void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ int tlbsel, i;
+
+ for (tlbsel = 0; tlbsel < 2; tlbsel++)
+ for (i = 0; i < vcpu_e500->guest_tlb_size[tlbsel]; i++)
+ kvmppc_e500_shadow_release(vcpu_e500, tlbsel, i);
+
+ /* discard all guest mapping */
+ _tlbil_all();
+}
+
+void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
+ unsigned int index)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ int tlbsel = tlbsel_of(index);
+ int esel = esel_of(index);
+ int stlbsel, sesel;
+
+ switch (tlbsel) {
+ case 0:
+ stlbsel = 0;
+ sesel = esel;
+ break;
+
+ case 1: {
+ gfn_t gfn = gpaddr >> PAGE_SHIFT;
+ struct tlbe *gtlbe
+ = &vcpu_e500->guest_tlb[tlbsel][esel];
+
+ stlbsel = 1;
+ sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr, gfn, gtlbe);
+ break;
+ }
+
+ default:
+ BUG();
+ break;
+ }
+ write_host_tlbe(vcpu_e500, stlbsel, sesel);
+}
+
+int kvmppc_e500_tlb_search(struct kvm_vcpu *vcpu,
+ gva_t eaddr, unsigned int pid, int as)
+{
+ struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+ int esel, tlbsel;
+
+ for (tlbsel = 0; tlbsel < 2; tlbsel++) {
+ esel = kvmppc_e500_tlb_index(vcpu_e500, eaddr, tlbsel, pid, as);
+ if (esel >= 0)
+ return index_of(tlbsel, esel);
+ }
+
+ return -1;
+}
+
+void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ struct tlbe *tlbe;
+
+ /* Insert large initial mapping for guest. */
+ tlbe = &vcpu_e500->guest_tlb[1][0];
+ tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOKE_PAGESZ_256M);
+ tlbe->mas2 = 0;
+ tlbe->mas3 = E500_TLB_SUPER_PERM_MASK;
+ tlbe->mas7 = 0;
+
+ /* 4K map for serial output. Used by kernel wrapper. */
+ tlbe = &vcpu_e500->guest_tlb[1][1];
+ tlbe->mas1 = MAS1_VALID | MAS1_TSIZE(BOOKE_PAGESZ_4K);
+ tlbe->mas2 = (0xe0004500 & 0xFFFFF000) | MAS2_I | MAS2_G;
+ tlbe->mas3 = (0xe0004500 & 0xFFFFF000) | E500_TLB_SUPER_PERM_MASK;
+ tlbe->mas7 = 0;
+}
+
+int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ tlb1_entry_num = mfspr(SPRN_TLB1CFG) & 0xFFF;
+
+ vcpu_e500->guest_tlb_size[0] = KVM_E500_TLB0_SIZE;
+ vcpu_e500->guest_tlb[0] =
+ kzalloc(sizeof(struct tlbe) * KVM_E500_TLB0_SIZE, GFP_KERNEL);
+ if (vcpu_e500->guest_tlb[0] == NULL)
+ goto err_out;
+
+ vcpu_e500->shadow_tlb_size[0] = KVM_E500_TLB0_SIZE;
+ vcpu_e500->shadow_tlb[0] =
+ kzalloc(sizeof(struct tlbe) * KVM_E500_TLB0_SIZE, GFP_KERNEL);
+ if (vcpu_e500->shadow_tlb[0] == NULL)
+ goto err_out_guest0;
+
+ vcpu_e500->guest_tlb_size[1] = KVM_E500_TLB1_SIZE;
+ vcpu_e500->guest_tlb[1] =
+ kzalloc(sizeof(struct tlbe) * KVM_E500_TLB1_SIZE, GFP_KERNEL);
+ if (vcpu_e500->guest_tlb[1] == NULL)
+ goto err_out_shadow0;
+
+ vcpu_e500->shadow_tlb_size[1] = tlb1_entry_num;
+ vcpu_e500->shadow_tlb[1] =
+ kzalloc(sizeof(struct tlbe) * tlb1_entry_num, GFP_KERNEL);
+ if (vcpu_e500->shadow_tlb[1] == NULL)
+ goto err_out_guest1;
+
+ vcpu_e500->shadow_pages[0] = (struct page **)
+ kzalloc(sizeof(struct page *) * KVM_E500_TLB0_SIZE, GFP_KERNEL);
+ if (vcpu_e500->shadow_pages[0] == NULL)
+ goto err_out_shadow1;
+
+ vcpu_e500->shadow_pages[1] = (struct page **)
+ kzalloc(sizeof(struct page *) * tlb1_entry_num, GFP_KERNEL);
+ if (vcpu_e500->shadow_pages[1] == NULL)
+ goto err_out_page0;
+
+ return 0;
+
+err_out_page0:
+ kfree(vcpu_e500->shadow_pages[0]);
+err_out_shadow1:
+ kfree(vcpu_e500->shadow_tlb[1]);
+err_out_guest1:
+ kfree(vcpu_e500->guest_tlb[1]);
+err_out_shadow0:
+ kfree(vcpu_e500->shadow_tlb[0]);
+err_out_guest0:
+ kfree(vcpu_e500->guest_tlb[0]);
+err_out:
+ return -1;
+}
+
+void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ kfree(vcpu_e500->shadow_pages[1]);
+ kfree(vcpu_e500->shadow_pages[0]);
+ kfree(vcpu_e500->shadow_tlb[1]);
+ kfree(vcpu_e500->guest_tlb[1]);
+ kfree(vcpu_e500->shadow_tlb[0]);
+ kfree(vcpu_e500->guest_tlb[0]);
+}
diff --git a/arch/powerpc/kvm/e500_tlb.h b/arch/powerpc/kvm/e500_tlb.h
new file mode 100644
index 000000000000..ab49e9355185
--- /dev/null
+++ b/arch/powerpc/kvm/e500_tlb.h
@@ -0,0 +1,184 @@
+/*
+ * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: Yu Liu, yu.liu@freescale.com
+ *
+ * Description:
+ * This file is based on arch/powerpc/kvm/44x_tlb.h,
+ * by Hollis Blanchard <hollisb@us.ibm.com>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __KVM_E500_TLB_H__
+#define __KVM_E500_TLB_H__
+
+#include <linux/kvm_host.h>
+#include <asm/mmu-fsl-booke.h>
+#include <asm/tlb.h>
+#include <asm/kvm_e500.h>
+
+#define KVM_E500_TLB0_WAY_SIZE_BIT 7 /* Fixed */
+#define KVM_E500_TLB0_WAY_SIZE (1UL << KVM_E500_TLB0_WAY_SIZE_BIT)
+#define KVM_E500_TLB0_WAY_SIZE_MASK (KVM_E500_TLB0_WAY_SIZE - 1)
+
+#define KVM_E500_TLB0_WAY_NUM_BIT 1 /* No greater than 7 */
+#define KVM_E500_TLB0_WAY_NUM (1UL << KVM_E500_TLB0_WAY_NUM_BIT)
+#define KVM_E500_TLB0_WAY_NUM_MASK (KVM_E500_TLB0_WAY_NUM - 1)
+
+#define KVM_E500_TLB0_SIZE (KVM_E500_TLB0_WAY_SIZE * KVM_E500_TLB0_WAY_NUM)
+#define KVM_E500_TLB1_SIZE 16
+
+#define index_of(tlbsel, esel) (((tlbsel) << 16) | ((esel) & 0xFFFF))
+#define tlbsel_of(index) ((index) >> 16)
+#define esel_of(index) ((index) & 0xFFFF)
+
+#define E500_TLB_USER_PERM_MASK (MAS3_UX|MAS3_UR|MAS3_UW)
+#define E500_TLB_SUPER_PERM_MASK (MAS3_SX|MAS3_SR|MAS3_SW)
+#define MAS2_ATTRIB_MASK \
+ (MAS2_X0 | MAS2_X1 | MAS2_W | MAS2_I | MAS2_M | MAS2_G | MAS2_E)
+#define MAS3_ATTRIB_MASK \
+ (MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3 \
+ | E500_TLB_USER_PERM_MASK | E500_TLB_SUPER_PERM_MASK)
+
+extern void kvmppc_dump_tlbs(struct kvm_vcpu *);
+extern int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *);
+extern int kvmppc_e500_emul_tlbre(struct kvm_vcpu *);
+extern int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *, int, int);
+extern int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *, int);
+extern int kvmppc_e500_tlb_search(struct kvm_vcpu *, gva_t, unsigned int, int);
+extern void kvmppc_e500_tlb_put(struct kvm_vcpu *);
+extern void kvmppc_e500_tlb_load(struct kvm_vcpu *, int);
+extern int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *);
+extern void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *);
+extern void kvmppc_e500_tlb_setup(struct kvmppc_vcpu_e500 *);
+
+/* TLB helper functions */
+static inline unsigned int get_tlb_size(const struct tlbe *tlbe)
+{
+ return (tlbe->mas1 >> 8) & 0xf;
+}
+
+static inline gva_t get_tlb_eaddr(const struct tlbe *tlbe)
+{
+ return tlbe->mas2 & 0xfffff000;
+}
+
+static inline u64 get_tlb_bytes(const struct tlbe *tlbe)
+{
+ unsigned int pgsize = get_tlb_size(tlbe);
+ return 1ULL << 10 << (pgsize << 1);
+}
+
+static inline gva_t get_tlb_end(const struct tlbe *tlbe)
+{
+ u64 bytes = get_tlb_bytes(tlbe);
+ return get_tlb_eaddr(tlbe) + bytes - 1;
+}
+
+static inline u64 get_tlb_raddr(const struct tlbe *tlbe)
+{
+ u64 rpn = tlbe->mas7;
+ return (rpn << 32) | (tlbe->mas3 & 0xfffff000);
+}
+
+static inline unsigned int get_tlb_tid(const struct tlbe *tlbe)
+{
+ return (tlbe->mas1 >> 16) & 0xff;
+}
+
+static inline unsigned int get_tlb_ts(const struct tlbe *tlbe)
+{
+ return (tlbe->mas1 >> 12) & 0x1;
+}
+
+static inline unsigned int get_tlb_v(const struct tlbe *tlbe)
+{
+ return (tlbe->mas1 >> 31) & 0x1;
+}
+
+static inline unsigned int get_tlb_iprot(const struct tlbe *tlbe)
+{
+ return (tlbe->mas1 >> 30) & 0x1;
+}
+
+static inline unsigned int get_cur_pid(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.pid & 0xff;
+}
+
+static inline unsigned int get_cur_spid(
+ const struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ return (vcpu_e500->mas6 >> 16) & 0xff;
+}
+
+static inline unsigned int get_cur_sas(
+ const struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ return vcpu_e500->mas6 & 0x1;
+}
+
+static inline unsigned int get_tlb_tlbsel(
+ const struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ /*
+ * Manual says that tlbsel has 2 bits wide.
+ * Since we only have two TLBs, only lower bit is used.
+ */
+ return (vcpu_e500->mas0 >> 28) & 0x1;
+}
+
+static inline unsigned int get_tlb_nv_bit(
+ const struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ return vcpu_e500->mas0 & 0xfff;
+}
+
+static inline unsigned int get_tlb_esel_bit(
+ const struct kvmppc_vcpu_e500 *vcpu_e500)
+{
+ return (vcpu_e500->mas0 >> 16) & 0xfff;
+}
+
+static inline unsigned int get_tlb_esel(
+ const struct kvmppc_vcpu_e500 *vcpu_e500,
+ int tlbsel)
+{
+ unsigned int esel = get_tlb_esel_bit(vcpu_e500);
+
+ if (tlbsel == 0) {
+ esel &= KVM_E500_TLB0_WAY_NUM_MASK;
+ esel |= ((vcpu_e500->mas2 >> 12) & KVM_E500_TLB0_WAY_SIZE_MASK)
+ << KVM_E500_TLB0_WAY_NUM_BIT;
+ } else {
+ esel &= KVM_E500_TLB1_SIZE - 1;
+ }
+
+ return esel;
+}
+
+static inline int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
+ const struct tlbe *tlbe)
+{
+ gpa_t gpa;
+
+ if (!get_tlb_v(tlbe))
+ return 0;
+
+ /* Does it match current guest AS? */
+ /* XXX what about IS != DS? */
+ if (get_tlb_ts(tlbe) != !!(vcpu->arch.msr & MSR_IS))
+ return 0;
+
+ gpa = get_tlb_raddr(tlbe);
+ if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT))
+ /* Mapping is not for RAM. */
+ return 0;
+
+ return 1;
+}
+
+#endif /* __KVM_E500_TLB_H__ */
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index d1d38daa93fb..a561d6e8da1c 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -30,6 +30,39 @@
#include <asm/disassemble.h>
#include "timing.h"
+#define OP_TRAP 3
+
+#define OP_31_XOP_LWZX 23
+#define OP_31_XOP_LBZX 87
+#define OP_31_XOP_STWX 151
+#define OP_31_XOP_STBX 215
+#define OP_31_XOP_STBUX 247
+#define OP_31_XOP_LHZX 279
+#define OP_31_XOP_LHZUX 311
+#define OP_31_XOP_MFSPR 339
+#define OP_31_XOP_STHX 407
+#define OP_31_XOP_STHUX 439
+#define OP_31_XOP_MTSPR 467
+#define OP_31_XOP_DCBI 470
+#define OP_31_XOP_LWBRX 534
+#define OP_31_XOP_TLBSYNC 566
+#define OP_31_XOP_STWBRX 662
+#define OP_31_XOP_LHBRX 790
+#define OP_31_XOP_STHBRX 918
+
+#define OP_LWZ 32
+#define OP_LWZU 33
+#define OP_LBZ 34
+#define OP_LBZU 35
+#define OP_STW 36
+#define OP_STWU 37
+#define OP_STB 38
+#define OP_STBU 39
+#define OP_LHZ 40
+#define OP_LHZU 41
+#define OP_STH 44
+#define OP_STHU 45
+
void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
{
if (vcpu->arch.tcr & TCR_DIE) {
@@ -78,7 +111,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS);
switch (get_op(inst)) {
- case 3: /* trap */
+ case OP_TRAP:
vcpu->arch.esr |= ESR_PTR;
kvmppc_core_queue_program(vcpu);
advance = 0;
@@ -87,31 +120,31 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
case 31:
switch (get_xop(inst)) {
- case 23: /* lwzx */
+ case OP_31_XOP_LWZX:
rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
break;
- case 87: /* lbzx */
+ case OP_31_XOP_LBZX:
rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
break;
- case 151: /* stwx */
+ case OP_31_XOP_STWX:
rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu,
vcpu->arch.gpr[rs],
4, 1);
break;
- case 215: /* stbx */
+ case OP_31_XOP_STBX:
rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu,
vcpu->arch.gpr[rs],
1, 1);
break;
- case 247: /* stbux */
+ case OP_31_XOP_STBUX:
rs = get_rs(inst);
ra = get_ra(inst);
rb = get_rb(inst);
@@ -126,12 +159,12 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
vcpu->arch.gpr[rs] = ea;
break;
- case 279: /* lhzx */
+ case OP_31_XOP_LHZX:
rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
break;
- case 311: /* lhzux */
+ case OP_31_XOP_LHZUX:
rt = get_rt(inst);
ra = get_ra(inst);
rb = get_rb(inst);
@@ -144,7 +177,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
vcpu->arch.gpr[ra] = ea;
break;
- case 339: /* mfspr */
+ case OP_31_XOP_MFSPR:
sprn = get_sprn(inst);
rt = get_rt(inst);
@@ -185,7 +218,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
}
break;
- case 407: /* sthx */
+ case OP_31_XOP_STHX:
rs = get_rs(inst);
ra = get_ra(inst);
rb = get_rb(inst);
@@ -195,7 +228,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
2, 1);
break;
- case 439: /* sthux */
+ case OP_31_XOP_STHUX:
rs = get_rs(inst);
ra = get_ra(inst);
rb = get_rb(inst);
@@ -210,7 +243,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
vcpu->arch.gpr[ra] = ea;
break;
- case 467: /* mtspr */
+ case OP_31_XOP_MTSPR:
sprn = get_sprn(inst);
rs = get_rs(inst);
switch (sprn) {
@@ -246,7 +279,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
}
break;
- case 470: /* dcbi */
+ case OP_31_XOP_DCBI:
/* Do nothing. The guest is performing dcbi because
* hardware DMA is not snooped by the dcache, but
* emulated DMA either goes through the dcache as
@@ -254,15 +287,15 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
* coherence. */
break;
- case 534: /* lwbrx */
+ case OP_31_XOP_LWBRX:
rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 4, 0);
break;
- case 566: /* tlbsync */
+ case OP_31_XOP_TLBSYNC:
break;
- case 662: /* stwbrx */
+ case OP_31_XOP_STWBRX:
rs = get_rs(inst);
ra = get_ra(inst);
rb = get_rb(inst);
@@ -272,12 +305,12 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
4, 0);
break;
- case 790: /* lhbrx */
+ case OP_31_XOP_LHBRX:
rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0);
break;
- case 918: /* sthbrx */
+ case OP_31_XOP_STHBRX:
rs = get_rs(inst);
ra = get_ra(inst);
rb = get_rb(inst);
@@ -293,37 +326,37 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
}
break;
- case 32: /* lwz */
+ case OP_LWZ:
rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
break;
- case 33: /* lwzu */
+ case OP_LWZU:
ra = get_ra(inst);
rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
break;
- case 34: /* lbz */
+ case OP_LBZ:
rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
break;
- case 35: /* lbzu */
+ case OP_LBZU:
ra = get_ra(inst);
rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
break;
- case 36: /* stw */
+ case OP_STW:
rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
4, 1);
break;
- case 37: /* stwu */
+ case OP_STWU:
ra = get_ra(inst);
rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
@@ -331,13 +364,13 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
break;
- case 38: /* stb */
+ case OP_STB:
rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
1, 1);
break;
- case 39: /* stbu */
+ case OP_STBU:
ra = get_ra(inst);
rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
@@ -345,25 +378,25 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
break;
- case 40: /* lhz */
+ case OP_LHZ:
rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
break;
- case 41: /* lhzu */
+ case OP_LHZU:
ra = get_ra(inst);
rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
break;
- case 44: /* sth */
+ case OP_STH:
rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
2, 1);
break;
- case 45: /* sthu */
+ case OP_STHU:
ra = get_ra(inst);
rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 2822c8ccfaaf..9057335fdc61 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -125,6 +125,10 @@ static void kvmppc_free_vcpus(struct kvm *kvm)
}
}
+void kvm_arch_sync_events(struct kvm *kvm)
+{
+}
+
void kvm_arch_destroy_vm(struct kvm *kvm)
{
kvmppc_free_vcpus(kvm);
@@ -212,46 +216,23 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
{
- kvmppc_core_destroy_mmu(vcpu);
+ kvmppc_mmu_destroy(vcpu);
}
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
- if (vcpu->guest_debug.enabled)
- kvmppc_core_load_guest_debugstate(vcpu);
-
kvmppc_core_vcpu_load(vcpu, cpu);
}
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{
- if (vcpu->guest_debug.enabled)
- kvmppc_core_load_host_debugstate(vcpu);
-
- /* Don't leave guest TLB entries resident when being de-scheduled. */
- /* XXX It would be nice to differentiate between heavyweight exit and
- * sched_out here, since we could avoid the TLB flush for heavyweight
- * exits. */
- _tlbil_all();
kvmppc_core_vcpu_put(vcpu);
}
-int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
- struct kvm_debug_guest *dbg)
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug *dbg)
{
- int i;
-
- vcpu->guest_debug.enabled = dbg->enabled;
- if (vcpu->guest_debug.enabled) {
- for (i=0; i < ARRAY_SIZE(vcpu->guest_debug.bp); i++) {
- if (dbg->breakpoints[i].enabled)
- vcpu->guest_debug.bp[i] = dbg->breakpoints[i].address;
- else
- vcpu->guest_debug.bp[i] = 0;
- }
- }
-
- return 0;
+ return -EINVAL;
}
static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu,
diff --git a/arch/s390/include/asm/kvm.h b/arch/s390/include/asm/kvm.h
index e1f54654e3ae..0b2f829f6d50 100644
--- a/arch/s390/include/asm/kvm.h
+++ b/arch/s390/include/asm/kvm.h
@@ -42,4 +42,11 @@ struct kvm_fpu {
__u64 fprs[16];
};
+struct kvm_debug_exit_arch {
+};
+
+/* for KVM_SET_GUEST_DEBUG */
+struct kvm_guest_debug_arch {
+};
+
#endif
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 3c55e4107dcc..c6e674f5fca9 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -21,9 +21,6 @@
/* memory slots that does not exposed to userspace */
#define KVM_PRIVATE_MEM_SLOTS 4
-struct kvm_guest_debug {
-};
-
struct sca_entry {
atomic_t scn;
__u64 reserved;
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index e051cad1f1e0..3e260b7e37b2 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -4,6 +4,9 @@
config HAVE_KVM
bool
+config HAVE_KVM_IRQCHIP
+ bool
+
menuconfig VIRTUALIZATION
bool "Virtualization"
default y
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index be8497186b96..cbfe91e10120 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -212,6 +212,10 @@ static void kvm_free_vcpus(struct kvm *kvm)
}
}
+void kvm_arch_sync_events(struct kvm *kvm)
+{
+}
+
void kvm_arch_destroy_vm(struct kvm *kvm)
{
kvm_free_vcpus(kvm);
@@ -418,8 +422,8 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
return -EINVAL; /* not implemented yet */
}
-int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
- struct kvm_debug_guest *dbg)
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug *dbg)
{
return -EINVAL; /* not implemented yet */
}
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index b95162af0bf6..a03110219082 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -205,7 +205,30 @@ struct kvm_pit_channel_state {
__s64 count_load_time;
};
+struct kvm_debug_exit_arch {
+ __u32 exception;
+ __u32 pad;
+ __u64 pc;
+ __u64 dr6;
+ __u64 dr7;
+};
+
+#define KVM_GUESTDBG_USE_SW_BP 0x00010000
+#define KVM_GUESTDBG_USE_HW_BP 0x00020000
+#define KVM_GUESTDBG_INJECT_DB 0x00040000
+#define KVM_GUESTDBG_INJECT_BP 0x00080000
+
+/* for KVM_SET_GUEST_DEBUG */
+struct kvm_guest_debug_arch {
+ __u64 debugreg[8];
+};
+
struct kvm_pit_state {
struct kvm_pit_channel_state channels[3];
};
+
+struct kvm_reinject_control {
+ __u8 pit_reinject;
+ __u8 reserved[31];
+};
#endif /* _ASM_X86_KVM_H */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 730843d1d2fb..55fd4c5fd388 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -22,6 +22,7 @@
#include <asm/pvclock-abi.h>
#include <asm/desc.h>
#include <asm/mtrr.h>
+#include <asm/msr-index.h>
#define KVM_MAX_VCPUS 16
#define KVM_MEMORY_SLOTS 32
@@ -134,11 +135,18 @@ enum {
#define KVM_NR_MEM_OBJS 40
-struct kvm_guest_debug {
- int enabled;
- unsigned long bp[4];
- int singlestep;
-};
+#define KVM_NR_DB_REGS 4
+
+#define DR6_BD (1 << 13)
+#define DR6_BS (1 << 14)
+#define DR6_FIXED_1 0xffff0ff0
+#define DR6_VOLATILE 0x0000e00f
+
+#define DR7_BP_EN_MASK 0x000000ff
+#define DR7_GE (1 << 9)
+#define DR7_GD (1 << 13)
+#define DR7_FIXED_1 0x00000400
+#define DR7_VOLATILE 0xffff23ff
/*
* We don't want allocation failures within the mmu code, so we preallocate
@@ -162,7 +170,8 @@ struct kvm_pte_chain {
* bits 0:3 - total guest paging levels (2-4, or zero for real mode)
* bits 4:7 - page table level for this shadow (1-4)
* bits 8:9 - page table quadrant for 2-level guests
- * bit 16 - "metaphysical" - gfn is not a real page (huge page/real mode)
+ * bit 16 - direct mapping of virtual to physical mapping at gfn
+ * used for real mode and two-dimensional paging
* bits 17:19 - common access permissions for all ptes in this shadow page
*/
union kvm_mmu_page_role {
@@ -172,9 +181,10 @@ union kvm_mmu_page_role {
unsigned level:4;
unsigned quadrant:2;
unsigned pad_for_nice_hex_output:6;
- unsigned metaphysical:1;
+ unsigned direct:1;
unsigned access:3;
unsigned invalid:1;
+ unsigned cr4_pge:1;
};
};
@@ -218,6 +228,18 @@ struct kvm_pv_mmu_op_buffer {
char buf[512] __aligned(sizeof(long));
};
+struct kvm_pio_request {
+ unsigned long count;
+ int cur_count;
+ gva_t guest_gva;
+ int in;
+ int port;
+ int size;
+ int string;
+ int down;
+ int rep;
+};
+
/*
* x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level
* 32-bit). The kvm_mmu structure abstracts the details of the current mmu
@@ -236,6 +258,7 @@ struct kvm_mmu {
hpa_t root_hpa;
int root_level;
int shadow_root_level;
+ union kvm_mmu_page_role base_role;
u64 *pae_root;
};
@@ -258,6 +281,7 @@ struct kvm_vcpu_arch {
unsigned long cr3;
unsigned long cr4;
unsigned long cr8;
+ u32 hflags;
u64 pdptrs[4]; /* pae */
u64 shadow_efer;
u64 apic_base;
@@ -338,6 +362,15 @@ struct kvm_vcpu_arch {
struct mtrr_state_type mtrr_state;
u32 pat;
+
+ int switch_db_regs;
+ unsigned long host_db[KVM_NR_DB_REGS];
+ unsigned long host_dr6;
+ unsigned long host_dr7;
+ unsigned long db[KVM_NR_DB_REGS];
+ unsigned long dr6;
+ unsigned long dr7;
+ unsigned long eff_db[KVM_NR_DB_REGS];
};
struct kvm_mem_alias {
@@ -378,6 +411,7 @@ struct kvm_arch{
unsigned long irq_sources_bitmap;
unsigned long irq_states[KVM_IOAPIC_NUM_PINS];
+ u64 vm_init_tsc;
};
struct kvm_vm_stat {
@@ -446,8 +480,7 @@ struct kvm_x86_ops {
void (*vcpu_put)(struct kvm_vcpu *vcpu);
int (*set_guest_debug)(struct kvm_vcpu *vcpu,
- struct kvm_debug_guest *dbg);
- void (*guest_debug_pre)(struct kvm_vcpu *vcpu);
+ struct kvm_guest_debug *dbg);
int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata);
int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg);
@@ -589,10 +622,6 @@ void kvm_inject_nmi(struct kvm_vcpu *vcpu);
void fx_init(struct kvm_vcpu *vcpu);
-int emulator_read_std(unsigned long addr,
- void *val,
- unsigned int bytes,
- struct kvm_vcpu *vcpu);
int emulator_write_emulated(unsigned long addr,
const void *val,
unsigned int bytes,
@@ -737,6 +766,10 @@ enum {
TASK_SWITCH_GATE = 3,
};
+#define HF_GIF_MASK (1 << 0)
+#define HF_HIF_MASK (1 << 1)
+#define HF_VINTR_MASK (1 << 2)
+
/*
* Hardware virtualization extension instructions may fault if a
* reboot turns off virtualization while processes are running.
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index cb58643947b9..1890032a1bee 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -18,11 +18,13 @@
#define _EFER_LME 8 /* Long mode enable */
#define _EFER_LMA 10 /* Long mode active (read-only) */
#define _EFER_NX 11 /* No execute enable */
+#define _EFER_SVME 12 /* Enable virtualization */
#define EFER_SCE (1<<_EFER_SCE)
#define EFER_LME (1<<_EFER_LME)
#define EFER_LMA (1<<_EFER_LMA)
#define EFER_NX (1<<_EFER_NX)
+#define EFER_SVME (1<<_EFER_SVME)
/* Intel MSRs. Some also available on other CPUs */
#define MSR_IA32_PERFCTR0 0x000000c1
@@ -331,4 +333,9 @@
#define MSR_IA32_VMX_PROCBASED_CTLS2 0x0000048b
#define MSR_IA32_VMX_EPT_VPID_CAP 0x0000048c
+/* AMD-V MSRs */
+
+#define MSR_VM_CR 0xc0010114
+#define MSR_VM_HSAVE_PA 0xc0010117
+
#endif /* _ASM_X86_MSR_INDEX_H */
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 1b8afa78e869..82ada75f3ebf 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -174,10 +174,6 @@ struct __attribute__ ((__packed__)) vmcb {
#define SVM_CPUID_FEATURE_SHIFT 2
#define SVM_CPUID_FUNC 0x8000000a
-#define MSR_EFER_SVME_MASK (1ULL << 12)
-#define MSR_VM_CR 0xc0010114
-#define MSR_VM_HSAVE_PA 0xc0010117ULL
-
#define SVM_VM_CR_SVM_DISABLE 4
#define SVM_SELECTOR_S_SHIFT 4
diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
index 593636275238..e0f9aa16358b 100644
--- a/arch/x86/include/asm/virtext.h
+++ b/arch/x86/include/asm/virtext.h
@@ -118,7 +118,7 @@ static inline void cpu_svm_disable(void)
wrmsrl(MSR_VM_HSAVE_PA, 0);
rdmsrl(MSR_EFER, efer);
- wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK);
+ wrmsrl(MSR_EFER, efer & ~EFER_SVME);
}
/** Makes sure SVM is disabled, if it is supported on the CPU
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index d0238e6151d8..498f944010b9 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -270,8 +270,9 @@ enum vmcs_field {
#define INTR_TYPE_EXT_INTR (0 << 8) /* external interrupt */
#define INTR_TYPE_NMI_INTR (2 << 8) /* NMI */
-#define INTR_TYPE_EXCEPTION (3 << 8) /* processor exception */
+#define INTR_TYPE_HARD_EXCEPTION (3 << 8) /* processor exception */
#define INTR_TYPE_SOFT_INTR (4 << 8) /* software interrupt */
+#define INTR_TYPE_SOFT_EXCEPTION (6 << 8) /* software exception */
/* GUEST_INTERRUPTIBILITY_INFO flags. */
#define GUEST_INTR_STATE_STI 0x00000001
@@ -311,7 +312,7 @@ enum vmcs_field {
#define DEBUG_REG_ACCESS_TYPE 0x10 /* 4, direction of access */
#define TYPE_MOV_TO_DR (0 << 4)
#define TYPE_MOV_FROM_DR (1 << 4)
-#define DEBUG_REG_ACCESS_REG 0xf00 /* 11:8, general purpose reg. */
+#define DEBUG_REG_ACCESS_REG(eq) (((eq) >> 8) & 0xf) /* 11:8, general purpose reg. */
/* segment AR */
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index c7da3683f4c5..a58504ea78cc 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -4,6 +4,10 @@
config HAVE_KVM
bool
+config HAVE_KVM_IRQCHIP
+ bool
+ default y
+
menuconfig VIRTUALIZATION
bool "Virtualization"
depends on HAVE_KVM || X86
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index e665d1c623ca..c13bb92d3157 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -201,13 +201,16 @@ static int __pit_timer_fn(struct kvm_kpit_state *ps)
if (!atomic_inc_and_test(&pt->pending))
set_bit(KVM_REQ_PENDING_TIMER, &vcpu0->requests);
+ if (!pt->reinject)
+ atomic_set(&pt->pending, 1);
+
if (vcpu0 && waitqueue_active(&vcpu0->wq))
wake_up_interruptible(&vcpu0->wq);
hrtimer_add_expires_ns(&pt->timer, pt->period);
pt->scheduled = hrtimer_get_expires_ns(&pt->timer);
if (pt->period)
- ps->channels[0].count_load_time = hrtimer_get_expires(&pt->timer);
+ ps->channels[0].count_load_time = ktime_get();
return (pt->period == 0 ? 0 : 1);
}
@@ -536,6 +539,16 @@ void kvm_pit_reset(struct kvm_pit *pit)
pit->pit_state.irq_ack = 1;
}
+static void pit_mask_notifer(struct kvm_irq_mask_notifier *kimn, bool mask)
+{
+ struct kvm_pit *pit = container_of(kimn, struct kvm_pit, mask_notifier);
+
+ if (!mask) {
+ atomic_set(&pit->pit_state.pit_timer.pending, 0);
+ pit->pit_state.irq_ack = 1;
+ }
+}
+
struct kvm_pit *kvm_create_pit(struct kvm *kvm)
{
struct kvm_pit *pit;
@@ -545,9 +558,7 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm)
if (!pit)
return NULL;
- mutex_lock(&kvm->lock);
pit->irq_source_id = kvm_request_irq_source_id(kvm);
- mutex_unlock(&kvm->lock);
if (pit->irq_source_id < 0) {
kfree(pit);
return NULL;
@@ -580,10 +591,14 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm)
pit_state->irq_ack_notifier.gsi = 0;
pit_state->irq_ack_notifier.irq_acked = kvm_pit_ack_irq;
kvm_register_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier);
+ pit_state->pit_timer.reinject = true;
mutex_unlock(&pit->pit_state.lock);
kvm_pit_reset(pit);
+ pit->mask_notifier.func = pit_mask_notifer;
+ kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
+
return pit;
}
@@ -592,6 +607,8 @@ void kvm_free_pit(struct kvm *kvm)
struct hrtimer *timer;
if (kvm->arch.vpit) {
+ kvm_unregister_irq_mask_notifier(kvm, 0,
+ &kvm->arch.vpit->mask_notifier);
mutex_lock(&kvm->arch.vpit->pit_state.lock);
timer = &kvm->arch.vpit->pit_state.pit_timer.timer;
hrtimer_cancel(timer);
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
index 4178022b97aa..6acbe4b505d5 100644
--- a/arch/x86/kvm/i8254.h
+++ b/arch/x86/kvm/i8254.h
@@ -9,6 +9,7 @@ struct kvm_kpit_timer {
s64 period; /* unit: ns */
s64 scheduled;
atomic_t pending;
+ bool reinject;
};
struct kvm_kpit_channel_state {
@@ -45,6 +46,7 @@ struct kvm_pit {
struct kvm *kvm;
struct kvm_kpit_state pit_state;
int irq_source_id;
+ struct kvm_irq_mask_notifier mask_notifier;
};
#define KVM_PIT_BASE_ADDRESS 0x40
diff --git a/arch/x86/kvm/kvm_svm.h b/arch/x86/kvm/kvm_svm.h
index 8e5ee99551f6..ed66e4c078dc 100644
--- a/arch/x86/kvm/kvm_svm.h
+++ b/arch/x86/kvm/kvm_svm.h
@@ -18,7 +18,6 @@ static const u32 host_save_user_msrs[] = {
};
#define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
-#define NUM_DB_REGS 4
struct kvm_vcpu;
@@ -29,18 +28,23 @@ struct vcpu_svm {
struct svm_cpu_data *svm_data;
uint64_t asid_generation;
- unsigned long db_regs[NUM_DB_REGS];
-
u64 next_rip;
u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
u64 host_gs_base;
unsigned long host_cr2;
- unsigned long host_db_regs[NUM_DB_REGS];
- unsigned long host_dr6;
- unsigned long host_dr7;
u32 *msrpm;
+ struct vmcb *hsave;
+ u64 hsave_msr;
+
+ u64 nested_vmcb;
+
+ /* These are the merged vectors */
+ u32 *nested_msrpm;
+
+ /* gpa pointers to the real vectors */
+ u64 nested_vmcb_msrpm;
};
#endif
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 83f11c7474a1..07986100149b 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -145,11 +145,20 @@ struct kvm_rmap_desc {
struct kvm_rmap_desc *more;
};
-struct kvm_shadow_walk {
- int (*entry)(struct kvm_shadow_walk *walk, struct kvm_vcpu *vcpu,
- u64 addr, u64 *spte, int level);
+struct kvm_shadow_walk_iterator {
+ u64 addr;
+ hpa_t shadow_addr;
+ int level;
+ u64 *sptep;
+ unsigned index;
};
+#define for_each_shadow_entry(_vcpu, _addr, _walker) \
+ for (shadow_walk_init(&(_walker), _vcpu, _addr); \
+ shadow_walk_okay(&(_walker)); \
+ shadow_walk_next(&(_walker)))
+
+
struct kvm_unsync_walk {
int (*entry) (struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk);
};
@@ -343,7 +352,6 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc,
BUG_ON(!mc->nobjs);
p = mc->objects[--mc->nobjs];
- memset(p, 0, size);
return p;
}
@@ -797,7 +805,6 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
ASSERT(is_empty_shadow_page(sp->spt));
bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
sp->multimapped = 0;
- sp->global = 1;
sp->parent_pte = parent_pte;
--vcpu->kvm->arch.n_free_mmu_pages;
return sp;
@@ -1059,7 +1066,7 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
index = kvm_page_table_hashfn(gfn);
bucket = &kvm->arch.mmu_page_hash[index];
hlist_for_each_entry(sp, node, bucket, hash_link)
- if (sp->gfn == gfn && !sp->role.metaphysical
+ if (sp->gfn == gfn && !sp->role.direct
&& !sp->role.invalid) {
pgprintk("%s: found role %x\n",
__func__, sp->role.word);
@@ -1193,7 +1200,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
gfn_t gfn,
gva_t gaddr,
unsigned level,
- int metaphysical,
+ int direct,
unsigned access,
u64 *parent_pte)
{
@@ -1204,10 +1211,9 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *sp;
struct hlist_node *node, *tmp;
- role.word = 0;
- role.glevels = vcpu->arch.mmu.root_level;
+ role = vcpu->arch.mmu.base_role;
role.level = level;
- role.metaphysical = metaphysical;
+ role.direct = direct;
role.access = access;
if (vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) {
quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level));
@@ -1242,8 +1248,9 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
pgprintk("%s: adding gfn %lx role %x\n", __func__, gfn, role.word);
sp->gfn = gfn;
sp->role = role;
+ sp->global = role.cr4_pge;
hlist_add_head(&sp->hash_link, bucket);
- if (!metaphysical) {
+ if (!direct) {
if (rmap_write_protect(vcpu->kvm, gfn))
kvm_flush_remote_tlbs(vcpu->kvm);
account_shadowed(vcpu->kvm, gfn);
@@ -1255,35 +1262,35 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
return sp;
}
-static int walk_shadow(struct kvm_shadow_walk *walker,
- struct kvm_vcpu *vcpu, u64 addr)
+static void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator,
+ struct kvm_vcpu *vcpu, u64 addr)
{
- hpa_t shadow_addr;
- int level;
- int r;
- u64 *sptep;
- unsigned index;
-
- shadow_addr = vcpu->arch.mmu.root_hpa;
- level = vcpu->arch.mmu.shadow_root_level;
- if (level == PT32E_ROOT_LEVEL) {
- shadow_addr = vcpu->arch.mmu.pae_root[(addr >> 30) & 3];
- shadow_addr &= PT64_BASE_ADDR_MASK;
- if (!shadow_addr)
- return 1;
- --level;
+ iterator->addr = addr;
+ iterator->shadow_addr = vcpu->arch.mmu.root_hpa;
+ iterator->level = vcpu->arch.mmu.shadow_root_level;
+ if (iterator->level == PT32E_ROOT_LEVEL) {
+ iterator->shadow_addr
+ = vcpu->arch.mmu.pae_root[(addr >> 30) & 3];
+ iterator->shadow_addr &= PT64_BASE_ADDR_MASK;
+ --iterator->level;
+ if (!iterator->shadow_addr)
+ iterator->level = 0;
}
+}
- while (level >= PT_PAGE_TABLE_LEVEL) {
- index = SHADOW_PT_INDEX(addr, level);
- sptep = ((u64 *)__va(shadow_addr)) + index;
- r = walker->entry(walker, vcpu, addr, sptep, level);
- if (r)
- return r;
- shadow_addr = *sptep & PT64_BASE_ADDR_MASK;
- --level;
- }
- return 0;
+static bool shadow_walk_okay(struct kvm_shadow_walk_iterator *iterator)
+{
+ if (iterator->level < PT_PAGE_TABLE_LEVEL)
+ return false;
+ iterator->index = SHADOW_PT_INDEX(iterator->addr, iterator->level);
+ iterator->sptep = ((u64 *)__va(iterator->shadow_addr)) + iterator->index;
+ return true;
+}
+
+static void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator)
+{
+ iterator->shadow_addr = *iterator->sptep & PT64_BASE_ADDR_MASK;
+ --iterator->level;
}
static void kvm_mmu_page_unlink_children(struct kvm *kvm,
@@ -1388,7 +1395,7 @@ static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
kvm_mmu_page_unlink_children(kvm, sp);
kvm_mmu_unlink_parents(kvm, sp);
kvm_flush_remote_tlbs(kvm);
- if (!sp->role.invalid && !sp->role.metaphysical)
+ if (!sp->role.invalid && !sp->role.direct)
unaccount_shadowed(kvm, sp->gfn);
if (sp->unsync)
kvm_unlink_unsync_page(kvm, sp);
@@ -1451,7 +1458,7 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
index = kvm_page_table_hashfn(gfn);
bucket = &kvm->arch.mmu_page_hash[index];
hlist_for_each_entry_safe(sp, node, n, bucket, hash_link)
- if (sp->gfn == gfn && !sp->role.metaphysical) {
+ if (sp->gfn == gfn && !sp->role.direct) {
pgprintk("%s: gfn %lx role %x\n", __func__, gfn,
sp->role.word);
r = 1;
@@ -1463,11 +1470,20 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
static void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
{
+ unsigned index;
+ struct hlist_head *bucket;
struct kvm_mmu_page *sp;
+ struct hlist_node *node, *nn;
- while ((sp = kvm_mmu_lookup_page(kvm, gfn)) != NULL) {
- pgprintk("%s: zap %lx %x\n", __func__, gfn, sp->role.word);
- kvm_mmu_zap_page(kvm, sp);
+ index = kvm_page_table_hashfn(gfn);
+ bucket = &kvm->arch.mmu_page_hash[index];
+ hlist_for_each_entry_safe(sp, node, nn, bucket, hash_link) {
+ if (sp->gfn == gfn && !sp->role.direct
+ && !sp->role.invalid) {
+ pgprintk("%s: zap %lx %x\n",
+ __func__, gfn, sp->role.word);
+ kvm_mmu_zap_page(kvm, sp);
+ }
}
}
@@ -1622,7 +1638,7 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
bucket = &vcpu->kvm->arch.mmu_page_hash[index];
/* don't unsync if pagetable is shadowed with multiple roles */
hlist_for_each_entry_safe(s, node, n, bucket, hash_link) {
- if (s->gfn != sp->gfn || s->role.metaphysical)
+ if (s->gfn != sp->gfn || s->role.direct)
continue;
if (s->role.word != sp->role.word)
return 1;
@@ -1669,8 +1685,6 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
u64 mt_mask = shadow_mt_mask;
struct kvm_mmu_page *sp = page_header(__pa(shadow_pte));
- if (!(vcpu->arch.cr4 & X86_CR4_PGE))
- global = 0;
if (!global && sp->global) {
sp->global = 0;
if (sp->unsync) {
@@ -1815,67 +1829,42 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
{
}
-struct direct_shadow_walk {
- struct kvm_shadow_walk walker;
- pfn_t pfn;
- int write;
- int largepage;
- int pt_write;
-};
-
-static int direct_map_entry(struct kvm_shadow_walk *_walk,
- struct kvm_vcpu *vcpu,
- u64 addr, u64 *sptep, int level)
+static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
+ int largepage, gfn_t gfn, pfn_t pfn)
{
- struct direct_shadow_walk *walk =
- container_of(_walk, struct direct_shadow_walk, walker);
+ struct kvm_shadow_walk_iterator iterator;
struct kvm_mmu_page *sp;
+ int pt_write = 0;
gfn_t pseudo_gfn;
- gfn_t gfn = addr >> PAGE_SHIFT;
-
- if (level == PT_PAGE_TABLE_LEVEL
- || (walk->largepage && level == PT_DIRECTORY_LEVEL)) {
- mmu_set_spte(vcpu, sptep, ACC_ALL, ACC_ALL,
- 0, walk->write, 1, &walk->pt_write,
- walk->largepage, 0, gfn, walk->pfn, false);
- ++vcpu->stat.pf_fixed;
- return 1;
- }
- if (*sptep == shadow_trap_nonpresent_pte) {
- pseudo_gfn = (addr & PT64_DIR_BASE_ADDR_MASK) >> PAGE_SHIFT;
- sp = kvm_mmu_get_page(vcpu, pseudo_gfn, (gva_t)addr, level - 1,
- 1, ACC_ALL, sptep);
- if (!sp) {
- pgprintk("nonpaging_map: ENOMEM\n");
- kvm_release_pfn_clean(walk->pfn);
- return -ENOMEM;
+ for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) {
+ if (iterator.level == PT_PAGE_TABLE_LEVEL
+ || (largepage && iterator.level == PT_DIRECTORY_LEVEL)) {
+ mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL,
+ 0, write, 1, &pt_write,
+ largepage, 0, gfn, pfn, false);
+ ++vcpu->stat.pf_fixed;
+ break;
}
- set_shadow_pte(sptep,
- __pa(sp->spt)
- | PT_PRESENT_MASK | PT_WRITABLE_MASK
- | shadow_user_mask | shadow_x_mask);
- }
- return 0;
-}
+ if (*iterator.sptep == shadow_trap_nonpresent_pte) {
+ pseudo_gfn = (iterator.addr & PT64_DIR_BASE_ADDR_MASK) >> PAGE_SHIFT;
+ sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr,
+ iterator.level - 1,
+ 1, ACC_ALL, iterator.sptep);
+ if (!sp) {
+ pgprintk("nonpaging_map: ENOMEM\n");
+ kvm_release_pfn_clean(pfn);
+ return -ENOMEM;
+ }
-static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
- int largepage, gfn_t gfn, pfn_t pfn)
-{
- int r;
- struct direct_shadow_walk walker = {
- .walker = { .entry = direct_map_entry, },
- .pfn = pfn,
- .largepage = largepage,
- .write = write,
- .pt_write = 0,
- };
-
- r = walk_shadow(&walker.walker, vcpu, gfn << PAGE_SHIFT);
- if (r < 0)
- return r;
- return walker.pt_write;
+ set_shadow_pte(iterator.sptep,
+ __pa(sp->spt)
+ | PT_PRESENT_MASK | PT_WRITABLE_MASK
+ | shadow_user_mask | shadow_x_mask);
+ }
+ }
+ return pt_write;
}
static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
@@ -1957,7 +1946,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
int i;
gfn_t root_gfn;
struct kvm_mmu_page *sp;
- int metaphysical = 0;
+ int direct = 0;
root_gfn = vcpu->arch.cr3 >> PAGE_SHIFT;
@@ -1966,18 +1955,18 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
ASSERT(!VALID_PAGE(root));
if (tdp_enabled)
- metaphysical = 1;
+ direct = 1;
sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
- PT64_ROOT_LEVEL, metaphysical,
+ PT64_ROOT_LEVEL, direct,
ACC_ALL, NULL);
root = __pa(sp->spt);
++sp->root_count;
vcpu->arch.mmu.root_hpa = root;
return;
}
- metaphysical = !is_paging(vcpu);
+ direct = !is_paging(vcpu);
if (tdp_enabled)
- metaphysical = 1;
+ direct = 1;
for (i = 0; i < 4; ++i) {
hpa_t root = vcpu->arch.mmu.pae_root[i];
@@ -1991,7 +1980,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
} else if (vcpu->arch.mmu.root_level == 0)
root_gfn = 0;
sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
- PT32_ROOT_LEVEL, metaphysical,
+ PT32_ROOT_LEVEL, direct,
ACC_ALL, NULL);
root = __pa(sp->spt);
++sp->root_count;
@@ -2246,17 +2235,23 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
static int init_kvm_softmmu(struct kvm_vcpu *vcpu)
{
+ int r;
+
ASSERT(vcpu);
ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
if (!is_paging(vcpu))
- return nonpaging_init_context(vcpu);
+ r = nonpaging_init_context(vcpu);
else if (is_long_mode(vcpu))
- return paging64_init_context(vcpu);
+ r = paging64_init_context(vcpu);
else if (is_pae(vcpu))
- return paging32E_init_context(vcpu);
+ r = paging32E_init_context(vcpu);
else
- return paging32_init_context(vcpu);
+ r = paging32_init_context(vcpu);
+
+ vcpu->arch.mmu.base_role.glevels = vcpu->arch.mmu.root_level;
+
+ return r;
}
static int init_kvm_mmu(struct kvm_vcpu *vcpu)
@@ -2487,7 +2482,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
index = kvm_page_table_hashfn(gfn);
bucket = &vcpu->kvm->arch.mmu_page_hash[index];
hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) {
- if (sp->gfn != gfn || sp->role.metaphysical || sp->role.invalid)
+ if (sp->gfn != gfn || sp->role.direct || sp->role.invalid)
continue;
pte_size = sp->role.glevels == PT32_ROOT_LEVEL ? 4 : 8;
misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1);
@@ -3125,7 +3120,7 @@ static void audit_write_protection(struct kvm_vcpu *vcpu)
gfn_t gfn;
list_for_each_entry(sp, &vcpu->kvm->arch.active_mmu_pages, link) {
- if (sp->role.metaphysical)
+ if (sp->role.direct)
continue;
gfn = unalias_gfn(vcpu->kvm, sp->gfn);
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 9fd78b6e17ad..7314c0944c5f 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -25,7 +25,6 @@
#if PTTYPE == 64
#define pt_element_t u64
#define guest_walker guest_walker64
- #define shadow_walker shadow_walker64
#define FNAME(name) paging##64_##name
#define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK
#define PT_DIR_BASE_ADDR_MASK PT64_DIR_BASE_ADDR_MASK
@@ -42,7 +41,6 @@
#elif PTTYPE == 32
#define pt_element_t u32
#define guest_walker guest_walker32
- #define shadow_walker shadow_walker32
#define FNAME(name) paging##32_##name
#define PT_BASE_ADDR_MASK PT32_BASE_ADDR_MASK
#define PT_DIR_BASE_ADDR_MASK PT32_DIR_BASE_ADDR_MASK
@@ -73,18 +71,6 @@ struct guest_walker {
u32 error_code;
};
-struct shadow_walker {
- struct kvm_shadow_walk walker;
- struct guest_walker *guest_walker;
- int user_fault;
- int write_fault;
- int largepage;
- int *ptwrite;
- pfn_t pfn;
- u64 *sptep;
- gpa_t pte_gpa;
-};
-
static gfn_t gpte_to_gfn(pt_element_t gpte)
{
return (gpte & PT_BASE_ADDR_MASK) >> PAGE_SHIFT;
@@ -283,91 +269,79 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
/*
* Fetch a shadow pte for a specific level in the paging hierarchy.
*/
-static int FNAME(shadow_walk_entry)(struct kvm_shadow_walk *_sw,
- struct kvm_vcpu *vcpu, u64 addr,
- u64 *sptep, int level)
+static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
+ struct guest_walker *gw,
+ int user_fault, int write_fault, int largepage,
+ int *ptwrite, pfn_t pfn)
{
- struct shadow_walker *sw =
- container_of(_sw, struct shadow_walker, walker);
- struct guest_walker *gw = sw->guest_walker;
unsigned access = gw->pt_access;
struct kvm_mmu_page *shadow_page;
- u64 spte;
- int metaphysical;
+ u64 spte, *sptep;
+ int direct;
gfn_t table_gfn;
int r;
+ int level;
pt_element_t curr_pte;
+ struct kvm_shadow_walk_iterator iterator;
- if (level == PT_PAGE_TABLE_LEVEL
- || (sw->largepage && level == PT_DIRECTORY_LEVEL)) {
- mmu_set_spte(vcpu, sptep, access, gw->pte_access & access,
- sw->user_fault, sw->write_fault,
- gw->ptes[gw->level-1] & PT_DIRTY_MASK,
- sw->ptwrite, sw->largepage,
- gw->ptes[gw->level-1] & PT_GLOBAL_MASK,
- gw->gfn, sw->pfn, false);
- sw->sptep = sptep;
- return 1;
- }
+ if (!is_present_pte(gw->ptes[gw->level - 1]))
+ return NULL;
- if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep))
- return 0;
+ for_each_shadow_entry(vcpu, addr, iterator) {
+ level = iterator.level;
+ sptep = iterator.sptep;
+ if (level == PT_PAGE_TABLE_LEVEL
+ || (largepage && level == PT_DIRECTORY_LEVEL)) {
+ mmu_set_spte(vcpu, sptep, access,
+ gw->pte_access & access,
+ user_fault, write_fault,
+ gw->ptes[gw->level-1] & PT_DIRTY_MASK,
+ ptwrite, largepage,
+ gw->ptes[gw->level-1] & PT_GLOBAL_MASK,
+ gw->gfn, pfn, false);
+ break;
+ }
- if (is_large_pte(*sptep)) {
- set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
- kvm_flush_remote_tlbs(vcpu->kvm);
- rmap_remove(vcpu->kvm, sptep);
- }
+ if (is_shadow_present_pte(*sptep) && !is_large_pte(*sptep))
+ continue;
- if (level == PT_DIRECTORY_LEVEL && gw->level == PT_DIRECTORY_LEVEL) {
- metaphysical = 1;
- if (!is_dirty_pte(gw->ptes[level - 1]))
- access &= ~ACC_WRITE_MASK;
- table_gfn = gpte_to_gfn(gw->ptes[level - 1]);
- } else {
- metaphysical = 0;
- table_gfn = gw->table_gfn[level - 2];
- }
- shadow_page = kvm_mmu_get_page(vcpu, table_gfn, (gva_t)addr, level-1,
- metaphysical, access, sptep);
- if (!metaphysical) {
- r = kvm_read_guest_atomic(vcpu->kvm, gw->pte_gpa[level - 2],
- &curr_pte, sizeof(curr_pte));
- if (r || curr_pte != gw->ptes[level - 2]) {
- kvm_mmu_put_page(shadow_page, sptep);
- kvm_release_pfn_clean(sw->pfn);
- sw->sptep = NULL;
- return 1;
+ if (is_large_pte(*sptep)) {
+ set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
+ kvm_flush_remote_tlbs(vcpu->kvm);
+ rmap_remove(vcpu->kvm, sptep);
}
- }
- spte = __pa(shadow_page->spt) | PT_PRESENT_MASK | PT_ACCESSED_MASK
- | PT_WRITABLE_MASK | PT_USER_MASK;
- *sptep = spte;
- return 0;
-}
-
-static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
- struct guest_walker *guest_walker,
- int user_fault, int write_fault, int largepage,
- int *ptwrite, pfn_t pfn)
-{
- struct shadow_walker walker = {
- .walker = { .entry = FNAME(shadow_walk_entry), },
- .guest_walker = guest_walker,
- .user_fault = user_fault,
- .write_fault = write_fault,
- .largepage = largepage,
- .ptwrite = ptwrite,
- .pfn = pfn,
- };
-
- if (!is_present_pte(guest_walker->ptes[guest_walker->level - 1]))
- return NULL;
+ if (level == PT_DIRECTORY_LEVEL
+ && gw->level == PT_DIRECTORY_LEVEL) {
+ direct = 1;
+ if (!is_dirty_pte(gw->ptes[level - 1]))
+ access &= ~ACC_WRITE_MASK;
+ table_gfn = gpte_to_gfn(gw->ptes[level - 1]);
+ } else {
+ direct = 0;
+ table_gfn = gw->table_gfn[level - 2];
+ }
+ shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1,
+ direct, access, sptep);
+ if (!direct) {
+ r = kvm_read_guest_atomic(vcpu->kvm,
+ gw->pte_gpa[level - 2],
+ &curr_pte, sizeof(curr_pte));
+ if (r || curr_pte != gw->ptes[level - 2]) {
+ kvm_mmu_put_page(shadow_page, sptep);
+ kvm_release_pfn_clean(pfn);
+ sptep = NULL;
+ break;
+ }
+ }
- walk_shadow(&walker.walker, vcpu, addr);
+ spte = __pa(shadow_page->spt)
+ | PT_PRESENT_MASK | PT_ACCESSED_MASK
+ | PT_WRITABLE_MASK | PT_USER_MASK;
+ *sptep = spte;
+ }
- return walker.sptep;
+ return sptep;
}
/*
@@ -465,54 +439,52 @@ out_unlock:
return 0;
}
-static int FNAME(shadow_invlpg_entry)(struct kvm_shadow_walk *_sw,
- struct kvm_vcpu *vcpu, u64 addr,
- u64 *sptep, int level)
+static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
{
- struct shadow_walker *sw =
- container_of(_sw, struct shadow_walker, walker);
+ struct kvm_shadow_walk_iterator iterator;
+ pt_element_t gpte;
+ gpa_t pte_gpa = -1;
+ int level;
+ u64 *sptep;
+
+ spin_lock(&vcpu->kvm->mmu_lock);
- /* FIXME: properly handle invlpg on large guest pages */
- if (level == PT_PAGE_TABLE_LEVEL ||
- ((level == PT_DIRECTORY_LEVEL) && is_large_pte(*sptep))) {
- struct kvm_mmu_page *sp = page_header(__pa(sptep));
+ for_each_shadow_entry(vcpu, gva, iterator) {
+ level = iterator.level;
+ sptep = iterator.sptep;
- sw->pte_gpa = (sp->gfn << PAGE_SHIFT);
- sw->pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t);
+ /* FIXME: properly handle invlpg on large guest pages */
+ if (level == PT_PAGE_TABLE_LEVEL ||
+ ((level == PT_DIRECTORY_LEVEL) && is_large_pte(*sptep))) {
+ struct kvm_mmu_page *sp = page_header(__pa(sptep));
- if (is_shadow_present_pte(*sptep)) {
- rmap_remove(vcpu->kvm, sptep);
- if (is_large_pte(*sptep))
- --vcpu->kvm->stat.lpages;
+ pte_gpa = (sp->gfn << PAGE_SHIFT);
+ pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t);
+
+ if (is_shadow_present_pte(*sptep)) {
+ rmap_remove(vcpu->kvm, sptep);
+ if (is_large_pte(*sptep))
+ --vcpu->kvm->stat.lpages;
+ }
+ set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
+ break;
}
- set_shadow_pte(sptep, shadow_trap_nonpresent_pte);
- return 1;
- }
- if (!is_shadow_present_pte(*sptep))
- return 1;
- return 0;
-}
-static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
-{
- pt_element_t gpte;
- struct shadow_walker walker = {
- .walker = { .entry = FNAME(shadow_invlpg_entry), },
- .pte_gpa = -1,
- };
+ if (!is_shadow_present_pte(*sptep))
+ break;
+ }
- spin_lock(&vcpu->kvm->mmu_lock);
- walk_shadow(&walker.walker, vcpu, gva);
spin_unlock(&vcpu->kvm->mmu_lock);
- if (walker.pte_gpa == -1)
+
+ if (pte_gpa == -1)
return;
- if (kvm_read_guest_atomic(vcpu->kvm, walker.pte_gpa, &gpte,
+ if (kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &gpte,
sizeof(pt_element_t)))
return;
if (is_present_pte(gpte) && (gpte & PT_ACCESSED_MASK)) {
if (mmu_topup_memory_caches(vcpu))
return;
- kvm_mmu_pte_write(vcpu, walker.pte_gpa, (const u8 *)&gpte,
+ kvm_mmu_pte_write(vcpu, pte_gpa, (const u8 *)&gpte,
sizeof(pt_element_t), 0);
}
}
@@ -540,7 +512,7 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu,
pt_element_t pt[256 / sizeof(pt_element_t)];
gpa_t pte_gpa;
- if (sp->role.metaphysical
+ if (sp->role.direct
|| (PTTYPE == 32 && sp->role.level > PT_PAGE_TABLE_LEVEL)) {
nonpaging_prefetch_page(vcpu, sp);
return;
@@ -619,7 +591,6 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
#undef pt_element_t
#undef guest_walker
-#undef shadow_walker
#undef FNAME
#undef PT_BASE_ADDR_MASK
#undef PT_INDEX
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 1452851ae258..14e517ea00a2 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -38,9 +38,6 @@ MODULE_LICENSE("GPL");
#define IOPM_ALLOC_ORDER 2
#define MSRPM_ALLOC_ORDER 1
-#define DR7_GD_MASK (1 << 13)
-#define DR6_BD_MASK (1 << 13)
-
#define SEG_TYPE_LDT 2
#define SEG_TYPE_BUSY_TSS16 3
@@ -50,6 +47,15 @@ MODULE_LICENSE("GPL");
#define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
+/* Turn on to get debugging output*/
+/* #define NESTED_DEBUG */
+
+#ifdef NESTED_DEBUG
+#define nsvm_printk(fmt, args...) printk(KERN_INFO fmt, ## args)
+#else
+#define nsvm_printk(fmt, args...) do {} while(0)
+#endif
+
/* enable NPT for AMD64 and X86 with PAE */
#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
static bool npt_enabled = true;
@@ -60,14 +66,29 @@ static int npt = 1;
module_param(npt, int, S_IRUGO);
+static int nested = 0;
+module_param(nested, int, S_IRUGO);
+
static void kvm_reput_irq(struct vcpu_svm *svm);
static void svm_flush_tlb(struct kvm_vcpu *vcpu);
+static int nested_svm_exit_handled(struct vcpu_svm *svm, bool kvm_override);
+static int nested_svm_vmexit(struct vcpu_svm *svm);
+static int nested_svm_vmsave(struct vcpu_svm *svm, void *nested_vmcb,
+ void *arg2, void *opaque);
+static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
+ bool has_error_code, u32 error_code);
+
static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
{
return container_of(vcpu, struct vcpu_svm, vcpu);
}
+static inline bool is_nested(struct vcpu_svm *svm)
+{
+ return svm->nested_vmcb;
+}
+
static unsigned long iopm_base;
struct kvm_ldttss_desc {
@@ -157,32 +178,6 @@ static inline void kvm_write_cr2(unsigned long val)
asm volatile ("mov %0, %%cr2" :: "r" (val));
}
-static inline unsigned long read_dr6(void)
-{
- unsigned long dr6;
-
- asm volatile ("mov %%dr6, %0" : "=r" (dr6));
- return dr6;
-}
-
-static inline void write_dr6(unsigned long val)
-{
- asm volatile ("mov %0, %%dr6" :: "r" (val));
-}
-
-static inline unsigned long read_dr7(void)
-{
- unsigned long dr7;
-
- asm volatile ("mov %%dr7, %0" : "=r" (dr7));
- return dr7;
-}
-
-static inline void write_dr7(unsigned long val)
-{
- asm volatile ("mov %0, %%dr7" :: "r" (val));
-}
-
static inline void force_new_asid(struct kvm_vcpu *vcpu)
{
to_svm(vcpu)->asid_generation--;
@@ -198,7 +193,7 @@ static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
if (!npt_enabled && !(efer & EFER_LMA))
efer &= ~EFER_LME;
- to_svm(vcpu)->vmcb->save.efer = efer | MSR_EFER_SVME_MASK;
+ to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME;
vcpu->arch.shadow_efer = efer;
}
@@ -207,6 +202,11 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
{
struct vcpu_svm *svm = to_svm(vcpu);
+ /* If we are within a nested VM we'd better #VMEXIT and let the
+ guest handle the exception */
+ if (nested_svm_check_exception(svm, nr, has_error_code, error_code))
+ return;
+
svm->vmcb->control.event_inj = nr
| SVM_EVTINJ_VALID
| (has_error_code ? SVM_EVTINJ_VALID_ERR : 0)
@@ -242,7 +242,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
kvm_rip_write(vcpu, svm->next_rip);
svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
- vcpu->arch.interrupt_window_open = 1;
+ vcpu->arch.interrupt_window_open = (svm->vcpu.arch.hflags & HF_GIF_MASK);
}
static int has_svm(void)
@@ -250,7 +250,7 @@ static int has_svm(void)
const char *msg;
if (!cpu_has_svm(&msg)) {
- printk(KERN_INFO "has_svn: %s\n", msg);
+ printk(KERN_INFO "has_svm: %s\n", msg);
return 0;
}
@@ -292,7 +292,7 @@ static void svm_hardware_enable(void *garbage)
svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
rdmsrl(MSR_EFER, efer);
- wrmsrl(MSR_EFER, efer | MSR_EFER_SVME_MASK);
+ wrmsrl(MSR_EFER, efer | EFER_SVME);
wrmsrl(MSR_VM_HSAVE_PA,
page_to_pfn(svm_data->save_area) << PAGE_SHIFT);
@@ -417,6 +417,11 @@ static __init int svm_hardware_setup(void)
if (boot_cpu_has(X86_FEATURE_NX))
kvm_enable_efer_bits(EFER_NX);
+ if (nested) {
+ printk(KERN_INFO "kvm: Nested Virtualization enabled\n");
+ kvm_enable_efer_bits(EFER_SVME);
+ }
+
for_each_online_cpu(cpu) {
r = svm_cpu_init(cpu);
if (r)
@@ -559,7 +564,7 @@ static void init_vmcb(struct vcpu_svm *svm)
init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
- save->efer = MSR_EFER_SVME_MASK;
+ save->efer = EFER_SVME;
save->dr6 = 0xffff0ff0;
save->dr7 = 0x400;
save->rflags = 2;
@@ -591,6 +596,9 @@ static void init_vmcb(struct vcpu_svm *svm)
save->cr4 = 0;
}
force_new_asid(&svm->vcpu);
+
+ svm->nested_vmcb = 0;
+ svm->vcpu.arch.hflags = HF_GIF_MASK;
}
static int svm_vcpu_reset(struct kvm_vcpu *vcpu)
@@ -615,6 +623,8 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
struct vcpu_svm *svm;
struct page *page;
struct page *msrpm_pages;
+ struct page *hsave_page;
+ struct page *nested_msrpm_pages;
int err;
svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
@@ -637,14 +647,25 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
if (!msrpm_pages)
goto uninit;
+
+ nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
+ if (!nested_msrpm_pages)
+ goto uninit;
+
svm->msrpm = page_address(msrpm_pages);
svm_vcpu_init_msrpm(svm->msrpm);
+ hsave_page = alloc_page(GFP_KERNEL);
+ if (!hsave_page)
+ goto uninit;
+ svm->hsave = page_address(hsave_page);
+
+ svm->nested_msrpm = page_address(nested_msrpm_pages);
+
svm->vmcb = page_address(page);
clear_page(svm->vmcb);
svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
svm->asid_generation = 0;
- memset(svm->db_regs, 0, sizeof(svm->db_regs));
init_vmcb(svm);
fx_init(&svm->vcpu);
@@ -669,6 +690,8 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
__free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT));
__free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
+ __free_page(virt_to_page(svm->hsave));
+ __free_pages(virt_to_page(svm->nested_msrpm), MSRPM_ALLOC_ORDER);
kvm_vcpu_uninit(vcpu);
kmem_cache_free(kvm_vcpu_cache, svm);
}
@@ -718,6 +741,16 @@ static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
to_svm(vcpu)->vmcb->save.rflags = rflags;
}
+static void svm_set_vintr(struct vcpu_svm *svm)
+{
+ svm->vmcb->control.intercept |= 1ULL << INTERCEPT_VINTR;
+}
+
+static void svm_clear_vintr(struct vcpu_svm *svm)
+{
+ svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VINTR);
+}
+
static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg)
{
struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
@@ -905,9 +938,37 @@ static void svm_set_segment(struct kvm_vcpu *vcpu,
}
-static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg)
+static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
{
- return -EOPNOTSUPP;
+ int old_debug = vcpu->guest_debug;
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ vcpu->guest_debug = dbg->control;
+
+ svm->vmcb->control.intercept_exceptions &=
+ ~((1 << DB_VECTOR) | (1 << BP_VECTOR));
+ if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
+ if (vcpu->guest_debug &
+ (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
+ svm->vmcb->control.intercept_exceptions |=
+ 1 << DB_VECTOR;
+ if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
+ svm->vmcb->control.intercept_exceptions |=
+ 1 << BP_VECTOR;
+ } else
+ vcpu->guest_debug = 0;
+
+ if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
+ svm->vmcb->save.dr7 = dbg->arch.debugreg[7];
+ else
+ svm->vmcb->save.dr7 = vcpu->arch.dr7;
+
+ if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+ svm->vmcb->save.rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
+ else if (old_debug & KVM_GUESTDBG_SINGLESTEP)
+ svm->vmcb->save.rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
+
+ return 0;
}
static int svm_get_irq(struct kvm_vcpu *vcpu)
@@ -949,7 +1010,29 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *svm_data)
static unsigned long svm_get_dr(struct kvm_vcpu *vcpu, int dr)
{
- unsigned long val = to_svm(vcpu)->db_regs[dr];
+ struct vcpu_svm *svm = to_svm(vcpu);
+ unsigned long val;
+
+ switch (dr) {
+ case 0 ... 3:
+ val = vcpu->arch.db[dr];
+ break;
+ case 6:
+ if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
+ val = vcpu->arch.dr6;
+ else
+ val = svm->vmcb->save.dr6;
+ break;
+ case 7:
+ if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
+ val = vcpu->arch.dr7;
+ else
+ val = svm->vmcb->save.dr7;
+ break;
+ default:
+ val = 0;
+ }
+
KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler);
return val;
}
@@ -959,33 +1042,40 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value,
{
struct vcpu_svm *svm = to_svm(vcpu);
- *exception = 0;
+ KVMTRACE_2D(DR_WRITE, vcpu, (u32)dr, (u32)value, handler);
- if (svm->vmcb->save.dr7 & DR7_GD_MASK) {
- svm->vmcb->save.dr7 &= ~DR7_GD_MASK;
- svm->vmcb->save.dr6 |= DR6_BD_MASK;
- *exception = DB_VECTOR;
- return;
- }
+ *exception = 0;
switch (dr) {
case 0 ... 3:
- svm->db_regs[dr] = value;
+ vcpu->arch.db[dr] = value;
+ if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
+ vcpu->arch.eff_db[dr] = value;
return;
case 4 ... 5:
- if (vcpu->arch.cr4 & X86_CR4_DE) {
+ if (vcpu->arch.cr4 & X86_CR4_DE)
*exception = UD_VECTOR;
+ return;
+ case 6:
+ if (value & 0xffffffff00000000ULL) {
+ *exception = GP_VECTOR;
return;
}
- case 7: {
- if (value & ~((1ULL << 32) - 1)) {
+ vcpu->arch.dr6 = (value & DR6_VOLATILE) | DR6_FIXED_1;
+ return;
+ case 7:
+ if (value & 0xffffffff00000000ULL) {
*exception = GP_VECTOR;
return;
}
- svm->vmcb->save.dr7 = value;
+ vcpu->arch.dr7 = (value & DR7_VOLATILE) | DR7_FIXED_1;
+ if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
+ svm->vmcb->save.dr7 = vcpu->arch.dr7;
+ vcpu->arch.switch_db_regs = (value & DR7_BP_EN_MASK);
+ }
return;
- }
default:
+ /* FIXME: Possible case? */
printk(KERN_DEBUG "%s: unexpected dr %u\n",
__func__, dr);
*exception = UD_VECTOR;
@@ -1031,6 +1121,27 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code);
}
+static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+{
+ if (!(svm->vcpu.guest_debug &
+ (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
+ kvm_queue_exception(&svm->vcpu, DB_VECTOR);
+ return 1;
+ }
+ kvm_run->exit_reason = KVM_EXIT_DEBUG;
+ kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
+ kvm_run->debug.arch.exception = DB_VECTOR;
+ return 0;
+}
+
+static int bp_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+{
+ kvm_run->exit_reason = KVM_EXIT_DEBUG;
+ kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
+ kvm_run->debug.arch.exception = BP_VECTOR;
+ return 0;
+}
+
static int ud_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
{
int er;
@@ -1139,6 +1250,567 @@ static int vmmcall_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
return 1;
}
+static int nested_svm_check_permissions(struct vcpu_svm *svm)
+{
+ if (!(svm->vcpu.arch.shadow_efer & EFER_SVME)
+ || !is_paging(&svm->vcpu)) {
+ kvm_queue_exception(&svm->vcpu, UD_VECTOR);
+ return 1;
+ }
+
+ if (svm->vmcb->save.cpl) {
+ kvm_inject_gp(&svm->vcpu, 0);
+ return 1;
+ }
+
+ return 0;
+}
+
+static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
+ bool has_error_code, u32 error_code)
+{
+ if (is_nested(svm)) {
+ svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
+ svm->vmcb->control.exit_code_hi = 0;
+ svm->vmcb->control.exit_info_1 = error_code;
+ svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
+ if (nested_svm_exit_handled(svm, false)) {
+ nsvm_printk("VMexit -> EXCP 0x%x\n", nr);
+
+ nested_svm_vmexit(svm);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static inline int nested_svm_intr(struct vcpu_svm *svm)
+{
+ if (is_nested(svm)) {
+ if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
+ return 0;
+
+ if (!(svm->vcpu.arch.hflags & HF_HIF_MASK))
+ return 0;
+
+ svm->vmcb->control.exit_code = SVM_EXIT_INTR;
+
+ if (nested_svm_exit_handled(svm, false)) {
+ nsvm_printk("VMexit -> INTR\n");
+ nested_svm_vmexit(svm);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static struct page *nested_svm_get_page(struct vcpu_svm *svm, u64 gpa)
+{
+ struct page *page;
+
+ down_read(&current->mm->mmap_sem);
+ page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT);
+ up_read(&current->mm->mmap_sem);
+
+ if (is_error_page(page)) {
+ printk(KERN_INFO "%s: could not find page at 0x%llx\n",
+ __func__, gpa);
+ kvm_release_page_clean(page);
+ kvm_inject_gp(&svm->vcpu, 0);
+ return NULL;
+ }
+ return page;
+}
+
+static int nested_svm_do(struct vcpu_svm *svm,
+ u64 arg1_gpa, u64 arg2_gpa, void *opaque,
+ int (*handler)(struct vcpu_svm *svm,
+ void *arg1,
+ void *arg2,
+ void *opaque))
+{
+ struct page *arg1_page;
+ struct page *arg2_page = NULL;
+ void *arg1;
+ void *arg2 = NULL;
+ int retval;
+
+ arg1_page = nested_svm_get_page(svm, arg1_gpa);
+ if(arg1_page == NULL)
+ return 1;
+
+ if (arg2_gpa) {
+ arg2_page = nested_svm_get_page(svm, arg2_gpa);
+ if(arg2_page == NULL) {
+ kvm_release_page_clean(arg1_page);
+ return 1;
+ }
+ }
+
+ arg1 = kmap_atomic(arg1_page, KM_USER0);
+ if (arg2_gpa)
+ arg2 = kmap_atomic(arg2_page, KM_USER1);
+
+ retval = handler(svm, arg1, arg2, opaque);
+
+ kunmap_atomic(arg1, KM_USER0);
+ if (arg2_gpa)
+ kunmap_atomic(arg2, KM_USER1);
+
+ kvm_release_page_dirty(arg1_page);
+ if (arg2_gpa)
+ kvm_release_page_dirty(arg2_page);
+
+ return retval;
+}
+
+static int nested_svm_exit_handled_real(struct vcpu_svm *svm,
+ void *arg1,
+ void *arg2,
+ void *opaque)
+{
+ struct vmcb *nested_vmcb = (struct vmcb *)arg1;
+ bool kvm_overrides = *(bool *)opaque;
+ u32 exit_code = svm->vmcb->control.exit_code;
+
+ if (kvm_overrides) {
+ switch (exit_code) {
+ case SVM_EXIT_INTR:
+ case SVM_EXIT_NMI:
+ return 0;
+ /* For now we are always handling NPFs when using them */
+ case SVM_EXIT_NPF:
+ if (npt_enabled)
+ return 0;
+ break;
+ /* When we're shadowing, trap PFs */
+ case SVM_EXIT_EXCP_BASE + PF_VECTOR:
+ if (!npt_enabled)
+ return 0;
+ break;
+ default:
+ break;
+ }
+ }
+
+ switch (exit_code) {
+ case SVM_EXIT_READ_CR0 ... SVM_EXIT_READ_CR8: {
+ u32 cr_bits = 1 << (exit_code - SVM_EXIT_READ_CR0);
+ if (nested_vmcb->control.intercept_cr_read & cr_bits)
+ return 1;
+ break;
+ }
+ case SVM_EXIT_WRITE_CR0 ... SVM_EXIT_WRITE_CR8: {
+ u32 cr_bits = 1 << (exit_code - SVM_EXIT_WRITE_CR0);
+ if (nested_vmcb->control.intercept_cr_write & cr_bits)
+ return 1;
+ break;
+ }
+ case SVM_EXIT_READ_DR0 ... SVM_EXIT_READ_DR7: {
+ u32 dr_bits = 1 << (exit_code - SVM_EXIT_READ_DR0);
+ if (nested_vmcb->control.intercept_dr_read & dr_bits)
+ return 1;
+ break;
+ }
+ case SVM_EXIT_WRITE_DR0 ... SVM_EXIT_WRITE_DR7: {
+ u32 dr_bits = 1 << (exit_code - SVM_EXIT_WRITE_DR0);
+ if (nested_vmcb->control.intercept_dr_write & dr_bits)
+ return 1;
+ break;
+ }
+ case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
+ u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
+ if (nested_vmcb->control.intercept_exceptions & excp_bits)
+ return 1;
+ break;
+ }
+ default: {
+ u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR);
+ nsvm_printk("exit code: 0x%x\n", exit_code);
+ if (nested_vmcb->control.intercept & exit_bits)
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static int nested_svm_exit_handled_msr(struct vcpu_svm *svm,
+ void *arg1, void *arg2,
+ void *opaque)
+{
+ struct vmcb *nested_vmcb = (struct vmcb *)arg1;
+ u8 *msrpm = (u8 *)arg2;
+ u32 t0, t1;
+ u32 msr = svm->vcpu.arch.regs[VCPU_REGS_RCX];
+ u32 param = svm->vmcb->control.exit_info_1 & 1;
+
+ if (!(nested_vmcb->control.intercept & (1ULL << INTERCEPT_MSR_PROT)))
+ return 0;
+
+ switch(msr) {
+ case 0 ... 0x1fff:
+ t0 = (msr * 2) % 8;
+ t1 = msr / 8;
+ break;
+ case 0xc0000000 ... 0xc0001fff:
+ t0 = (8192 + msr - 0xc0000000) * 2;
+ t1 = (t0 / 8);
+ t0 %= 8;
+ break;
+ case 0xc0010000 ... 0xc0011fff:
+ t0 = (16384 + msr - 0xc0010000) * 2;
+ t1 = (t0 / 8);
+ t0 %= 8;
+ break;
+ default:
+ return 1;
+ break;
+ }
+ if (msrpm[t1] & ((1 << param) << t0))
+ return 1;
+
+ return 0;
+}
+
+static int nested_svm_exit_handled(struct vcpu_svm *svm, bool kvm_override)
+{
+ bool k = kvm_override;
+
+ switch (svm->vmcb->control.exit_code) {
+ case SVM_EXIT_MSR:
+ return nested_svm_do(svm, svm->nested_vmcb,
+ svm->nested_vmcb_msrpm, NULL,
+ nested_svm_exit_handled_msr);
+ default: break;
+ }
+
+ return nested_svm_do(svm, svm->nested_vmcb, 0, &k,
+ nested_svm_exit_handled_real);
+}
+
+static int nested_svm_vmexit_real(struct vcpu_svm *svm, void *arg1,
+ void *arg2, void *opaque)
+{
+ struct vmcb *nested_vmcb = (struct vmcb *)arg1;
+ struct vmcb *hsave = svm->hsave;
+ u64 nested_save[] = { nested_vmcb->save.cr0,
+ nested_vmcb->save.cr3,
+ nested_vmcb->save.cr4,
+ nested_vmcb->save.efer,
+ nested_vmcb->control.intercept_cr_read,
+ nested_vmcb->control.intercept_cr_write,
+ nested_vmcb->control.intercept_dr_read,
+ nested_vmcb->control.intercept_dr_write,
+ nested_vmcb->control.intercept_exceptions,
+ nested_vmcb->control.intercept,
+ nested_vmcb->control.msrpm_base_pa,
+ nested_vmcb->control.iopm_base_pa,
+ nested_vmcb->control.tsc_offset };
+
+ /* Give the current vmcb to the guest */
+ memcpy(nested_vmcb, svm->vmcb, sizeof(struct vmcb));
+ nested_vmcb->save.cr0 = nested_save[0];
+ if (!npt_enabled)
+ nested_vmcb->save.cr3 = nested_save[1];
+ nested_vmcb->save.cr4 = nested_save[2];
+ nested_vmcb->save.efer = nested_save[3];
+ nested_vmcb->control.intercept_cr_read = nested_save[4];
+ nested_vmcb->control.intercept_cr_write = nested_save[5];
+ nested_vmcb->control.intercept_dr_read = nested_save[6];
+ nested_vmcb->control.intercept_dr_write = nested_save[7];
+ nested_vmcb->control.intercept_exceptions = nested_save[8];
+ nested_vmcb->control.intercept = nested_save[9];
+ nested_vmcb->control.msrpm_base_pa = nested_save[10];
+ nested_vmcb->control.iopm_base_pa = nested_save[11];
+ nested_vmcb->control.tsc_offset = nested_save[12];
+
+ /* We always set V_INTR_MASKING and remember the old value in hflags */
+ if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
+ nested_vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
+
+ if ((nested_vmcb->control.int_ctl & V_IRQ_MASK) &&
+ (nested_vmcb->control.int_vector)) {
+ nsvm_printk("WARNING: IRQ 0x%x still enabled on #VMEXIT\n",
+ nested_vmcb->control.int_vector);
+ }
+
+ /* Restore the original control entries */
+ svm->vmcb->control = hsave->control;
+
+ /* Kill any pending exceptions */
+ if (svm->vcpu.arch.exception.pending == true)
+ nsvm_printk("WARNING: Pending Exception\n");
+ svm->vcpu.arch.exception.pending = false;
+
+ /* Restore selected save entries */
+ svm->vmcb->save.es = hsave->save.es;
+ svm->vmcb->save.cs = hsave->save.cs;
+ svm->vmcb->save.ss = hsave->save.ss;
+ svm->vmcb->save.ds = hsave->save.ds;
+ svm->vmcb->save.gdtr = hsave->save.gdtr;
+ svm->vmcb->save.idtr = hsave->save.idtr;
+ svm->vmcb->save.rflags = hsave->save.rflags;
+ svm_set_efer(&svm->vcpu, hsave->save.efer);
+ svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE);
+ svm_set_cr4(&svm->vcpu, hsave->save.cr4);
+ if (npt_enabled) {
+ svm->vmcb->save.cr3 = hsave->save.cr3;
+ svm->vcpu.arch.cr3 = hsave->save.cr3;
+ } else {
+ kvm_set_cr3(&svm->vcpu, hsave->save.cr3);
+ }
+ kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, hsave->save.rax);
+ kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, hsave->save.rsp);
+ kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, hsave->save.rip);
+ svm->vmcb->save.dr7 = 0;
+ svm->vmcb->save.cpl = 0;
+ svm->vmcb->control.exit_int_info = 0;
+
+ svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
+ /* Exit nested SVM mode */
+ svm->nested_vmcb = 0;
+
+ return 0;
+}
+
+static int nested_svm_vmexit(struct vcpu_svm *svm)
+{
+ nsvm_printk("VMexit\n");
+ if (nested_svm_do(svm, svm->nested_vmcb, 0,
+ NULL, nested_svm_vmexit_real))
+ return 1;
+
+ kvm_mmu_reset_context(&svm->vcpu);
+ kvm_mmu_load(&svm->vcpu);
+
+ return 0;
+}
+
+static int nested_svm_vmrun_msrpm(struct vcpu_svm *svm, void *arg1,
+ void *arg2, void *opaque)
+{
+ int i;
+ u32 *nested_msrpm = (u32*)arg1;
+ for (i=0; i< PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER) / 4; i++)
+ svm->nested_msrpm[i] = svm->msrpm[i] | nested_msrpm[i];
+ svm->vmcb->control.msrpm_base_pa = __pa(svm->nested_msrpm);
+
+ return 0;
+}
+
+static int nested_svm_vmrun(struct vcpu_svm *svm, void *arg1,
+ void *arg2, void *opaque)
+{
+ struct vmcb *nested_vmcb = (struct vmcb *)arg1;
+ struct vmcb *hsave = svm->hsave;
+
+ /* nested_vmcb is our indicator if nested SVM is activated */
+ svm->nested_vmcb = svm->vmcb->save.rax;
+
+ /* Clear internal status */
+ svm->vcpu.arch.exception.pending = false;
+
+ /* Save the old vmcb, so we don't need to pick what we save, but
+ can restore everything when a VMEXIT occurs */
+ memcpy(hsave, svm->vmcb, sizeof(struct vmcb));
+ /* We need to remember the original CR3 in the SPT case */
+ if (!npt_enabled)
+ hsave->save.cr3 = svm->vcpu.arch.cr3;
+ hsave->save.cr4 = svm->vcpu.arch.cr4;
+ hsave->save.rip = svm->next_rip;
+
+ if (svm->vmcb->save.rflags & X86_EFLAGS_IF)
+ svm->vcpu.arch.hflags |= HF_HIF_MASK;
+ else
+ svm->vcpu.arch.hflags &= ~HF_HIF_MASK;
+
+ /* Load the nested guest state */
+ svm->vmcb->save.es = nested_vmcb->save.es;
+ svm->vmcb->save.cs = nested_vmcb->save.cs;
+ svm->vmcb->save.ss = nested_vmcb->save.ss;
+ svm->vmcb->save.ds = nested_vmcb->save.ds;
+ svm->vmcb->save.gdtr = nested_vmcb->save.gdtr;
+ svm->vmcb->save.idtr = nested_vmcb->save.idtr;
+ svm->vmcb->save.rflags = nested_vmcb->save.rflags;
+ svm_set_efer(&svm->vcpu, nested_vmcb->save.efer);
+ svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0);
+ svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4);
+ if (npt_enabled) {
+ svm->vmcb->save.cr3 = nested_vmcb->save.cr3;
+ svm->vcpu.arch.cr3 = nested_vmcb->save.cr3;
+ } else {
+ kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3);
+ kvm_mmu_reset_context(&svm->vcpu);
+ }
+ svm->vmcb->save.cr2 = nested_vmcb->save.cr2;
+ kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax);
+ kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp);
+ kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip);
+ /* In case we don't even reach vcpu_run, the fields are not updated */
+ svm->vmcb->save.rax = nested_vmcb->save.rax;
+ svm->vmcb->save.rsp = nested_vmcb->save.rsp;
+ svm->vmcb->save.rip = nested_vmcb->save.rip;
+ svm->vmcb->save.dr7 = nested_vmcb->save.dr7;
+ svm->vmcb->save.dr6 = nested_vmcb->save.dr6;
+ svm->vmcb->save.cpl = nested_vmcb->save.cpl;
+
+ /* We don't want a nested guest to be more powerful than the guest,
+ so all intercepts are ORed */
+ svm->vmcb->control.intercept_cr_read |=
+ nested_vmcb->control.intercept_cr_read;
+ svm->vmcb->control.intercept_cr_write |=
+ nested_vmcb->control.intercept_cr_write;
+ svm->vmcb->control.intercept_dr_read |=
+ nested_vmcb->control.intercept_dr_read;
+ svm->vmcb->control.intercept_dr_write |=
+ nested_vmcb->control.intercept_dr_write;
+ svm->vmcb->control.intercept_exceptions |=
+ nested_vmcb->control.intercept_exceptions;
+
+ svm->vmcb->control.intercept |= nested_vmcb->control.intercept;
+
+ svm->nested_vmcb_msrpm = nested_vmcb->control.msrpm_base_pa;
+
+ force_new_asid(&svm->vcpu);
+ svm->vmcb->control.exit_int_info = nested_vmcb->control.exit_int_info;
+ svm->vmcb->control.exit_int_info_err = nested_vmcb->control.exit_int_info_err;
+ svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
+ if (nested_vmcb->control.int_ctl & V_IRQ_MASK) {
+ nsvm_printk("nSVM Injecting Interrupt: 0x%x\n",
+ nested_vmcb->control.int_ctl);
+ }
+ if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
+ svm->vcpu.arch.hflags |= HF_VINTR_MASK;
+ else
+ svm->vcpu.arch.hflags &= ~HF_VINTR_MASK;
+
+ nsvm_printk("nSVM exit_int_info: 0x%x | int_state: 0x%x\n",
+ nested_vmcb->control.exit_int_info,
+ nested_vmcb->control.int_state);
+
+ svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
+ svm->vmcb->control.int_state = nested_vmcb->control.int_state;
+ svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset;
+ if (nested_vmcb->control.event_inj & SVM_EVTINJ_VALID)
+ nsvm_printk("Injecting Event: 0x%x\n",
+ nested_vmcb->control.event_inj);
+ svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
+ svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;
+
+ svm->vcpu.arch.hflags |= HF_GIF_MASK;
+
+ return 0;
+}
+
+static int nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
+{
+ to_vmcb->save.fs = from_vmcb->save.fs;
+ to_vmcb->save.gs = from_vmcb->save.gs;
+ to_vmcb->save.tr = from_vmcb->save.tr;
+ to_vmcb->save.ldtr = from_vmcb->save.ldtr;
+ to_vmcb->save.kernel_gs_base = from_vmcb->save.kernel_gs_base;
+ to_vmcb->save.star = from_vmcb->save.star;
+ to_vmcb->save.lstar = from_vmcb->save.lstar;
+ to_vmcb->save.cstar = from_vmcb->save.cstar;
+ to_vmcb->save.sfmask = from_vmcb->save.sfmask;
+ to_vmcb->save.sysenter_cs = from_vmcb->save.sysenter_cs;
+ to_vmcb->save.sysenter_esp = from_vmcb->save.sysenter_esp;
+ to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip;
+
+ return 1;
+}
+
+static int nested_svm_vmload(struct vcpu_svm *svm, void *nested_vmcb,
+ void *arg2, void *opaque)
+{
+ return nested_svm_vmloadsave((struct vmcb *)nested_vmcb, svm->vmcb);
+}
+
+static int nested_svm_vmsave(struct vcpu_svm *svm, void *nested_vmcb,
+ void *arg2, void *opaque)
+{
+ return nested_svm_vmloadsave(svm->vmcb, (struct vmcb *)nested_vmcb);
+}
+
+static int vmload_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+{
+ if (nested_svm_check_permissions(svm))
+ return 1;
+
+ svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
+ skip_emulated_instruction(&svm->vcpu);
+
+ nested_svm_do(svm, svm->vmcb->save.rax, 0, NULL, nested_svm_vmload);
+
+ return 1;
+}
+
+static int vmsave_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+{
+ if (nested_svm_check_permissions(svm))
+ return 1;
+
+ svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
+ skip_emulated_instruction(&svm->vcpu);
+
+ nested_svm_do(svm, svm->vmcb->save.rax, 0, NULL, nested_svm_vmsave);
+
+ return 1;
+}
+
+static int vmrun_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+{
+ nsvm_printk("VMrun\n");
+ if (nested_svm_check_permissions(svm))
+ return 1;
+
+ svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
+ skip_emulated_instruction(&svm->vcpu);
+
+ if (nested_svm_do(svm, svm->vmcb->save.rax, 0,
+ NULL, nested_svm_vmrun))
+ return 1;
+
+ if (nested_svm_do(svm, svm->nested_vmcb_msrpm, 0,
+ NULL, nested_svm_vmrun_msrpm))
+ return 1;
+
+ return 1;
+}
+
+static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+{
+ if (nested_svm_check_permissions(svm))
+ return 1;
+
+ svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
+ skip_emulated_instruction(&svm->vcpu);
+
+ svm->vcpu.arch.hflags |= HF_GIF_MASK;
+
+ return 1;
+}
+
+static int clgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+{
+ if (nested_svm_check_permissions(svm))
+ return 1;
+
+ svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
+ skip_emulated_instruction(&svm->vcpu);
+
+ svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
+
+ /* After a CLGI no interrupts should come */
+ svm_clear_vintr(svm);
+ svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
+
+ return 1;
+}
+
static int invalid_op_interception(struct vcpu_svm *svm,
struct kvm_run *kvm_run)
{
@@ -1250,6 +1922,15 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
case MSR_IA32_LASTINTTOIP:
*data = svm->vmcb->save.last_excp_to;
break;
+ case MSR_VM_HSAVE_PA:
+ *data = svm->hsave_msr;
+ break;
+ case MSR_VM_CR:
+ *data = 0;
+ break;
+ case MSR_IA32_UCODE_REV:
+ *data = 0x01000065;
+ break;
default:
return kvm_get_msr_common(vcpu, ecx, data);
}
@@ -1344,6 +2025,9 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", ecx, data);
break;
+ case MSR_VM_HSAVE_PA:
+ svm->hsave_msr = data;
+ break;
default:
return kvm_set_msr_common(vcpu, ecx, data);
}
@@ -1380,7 +2064,7 @@ static int interrupt_window_interception(struct vcpu_svm *svm,
{
KVMTRACE_0D(PEND_INTR, &svm->vcpu, handler);
- svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VINTR);
+ svm_clear_vintr(svm);
svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
/*
* If the user space waits to inject interrupts, exit as soon as
@@ -1417,6 +2101,8 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
[SVM_EXIT_WRITE_DR3] = emulate_on_interception,
[SVM_EXIT_WRITE_DR5] = emulate_on_interception,
[SVM_EXIT_WRITE_DR7] = emulate_on_interception,
+ [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception,
+ [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception,
[SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception,
[SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception,
[SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception,
@@ -1436,12 +2122,12 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
[SVM_EXIT_MSR] = msr_interception,
[SVM_EXIT_TASK_SWITCH] = task_switch_interception,
[SVM_EXIT_SHUTDOWN] = shutdown_interception,
- [SVM_EXIT_VMRUN] = invalid_op_interception,
+ [SVM_EXIT_VMRUN] = vmrun_interception,
[SVM_EXIT_VMMCALL] = vmmcall_interception,
- [SVM_EXIT_VMLOAD] = invalid_op_interception,
- [SVM_EXIT_VMSAVE] = invalid_op_interception,
- [SVM_EXIT_STGI] = invalid_op_interception,
- [SVM_EXIT_CLGI] = invalid_op_interception,
+ [SVM_EXIT_VMLOAD] = vmload_interception,
+ [SVM_EXIT_VMSAVE] = vmsave_interception,
+ [SVM_EXIT_STGI] = stgi_interception,
+ [SVM_EXIT_CLGI] = clgi_interception,
[SVM_EXIT_SKINIT] = invalid_op_interception,
[SVM_EXIT_WBINVD] = emulate_on_interception,
[SVM_EXIT_MONITOR] = invalid_op_interception,
@@ -1457,6 +2143,17 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
KVMTRACE_3D(VMEXIT, vcpu, exit_code, (u32)svm->vmcb->save.rip,
(u32)((u64)svm->vmcb->save.rip >> 32), entryexit);
+ if (is_nested(svm)) {
+ nsvm_printk("nested handle_exit: 0x%x | 0x%lx | 0x%lx | 0x%lx\n",
+ exit_code, svm->vmcb->control.exit_info_1,
+ svm->vmcb->control.exit_info_2, svm->vmcb->save.rip);
+ if (nested_svm_exit_handled(svm, true)) {
+ nested_svm_vmexit(svm);
+ nsvm_printk("-> #VMEXIT\n");
+ return 1;
+ }
+ }
+
if (npt_enabled) {
int mmu_reload = 0;
if ((vcpu->arch.cr0 ^ svm->vmcb->save.cr0) & X86_CR0_PG) {
@@ -1544,6 +2241,8 @@ static void svm_set_irq(struct kvm_vcpu *vcpu, int irq)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ nested_svm_intr(svm);
+
svm_inject_irq(svm, irq);
}
@@ -1589,11 +2288,17 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu)
if (!kvm_cpu_has_interrupt(vcpu))
goto out;
+ if (nested_svm_intr(svm))
+ goto out;
+
+ if (!(svm->vcpu.arch.hflags & HF_GIF_MASK))
+ goto out;
+
if (!(vmcb->save.rflags & X86_EFLAGS_IF) ||
(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) ||
(vmcb->control.event_inj & SVM_EVTINJ_VALID)) {
/* unable to deliver irq, set pending irq */
- vmcb->control.intercept |= (1ULL << INTERCEPT_VINTR);
+ svm_set_vintr(svm);
svm_inject_irq(svm, 0x0);
goto out;
}
@@ -1616,7 +2321,8 @@ static void kvm_reput_irq(struct vcpu_svm *svm)
}
svm->vcpu.arch.interrupt_window_open =
- !(control->int_state & SVM_INTERRUPT_SHADOW_MASK);
+ !(control->int_state & SVM_INTERRUPT_SHADOW_MASK) &&
+ (svm->vcpu.arch.hflags & HF_GIF_MASK);
}
static void svm_do_inject_vector(struct vcpu_svm *svm)
@@ -1638,9 +2344,13 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
struct vcpu_svm *svm = to_svm(vcpu);
struct vmcb_control_area *control = &svm->vmcb->control;
+ if (nested_svm_intr(svm))
+ return;
+
svm->vcpu.arch.interrupt_window_open =
(!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) &&
- (svm->vmcb->save.rflags & X86_EFLAGS_IF));
+ (svm->vmcb->save.rflags & X86_EFLAGS_IF) &&
+ (svm->vcpu.arch.hflags & HF_GIF_MASK));
if (svm->vcpu.arch.interrupt_window_open && svm->vcpu.arch.irq_summary)
/*
@@ -1653,9 +2363,9 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
*/
if (!svm->vcpu.arch.interrupt_window_open &&
(svm->vcpu.arch.irq_summary || kvm_run->request_interrupt_window))
- control->intercept |= 1ULL << INTERCEPT_VINTR;
- else
- control->intercept &= ~(1ULL << INTERCEPT_VINTR);
+ svm_set_vintr(svm);
+ else
+ svm_clear_vintr(svm);
}
static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
@@ -1663,22 +2373,6 @@ static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
return 0;
}
-static void save_db_regs(unsigned long *db_regs)
-{
- asm volatile ("mov %%dr0, %0" : "=r"(db_regs[0]));
- asm volatile ("mov %%dr1, %0" : "=r"(db_regs[1]));
- asm volatile ("mov %%dr2, %0" : "=r"(db_regs[2]));
- asm volatile ("mov %%dr3, %0" : "=r"(db_regs[3]));
-}
-
-static void load_db_regs(unsigned long *db_regs)
-{
- asm volatile ("mov %0, %%dr0" : : "r"(db_regs[0]));
- asm volatile ("mov %0, %%dr1" : : "r"(db_regs[1]));
- asm volatile ("mov %0, %%dr2" : : "r"(db_regs[2]));
- asm volatile ("mov %0, %%dr3" : : "r"(db_regs[3]));
-}
-
static void svm_flush_tlb(struct kvm_vcpu *vcpu)
{
force_new_asid(vcpu);
@@ -1737,19 +2431,12 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
gs_selector = kvm_read_gs();
ldt_selector = kvm_read_ldt();
svm->host_cr2 = kvm_read_cr2();
- svm->host_dr6 = read_dr6();
- svm->host_dr7 = read_dr7();
- svm->vmcb->save.cr2 = vcpu->arch.cr2;
+ if (!is_nested(svm))
+ svm->vmcb->save.cr2 = vcpu->arch.cr2;
/* required for live migration with NPT */
if (npt_enabled)
svm->vmcb->save.cr3 = vcpu->arch.cr3;
- if (svm->vmcb->save.dr7 & 0xff) {
- write_dr7(0);
- save_db_regs(svm->host_db_regs);
- load_db_regs(svm->db_regs);
- }
-
clgi();
local_irq_enable();
@@ -1825,16 +2512,11 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
#endif
);
- if ((svm->vmcb->save.dr7 & 0xff))
- load_db_regs(svm->host_db_regs);
-
vcpu->arch.cr2 = svm->vmcb->save.cr2;
vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
- write_dr6(svm->host_dr6);
- write_dr7(svm->host_dr7);
kvm_write_cr2(svm->host_cr2);
kvm_load_fs(fs_selector);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 6259d7467648..9b56d2138589 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -91,6 +91,7 @@ struct vcpu_vmx {
} rmode;
int vpid;
bool emulation_required;
+ enum emulation_result invalid_state_emulation_result;
/* Support for vnmi-less CPUs */
int soft_vnmi_blocked;
@@ -189,21 +190,21 @@ static inline int is_page_fault(u32 intr_info)
{
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
INTR_INFO_VALID_MASK)) ==
- (INTR_TYPE_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK);
+ (INTR_TYPE_HARD_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK);
}
static inline int is_no_device(u32 intr_info)
{
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
INTR_INFO_VALID_MASK)) ==
- (INTR_TYPE_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK);
+ (INTR_TYPE_HARD_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK);
}
static inline int is_invalid_opcode(u32 intr_info)
{
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
INTR_INFO_VALID_MASK)) ==
- (INTR_TYPE_EXCEPTION | UD_VECTOR | INTR_INFO_VALID_MASK);
+ (INTR_TYPE_HARD_EXCEPTION | UD_VECTOR | INTR_INFO_VALID_MASK);
}
static inline int is_external_interrupt(u32 intr_info)
@@ -480,8 +481,13 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
eb = (1u << PF_VECTOR) | (1u << UD_VECTOR);
if (!vcpu->fpu_active)
eb |= 1u << NM_VECTOR;
- if (vcpu->guest_debug.enabled)
- eb |= 1u << DB_VECTOR;
+ if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
+ if (vcpu->guest_debug &
+ (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
+ eb |= 1u << DB_VECTOR;
+ if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
+ eb |= 1u << BP_VECTOR;
+ }
if (vcpu->arch.rmode.active)
eb = ~0;
if (vm_need_ept())
@@ -747,29 +753,33 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
bool has_error_code, u32 error_code)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
+ u32 intr_info = nr | INTR_INFO_VALID_MASK;
- if (has_error_code)
+ if (has_error_code) {
vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
+ intr_info |= INTR_INFO_DELIVER_CODE_MASK;
+ }
if (vcpu->arch.rmode.active) {
vmx->rmode.irq.pending = true;
vmx->rmode.irq.vector = nr;
vmx->rmode.irq.rip = kvm_rip_read(vcpu);
- if (nr == BP_VECTOR)
+ if (nr == BP_VECTOR || nr == OF_VECTOR)
vmx->rmode.irq.rip++;
- vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
- nr | INTR_TYPE_SOFT_INTR
- | (has_error_code ? INTR_INFO_DELIVER_CODE_MASK : 0)
- | INTR_INFO_VALID_MASK);
+ intr_info |= INTR_TYPE_SOFT_INTR;
+ vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1);
return;
}
- vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
- nr | INTR_TYPE_EXCEPTION
- | (has_error_code ? INTR_INFO_DELIVER_CODE_MASK : 0)
- | INTR_INFO_VALID_MASK);
+ if (nr == BP_VECTOR || nr == OF_VECTOR) {
+ vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
+ intr_info |= INTR_TYPE_SOFT_EXCEPTION;
+ } else
+ intr_info |= INTR_TYPE_HARD_EXCEPTION;
+
+ vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
}
static bool vmx_exception_injected(struct kvm_vcpu *vcpu)
@@ -856,11 +866,8 @@ static u64 guest_read_tsc(void)
* writes 'guest_tsc' into guest's timestamp counter "register"
* guest_tsc = host_tsc + tsc_offset ==> tsc_offset = guest_tsc - host_tsc
*/
-static void guest_write_tsc(u64 guest_tsc)
+static void guest_write_tsc(u64 guest_tsc, u64 host_tsc)
{
- u64 host_tsc;
-
- rdtscll(host_tsc);
vmcs_write64(TSC_OFFSET, guest_tsc - host_tsc);
}
@@ -924,6 +931,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct kvm_msr_entry *msr;
+ u64 host_tsc;
int ret = 0;
switch (msr_index) {
@@ -949,7 +957,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
vmcs_writel(GUEST_SYSENTER_ESP, data);
break;
case MSR_IA32_TIME_STAMP_COUNTER:
- guest_write_tsc(data);
+ rdtscll(host_tsc);
+ guest_write_tsc(data, host_tsc);
break;
case MSR_P6_PERFCTR0:
case MSR_P6_PERFCTR1:
@@ -998,40 +1007,28 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
}
}
-static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg)
+static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
{
- unsigned long dr7 = 0x400;
- int old_singlestep;
-
- old_singlestep = vcpu->guest_debug.singlestep;
-
- vcpu->guest_debug.enabled = dbg->enabled;
- if (vcpu->guest_debug.enabled) {
- int i;
-
- dr7 |= 0x200; /* exact */
- for (i = 0; i < 4; ++i) {
- if (!dbg->breakpoints[i].enabled)
- continue;
- vcpu->guest_debug.bp[i] = dbg->breakpoints[i].address;
- dr7 |= 2 << (i*2); /* global enable */
- dr7 |= 0 << (i*4+16); /* execution breakpoint */
- }
+ int old_debug = vcpu->guest_debug;
+ unsigned long flags;
- vcpu->guest_debug.singlestep = dbg->singlestep;
- } else
- vcpu->guest_debug.singlestep = 0;
+ vcpu->guest_debug = dbg->control;
+ if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE))
+ vcpu->guest_debug = 0;
- if (old_singlestep && !vcpu->guest_debug.singlestep) {
- unsigned long flags;
+ if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
+ vmcs_writel(GUEST_DR7, dbg->arch.debugreg[7]);
+ else
+ vmcs_writel(GUEST_DR7, vcpu->arch.dr7);
- flags = vmcs_readl(GUEST_RFLAGS);
+ flags = vmcs_readl(GUEST_RFLAGS);
+ if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+ flags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
+ else if (old_debug & KVM_GUESTDBG_SINGLESTEP)
flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
- vmcs_writel(GUEST_RFLAGS, flags);
- }
+ vmcs_writel(GUEST_RFLAGS, flags);
update_exception_bitmap(vcpu);
- vmcs_writel(GUEST_DR7, dr7);
return 0;
}
@@ -1652,7 +1649,7 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
var->limit = vmcs_read32(sf->limit);
var->selector = vmcs_read16(sf->selector);
ar = vmcs_read32(sf->ar_bytes);
- if (ar & AR_UNUSABLE_MASK)
+ if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state)
ar = 0;
var->type = ar & 15;
var->s = (ar >> 4) & 1;
@@ -1787,14 +1784,16 @@ static bool code_segment_valid(struct kvm_vcpu *vcpu)
vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
cs_rpl = cs.selector & SELECTOR_RPL_MASK;
+ if (cs.unusable)
+ return false;
if (~cs.type & (AR_TYPE_CODE_MASK|AR_TYPE_ACCESSES_MASK))
return false;
if (!cs.s)
return false;
- if (!(~cs.type & (AR_TYPE_CODE_MASK|AR_TYPE_WRITEABLE_MASK))) {
+ if (cs.type & AR_TYPE_WRITEABLE_MASK) {
if (cs.dpl > cs_rpl)
return false;
- } else if (cs.type & AR_TYPE_CODE_MASK) {
+ } else {
if (cs.dpl != cs_rpl)
return false;
}
@@ -1813,7 +1812,9 @@ static bool stack_segment_valid(struct kvm_vcpu *vcpu)
vmx_get_segment(vcpu, &ss, VCPU_SREG_SS);
ss_rpl = ss.selector & SELECTOR_RPL_MASK;
- if ((ss.type != 3) || (ss.type != 7))
+ if (ss.unusable)
+ return true;
+ if (ss.type != 3 && ss.type != 7)
return false;
if (!ss.s)
return false;
@@ -1833,6 +1834,8 @@ static bool data_segment_valid(struct kvm_vcpu *vcpu, int seg)
vmx_get_segment(vcpu, &var, seg);
rpl = var.selector & SELECTOR_RPL_MASK;
+ if (var.unusable)
+ return true;
if (!var.s)
return false;
if (!var.present)
@@ -1854,9 +1857,11 @@ static bool tr_valid(struct kvm_vcpu *vcpu)
vmx_get_segment(vcpu, &tr, VCPU_SREG_TR);
+ if (tr.unusable)
+ return false;
if (tr.selector & SELECTOR_TI_MASK) /* TI = 1 */
return false;
- if ((tr.type != 3) || (tr.type != 11)) /* TODO: Check if guest is in IA32e mode */
+ if (tr.type != 3 && tr.type != 11) /* TODO: Check if guest is in IA32e mode */
return false;
if (!tr.present)
return false;
@@ -1870,6 +1875,8 @@ static bool ldtr_valid(struct kvm_vcpu *vcpu)
vmx_get_segment(vcpu, &ldtr, VCPU_SREG_LDTR);
+ if (ldtr.unusable)
+ return true;
if (ldtr.selector & SELECTOR_TI_MASK) /* TI = 1 */
return false;
if (ldtr.type != 2)
@@ -2111,7 +2118,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
{
u32 host_sysenter_cs, msr_low, msr_high;
u32 junk;
- u64 host_pat;
+ u64 host_pat, tsc_this, tsc_base;
unsigned long a;
struct descriptor_table dt;
int i;
@@ -2239,6 +2246,12 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL);
vmcs_writel(CR4_GUEST_HOST_MASK, KVM_GUEST_CR4_MASK);
+ tsc_base = vmx->vcpu.kvm->arch.vm_init_tsc;
+ rdtscll(tsc_this);
+ if (tsc_this < vmx->vcpu.kvm->arch.vm_init_tsc)
+ tsc_base = tsc_this;
+
+ guest_write_tsc(0, tsc_base);
return 0;
}
@@ -2318,7 +2331,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
kvm_rip_write(vcpu, 0);
kvm_register_write(vcpu, VCPU_REGS_RSP, 0);
- /* todo: dr0 = dr1 = dr2 = dr3 = 0; dr6 = 0xffff0ff0 */
vmcs_writel(GUEST_DR7, 0x400);
vmcs_writel(GUEST_GDTR_BASE, 0);
@@ -2331,8 +2343,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0);
- guest_write_tsc(0);
-
/* Special registers */
vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
@@ -2485,6 +2495,11 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
{
vmx_update_window_states(vcpu);
+ if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+ vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
+ GUEST_INTR_STATE_STI |
+ GUEST_INTR_STATE_MOV_SS);
+
if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
if (vcpu->arch.interrupt.pending) {
enable_nmi_window(vcpu);
@@ -2535,24 +2550,6 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
return 0;
}
-static void kvm_guest_debug_pre(struct kvm_vcpu *vcpu)
-{
- struct kvm_guest_debug *dbg = &vcpu->guest_debug;
-
- set_debugreg(dbg->bp[0], 0);
- set_debugreg(dbg->bp[1], 1);
- set_debugreg(dbg->bp[2], 2);
- set_debugreg(dbg->bp[3], 3);
-
- if (dbg->singlestep) {
- unsigned long flags;
-
- flags = vmcs_readl(GUEST_RFLAGS);
- flags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
- vmcs_writel(GUEST_RFLAGS, flags);
- }
-}
-
static int handle_rmode_exception(struct kvm_vcpu *vcpu,
int vec, u32 err_code)
{
@@ -2569,9 +2566,17 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
* the required debugging infrastructure rework.
*/
switch (vec) {
- case DE_VECTOR:
case DB_VECTOR:
+ if (vcpu->guest_debug &
+ (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
+ return 0;
+ kvm_queue_exception(vcpu, vec);
+ return 1;
case BP_VECTOR:
+ if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
+ return 0;
+ /* fall through */
+ case DE_VECTOR:
case OF_VECTOR:
case BR_VECTOR:
case UD_VECTOR:
@@ -2588,8 +2593,8 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- u32 intr_info, error_code;
- unsigned long cr2, rip;
+ u32 intr_info, ex_no, error_code;
+ unsigned long cr2, rip, dr6;
u32 vect_info;
enum emulation_result er;
@@ -2648,14 +2653,30 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
return 1;
}
- if ((intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK)) ==
- (INTR_TYPE_EXCEPTION | 1)) {
+ ex_no = intr_info & INTR_INFO_VECTOR_MASK;
+ switch (ex_no) {
+ case DB_VECTOR:
+ dr6 = vmcs_readl(EXIT_QUALIFICATION);
+ if (!(vcpu->guest_debug &
+ (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
+ vcpu->arch.dr6 = dr6 | DR6_FIXED_1;
+ kvm_queue_exception(vcpu, DB_VECTOR);
+ return 1;
+ }
+ kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1;
+ kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7);
+ /* fall through */
+ case BP_VECTOR:
kvm_run->exit_reason = KVM_EXIT_DEBUG;
- return 0;
+ kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip;
+ kvm_run->debug.arch.exception = ex_no;
+ break;
+ default:
+ kvm_run->exit_reason = KVM_EXIT_EXCEPTION;
+ kvm_run->ex.exception = ex_no;
+ kvm_run->ex.error_code = error_code;
+ break;
}
- kvm_run->exit_reason = KVM_EXIT_EXCEPTION;
- kvm_run->ex.exception = intr_info & INTR_INFO_VECTOR_MASK;
- kvm_run->ex.error_code = error_code;
return 0;
}
@@ -2794,21 +2815,44 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
unsigned long val;
int dr, reg;
- /*
- * FIXME: this code assumes the host is debugging the guest.
- * need to deal with guest debugging itself too.
- */
+ dr = vmcs_readl(GUEST_DR7);
+ if (dr & DR7_GD) {
+ /*
+ * As the vm-exit takes precedence over the debug trap, we
+ * need to emulate the latter, either for the host or the
+ * guest debugging itself.
+ */
+ if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
+ kvm_run->debug.arch.dr6 = vcpu->arch.dr6;
+ kvm_run->debug.arch.dr7 = dr;
+ kvm_run->debug.arch.pc =
+ vmcs_readl(GUEST_CS_BASE) +
+ vmcs_readl(GUEST_RIP);
+ kvm_run->debug.arch.exception = DB_VECTOR;
+ kvm_run->exit_reason = KVM_EXIT_DEBUG;
+ return 0;
+ } else {
+ vcpu->arch.dr7 &= ~DR7_GD;
+ vcpu->arch.dr6 |= DR6_BD;
+ vmcs_writel(GUEST_DR7, vcpu->arch.dr7);
+ kvm_queue_exception(vcpu, DB_VECTOR);
+ return 1;
+ }
+ }
+
exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
- dr = exit_qualification & 7;
- reg = (exit_qualification >> 8) & 15;
- if (exit_qualification & 16) {
- /* mov from dr */
+ dr = exit_qualification & DEBUG_REG_ACCESS_NUM;
+ reg = DEBUG_REG_ACCESS_REG(exit_qualification);
+ if (exit_qualification & TYPE_MOV_FROM_DR) {
switch (dr) {
+ case 0 ... 3:
+ val = vcpu->arch.db[dr];
+ break;
case 6:
- val = 0xffff0ff0;
+ val = vcpu->arch.dr6;
break;
case 7:
- val = 0x400;
+ val = vcpu->arch.dr7;
break;
default:
val = 0;
@@ -2816,7 +2860,38 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
kvm_register_write(vcpu, reg, val);
KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler);
} else {
- /* mov to dr */
+ val = vcpu->arch.regs[reg];
+ switch (dr) {
+ case 0 ... 3:
+ vcpu->arch.db[dr] = val;
+ if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
+ vcpu->arch.eff_db[dr] = val;
+ break;
+ case 4 ... 5:
+ if (vcpu->arch.cr4 & X86_CR4_DE)
+ kvm_queue_exception(vcpu, UD_VECTOR);
+ break;
+ case 6:
+ if (val & 0xffffffff00000000ULL) {
+ kvm_queue_exception(vcpu, GP_VECTOR);
+ break;
+ }
+ vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1;
+ break;
+ case 7:
+ if (val & 0xffffffff00000000ULL) {
+ kvm_queue_exception(vcpu, GP_VECTOR);
+ break;
+ }
+ vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1;
+ if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
+ vmcs_writel(GUEST_DR7, vcpu->arch.dr7);
+ vcpu->arch.switch_db_regs =
+ (val & DR7_BP_EN_MASK);
+ }
+ break;
+ }
+ KVMTRACE_2D(DR_WRITE, vcpu, (u32)dr, (u32)val, handler);
}
skip_emulated_instruction(vcpu);
return 1;
@@ -2967,7 +3042,18 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
}
tss_selector = exit_qualification;
- return kvm_task_switch(vcpu, tss_selector, reason);
+ if (!kvm_task_switch(vcpu, tss_selector, reason))
+ return 0;
+
+ /* clear all local breakpoint enable flags */
+ vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~55);
+
+ /*
+ * TODO: What about debug traps on tss switch?
+ * Are we supposed to inject them and update dr6?
+ */
+
+ return 1;
}
static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
@@ -3045,7 +3131,7 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
struct kvm_run *kvm_run)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- int err;
+ enum emulation_result err = EMULATE_DONE;
preempt_enable();
local_irq_enable();
@@ -3070,10 +3156,7 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
local_irq_disable();
preempt_disable();
- /* Guest state should be valid now except if we need to
- * emulate an MMIO */
- if (guest_state_valid(vcpu))
- vmx->emulation_required = 0;
+ vmx->invalid_state_emulation_result = err;
}
/*
@@ -3122,8 +3205,11 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
/* If we need to emulate an MMIO from handle_invalid_guest_state
* we just return 0 */
- if (vmx->emulation_required && emulate_invalid_guest_state)
- return 0;
+ if (vmx->emulation_required && emulate_invalid_guest_state) {
+ if (guest_state_valid(vcpu))
+ vmx->emulation_required = 0;
+ return vmx->invalid_state_emulation_result != EMULATE_DO_MMIO;
+ }
/* Access CR3 don't cause VMExit in paging mode, so we need
* to sync with guest real CR3. */
@@ -3237,7 +3323,8 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
vmx->vcpu.arch.nmi_injected = false;
}
kvm_clear_exception_queue(&vmx->vcpu);
- if (idtv_info_valid && type == INTR_TYPE_EXCEPTION) {
+ if (idtv_info_valid && (type == INTR_TYPE_HARD_EXCEPTION ||
+ type == INTR_TYPE_SOFT_EXCEPTION)) {
if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
error = vmcs_read32(IDT_VECTORING_ERROR_CODE);
kvm_queue_exception_e(&vmx->vcpu, vector, error);
@@ -3258,6 +3345,11 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
vmx_update_window_states(vcpu);
+ if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+ vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
+ GUEST_INTR_STATE_STI |
+ GUEST_INTR_STATE_MOV_SS);
+
if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
if (vcpu->arch.interrupt.pending) {
enable_nmi_window(vcpu);
@@ -3347,6 +3439,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
*/
vmcs_writel(HOST_CR0, read_cr0());
+ set_debugreg(vcpu->arch.dr6, 6);
+
asm(
/* Store host registers */
"push %%"R"dx; push %%"R"bp;"
@@ -3441,6 +3535,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP));
vcpu->arch.regs_dirty = 0;
+ get_debugreg(vcpu->arch.dr6, 6);
+
vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
if (vmx->rmode.irq.pending)
fixup_rmode_irq(vmx);
@@ -3595,7 +3691,6 @@ static struct kvm_x86_ops vmx_x86_ops = {
.vcpu_put = vmx_vcpu_put,
.set_guest_debug = set_guest_debug,
- .guest_debug_pre = kvm_guest_debug_pre,
.get_msr = vmx_get_msr,
.set_msr = vmx_set_msr,
.get_segment_base = vmx_get_segment_base,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index cc17546a2406..2509362a5475 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -69,6 +69,8 @@ static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffffeULL;
static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
struct kvm_cpuid_entry2 __user *entries);
+struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
+ u32 function, u32 index);
struct kvm_x86_ops *kvm_x86_ops;
EXPORT_SYMBOL_GPL(kvm_x86_ops);
@@ -173,6 +175,7 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr,
u32 error_code)
{
++vcpu->stat.pf_guest;
+
if (vcpu->arch.exception.pending) {
if (vcpu->arch.exception.nr == PF_VECTOR) {
printk(KERN_DEBUG "kvm: inject_page_fault:"
@@ -361,6 +364,7 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
}
kvm_x86_ops->set_cr4(vcpu, cr4);
vcpu->arch.cr4 = cr4;
+ vcpu->arch.mmu.base_role.cr4_pge = (cr4 & X86_CR4_PGE) && !tdp_enabled;
kvm_mmu_sync_global(vcpu);
kvm_mmu_reset_context(vcpu);
}
@@ -442,6 +446,11 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_get_cr8);
+static inline u32 bit(int bitno)
+{
+ return 1 << (bitno & 31);
+}
+
/*
* List of msr numbers which we expose to userspace through KVM_GET_MSRS
* and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
@@ -456,7 +465,7 @@ static u32 msrs_to_save[] = {
MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
#endif
MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
- MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT
+ MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA
};
static unsigned num_msrs_to_save;
@@ -481,6 +490,17 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
return;
}
+ if (efer & EFER_SVME) {
+ struct kvm_cpuid_entry2 *feat;
+
+ feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
+ if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) {
+ printk(KERN_DEBUG "set_efer: #GP, enable SVM w/o SVM\n");
+ kvm_inject_gp(vcpu, 0);
+ return;
+ }
+ }
+
kvm_x86_ops->set_efer(vcpu, efer);
efer &= ~EFER_LMA;
@@ -722,6 +742,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
break;
case MSR_IA32_UCODE_REV:
case MSR_IA32_UCODE_WRITE:
+ case MSR_VM_HSAVE_PA:
break;
case 0x200 ... 0x2ff:
return set_msr_mtrr(vcpu, msr, data);
@@ -843,6 +864,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
case MSR_IA32_LASTBRANCHTOIP:
case MSR_IA32_LASTINTFROMIP:
case MSR_IA32_LASTINTTOIP:
+ case MSR_VM_HSAVE_PA:
data = 0;
break;
case MSR_MTRRcap:
@@ -972,6 +994,7 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_NOP_IO_DELAY:
case KVM_CAP_MP_STATE:
case KVM_CAP_SYNC_MMU:
+ case KVM_CAP_REINJECT_CONTROL:
r = 1;
break;
case KVM_CAP_COALESCED_MMIO:
@@ -1042,7 +1065,7 @@ long kvm_arch_dev_ioctl(struct file *filp,
if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
goto out;
r = kvm_dev_ioctl_get_supported_cpuid(&cpuid,
- cpuid_arg->entries);
+ cpuid_arg->entries);
if (r)
goto out;
@@ -1140,8 +1163,8 @@ out:
}
static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
- struct kvm_cpuid2 *cpuid,
- struct kvm_cpuid_entry2 __user *entries)
+ struct kvm_cpuid2 *cpuid,
+ struct kvm_cpuid_entry2 __user *entries)
{
int r;
@@ -1160,8 +1183,8 @@ out:
}
static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
- struct kvm_cpuid2 *cpuid,
- struct kvm_cpuid_entry2 __user *entries)
+ struct kvm_cpuid2 *cpuid,
+ struct kvm_cpuid_entry2 __user *entries)
{
int r;
@@ -1170,7 +1193,7 @@ static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
goto out;
r = -EFAULT;
if (copy_to_user(entries, &vcpu->arch.cpuid_entries,
- vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2)))
+ vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2)))
goto out;
return 0;
@@ -1179,18 +1202,13 @@ out:
return r;
}
-static inline u32 bit(int bitno)
-{
- return 1 << (bitno & 31);
-}
-
static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function,
- u32 index)
+ u32 index)
{
entry->function = function;
entry->index = index;
cpuid_count(entry->function, entry->index,
- &entry->eax, &entry->ebx, &entry->ecx, &entry->edx);
+ &entry->eax, &entry->ebx, &entry->ecx, &entry->edx);
entry->flags = 0;
}
@@ -1226,9 +1244,10 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
const u32 kvm_supported_word3_x86_features =
bit(X86_FEATURE_XMM3) | bit(X86_FEATURE_CX16);
const u32 kvm_supported_word6_x86_features =
- bit(X86_FEATURE_LAHF_LM) | bit(X86_FEATURE_CMP_LEGACY);
+ bit(X86_FEATURE_LAHF_LM) | bit(X86_FEATURE_CMP_LEGACY) |
+ bit(X86_FEATURE_SVM);
- /* all func 2 cpuid_count() should be called on the same cpu */
+ /* all calls to cpuid_count() should be made on the same cpu */
get_cpu();
do_cpuid_1_ent(entry, function, index);
++*nent;
@@ -1302,7 +1321,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
}
static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
- struct kvm_cpuid_entry2 __user *entries)
+ struct kvm_cpuid_entry2 __user *entries)
{
struct kvm_cpuid_entry2 *cpuid_entries;
int limit, nent = 0, r = -E2BIG;
@@ -1319,7 +1338,7 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
limit = cpuid_entries[0].eax;
for (func = 1; func <= limit && nent < cpuid->nent; ++func)
do_cpuid_ent(&cpuid_entries[nent], func, 0,
- &nent, cpuid->nent);
+ &nent, cpuid->nent);
r = -E2BIG;
if (nent >= cpuid->nent)
goto out_free;
@@ -1328,10 +1347,10 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
limit = cpuid_entries[nent - 1].eax;
for (func = 0x80000001; func <= limit && nent < cpuid->nent; ++func)
do_cpuid_ent(&cpuid_entries[nent], func, 0,
- &nent, cpuid->nent);
+ &nent, cpuid->nent);
r = -EFAULT;
if (copy_to_user(entries, cpuid_entries,
- nent * sizeof(struct kvm_cpuid_entry2)))
+ nent * sizeof(struct kvm_cpuid_entry2)))
goto out_free;
cpuid->nent = nent;
r = 0;
@@ -1475,7 +1494,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
goto out;
r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid,
- cpuid_arg->entries);
+ cpuid_arg->entries);
if (r)
goto out;
break;
@@ -1488,7 +1507,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
goto out;
r = kvm_vcpu_ioctl_get_cpuid2(vcpu, &cpuid,
- cpuid_arg->entries);
+ cpuid_arg->entries);
if (r)
goto out;
r = -EFAULT;
@@ -1708,6 +1727,15 @@ static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
return r;
}
+static int kvm_vm_ioctl_reinject(struct kvm *kvm,
+ struct kvm_reinject_control *control)
+{
+ if (!kvm->arch.vpit)
+ return -ENXIO;
+ kvm->arch.vpit->pit_state.pit_timer.reinject = control->pit_reinject;
+ return 0;
+}
+
/*
* Get (and clear) the dirty memory log for a memory slot.
*/
@@ -1807,10 +1835,16 @@ long kvm_arch_vm_ioctl(struct file *filp,
goto out;
break;
case KVM_CREATE_PIT:
+ mutex_lock(&kvm->lock);
+ r = -EEXIST;
+ if (kvm->arch.vpit)
+ goto create_pit_unlock;
r = -ENOMEM;
kvm->arch.vpit = kvm_create_pit(kvm);
if (kvm->arch.vpit)
r = 0;
+ create_pit_unlock:
+ mutex_unlock(&kvm->lock);
break;
case KVM_IRQ_LINE: {
struct kvm_irq_level irq_event;
@@ -1905,6 +1939,17 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = 0;
break;
}
+ case KVM_REINJECT_CONTROL: {
+ struct kvm_reinject_control control;
+ r = -EFAULT;
+ if (copy_from_user(&control, argp, sizeof(control)))
+ goto out;
+ r = kvm_vm_ioctl_reinject(kvm, &control);
+ if (r)
+ goto out;
+ r = 0;
+ break;
+ }
default:
;
}
@@ -1958,10 +2003,38 @@ static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu,
return dev;
}
-int emulator_read_std(unsigned long addr,
- void *val,
- unsigned int bytes,
- struct kvm_vcpu *vcpu)
+int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes,
+ struct kvm_vcpu *vcpu)
+{
+ void *data = val;
+ int r = X86EMUL_CONTINUE;
+
+ while (bytes) {
+ gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
+ unsigned offset = addr & (PAGE_SIZE-1);
+ unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset);
+ int ret;
+
+ if (gpa == UNMAPPED_GVA) {
+ r = X86EMUL_PROPAGATE_FAULT;
+ goto out;
+ }
+ ret = kvm_read_guest(vcpu->kvm, gpa, data, toread);
+ if (ret < 0) {
+ r = X86EMUL_UNHANDLEABLE;
+ goto out;
+ }
+
+ bytes -= toread;
+ data += toread;
+ addr += toread;
+ }
+out:
+ return r;
+}
+
+int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes,
+ struct kvm_vcpu *vcpu)
{
void *data = val;
int r = X86EMUL_CONTINUE;
@@ -1969,27 +2042,27 @@ int emulator_read_std(unsigned long addr,
while (bytes) {
gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
unsigned offset = addr & (PAGE_SIZE-1);
- unsigned tocopy = min(bytes, (unsigned)PAGE_SIZE - offset);
+ unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
int ret;
if (gpa == UNMAPPED_GVA) {
r = X86EMUL_PROPAGATE_FAULT;
goto out;
}
- ret = kvm_read_guest(vcpu->kvm, gpa, data, tocopy);
+ ret = kvm_write_guest(vcpu->kvm, gpa, data, towrite);
if (ret < 0) {
r = X86EMUL_UNHANDLEABLE;
goto out;
}
- bytes -= tocopy;
- data += tocopy;
- addr += tocopy;
+ bytes -= towrite;
+ data += towrite;
+ addr += towrite;
}
out:
return r;
}
-EXPORT_SYMBOL_GPL(emulator_read_std);
+
static int emulator_read_emulated(unsigned long addr,
void *val,
@@ -2011,8 +2084,8 @@ static int emulator_read_emulated(unsigned long addr,
if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
goto mmio;
- if (emulator_read_std(addr, val, bytes, vcpu)
- == X86EMUL_CONTINUE)
+ if (kvm_read_guest_virt(addr, val, bytes, vcpu)
+ == X86EMUL_CONTINUE)
return X86EMUL_CONTINUE;
if (gpa == UNMAPPED_GVA)
return X86EMUL_PROPAGATE_FAULT;
@@ -2215,7 +2288,7 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS);
- emulator_read_std(rip_linear, (void *)opcodes, 4, vcpu);
+ kvm_read_guest_virt(rip_linear, (void *)opcodes, 4, vcpu);
printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n",
context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]);
@@ -2223,7 +2296,7 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
EXPORT_SYMBOL_GPL(kvm_report_emulation_failure);
static struct x86_emulate_ops emulate_ops = {
- .read_std = emulator_read_std,
+ .read_std = kvm_read_guest_virt,
.read_emulated = emulator_read_emulated,
.write_emulated = emulator_write_emulated,
.cmpxchg_emulated = emulator_cmpxchg_emulated,
@@ -2325,40 +2398,19 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
}
EXPORT_SYMBOL_GPL(emulate_instruction);
-static void free_pio_guest_pages(struct kvm_vcpu *vcpu)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(vcpu->arch.pio.guest_pages); ++i)
- if (vcpu->arch.pio.guest_pages[i]) {
- kvm_release_page_dirty(vcpu->arch.pio.guest_pages[i]);
- vcpu->arch.pio.guest_pages[i] = NULL;
- }
-}
-
static int pio_copy_data(struct kvm_vcpu *vcpu)
{
void *p = vcpu->arch.pio_data;
- void *q;
+ gva_t q = vcpu->arch.pio.guest_gva;
unsigned bytes;
- int nr_pages = vcpu->arch.pio.guest_pages[1] ? 2 : 1;
+ int ret;
- q = vmap(vcpu->arch.pio.guest_pages, nr_pages, VM_READ|VM_WRITE,
- PAGE_KERNEL);
- if (!q) {
- free_pio_guest_pages(vcpu);
- return -ENOMEM;
- }
- q += vcpu->arch.pio.guest_page_offset;
bytes = vcpu->arch.pio.size * vcpu->arch.pio.cur_count;
if (vcpu->arch.pio.in)
- memcpy(q, p, bytes);
+ ret = kvm_write_guest_virt(q, p, bytes, vcpu);
else
- memcpy(p, q, bytes);
- q -= vcpu->arch.pio.guest_page_offset;
- vunmap(q);
- free_pio_guest_pages(vcpu);
- return 0;
+ ret = kvm_read_guest_virt(q, p, bytes, vcpu);
+ return ret;
}
int complete_pio(struct kvm_vcpu *vcpu)
@@ -2469,7 +2521,6 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
vcpu->arch.pio.in = in;
vcpu->arch.pio.string = 0;
vcpu->arch.pio.down = 0;
- vcpu->arch.pio.guest_page_offset = 0;
vcpu->arch.pio.rep = 0;
if (vcpu->run->io.direction == KVM_EXIT_IO_IN)
@@ -2497,9 +2548,7 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
gva_t address, int rep, unsigned port)
{
unsigned now, in_page;
- int i, ret = 0;
- int nr_pages = 1;
- struct page *page;
+ int ret = 0;
struct kvm_io_device *pio_dev;
vcpu->run->exit_reason = KVM_EXIT_IO;
@@ -2511,7 +2560,6 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
vcpu->arch.pio.in = in;
vcpu->arch.pio.string = 1;
vcpu->arch.pio.down = down;
- vcpu->arch.pio.guest_page_offset = offset_in_page(address);
vcpu->arch.pio.rep = rep;
if (vcpu->run->io.direction == KVM_EXIT_IO_IN)
@@ -2531,15 +2579,8 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
else
in_page = offset_in_page(address) + size;
now = min(count, (unsigned long)in_page / size);
- if (!now) {
- /*
- * String I/O straddles page boundary. Pin two guest pages
- * so that we satisfy atomicity constraints. Do just one
- * transaction to avoid complexity.
- */
- nr_pages = 2;
+ if (!now)
now = 1;
- }
if (down) {
/*
* String I/O in reverse. Yuck. Kill the guest, fix later.
@@ -2554,15 +2595,7 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
if (vcpu->arch.pio.cur_count == vcpu->arch.pio.count)
kvm_x86_ops->skip_emulated_instruction(vcpu);
- for (i = 0; i < nr_pages; ++i) {
- page = gva_to_page(vcpu, address + i * PAGE_SIZE);
- vcpu->arch.pio.guest_pages[i] = page;
- if (!page) {
- kvm_inject_gp(vcpu, 0);
- free_pio_guest_pages(vcpu);
- return 1;
- }
- }
+ vcpu->arch.pio.guest_gva = address;
pio_dev = vcpu_find_pio_dev(vcpu, port,
vcpu->arch.pio.cur_count,
@@ -2570,7 +2603,11 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
if (!vcpu->arch.pio.in) {
/* string PIO write */
ret = pio_copy_data(vcpu);
- if (ret >= 0 && pio_dev) {
+ if (ret == X86EMUL_PROPAGATE_FAULT) {
+ kvm_inject_gp(vcpu, 0);
+ return 1;
+ }
+ if (ret == 0 && pio_dev) {
pio_string_write(pio_dev, vcpu);
complete_pio(vcpu);
if (vcpu->arch.pio.count == 0)
@@ -2825,25 +2862,20 @@ static int is_matching_cpuid_entry(struct kvm_cpuid_entry2 *e,
if ((e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) && e->index != index)
return 0;
if ((e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) &&
- !(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT))
+ !(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT))
return 0;
return 1;
}
-void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
+struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
+ u32 function, u32 index)
{
int i;
- u32 function, index;
- struct kvm_cpuid_entry2 *e, *best;
+ struct kvm_cpuid_entry2 *best = NULL;
- function = kvm_register_read(vcpu, VCPU_REGS_RAX);
- index = kvm_register_read(vcpu, VCPU_REGS_RCX);
- kvm_register_write(vcpu, VCPU_REGS_RAX, 0);
- kvm_register_write(vcpu, VCPU_REGS_RBX, 0);
- kvm_register_write(vcpu, VCPU_REGS_RCX, 0);
- kvm_register_write(vcpu, VCPU_REGS_RDX, 0);
- best = NULL;
for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
+ struct kvm_cpuid_entry2 *e;
+
e = &vcpu->arch.cpuid_entries[i];
if (is_matching_cpuid_entry(e, function, index)) {
if (e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC)
@@ -2858,6 +2890,21 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
if (!best || e->function > best->function)
best = e;
}
+ return best;
+}
+
+void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
+{
+ u32 function, index;
+ struct kvm_cpuid_entry2 *best;
+
+ function = kvm_register_read(vcpu, VCPU_REGS_RAX);
+ index = kvm_register_read(vcpu, VCPU_REGS_RCX);
+ kvm_register_write(vcpu, VCPU_REGS_RAX, 0);
+ kvm_register_write(vcpu, VCPU_REGS_RBX, 0);
+ kvm_register_write(vcpu, VCPU_REGS_RCX, 0);
+ kvm_register_write(vcpu, VCPU_REGS_RDX, 0);
+ best = kvm_find_cpuid_entry(vcpu, function, index);
if (best) {
kvm_register_write(vcpu, VCPU_REGS_RAX, best->eax);
kvm_register_write(vcpu, VCPU_REGS_RBX, best->ebx);
@@ -2977,9 +3024,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
goto out;
}
- if (vcpu->guest_debug.enabled)
- kvm_x86_ops->guest_debug_pre(vcpu);
-
vcpu->guest_mode = 1;
/*
* Make sure that guest_mode assignment won't happen after
@@ -3000,10 +3044,34 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
kvm_guest_enter();
+ get_debugreg(vcpu->arch.host_dr6, 6);
+ get_debugreg(vcpu->arch.host_dr7, 7);
+ if (unlikely(vcpu->arch.switch_db_regs)) {
+ get_debugreg(vcpu->arch.host_db[0], 0);
+ get_debugreg(vcpu->arch.host_db[1], 1);
+ get_debugreg(vcpu->arch.host_db[2], 2);
+ get_debugreg(vcpu->arch.host_db[3], 3);
+
+ set_debugreg(0, 7);
+ set_debugreg(vcpu->arch.eff_db[0], 0);
+ set_debugreg(vcpu->arch.eff_db[1], 1);
+ set_debugreg(vcpu->arch.eff_db[2], 2);
+ set_debugreg(vcpu->arch.eff_db[3], 3);
+ }
KVMTRACE_0D(VMENTRY, vcpu, entryexit);
kvm_x86_ops->run(vcpu, kvm_run);
+ if (unlikely(vcpu->arch.switch_db_regs)) {
+ set_debugreg(0, 7);
+ set_debugreg(vcpu->arch.host_db[0], 0);
+ set_debugreg(vcpu->arch.host_db[1], 1);
+ set_debugreg(vcpu->arch.host_db[2], 2);
+ set_debugreg(vcpu->arch.host_db[3], 3);
+ }
+ set_debugreg(vcpu->arch.host_dr6, 6);
+ set_debugreg(vcpu->arch.host_dr7, 7);
+
vcpu->guest_mode = 0;
local_irq_enable();
@@ -3190,7 +3258,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
/*
* Don't leak debug flags in case they were set for guest debugging
*/
- if (vcpu->guest_debug.enabled && vcpu->guest_debug.singlestep)
+ if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
regs->rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
vcpu_put(vcpu);
@@ -3809,15 +3877,32 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
return 0;
}
-int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
- struct kvm_debug_guest *dbg)
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug *dbg)
{
- int r;
+ int i, r;
vcpu_load(vcpu);
+ if ((dbg->control & (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) ==
+ (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) {
+ for (i = 0; i < KVM_NR_DB_REGS; ++i)
+ vcpu->arch.eff_db[i] = dbg->arch.debugreg[i];
+ vcpu->arch.switch_db_regs =
+ (dbg->arch.debugreg[7] & DR7_BP_EN_MASK);
+ } else {
+ for (i = 0; i < KVM_NR_DB_REGS; i++)
+ vcpu->arch.eff_db[i] = vcpu->arch.db[i];
+ vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK);
+ }
+
r = kvm_x86_ops->set_guest_debug(vcpu, dbg);
+ if (dbg->control & KVM_GUESTDBG_INJECT_DB)
+ kvm_queue_exception(vcpu, DB_VECTOR);
+ else if (dbg->control & KVM_GUESTDBG_INJECT_BP)
+ kvm_queue_exception(vcpu, BP_VECTOR);
+
vcpu_put(vcpu);
return r;
@@ -4005,6 +4090,11 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
vcpu->arch.nmi_pending = false;
vcpu->arch.nmi_injected = false;
+ vcpu->arch.switch_db_regs = 0;
+ memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
+ vcpu->arch.dr6 = DR6_FIXED_1;
+ vcpu->arch.dr7 = DR7_FIXED_1;
+
return kvm_x86_ops->vcpu_reset(vcpu);
}
@@ -4098,6 +4188,8 @@ struct kvm *kvm_arch_create_vm(void)
/* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
+ rdtscll(kvm->arch.vm_init_tsc);
+
return kvm;
}
@@ -4127,9 +4219,13 @@ static void kvm_free_vcpus(struct kvm *kvm)
}
-void kvm_arch_destroy_vm(struct kvm *kvm)
+void kvm_arch_sync_events(struct kvm *kvm)
{
kvm_free_all_assigned_devices(kvm);
+}
+
+void kvm_arch_destroy_vm(struct kvm *kvm)
+{
kvm_iommu_unmap_guest(kvm);
kvm_free_pit(kvm);
kfree(kvm->arch.vpic);
diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index d174db7a3370..ca91749d2083 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -178,7 +178,7 @@ static u32 opcode_table[256] = {
0, ImplicitOps | Stack, 0, 0,
ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov,
/* 0xC8 - 0xCF */
- 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, ImplicitOps | Stack, 0, 0, 0, 0,
/* 0xD0 - 0xD7 */
ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
@@ -1136,18 +1136,19 @@ static inline void emulate_push(struct x86_emulate_ctxt *ctxt)
}
static int emulate_pop(struct x86_emulate_ctxt *ctxt,
- struct x86_emulate_ops *ops)
+ struct x86_emulate_ops *ops,
+ void *dest, int len)
{
struct decode_cache *c = &ctxt->decode;
int rc;
rc = ops->read_emulated(register_address(c, ss_base(ctxt),
c->regs[VCPU_REGS_RSP]),
- &c->src.val, c->src.bytes, ctxt->vcpu);
+ dest, len, ctxt->vcpu);
if (rc != 0)
return rc;
- register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->src.bytes);
+ register_address_increment(c, &c->regs[VCPU_REGS_RSP], len);
return rc;
}
@@ -1157,11 +1158,9 @@ static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt,
struct decode_cache *c = &ctxt->decode;
int rc;
- c->src.bytes = c->dst.bytes;
- rc = emulate_pop(ctxt, ops);
+ rc = emulate_pop(ctxt, ops, &c->dst.val, c->dst.bytes);
if (rc != 0)
return rc;
- c->dst.val = c->src.val;
return 0;
}
@@ -1279,6 +1278,25 @@ static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt,
return 0;
}
+static int emulate_ret_far(struct x86_emulate_ctxt *ctxt,
+ struct x86_emulate_ops *ops)
+{
+ struct decode_cache *c = &ctxt->decode;
+ int rc;
+ unsigned long cs;
+
+ rc = emulate_pop(ctxt, ops, &c->eip, c->op_bytes);
+ if (rc)
+ return rc;
+ if (c->op_bytes == 4)
+ c->eip = (u32)c->eip;
+ rc = emulate_pop(ctxt, ops, &cs, c->op_bytes);
+ if (rc)
+ return rc;
+ rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)cs, 1, VCPU_SREG_CS);
+ return rc;
+}
+
static inline int writeback(struct x86_emulate_ctxt *ctxt,
struct x86_emulate_ops *ops)
{
@@ -1467,11 +1485,9 @@ special_insn:
break;
case 0x58 ... 0x5f: /* pop reg */
pop_instruction:
- c->src.bytes = c->op_bytes;
- rc = emulate_pop(ctxt, ops);
+ rc = emulate_pop(ctxt, ops, &c->dst.val, c->op_bytes);
if (rc != 0)
goto done;
- c->dst.val = c->src.val;
break;
case 0x63: /* movsxd */
if (ctxt->mode != X86EMUL_MODE_PROT64)
@@ -1738,6 +1754,11 @@ special_insn:
mov:
c->dst.val = c->src.val;
break;
+ case 0xcb: /* ret far */
+ rc = emulate_ret_far(ctxt, ops);
+ if (rc)
+ goto done;
+ break;
case 0xd0 ... 0xd1: /* Grp2 */
c->src.val = 1;
emulate_grp2(ctxt);
@@ -1908,11 +1929,16 @@ twobyte_insn:
c->dst.type = OP_NONE;
break;
case 3: /* lidt/vmmcall */
- if (c->modrm_mod == 3 && c->modrm_rm == 1) {
- rc = kvm_fix_hypercall(ctxt->vcpu);
- if (rc)
- goto done;
- kvm_emulate_hypercall(ctxt->vcpu);
+ if (c->modrm_mod == 3) {
+ switch (c->modrm_rm) {
+ case 1:
+ rc = kvm_fix_hypercall(ctxt->vcpu);
+ if (rc)
+ goto done;
+ break;
+ default:
+ goto cannot_emulate;
+ }
} else {
rc = read_descriptor(ctxt, ops, c->src.ptr,
&size, &address,
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 35525ac63337..71c150f25954 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -126,6 +126,7 @@ struct kvm_run {
__u64 data_offset; /* relative to kvm_run start */
} io;
struct {
+ struct kvm_debug_exit_arch arch;
} debug;
/* KVM_EXIT_MMIO */
struct {
@@ -217,21 +218,6 @@ struct kvm_interrupt {
__u32 irq;
};
-struct kvm_breakpoint {
- __u32 enabled;
- __u32 padding;
- __u64 address;
-};
-
-/* for KVM_DEBUG_GUEST */
-struct kvm_debug_guest {
- /* int */
- __u32 enabled;
- __u32 pad;
- struct kvm_breakpoint breakpoints[4];
- __u32 singlestep;
-};
-
/* for KVM_GET_DIRTY_LOG */
struct kvm_dirty_log {
__u32 slot;
@@ -292,6 +278,17 @@ struct kvm_s390_interrupt {
__u64 parm64;
};
+/* for KVM_SET_GUEST_DEBUG */
+
+#define KVM_GUESTDBG_ENABLE 0x00000001
+#define KVM_GUESTDBG_SINGLESTEP 0x00000002
+
+struct kvm_guest_debug {
+ __u32 control;
+ __u32 pad;
+ struct kvm_guest_debug_arch arch;
+};
+
#define KVM_TRC_SHIFT 16
/*
* kvm trace categories
@@ -396,6 +393,12 @@ struct kvm_trace_rec {
#if defined(CONFIG_X86)
#define KVM_CAP_USER_NMI 22
#endif
+#if defined(CONFIG_X86)
+#define KVM_CAP_SET_GUEST_DEBUG 23
+#endif
+#if defined(CONFIG_X86)
+#define KVM_CAP_REINJECT_CONTROL 24
+#endif
/*
* ioctls for VM fds
@@ -429,6 +432,7 @@ struct kvm_trace_rec {
struct kvm_assigned_pci_dev)
#define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \
struct kvm_assigned_irq)
+#define KVM_REINJECT_CONTROL _IO(KVMIO, 0x71)
/*
* ioctls for vcpu fds
@@ -440,7 +444,8 @@ struct kvm_trace_rec {
#define KVM_SET_SREGS _IOW(KVMIO, 0x84, struct kvm_sregs)
#define KVM_TRANSLATE _IOWR(KVMIO, 0x85, struct kvm_translation)
#define KVM_INTERRUPT _IOW(KVMIO, 0x86, struct kvm_interrupt)
-#define KVM_DEBUG_GUEST _IOW(KVMIO, 0x87, struct kvm_debug_guest)
+/* KVM_DEBUG_GUEST is no longer supported, use KVM_SET_GUEST_DEBUG instead */
+#define KVM_DEBUG_GUEST __KVM_DEPRECATED_DEBUG_GUEST
#define KVM_GET_MSRS _IOWR(KVMIO, 0x88, struct kvm_msrs)
#define KVM_SET_MSRS _IOW(KVMIO, 0x89, struct kvm_msrs)
#define KVM_SET_CPUID _IOW(KVMIO, 0x8a, struct kvm_cpuid)
@@ -469,6 +474,29 @@ struct kvm_trace_rec {
#define KVM_SET_MP_STATE _IOW(KVMIO, 0x99, struct kvm_mp_state)
/* Available with KVM_CAP_NMI */
#define KVM_NMI _IO(KVMIO, 0x9a)
+/* Available with KVM_CAP_SET_GUEST_DEBUG */
+#define KVM_SET_GUEST_DEBUG _IOW(KVMIO, 0x9b, struct kvm_guest_debug)
+
+/*
+ * Deprecated interfaces
+ */
+struct kvm_breakpoint {
+ __u32 enabled;
+ __u32 padding;
+ __u64 address;
+};
+
+struct kvm_debug_guest {
+ __u32 enabled;
+ __u32 pad;
+ struct kvm_breakpoint breakpoints[4];
+ __u32 singlestep;
+};
+
+#define __KVM_DEPRECATED_DEBUG_GUEST _IOW(KVMIO, 0x87, struct kvm_debug_guest)
+
+#define KVM_IA64_VCPU_GET_STACK _IOR(KVMIO, 0x9a, void *)
+#define KVM_IA64_VCPU_SET_STACK _IOW(KVMIO, 0x9b, void *)
#define KVM_TRC_INJ_VIRQ (KVM_TRC_HANDLER + 0x02)
#define KVM_TRC_REDELIVER_EVT (KVM_TRC_HANDLER + 0x03)
@@ -522,6 +550,7 @@ struct kvm_assigned_irq {
#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
+#define KVM_DEV_IRQ_ASSIGN_MSI_ACTION KVM_DEV_IRQ_ASSIGN_ENABLE_MSI
#define KVM_DEV_IRQ_ASSIGN_ENABLE_MSI (1 << 0)
#endif
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ec49d0be7f52..99963f36a6db 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -73,7 +73,7 @@ struct kvm_vcpu {
struct kvm_run *run;
int guest_mode;
unsigned long requests;
- struct kvm_guest_debug guest_debug;
+ unsigned long guest_debug;
int fpu_active;
int guest_fpu_loaded;
wait_queue_head_t wq;
@@ -127,6 +127,10 @@ struct kvm {
struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
#endif
+#ifdef CONFIG_HAVE_KVM_IRQCHIP
+ struct hlist_head mask_notifier_list;
+#endif
+
#ifdef KVM_ARCH_WANT_MMU_NOTIFIER
struct mmu_notifier mmu_notifier;
unsigned long mmu_notifier_seq;
@@ -237,7 +241,6 @@ int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
int user_alloc);
long kvm_arch_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg);
-void kvm_arch_destroy_vm(struct kvm *kvm);
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu);
int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu);
@@ -255,8 +258,8 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state);
int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state);
-int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
- struct kvm_debug_guest *dbg);
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug *dbg);
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run);
int kvm_arch_init(void *opaque);
@@ -285,6 +288,7 @@ void kvm_free_physmem(struct kvm *kvm);
struct kvm *kvm_arch_create_vm(void);
void kvm_arch_destroy_vm(struct kvm *kvm);
void kvm_free_all_assigned_devices(struct kvm *kvm);
+void kvm_arch_sync_events(struct kvm *kvm);
int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
@@ -320,6 +324,19 @@ struct kvm_assigned_dev_kernel {
struct pci_dev *dev;
struct kvm *kvm;
};
+
+struct kvm_irq_mask_notifier {
+ void (*func)(struct kvm_irq_mask_notifier *kimn, bool masked);
+ int irq;
+ struct hlist_node link;
+};
+
+void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
+ struct kvm_irq_mask_notifier *kimn);
+void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
+ struct kvm_irq_mask_notifier *kimn);
+void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask);
+
void kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level);
void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi);
void kvm_register_irq_ack_notifier(struct kvm *kvm,
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index 9b6f395c9625..2b8318c83e53 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -40,17 +40,4 @@ typedef unsigned long hfn_t;
typedef hfn_t pfn_t;
-struct kvm_pio_request {
- unsigned long count;
- int cur_count;
- struct page *guest_pages[2];
- unsigned guest_page_offset;
- int in;
- int port;
- int size;
- int string;
- int down;
- int rep;
-};
-
#endif /* __KVM_TYPES_H__ */
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 23b81cf242af..e85a2bcd2db1 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -101,6 +101,7 @@ static void ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx)
static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
{
unsigned index;
+ bool mask_before, mask_after;
switch (ioapic->ioregsel) {
case IOAPIC_REG_VERSION:
@@ -120,6 +121,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
ioapic_debug("change redir index %x val %x\n", index, val);
if (index >= IOAPIC_NUM_PINS)
return;
+ mask_before = ioapic->redirtbl[index].fields.mask;
if (ioapic->ioregsel & 1) {
ioapic->redirtbl[index].bits &= 0xffffffff;
ioapic->redirtbl[index].bits |= (u64) val << 32;
@@ -128,6 +130,9 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
ioapic->redirtbl[index].bits |= (u32) val;
ioapic->redirtbl[index].fields.remote_irr = 0;
}
+ mask_after = ioapic->redirtbl[index].fields.mask;
+ if (mask_before != mask_after)
+ kvm_fire_mask_notifiers(ioapic->kvm, index, mask_after);
if (ioapic->irr & (1 << index))
ioapic_service(ioapic, index);
break;
@@ -426,3 +431,4 @@ int kvm_ioapic_init(struct kvm *kvm)
kvm_io_bus_register_dev(&kvm->mmio_bus, &ioapic->dev);
return 0;
}
+
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index aa5d1e5c497e..5162a411e4d2 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -99,3 +99,27 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
clear_bit(irq_source_id, &kvm->arch.irq_states[i]);
clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap);
}
+
+void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
+ struct kvm_irq_mask_notifier *kimn)
+{
+ kimn->irq = irq;
+ hlist_add_head(&kimn->link, &kvm->mask_notifier_list);
+}
+
+void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
+ struct kvm_irq_mask_notifier *kimn)
+{
+ hlist_del(&kimn->link);
+}
+
+void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
+{
+ struct kvm_irq_mask_notifier *kimn;
+ struct hlist_node *n;
+
+ hlist_for_each_entry(kimn, n, &kvm->mask_notifier_list, link)
+ if (kimn->irq == irq)
+ kimn->func(kimn, mask);
+}
+
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 3a5a08298aab..103bc08a2dc3 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -173,7 +173,6 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
assigned_dev->host_irq_disabled = false;
}
mutex_unlock(&assigned_dev->kvm->lock);
- kvm_put_kvm(assigned_dev->kvm);
}
static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
@@ -181,8 +180,6 @@ static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
struct kvm_assigned_dev_kernel *assigned_dev =
(struct kvm_assigned_dev_kernel *) dev_id;
- kvm_get_kvm(assigned_dev->kvm);
-
schedule_work(&assigned_dev->interrupt_work);
disable_irq_nosync(irq);
@@ -213,6 +210,7 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
}
}
+/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */
static void kvm_free_assigned_irq(struct kvm *kvm,
struct kvm_assigned_dev_kernel *assigned_dev)
{
@@ -228,11 +226,24 @@ static void kvm_free_assigned_irq(struct kvm *kvm,
if (!assigned_dev->irq_requested_type)
return;
- if (cancel_work_sync(&assigned_dev->interrupt_work))
- /* We had pending work. That means we will have to take
- * care of kvm_put_kvm.
- */
- kvm_put_kvm(kvm);
+ /*
+ * In kvm_free_device_irq, cancel_work_sync return true if:
+ * 1. work is scheduled, and then cancelled.
+ * 2. work callback is executed.
+ *
+ * The first one ensured that the irq is disabled and no more events
+ * would happen. But for the second one, the irq may be enabled (e.g.
+ * for MSI). So we disable irq here to prevent further events.
+ *
+ * Notice this maybe result in nested disable if the interrupt type is
+ * INTx, but it's OK for we are going to free it.
+ *
+ * If this function is a part of VM destroy, please ensure that till
+ * now, the kvm state is still legal for probably we also have to wait
+ * interrupt_work done.
+ */
+ disable_irq_nosync(assigned_dev->host_irq);
+ cancel_work_sync(&assigned_dev->interrupt_work);
free_irq(assigned_dev->host_irq, (void *)assigned_dev);
@@ -285,8 +296,8 @@ static int assigned_device_update_intx(struct kvm *kvm,
if (irqchip_in_kernel(kvm)) {
if (!msi2intx &&
- adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) {
- free_irq(adev->host_irq, (void *)kvm);
+ (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI)) {
+ free_irq(adev->host_irq, (void *)adev);
pci_disable_msi(adev->dev);
}
@@ -332,6 +343,14 @@ static int assigned_device_update_msi(struct kvm *kvm,
adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_MSI;
adev->guest_irq = airq->guest_irq;
adev->ack_notifier.gsi = airq->guest_irq;
+ } else {
+ /*
+ * Guest require to disable device MSI, we disable MSI and
+ * re-enable INTx by default again. Notice it's only for
+ * non-msi2intx.
+ */
+ assigned_device_update_intx(kvm, adev, airq);
+ return 0;
}
if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI)
@@ -368,6 +387,7 @@ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
{
int r = 0;
struct kvm_assigned_dev_kernel *match;
+ u32 current_flags = 0, changed_flags;
mutex_lock(&kvm->lock);
@@ -405,8 +425,13 @@ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
}
}
- if ((!msi2intx &&
- (assigned_irq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSI)) ||
+ if ((match->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) &&
+ (match->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_MSI))
+ current_flags |= KVM_DEV_IRQ_ASSIGN_ENABLE_MSI;
+
+ changed_flags = assigned_irq->flags ^ current_flags;
+
+ if ((changed_flags & KVM_DEV_IRQ_ASSIGN_MSI_ACTION) ||
(msi2intx && match->dev->msi_enabled)) {
#ifdef CONFIG_X86
r = assigned_device_update_msi(kvm, match, assigned_irq);
@@ -789,11 +814,19 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
return young;
}
+static void kvm_mmu_notifier_release(struct mmu_notifier *mn,
+ struct mm_struct *mm)
+{
+ struct kvm *kvm = mmu_notifier_to_kvm(mn);
+ kvm_arch_flush_shadow(kvm);
+}
+
static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
.invalidate_page = kvm_mmu_notifier_invalidate_page,
.invalidate_range_start = kvm_mmu_notifier_invalidate_range_start,
.invalidate_range_end = kvm_mmu_notifier_invalidate_range_end,
.clear_flush_young = kvm_mmu_notifier_clear_flush_young,
+ .release = kvm_mmu_notifier_release,
};
#endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */
@@ -806,6 +839,9 @@ static struct kvm *kvm_create_vm(void)
if (IS_ERR(kvm))
goto out;
+#ifdef CONFIG_HAVE_KVM_IRQCHIP
+ INIT_HLIST_HEAD(&kvm->mask_notifier_list);
+#endif
#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
page = alloc_page(GFP_KERNEL | __GFP_ZERO);
@@ -883,6 +919,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
{
struct mm_struct *mm = kvm->mm;
+ kvm_arch_sync_events(kvm);
spin_lock(&kvm_lock);
list_del(&kvm->vm_list);
spin_unlock(&kvm_lock);
@@ -1732,13 +1769,13 @@ out_free2:
r = 0;
break;
}
- case KVM_DEBUG_GUEST: {
- struct kvm_debug_guest dbg;
+ case KVM_SET_GUEST_DEBUG: {
+ struct kvm_guest_debug dbg;
r = -EFAULT;
if (copy_from_user(&dbg, argp, sizeof dbg))
goto out;
- r = kvm_arch_vcpu_ioctl_debug_guest(vcpu, &dbg);
+ r = kvm_arch_vcpu_ioctl_set_guest_debug(vcpu, &dbg);
if (r)
goto out;
r = 0;